001/*
002 * Copyright 2022-2025 Revetware LLC.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.soklet;
018
019import javax.annotation.Nonnull;
020import javax.annotation.Nullable;
021import javax.annotation.concurrent.ThreadSafe;
022import java.io.ByteArrayOutputStream;
023import java.lang.Thread.UncaughtExceptionHandler;
024import java.lang.invoke.MethodHandle;
025import java.lang.invoke.MethodHandles;
026import java.lang.invoke.MethodHandles.Lookup;
027import java.lang.invoke.MethodType;
028import java.net.MalformedURLException;
029import java.net.URI;
030import java.net.URISyntaxException;
031import java.net.URL;
032import java.net.URLDecoder;
033import java.nio.charset.Charset;
034import java.nio.charset.IllegalCharsetNameException;
035import java.nio.charset.StandardCharsets;
036import java.nio.charset.UnsupportedCharsetException;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.LinkedHashMap;
040import java.util.LinkedHashSet;
041import java.util.List;
042import java.util.Locale;
043import java.util.Locale.LanguageRange;
044import java.util.Map;
045import java.util.Map.Entry;
046import java.util.Optional;
047import java.util.Set;
048import java.util.concurrent.ExecutorService;
049import java.util.concurrent.Executors;
050import java.util.concurrent.ThreadFactory;
051import java.util.regex.Pattern;
052import java.util.stream.Collectors;
053
054import static java.lang.String.format;
055import static java.util.Objects.requireNonNull;
056
057/**
058 * A non-instantiable collection of utility methods.
059 *
060 * @author <a href="https://www.revetkn.com">Mark Allen</a>
061 */
062@ThreadSafe
063public final class Utilities {
064        @Nonnull
065        private static final boolean VIRTUAL_THREADS_AVAILABLE;
066        @Nonnull
067        private static final byte[] EMPTY_BYTE_ARRAY;
068        @Nonnull
069        private static final Map<String, Locale> LOCALES_BY_LANGUAGE_RANGE_RANGE;
070        @Nonnull
071        private static final Pattern HEAD_WHITESPACE_PATTERN;
072        @Nonnull
073        private static final Pattern TAIL_WHITESPACE_PATTERN;
074
075        static {
076                EMPTY_BYTE_ARRAY = new byte[0];
077
078                Locale[] locales = Locale.getAvailableLocales();
079                Map<String, Locale> localesByLanguageRangeRange = new LinkedHashMap<>(locales.length);
080
081                for (Locale locale : locales) {
082                        LanguageRange languageRange = new LanguageRange(locale.toLanguageTag());
083                        localesByLanguageRangeRange.put(languageRange.getRange(), locale);
084                }
085
086                LOCALES_BY_LANGUAGE_RANGE_RANGE = Collections.unmodifiableMap(localesByLanguageRangeRange);
087
088                boolean virtualThreadsAvailable = false;
089
090                try {
091                        // Detect if Virtual Threads are usable by feature testing via reflection.
092                        // Hat tip to https://github.com/javalin/javalin for this technique
093                        Class.forName("java.lang.Thread$Builder$OfVirtual");
094                        virtualThreadsAvailable = true;
095                } catch (Exception ignored) {
096                        // We don't care why this failed, but if we're here we know JVM does not support virtual threads
097                }
098
099                VIRTUAL_THREADS_AVAILABLE = virtualThreadsAvailable;
100
101                // See https://www.regular-expressions.info/unicode.html
102                // \p{Z} or \p{Separator}: any kind of whitespace or invisible separator.
103                //
104                // First pattern matches all whitespace at the head of a string, second matches the same for tail.
105                // Useful for a "stronger" trim() function, which is almost always what we want in a web context
106                // with user-supplied input.
107                HEAD_WHITESPACE_PATTERN = Pattern.compile("^(\\p{Z})+");
108                TAIL_WHITESPACE_PATTERN = Pattern.compile("(\\p{Z})+$");
109        }
110
111        private Utilities() {
112                // Non-instantiable
113        }
114
115        /**
116         * Does the platform runtime support virtual threads (either Java 19 and 20 w/preview enabled or Java 21+)?
117         *
118         * @return {@code true} if the runtime supports virtual threads, {@code false} otherwise
119         */
120        @Nonnull
121        public static Boolean virtualThreadsAvailable() {
122                return VIRTUAL_THREADS_AVAILABLE;
123        }
124
125        /**
126         * Provides a virtual-thread-per-task executor service if supported by the runtime.
127         * <p>
128         * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+),
129         * we compile Soklet with a source level &lt; 19 and avoid any hard references to virtual threads by dynamically creating our executor service via {@link MethodHandle} references.
130         * <p>
131         * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong>
132         * <pre>{@code  // This method is effectively equivalent to this code
133         * return Executors.newThreadPerTaskExecutor(
134         *   Thread.ofVirtual()
135         *    .name(threadNamePrefix)
136         *    .uncaughtExceptionHandler(uncaughtExceptionHandler)
137         *    .factory()
138         * );}</pre>
139         *
140         * @param threadNamePrefix         thread name prefix for the virtual thread factory builder
141         * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder
142         * @return a virtual-thread-per-task executor service
143         * @throws IllegalStateException if the runtime environment does not support virtual threads
144         */
145        @Nonnull
146        public static ExecutorService createVirtualThreadsNewThreadPerTaskExecutor(@Nonnull String threadNamePrefix,
147                                                                                                                                                                                                                                                                                                                 @Nonnull UncaughtExceptionHandler uncaughtExceptionHandler) {
148                requireNonNull(threadNamePrefix);
149                requireNonNull(uncaughtExceptionHandler);
150
151                if (!virtualThreadsAvailable())
152                        throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+");
153
154                // Hat tip to https://github.com/javalin/javalin for this technique
155                Class<?> threadBuilderOfVirtualClass;
156
157                try {
158                        threadBuilderOfVirtualClass = Class.forName("java.lang.Thread$Builder$OfVirtual");
159                } catch (ClassNotFoundException e) {
160                        throw new IllegalStateException("Unable to load virtual thread builder class", e);
161                }
162
163                Lookup lookup = MethodHandles.publicLookup();
164
165                MethodHandle methodHandleThreadOfVirtual;
166                MethodHandle methodHandleThreadBuilderOfVirtualName;
167                MethodHandle methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler;
168                MethodHandle methodHandleThreadBuilderOfVirtualFactory;
169                MethodHandle methodHandleExecutorsNewThreadPerTaskExecutor;
170
171                try {
172                        methodHandleThreadOfVirtual = lookup.findStatic(Thread.class, "ofVirtual", MethodType.methodType(threadBuilderOfVirtualClass));
173                        methodHandleThreadBuilderOfVirtualName = lookup.findVirtual(threadBuilderOfVirtualClass, "name", MethodType.methodType(threadBuilderOfVirtualClass, String.class, long.class));
174                        methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler = lookup.findVirtual(threadBuilderOfVirtualClass, "uncaughtExceptionHandler", MethodType.methodType(threadBuilderOfVirtualClass, UncaughtExceptionHandler.class));
175                        methodHandleThreadBuilderOfVirtualFactory = lookup.findVirtual(threadBuilderOfVirtualClass, "factory", MethodType.methodType(ThreadFactory.class));
176                        methodHandleExecutorsNewThreadPerTaskExecutor = lookup.findStatic(Executors.class, "newThreadPerTaskExecutor", MethodType.methodType(ExecutorService.class, ThreadFactory.class));
177                } catch (NoSuchMethodException | IllegalAccessException e) {
178                        throw new IllegalStateException("Unable to load method handle for virtual thread factory", e);
179                }
180
181                try {
182                        // Thread.ofVirtual()
183                        Object virtualThreadBuilder = methodHandleThreadOfVirtual.invoke();
184                        // .name(threadNamePrefix, start)
185                        methodHandleThreadBuilderOfVirtualName.invoke(virtualThreadBuilder, threadNamePrefix, 1);
186                        // .uncaughtExceptionHandler(uncaughtExceptionHandler)
187                        methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler.invoke(virtualThreadBuilder, uncaughtExceptionHandler);
188                        // .factory();
189                        ThreadFactory threadFactory = (ThreadFactory) methodHandleThreadBuilderOfVirtualFactory.invoke(virtualThreadBuilder);
190
191                        // return Executors.newThreadPerTaskExecutor(threadFactory);
192                        return (ExecutorService) methodHandleExecutorsNewThreadPerTaskExecutor.invoke(threadFactory);
193                } catch (Throwable t) {
194                        throw new IllegalStateException("Unable to create virtual thread executor service", t);
195                }
196        }
197
198        /**
199         * Returns a shared zero-length {@code byte[]} instance.
200         * <p>
201         * Useful as a sentinel when you need a non-{@code null} byte array but have no content.
202         *
203         * @return a zero-length byte array (never {@code null})
204         */
205        @Nonnull
206        public static byte[] emptyByteArray() {
207                return EMPTY_BYTE_ARRAY;
208        }
209
210        /**
211         * Parses an {@code application/x-www-form-urlencoded} query string into a multimap of names to values.
212         * <p>
213         * Decodes percent-escapes and {@code '+'} as space using UTF-8. Pairs missing a name or value are ignored.
214         * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated).
215         *
216         * @param query a raw query string such as {@code "a=1&b=2&b=3"} (must be non-{@code null})
217         * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none
218         * @see #extractQueryParametersFromUrl(String)
219         */
220        @Nonnull
221        public static Map<String, Set<String>> extractQueryParametersFromQuery(@Nonnull String query) {
222                requireNonNull(query);
223
224                // For form parameters, body will look like "One=Two&Three=Four" ...a query string.
225                String syntheticUrl = format("https://soklet.invalid?%s", query); // avoid referencing real domain
226                return extractQueryParametersFromUrl(syntheticUrl);
227        }
228
229        /**
230         * Extracts query parameters from a URL (or URI string) into a multimap of names to values.
231         * <p>
232         * If the input is not a valid {@link URI}, an empty map is returned. The raw query is split on {@code '&'} into
233         * name/value pairs, values are split on the first {@code '='}, and both name and value are UTF-8 decoded
234         * (percent-escapes and {@code '+'} → space). Blank pairs and pairs missing either name or value are ignored.
235         * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated).
236         *
237         * @param url an absolute or relative URL/URI string (must be non-{@code null})
238         * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none/invalid
239         */
240        @Nonnull
241        public static Map<String, Set<String>> extractQueryParametersFromUrl(@Nonnull String url) {
242                requireNonNull(url);
243
244                URI uri;
245
246                try {
247                        uri = new URI(url);
248                } catch (URISyntaxException e) {
249                        return Map.of();
250                }
251
252                String query = trimAggressivelyToNull(uri.getRawQuery());
253
254                if (query == null)
255                        return Map.of();
256
257                Map<String, Set<String>> queryParameters = new LinkedHashMap<>();
258                for (String pair : query.split("&")) {
259                        if (pair.isEmpty())
260                                continue;
261
262                        String[] nv = pair.split("=", 2);
263                        String rawName = trimAggressivelyToNull(nv.length > 0 ? nv[0] : null);
264                        String rawValue = trimAggressivelyToNull(nv.length > 1 ? nv[1] : null);
265
266                        if (rawName == null || rawValue == null)
267                                continue;
268
269                        String name = URLDecoder.decode(rawName, StandardCharsets.UTF_8);
270                        String value = URLDecoder.decode(rawValue, StandardCharsets.UTF_8);
271
272                        queryParameters.computeIfAbsent(name, k -> new LinkedHashSet<>()).add(value);
273                }
274
275                return queryParameters;
276        }
277
278        /**
279         * Parses {@code Cookie} request headers into a map of cookie names to values.
280         * <p>
281         * Header name matching is case-insensitive ({@code "Cookie"} vs {@code "cookie"}), but <em>cookie names are case-sensitive</em>.
282         * Values are parsed per the following liberal rules:
283         * <ul>
284         *   <li>Components are split on {@code ';'} unless inside a quoted string.</li>
285         *   <li>Quoted values have surrounding quotes removed and common backslash escapes unescaped.</li>
286         *   <li>Percent-escapes are decoded as UTF-8. {@code '+'} is <strong>not</strong> treated specially.</li>
287         * </ul>
288         * Multiple occurrences of the same cookie name are collected into a {@link Set} in insertion order.
289         *
290         * @param headers request headers as a multimap of header name to values (must be non-{@code null})
291         * @return a map of cookie name to distinct values; empty if no valid cookies are present
292         */
293        @Nonnull
294        public static Map<String, Set<String>> extractCookiesFromHeaders(@Nonnull Map<String, Set<String>> headers) {
295                requireNonNull(headers);
296
297                // Cookie *names* must be case-sensitive; keep LinkedHashMap (NOT case-insensitive)
298                Map<String, Set<String>> cookies = new LinkedHashMap<>();
299
300                for (Entry<String, Set<String>> entry : headers.entrySet()) {
301                        String headerName = entry.getKey();
302                        if (headerName == null || !"cookie".equalsIgnoreCase(headerName.trim()))
303                                continue;
304
305                        Set<String> values = entry.getValue();
306                        if (values == null) continue;
307
308                        for (String headerValue : values) {
309                                headerValue = trimAggressivelyToNull(headerValue);
310                                if (headerValue == null) continue;
311
312                                // Split on ';' only when NOT inside a quoted string
313                                List<String> cookieComponents = splitCookieHeaderRespectingQuotes(headerValue);
314
315                                for (String cookieComponent : cookieComponents) {
316                                        cookieComponent = trimAggressivelyToNull(cookieComponent);
317                                        if (cookieComponent == null) continue;
318
319                                        String[] cookiePair = cookieComponent.split("=", 2);
320                                        String rawName = trimAggressivelyToNull(cookiePair[0]);
321                                        String rawValue = (cookiePair.length == 2 ? trimAggressivelyToNull(cookiePair[1]) : null);
322
323                                        if (rawName == null) continue;
324
325                                        // DO NOT decode the name; cookie names are case-sensitive and rarely encoded
326                                        String cookieName = rawName;
327
328                                        String cookieValue = null;
329                                        if (rawValue != null) {
330                                                // If it's quoted, unquote+unescape first, then percent-decode (still no '+' -> space)
331                                                String unquoted = unquoteCookieValueIfNeeded(rawValue);
332                                                cookieValue = percentDecodeCookieValue(unquoted);
333                                        }
334
335                                        cookies.computeIfAbsent(cookieName, key -> new LinkedHashSet<>());
336                                        if (cookieValue != null)
337                                                cookies.get(cookieName).add(cookieValue);
338                                }
339                        }
340                }
341
342                return cookies;
343        }
344
345        /**
346         * Percent-decodes %HH to bytes->UTF-8. Does NOT treat '+' specially.
347         */
348        @Nonnull
349        private static String percentDecodeCookieValue(@Nonnull String cookieValue) {
350                requireNonNull(cookieValue);
351
352                ByteArrayOutputStream out = new ByteArrayOutputStream(cookieValue.length());
353
354                for (int i = 0; i < cookieValue.length(); ) {
355                        char c = cookieValue.charAt(i);
356                        if (c == '%' && i + 2 < cookieValue.length()) {
357                                int hi = Character.digit(cookieValue.charAt(i + 1), 16);
358                                int lo = Character.digit(cookieValue.charAt(i + 2), 16);
359                                if (hi >= 0 && lo >= 0) {
360                                        out.write((hi << 4) + lo);
361                                        i += 3;
362                                        continue;
363                                }
364                        }
365
366                        out.write((byte) c);
367                        i++;
368                }
369
370                return out.toString(StandardCharsets.UTF_8);
371        }
372
373        /**
374         * Splits a Cookie header string into components on ';' but ONLY when not inside a quoted value.
375         * Supports backslash-escaped quotes within quoted strings.
376         */
377        private static List<String> splitCookieHeaderRespectingQuotes(@Nonnull String headerValue) {
378                List<String> parts = new ArrayList<>();
379                StringBuilder cur = new StringBuilder(headerValue.length());
380                boolean inQuotes = false;
381                boolean escape = false;
382
383                for (int i = 0; i < headerValue.length(); i++) {
384                        char c = headerValue.charAt(i);
385
386                        if (escape) {
387                                // keep escaped char literally (e.g., \" \; \\)
388                                cur.append(c);
389                                escape = false;
390                                continue;
391                        }
392
393                        if (c == '\\') {
394                                escape = true;
395                                // keep the backslash for now; unquote step will handle unescaping
396                                cur.append(c);
397                                continue;
398                        }
399
400                        if (c == '"') {
401                                inQuotes = !inQuotes;
402                                cur.append(c);
403                                continue;
404                        }
405
406                        if (c == ';' && !inQuotes) {
407                                parts.add(cur.toString());
408                                cur.setLength(0);
409                                continue;
410                        }
411
412                        cur.append(c);
413                }
414
415                if (cur.length() > 0)
416                        parts.add(cur.toString());
417
418                return parts;
419        }
420
421        /**
422         * If the cookie value is a quoted-string, remove surrounding quotes and unescape \" \\ and \; .
423         * Otherwise returns the input as-is.
424         */
425        @Nonnull
426        private static String unquoteCookieValueIfNeeded(@Nonnull String rawValue) {
427                requireNonNull(rawValue);
428
429                if (rawValue.length() >= 2 && rawValue.charAt(0) == '"' && rawValue.charAt(rawValue.length() - 1) == '"') {
430                        // Strip the surrounding quotes
431                        String inner = rawValue.substring(1, rawValue.length() - 1);
432
433                        // Unescape \" \\ and \; (common patterns seen in the wild)
434                        // Order matters: unescape backslash-escape sequences, then leave other chars intact.
435                        StringBuilder sb = new StringBuilder(inner.length());
436                        boolean escape = false;
437
438                        for (int i = 0; i < inner.length(); i++) {
439                                char c = inner.charAt(i);
440                                if (escape) {
441                                        // Only special-case a few common escapes; otherwise keep the char
442                                        if (c == '"' || c == '\\' || c == ';')
443                                                sb.append(c);
444                                        else
445                                                sb.append(c); // unknown escape -> keep literally (liberal in what we accept)
446
447                                        escape = false;
448                                } else if (c == '\\') {
449                                        escape = true;
450                                } else {
451                                        sb.append(c);
452                                }
453                        }
454
455                        // If string ended with a dangling backslash, keep it literally
456                        if (escape)
457                                sb.append('\\');
458
459                        return sb.toString();
460                }
461
462                return rawValue;
463        }
464
465        /**
466         * Normalizes a URL or path into a canonical request path.
467         * <p>
468         * Behavior:
469         * <ul>
470         *   <li>If input starts with {@code http://} or {@code https://}, the path portion is extracted.</li>
471         *   <li>Ensures the result begins with {@code '/'}.</li>
472         *   <li>Removes any trailing {@code '/'} (except for the root path {@code '/'}).</li>
473         *   <li>Strips any query string.</li>
474         *   <li>Applies aggressive trimming of Unicode whitespace.</li>
475         * </ul>
476         *
477         * @param url a URL or path to normalize (must be non-{@code null})
478         * @return the normalized path (never {@code null}); {@code "/"} for empty input
479         */
480        @Nonnull
481        public static String normalizedPathForUrl(@Nonnull String url) {
482                requireNonNull(url);
483
484                url = trimAggressively(url);
485
486                if (url.length() == 0)
487                        return "/";
488
489                if (url.startsWith("http://") || url.startsWith("https://")) {
490                        try {
491                                URL absoluteUrl = new URL(url);
492                                url = absoluteUrl.getPath();
493                        } catch (MalformedURLException e) {
494                                throw new RuntimeException(format("Malformed URL: %s", url), e);
495                        }
496                }
497
498                if (!url.startsWith("/"))
499                        url = format("/%s", url);
500
501                if ("/".equals(url))
502                        return url;
503
504                while (url.endsWith("/"))
505                        url = url.substring(0, url.length() - 1);
506
507                int queryIndex = url.indexOf("?");
508
509                if (queryIndex != -1)
510                        url = url.substring(0, queryIndex);
511
512                return url;
513        }
514
515        /**
516         * Parses an {@code Accept-Language} header value into a best-effort ordered list of {@link Locale}s.
517         * <p>
518         * Quality weights are honored by {@link Locale.LanguageRange#parse(String)}; results are then mapped to available
519         * JVM locales. Unknown or unavailable language ranges are skipped. On parse failure, an empty list is returned.
520         *
521         * @param acceptLanguageHeaderValue the raw header value (must be non-{@code null})
522         * @return locales in descending preference order; empty if none could be resolved
523         */
524        @Nonnull
525        public static List<Locale> localesFromAcceptLanguageHeaderValue(@Nonnull String acceptLanguageHeaderValue) {
526                requireNonNull(acceptLanguageHeaderValue);
527
528                try {
529                        List<LanguageRange> languageRanges = LanguageRange.parse(acceptLanguageHeaderValue);
530
531                        return languageRanges.stream()
532                                        .map(languageRange -> LOCALES_BY_LANGUAGE_RANGE_RANGE.get(languageRange.getRange()))
533                                        .filter(locale -> locale != null)
534                                        .collect(Collectors.toList());
535                } catch (Exception ignored) {
536                        return List.of();
537                }
538        }
539
540        /**
541         * Best-effort attempt to determine a client's URL prefix by examining request headers.
542         * <p>
543         * A URL prefix in this context is defined as {@code <scheme>://host<:optional port>}, but no path or query components.
544         * <p>
545         * Soklet is generally the "last hop" behind a load balancer/reverse proxy and does get accessed directly by clients.
546         * <p>
547         * Normally a load balancer/reverse proxy/other upstream proxies will provide information about the true source of the
548         * request through headers like the following:
549         * <ul>
550         *   <li>{@code Host}</li>
551         *   <li>{@code Forwarded}</li>
552         *   <li>{@code Origin}</li>
553         *   <li>{@code X-Forwarded-Proto}</li>
554         *   <li>{@code X-Forwarded-Protocol}</li>
555         *   <li>{@code X-Url-Scheme}</li>
556         *   <li>{@code Front-End-Https}</li>
557         *   <li>{@code X-Forwarded-Ssl}</li>
558         *   <li>{@code X-Forwarded-Host}</li>
559         *   <li>{@code X-Forwarded-Port}</li>
560         * </ul>
561         * <p>
562         * This method may take these and other headers into account when determining URL prefix.
563         * <p>
564         * For example, the following would be legal URL prefixes returned from this method:
565         * <ul>
566         *   <li>{@code https://www.soklet.com}</li>
567         *   <li>{@code http://www.fake.com:1234}</li>
568         * </ul>
569         * <p>
570         * The following would NOT be legal URL prefixes:
571         * <ul>
572         *   <li>{@code www.soklet.com} (missing protocol) </li>
573         *   <li>{@code https://www.soklet.com/} (trailing slash)</li>
574         *   <li>{@code https://www.soklet.com/test} (trailing slash, path)</li>
575         *   <li>{@code https://www.soklet.com/test?abc=1234} (trailing slash, path, query)</li>
576         * </ul>
577         *
578         * @param headers HTTP request headers
579         * @return the URL prefix, or {@link Optional#empty()} if it could not be determined
580         */
581        @Nonnull
582        public static Optional<String> extractClientUrlPrefixFromHeaders(@Nonnull Map<String, Set<String>> headers) {
583                requireNonNull(headers);
584
585                // Host                   developer.mozilla.org OR developer.mozilla.org:443
586                // Forwarded              by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89)
587                // Origin                 null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port>
588                // X-Forwarded-Proto      https
589                // X-Forwarded-Protocol   https (Microsoft's alternate name)
590                // X-Url-Scheme           https (Microsoft's alternate name)
591                // Front-End-Https        on (Microsoft's alternate name)
592                // X-Forwarded-Ssl        on (Microsoft's alternate name)
593                // X-Forwarded-Host       id42.example-cdn.com
594                // X-Forwarded-Port       443
595
596                String protocol = null;
597                String host = null;
598                String portAsString = null;
599
600                // Host: developer.mozilla.org OR developer.mozilla.org:443
601                Set<String> hostHeaders = headers.get("Host");
602
603                if (hostHeaders != null && hostHeaders.size() > 0) {
604                        String hostHeader = trimAggressivelyToNull(hostHeaders.stream().findFirst().get());
605
606                        if (hostHeader != null) {
607                                if (hostHeader.contains(":")) {
608                                        String[] hostHeaderComponents = hostHeader.split(":");
609                                        if (hostHeaderComponents.length == 2) {
610                                                host = trimAggressivelyToNull(hostHeaderComponents[0]);
611                                                portAsString = trimAggressivelyToNull(hostHeaderComponents[1]);
612                                        }
613                                } else {
614                                        host = hostHeader;
615                                }
616                        }
617                }
618
619                // Forwarded: by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89)
620                Set<String> forwardedHeaders = headers.get("Forwarded");
621
622                if (forwardedHeaders != null && forwardedHeaders.size() > 0) {
623                        String forwardedHeader = trimAggressivelyToNull(forwardedHeaders.stream().findFirst().get());
624
625                        // If there are multiple comma-separated components, pick the first one
626                        String[] forwardedHeaderComponents = forwardedHeader.split(",");
627                        forwardedHeader = trimAggressivelyToNull(forwardedHeaderComponents[0]);
628
629                        if (forwardedHeader != null) {
630                                // Each field component might look like "by=<identifier>"
631                                String[] forwardedHeaderFieldComponents = forwardedHeader.split(";");
632
633                                for (String forwardedHeaderFieldComponent : forwardedHeaderFieldComponents) {
634                                        forwardedHeaderFieldComponent = trimAggressivelyToNull(forwardedHeaderFieldComponent);
635
636                                        if (forwardedHeaderFieldComponent == null)
637                                                continue;
638
639                                        // Break "by=<identifier>" into "by" and "<identifier>" pieces
640                                        String[] forwardedHeaderFieldNameAndValue = forwardedHeaderFieldComponent.split(Pattern.quote("=" /* escape special Regex char */));
641                                        if (forwardedHeaderFieldNameAndValue.length != 2)
642                                                continue;
643
644                                        // e.g. "by"
645                                        String name = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[0]);
646                                        // e.g. "<identifier>"
647                                        String value = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[1]);
648
649                                        if (name == null || value == null)
650                                                continue;
651
652                                        // We only care about the "Host" and "Proto" components here.
653                                        if ("host".equalsIgnoreCase(name)) {
654                                                if (host == null)
655                                                        host = value;
656                                        } else if ("proto".equalsIgnoreCase(name)) {
657                                                if (protocol == null)
658                                                        protocol = value;
659                                        }
660                                }
661                        }
662                }
663
664                // Origin: null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port>
665                if (protocol == null || host == null || portAsString == null) {
666                        Set<String> originHeaders = headers.get("Origin");
667
668                        if (originHeaders != null && originHeaders.size() > 0) {
669                                String originHeader = trimAggressivelyToNull(originHeaders.stream().findFirst().get());
670                                String[] originHeaderComponents = originHeader.split("://");
671
672                                if (originHeaderComponents.length == 2) {
673                                        protocol = trimAggressivelyToNull(originHeaderComponents[0]);
674                                        String originHostAndMaybePort = trimAggressivelyToNull(originHeaderComponents[1]);
675
676                                        if (originHostAndMaybePort != null) {
677                                                if (originHostAndMaybePort.contains(":")) {
678                                                        String[] originHostAndPortComponents = originHostAndMaybePort.split(":");
679
680                                                        if (originHostAndPortComponents.length == 2) {
681                                                                host = trimAggressivelyToNull(originHostAndPortComponents[0]);
682                                                                portAsString = trimAggressivelyToNull(originHostAndPortComponents[1]);
683                                                        }
684                                                } else {
685                                                        host = originHostAndMaybePort;
686                                                }
687                                        }
688                                }
689                        }
690                }
691
692                // X-Forwarded-Proto: https
693                if (protocol == null) {
694                        Set<String> xForwardedProtoHeaders = headers.get("X-Forwarded-Proto");
695                        if (xForwardedProtoHeaders != null && xForwardedProtoHeaders.size() > 0) {
696                                String xForwardedProtoHeader = trimAggressivelyToNull(xForwardedProtoHeaders.stream().findFirst().get());
697                                protocol = xForwardedProtoHeader;
698                        }
699                }
700
701                // X-Forwarded-Protocol: https (Microsoft's alternate name)
702                if (protocol == null) {
703                        Set<String> xForwardedProtocolHeaders = headers.get("X-Forwarded-Protocol");
704                        if (xForwardedProtocolHeaders != null && xForwardedProtocolHeaders.size() > 0) {
705                                String xForwardedProtocolHeader = trimAggressivelyToNull(xForwardedProtocolHeaders.stream().findFirst().get());
706                                protocol = xForwardedProtocolHeader;
707                        }
708                }
709
710                // X-Url-Scheme: https (Microsoft's alternate name)
711                if (protocol == null) {
712                        Set<String> xUrlSchemeHeaders = headers.get("X-Url-Scheme");
713                        if (xUrlSchemeHeaders != null && xUrlSchemeHeaders.size() > 0) {
714                                String xUrlSchemeHeader = trimAggressivelyToNull(xUrlSchemeHeaders.stream().findFirst().get());
715                                protocol = xUrlSchemeHeader;
716                        }
717                }
718
719                // Front-End-Https: on (Microsoft's alternate name)
720                if (protocol == null) {
721                        Set<String> frontEndHttpsHeaders = headers.get("Front-End-Https");
722                        if (frontEndHttpsHeaders != null && frontEndHttpsHeaders.size() > 0) {
723                                String frontEndHttpsHeader = trimAggressivelyToNull(frontEndHttpsHeaders.stream().findFirst().get());
724
725                                if (frontEndHttpsHeader != null)
726                                        protocol = "on".equalsIgnoreCase(frontEndHttpsHeader) ? "https" : "http";
727                        }
728                }
729
730                // X-Forwarded-Ssl: on (Microsoft's alternate name)
731                if (protocol == null) {
732                        Set<String> xForwardedSslHeaders = headers.get("X-Forwarded-Ssl");
733                        if (xForwardedSslHeaders != null && xForwardedSslHeaders.size() > 0) {
734                                String xForwardedSslHeader = trimAggressivelyToNull(xForwardedSslHeaders.stream().findFirst().get());
735
736                                if (xForwardedSslHeader != null)
737                                        protocol = "on".equalsIgnoreCase(xForwardedSslHeader) ? "https" : "http";
738                        }
739                }
740
741                // X-Forwarded-Host: id42.example-cdn.com
742                if (host == null) {
743                        Set<String> xForwardedHostHeaders = headers.get("X-Forwarded-Host");
744                        if (xForwardedHostHeaders != null && xForwardedHostHeaders.size() > 0) {
745                                String xForwardedHostHeader = trimAggressivelyToNull(xForwardedHostHeaders.stream().findFirst().get());
746                                host = xForwardedHostHeader;
747                        }
748                }
749
750                // X-Forwarded-Port: 443
751                if (portAsString == null) {
752                        Set<String> xForwardedPortHeaders = headers.get("X-Forwarded-Port");
753                        if (xForwardedPortHeaders != null && xForwardedPortHeaders.size() > 0) {
754                                String xForwardedPortHeader = trimAggressivelyToNull(xForwardedPortHeaders.stream().findFirst().get());
755                                portAsString = xForwardedPortHeader;
756                        }
757                }
758
759                Integer port = null;
760
761                if (portAsString != null) {
762                        try {
763                                port = Integer.parseInt(portAsString, 10);
764                        } catch (Exception ignored) {
765                                // Not an integer; ignore it
766                        }
767                }
768
769                if (protocol != null && host != null && port == null)
770                        return Optional.of(format("%s://%s", protocol, host));
771
772                if (protocol != null && host != null && port != null) {
773                        boolean usingDefaultPort = ("http".equalsIgnoreCase(protocol) && port.equals(80))
774                                        || ("https".equalsIgnoreCase(protocol) && port.equals(443));
775
776                        // Only include the port number if it's nonstandard for the protocol
777                        String clientUrlPrefix = usingDefaultPort
778                                        ? format("%s://%s", protocol, host)
779                                        : format("%s://%s:%s", protocol, host, port);
780
781                        return Optional.of(clientUrlPrefix);
782                }
783
784                return Optional.empty();
785        }
786
787        /**
788         * Extracts the media type (without parameters) from the first {@code Content-Type} header.
789         * <p>
790         * For example, {@code "text/html; charset=utf-8"} → {@code "text/html"}.
791         *
792         * @param headers request/response headers (must be non-{@code null})
793         * @return the media type if present; otherwise {@link Optional#empty()}
794         * @see #extractContentTypeFromHeaderValue(String)
795         */
796        @Nonnull
797        public static Optional<String> extractContentTypeFromHeaders(@Nonnull Map<String, Set<String>> headers) {
798                requireNonNull(headers);
799
800                Set<String> contentTypeHeaderValues = headers.get("Content-Type");
801
802                if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0)
803                        return Optional.empty();
804
805                return extractContentTypeFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get());
806        }
807
808        /**
809         * Extracts the media type (without parameters) from a {@code Content-Type} header value.
810         * <p>
811         * For example, {@code "application/json; charset=utf-8"} → {@code "application/json"}.
812         *
813         * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank
814         * @return the media type if present; otherwise {@link Optional#empty()}
815         */
816        @Nonnull
817        public static Optional<String> extractContentTypeFromHeaderValue(@Nullable String contentTypeHeaderValue) {
818                contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue);
819
820                if (contentTypeHeaderValue == null)
821                        return Optional.empty();
822
823                // Examples
824                // Content-Type: text/html; charset=utf-8
825                // Content-Type: multipart/form-data; boundary=something
826
827                int indexOfSemicolon = contentTypeHeaderValue.indexOf(";");
828
829                // Simple case, e.g. "text/html"
830                if (indexOfSemicolon == -1)
831                        return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue));
832
833                // More complex case, e.g. "text/html; charset=utf-8"
834                return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue.substring(0, indexOfSemicolon)));
835        }
836
837        /**
838         * Extracts the {@link Charset} from the first {@code Content-Type} header, if present and valid.
839         * <p>
840         * Tolerates additional parameters and arbitrary whitespace. Invalid or unknown charset tokens yield {@link Optional#empty()}.
841         *
842         * @param headers request/response headers (must be non-{@code null})
843         * @return the charset declared by the header; otherwise {@link Optional#empty()}
844         * @see #extractCharsetFromHeaderValue(String)
845         */
846        @Nonnull
847        public static Optional<Charset> extractCharsetFromHeaders(@Nonnull Map<String, Set<String>> headers) {
848                requireNonNull(headers);
849
850                Set<String> contentTypeHeaderValues = headers.get("Content-Type");
851
852                if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0)
853                        return Optional.empty();
854
855                return extractCharsetFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get());
856        }
857
858        /**
859         * Extracts the {@code charset=...} parameter from a {@code Content-Type} header value.
860         * <p>
861         * Parsing is forgiving: parameters may appear in any order and with arbitrary spacing. If a charset is found,
862         * it is validated via {@link Charset#forName(String)}; invalid names result in {@link Optional#empty()}.
863         *
864         * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank
865         * @return the resolved charset if present and valid; otherwise {@link Optional#empty()}
866         */
867        @Nonnull
868        public static Optional<Charset> extractCharsetFromHeaderValue(@Nullable String contentTypeHeaderValue) {
869                contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue);
870
871                if (contentTypeHeaderValue == null)
872                        return Optional.empty();
873
874                // Examples
875                // Content-Type: text/html; charset=utf-8
876                // Content-Type: multipart/form-data; boundary=something
877
878                int indexOfSemicolon = contentTypeHeaderValue.indexOf(";");
879
880                // Simple case, e.g. "text/html"
881                if (indexOfSemicolon == -1)
882                        return Optional.empty();
883
884                // More complex case, e.g. "text/html; charset=utf-8" or "multipart/form-data; charset=utf-8; boundary=something"
885                boolean finishedContentType = false;
886                boolean finishedCharsetName = false;
887                StringBuilder buffer = new StringBuilder();
888                String charsetName = null;
889
890                for (int i = 0; i < contentTypeHeaderValue.length(); i++) {
891                        char c = contentTypeHeaderValue.charAt(i);
892
893                        if (Character.isWhitespace(c))
894                                continue;
895
896                        if (c == ';') {
897                                // No content type yet?  This just be it...
898                                if (!finishedContentType) {
899                                        finishedContentType = true;
900                                        buffer = new StringBuilder();
901                                } else if (!finishedCharsetName) {
902                                        if (buffer.indexOf("charset=") == 0) {
903                                                charsetName = buffer.toString();
904                                                finishedCharsetName = true;
905                                                break;
906                                        }
907                                }
908                        } else {
909                                buffer.append(Character.toLowerCase(c));
910                        }
911                }
912
913                // Handle case where charset is the end of the string, e.g. "whatever;charset=utf-8"
914                if (!finishedCharsetName) {
915                        String potentialCharset = trimAggressivelyToNull(buffer.toString());
916                        if (potentialCharset != null && potentialCharset.startsWith("charset=")) {
917                                finishedCharsetName = true;
918                                charsetName = potentialCharset;
919                        }
920                }
921
922                if (finishedCharsetName) {
923                        // e.g. "charset=utf-8" -> "utf-8"
924                        charsetName = trimAggressivelyToNull(charsetName.replace("charset=", ""));
925
926                        if (charsetName != null) {
927                                try {
928                                        return Optional.of(Charset.forName(charsetName));
929                                } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) {
930                                        return Optional.empty();
931                                }
932                        }
933                }
934
935                return Optional.empty();
936        }
937
938        /**
939         * A "stronger" version of {@link String#trim()} which discards any kind of whitespace or invisible separator.
940         * <p>
941         * In a web environment with user-supplied inputs, this is the behavior we want the vast majority of the time.
942         * For example, users copy-paste URLs from Microsoft Word or Outlook and it's easy to accidentally include a {@code U+202F
943         * "Narrow No-Break Space (NNBSP)"} character at the end, which might break parsing.
944         * <p>
945         * See <a href="https://www.compart.com/en/unicode/U+202F">https://www.compart.com/en/unicode/U+202F</a> for details.
946         *
947         * @param string the string to trim
948         * @return the trimmed string, or {@code null} if the input string is {@code null} or the trimmed representation is of length {@code 0}
949         */
950        @Nullable
951        public static String trimAggressively(@Nullable String string) {
952                if (string == null)
953                        return null;
954
955                string = HEAD_WHITESPACE_PATTERN.matcher(string).replaceAll("");
956
957                if (string.length() == 0)
958                        return string;
959
960                string = TAIL_WHITESPACE_PATTERN.matcher(string).replaceAll("");
961
962                return string;
963        }
964
965        /**
966         * Aggressively trims Unicode whitespace from the given string and returns {@code null} if the result is empty.
967         * <p>
968         * See {@link #trimAggressively(String)} for details on which code points are removed.
969         *
970         * @param string the input string; may be {@code null}
971         * @return a trimmed, non-empty string; or {@code null} if input was {@code null} or trimmed to empty
972         */
973        @Nullable
974        public static String trimAggressivelyToNull(@Nullable String string) {
975                if (string == null)
976                        return null;
977
978                string = trimAggressively(string);
979                return string.length() == 0 ? null : string;
980        }
981
982        /**
983         * Aggressively trims Unicode whitespace from the given string and returns {@code ""} if the input is {@code null}.
984         * <p>
985         * See {@link #trimAggressively(String)} for details on which code points are removed.
986         *
987         * @param string the input string; may be {@code null}
988         * @return a trimmed string (never {@code null}); {@code ""} if input was {@code null}
989         */
990        @Nonnull
991        public static String trimAggressivelyToEmpty(@Nullable String string) {
992                if (string == null)
993                        return "";
994
995                return trimAggressively(string);
996        }
997}