Source code

001/*
002 * Copyright 2022-2025 Revetware LLC.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.soklet;
018
019import com.soklet.internal.spring.LinkedCaseInsensitiveMap;
020
021import javax.annotation.Nonnull;
022import javax.annotation.Nullable;
023import javax.annotation.concurrent.ThreadSafe;
024import java.io.ByteArrayOutputStream;
025import java.lang.Thread.UncaughtExceptionHandler;
026import java.lang.invoke.MethodHandle;
027import java.lang.invoke.MethodHandles;
028import java.lang.invoke.MethodHandles.Lookup;
029import java.lang.invoke.MethodType;
030import java.net.URI;
031import java.net.URISyntaxException;
032import java.net.URLEncoder;
033import java.nio.charset.Charset;
034import java.nio.charset.IllegalCharsetNameException;
035import java.nio.charset.StandardCharsets;
036import java.nio.charset.UnsupportedCharsetException;
037import java.util.ArrayDeque;
038import java.util.ArrayList;
039import java.util.Arrays;
040import java.util.Collections;
041import java.util.Deque;
042import java.util.LinkedHashMap;
043import java.util.LinkedHashSet;
044import java.util.List;
045import java.util.Locale;
046import java.util.Locale.LanguageRange;
047import java.util.Map;
048import java.util.Map.Entry;
049import java.util.Optional;
050import java.util.Set;
051import java.util.concurrent.ExecutorService;
052import java.util.concurrent.Executors;
053import java.util.concurrent.ThreadFactory;
054import java.util.regex.Matcher;
055import java.util.regex.Pattern;
056import java.util.stream.Collectors;
057
058import static java.lang.String.format;
059import static java.util.Objects.requireNonNull;
060
061/**
062 * A non-instantiable collection of utility methods.
063 *
064 * @author <a href="https://www.revetkn.com">Mark Allen</a>
065 */
066@ThreadSafe
067public final class Utilities {
068        @Nonnull
069        private static final boolean VIRTUAL_THREADS_AVAILABLE;
070        @Nonnull
071        private static final byte[] EMPTY_BYTE_ARRAY;
072        @Nonnull
073        private static final Map<String, Locale> LOCALES_BY_LANGUAGE_RANGE_RANGE;
074        @Nonnull
075        private static final Pattern HEAD_WHITESPACE_PATTERN;
076        @Nonnull
077        private static final Pattern TAIL_WHITESPACE_PATTERN;
078        @Nonnull
079        private static final Pattern HEADER_PERCENT_ENCODING_PATTERN;
080
081        static {
082                EMPTY_BYTE_ARRAY = new byte[0];
083
084                Locale[] locales = Locale.getAvailableLocales();
085                Map<String, Locale> localesByLanguageRangeRange = new LinkedHashMap<>(locales.length);
086
087                for (Locale locale : locales) {
088                        LanguageRange languageRange = new LanguageRange(locale.toLanguageTag());
089                        localesByLanguageRangeRange.put(languageRange.getRange(), locale);
090                }
091
092                LOCALES_BY_LANGUAGE_RANGE_RANGE = Collections.unmodifiableMap(localesByLanguageRangeRange);
093
094                boolean virtualThreadsAvailable = false;
095
096                try {
097                        // Detect if Virtual Threads are usable by feature testing via reflection.
098                        // Hat tip to https://github.com/javalin/javalin for this technique
099                        Class.forName("java.lang.Thread$Builder$OfVirtual");
100                        virtualThreadsAvailable = true;
101                } catch (Exception ignored) {
102                        // We don't care why this failed, but if we're here we know JVM does not support virtual threads
103                }
104
105                VIRTUAL_THREADS_AVAILABLE = virtualThreadsAvailable;
106
107                // See https://www.regular-expressions.info/unicode.html
108                // \p{Z} or \p{Separator}: any kind of whitespace or invisible separator.
109                //
110                // First pattern matches all whitespace at the head of a string, second matches the same for tail.
111                // Useful for a "stronger" trim() function, which is almost always what we want in a web context
112                // with user-supplied input.
113                HEAD_WHITESPACE_PATTERN = Pattern.compile("^(\\p{Z})+");
114                TAIL_WHITESPACE_PATTERN = Pattern.compile("(\\p{Z})+$");
115
116                HEADER_PERCENT_ENCODING_PATTERN = Pattern.compile("%([0-9A-Fa-f]{2})");
117        }
118
119        private Utilities() {
120                // Non-instantiable
121        }
122
123        /**
124         * Does the platform runtime support virtual threads (either Java 19 and 20 w/preview enabled or Java 21+)?
125         *
126         * @return {@code true} if the runtime supports virtual threads, {@code false} otherwise
127         */
128        @Nonnull
129        static Boolean virtualThreadsAvailable() {
130                return VIRTUAL_THREADS_AVAILABLE;
131        }
132
133        /**
134         * Provides a virtual-thread-per-task executor service if supported by the runtime.
135         * <p>
136         * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+),
137         * we compile Soklet with a source level &lt; 19 and avoid any hard references to virtual threads by dynamically creating our executor service via {@link MethodHandle} references.
138         * <p>
139         * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong>
140         * <pre>{@code // This method is effectively equivalent to this code
141         * return Executors.newThreadPerTaskExecutor(
142         *   Thread.ofVirtual()
143         *    .name(threadNamePrefix)
144         *    .uncaughtExceptionHandler(uncaughtExceptionHandler)
145         *    .factory()
146         * );}</pre>
147         *
148         * @param threadNamePrefix         thread name prefix for the virtual thread factory builder
149         * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder
150         * @return a virtual-thread-per-task executor service
151         * @throws IllegalStateException if the runtime environment does not support virtual threads
152         */
153        @Nonnull
154        static ExecutorService createVirtualThreadsNewThreadPerTaskExecutor(@Nonnull String threadNamePrefix,
155                                                                                                                                                                                                                                                                                        @Nonnull UncaughtExceptionHandler uncaughtExceptionHandler) {
156                requireNonNull(threadNamePrefix);
157                requireNonNull(uncaughtExceptionHandler);
158
159                if (!virtualThreadsAvailable())
160                        throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+");
161
162                // Hat tip to https://github.com/javalin/javalin for this technique
163                Class<?> threadBuilderOfVirtualClass;
164
165                try {
166                        threadBuilderOfVirtualClass = Class.forName("java.lang.Thread$Builder$OfVirtual");
167                } catch (ClassNotFoundException e) {
168                        throw new IllegalStateException("Unable to load virtual thread builder class", e);
169                }
170
171                Lookup lookup = MethodHandles.publicLookup();
172
173                MethodHandle methodHandleThreadOfVirtual;
174                MethodHandle methodHandleThreadBuilderOfVirtualName;
175                MethodHandle methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler;
176                MethodHandle methodHandleThreadBuilderOfVirtualFactory;
177                MethodHandle methodHandleExecutorsNewThreadPerTaskExecutor;
178
179                try {
180                        methodHandleThreadOfVirtual = lookup.findStatic(Thread.class, "ofVirtual", MethodType.methodType(threadBuilderOfVirtualClass));
181                        methodHandleThreadBuilderOfVirtualName = lookup.findVirtual(threadBuilderOfVirtualClass, "name", MethodType.methodType(threadBuilderOfVirtualClass, String.class, long.class));
182                        methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler = lookup.findVirtual(threadBuilderOfVirtualClass, "uncaughtExceptionHandler", MethodType.methodType(threadBuilderOfVirtualClass, UncaughtExceptionHandler.class));
183                        methodHandleThreadBuilderOfVirtualFactory = lookup.findVirtual(threadBuilderOfVirtualClass, "factory", MethodType.methodType(ThreadFactory.class));
184                        methodHandleExecutorsNewThreadPerTaskExecutor = lookup.findStatic(Executors.class, "newThreadPerTaskExecutor", MethodType.methodType(ExecutorService.class, ThreadFactory.class));
185                } catch (NoSuchMethodException | IllegalAccessException e) {
186                        throw new IllegalStateException("Unable to load method handle for virtual thread factory", e);
187                }
188
189                try {
190                        // Thread.ofVirtual()
191                        Object virtualThreadBuilder = methodHandleThreadOfVirtual.invoke();
192                        // .name(threadNamePrefix, start)
193                        methodHandleThreadBuilderOfVirtualName.invoke(virtualThreadBuilder, threadNamePrefix, 1);
194                        // .uncaughtExceptionHandler(uncaughtExceptionHandler)
195                        methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler.invoke(virtualThreadBuilder, uncaughtExceptionHandler);
196                        // .factory();
197                        ThreadFactory threadFactory = (ThreadFactory) methodHandleThreadBuilderOfVirtualFactory.invoke(virtualThreadBuilder);
198
199                        // return Executors.newThreadPerTaskExecutor(threadFactory);
200                        return (ExecutorService) methodHandleExecutorsNewThreadPerTaskExecutor.invoke(threadFactory);
201                } catch (Throwable t) {
202                        throw new IllegalStateException("Unable to create virtual thread executor service", t);
203                }
204        }
205
206        /**
207         * Returns a shared zero-length {@code byte[]} instance.
208         * <p>
209         * Useful as a sentinel when you need a non-{@code null} byte array but have no content.
210         *
211         * @return a zero-length byte array (never {@code null})
212         */
213        @Nonnull
214        static byte[] emptyByteArray() {
215                return EMPTY_BYTE_ARRAY;
216        }
217
218        /**
219         * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values.
220         * <p>
221         * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromQuery(String, QueryFormat, Charset)} if you need to specify a different charset).
222         * <p>
223         * Pairs missing a name are ignored.
224         * <p>
225         * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated).
226         *
227         * @param query       a raw query string such as {@code "a=1&b=2&c=%20"}
228         * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986
229         * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none
230         */
231        @Nonnull
232        public static Map<String, Set<String>> extractQueryParametersFromQuery(@Nonnull String query,
233                                                                                                                                                                                                                                                                                                 @Nonnull QueryFormat queryFormat) {
234                requireNonNull(query);
235                requireNonNull(queryFormat);
236
237                return extractQueryParametersFromQuery(query, queryFormat, StandardCharsets.UTF_8);
238        }
239
240        /**
241         * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values.
242         * <p>
243         * Decodes percent-escapes using the specified charset.
244         * <p>
245         * Pairs missing a name are ignored.
246         * <p>
247         * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated).
248         *
249         * @param query       a raw query string such as {@code "a=1&b=2&c=%20"}
250         * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986
251         * @param charset     the charset to use when decoding percent-escapes
252         * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none
253         */
254        @Nonnull
255        public static Map<String, Set<String>> extractQueryParametersFromQuery(@Nonnull String query,
256                                                                                                                                                                                                                                                                                                 @Nonnull QueryFormat queryFormat,
257                                                                                                                                                                                                                                                                                                 @Nonnull Charset charset) {
258                requireNonNull(query);
259                requireNonNull(queryFormat);
260                requireNonNull(charset);
261
262                // For form parameters, body will look like "One=Two&Three=Four" ...a query string.
263                String syntheticUrl = format("https://soklet.invalid?%s", query); // avoid referencing real domain
264                return extractQueryParametersFromUrl(syntheticUrl, queryFormat, charset);
265        }
266
267        /**
268         * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values.
269         * <p>
270         * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromUrl(String, QueryFormat, Charset)} if you need to specify a different charset).
271         * <p>
272         * Pairs missing a name are ignored.
273         * <p>
274         * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated).
275         *
276         * @param url         a relative or absolute URL/URI string
277         * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986
278         * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none/invalid
279         */
280        @Nonnull
281        public static Map<String, Set<String>> extractQueryParametersFromUrl(@Nonnull String url,
282                                                                                                                                                                                                                                                                                         @Nonnull QueryFormat queryFormat) {
283                requireNonNull(url);
284                requireNonNull(queryFormat);
285
286                return extractQueryParametersFromUrl(url, queryFormat, StandardCharsets.UTF_8);
287        }
288
289        /**
290         * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values.
291         * <p>
292         * Decodes percent-escapes using the specified charset.
293         * <p>
294         * Pairs missing a name are ignored.
295         * <p>
296         * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated).
297         *
298         * @param url         a relative or absolute URL/URI string
299         * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986
300         * @param charset     the charset to use when decoding percent-escapes
301         * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none/invalid
302         */
303        @Nonnull
304        public static Map<String, Set<String>> extractQueryParametersFromUrl(@Nonnull String url,
305                                                                                                                                                                                                                                                                                         @Nonnull QueryFormat queryFormat,
306                                                                                                                                                                                                                                                                                         @Nonnull Charset charset) {
307                requireNonNull(url);
308                requireNonNull(queryFormat);
309                requireNonNull(charset);
310
311                URI uri;
312
313                try {
314                        uri = new URI(url);
315                } catch (URISyntaxException e) {
316                        return Map.of();
317                }
318
319                String query = trimAggressivelyToNull(uri.getRawQuery());
320
321                if (query == null)
322                        return Map.of();
323
324                Map<String, Set<String>> queryParameters = new LinkedHashMap<>();
325                for (String pair : query.split("&")) {
326                        if (pair.isEmpty())
327                                continue;
328
329                        String[] nv = pair.split("=", 2);
330                        String rawName = trimAggressivelyToNull(nv.length > 0 ? nv[0] : null);
331                        String rawValue = trimAggressivelyToNull(nv.length > 1 ? nv[1] : null);
332
333                        if (rawName == null)
334                                continue;
335
336                        // Preserve empty values; it's what users probably expect
337                        if (rawValue == null)
338                                rawValue = "";
339
340                        String name = decodeQueryComponent(rawName, queryFormat, charset);
341                        String value = decodeQueryComponent(rawValue, queryFormat, charset);
342
343                        queryParameters.computeIfAbsent(name, k -> new LinkedHashSet<>()).add(value);
344                }
345
346                return queryParameters;
347        }
348
349        /**
350         * Decodes a single key or value using the given mode and charset.
351         */
352        @Nonnull
353        private static String decodeQueryComponent(@Nonnull String string,
354                                                                                                                                                                                 @Nonnull QueryFormat queryFormat,
355                                                                                                                                                                                 @Nonnull Charset charset) {
356                requireNonNull(string);
357                requireNonNull(queryFormat);
358                requireNonNull(charset);
359
360                if (string.isEmpty())
361                        return "";
362
363                // Step 1: in form mode, '+' means space
364                String prepped = (queryFormat == QueryFormat.X_WWW_FORM_URLENCODED) ? string.replace('+', ' ') : string;
365                // Step 2: percent-decode bytes, then interpret bytes with the provided charset
366                return percentDecode(prepped, charset);
367        }
368
369        /**
370         * Percent-decodes a string into bytes, then constructs a String using the provided charset.
371         * One pass only: invalid %xy sequences are left as literal '%' + chars.
372         */
373        @Nonnull
374        private static String percentDecode(@Nonnull String string,
375                                                                                                                                                        @Nonnull Charset charset) {
376                requireNonNull(string);
377                requireNonNull(charset);
378
379                if (string.isEmpty())
380                        return "";
381
382                ByteArrayOutputStream out = new ByteArrayOutputStream(string.length());
383                for (int i = 0; i < string.length(); i++) {
384                        char c = string.charAt(i);
385                        if (c == '%' && i + 2 < string.length()) {
386                                int hi = hex(string.charAt(i + 1));
387                                int lo = hex(string.charAt(i + 2));
388                                if (hi >= 0 && lo >= 0) {
389                                        out.write((hi << 4) | lo);
390                                        i += 2;
391                                        continue;
392                                }
393                                // fall through: invalid percent triplet, treat '%' literally
394                        }
395                        // Write this character's bytes in the given charset (ASCII-fast path is fine too)
396                        byte[] bs = String.valueOf(c).getBytes(charset);
397                        out.write(bs, 0, bs.length);
398                }
399
400                return new String(out.toByteArray(), charset);
401        }
402
403        private static int hex(char c) {
404                if (c >= '0' && c <= '9') return c - '0';
405                if (c >= 'A' && c <= 'F') return c - 'A' + 10;
406                if (c >= 'a' && c <= 'f') return c - 'a' + 10;
407                return -1;
408        }
409
410        /**
411         * Parses {@code Cookie} request headers into a map of cookie names to values.
412         * <p>
413         * Header name matching is case-insensitive ({@code "Cookie"} vs {@code "cookie"}), but <em>cookie names are case-sensitive</em>.
414         * Values are parsed per the following liberal rules:
415         * <ul>
416         *   <li>Components are split on {@code ';'} unless inside a quoted string.</li>
417         *   <li>Quoted values have surrounding quotes removed and common backslash escapes unescaped.</li>
418         *   <li>Percent-escapes are decoded as UTF-8. {@code '+'} is <strong>not</strong> treated specially.</li>
419         * </ul>
420         * Multiple occurrences of the same cookie name are collected into a {@link Set} in insertion order.
421         *
422         * @param headers request headers as a multimap of header name to values (must be non-{@code null})
423         * @return a map of cookie name to distinct values; empty if no valid cookies are present
424         */
425        @Nonnull
426        public static Map<String, Set<String>> extractCookiesFromHeaders(@Nonnull Map<String, Set<String>> headers) {
427                requireNonNull(headers);
428
429                // Cookie *names* must be case-sensitive; keep LinkedHashMap (NOT case-insensitive)
430                Map<String, Set<String>> cookies = new LinkedHashMap<>();
431
432                for (Entry<String, Set<String>> entry : headers.entrySet()) {
433                        String headerName = entry.getKey();
434                        if (headerName == null || !"cookie".equalsIgnoreCase(headerName.trim()))
435                                continue;
436
437                        Set<String> values = entry.getValue();
438                        if (values == null) continue;
439
440                        for (String headerValue : values) {
441                                headerValue = trimAggressivelyToNull(headerValue);
442                                if (headerValue == null) continue;
443
444                                // Split on ';' only when NOT inside a quoted string
445                                List<String> cookieComponents = splitCookieHeaderRespectingQuotes(headerValue);
446
447                                for (String cookieComponent : cookieComponents) {
448                                        cookieComponent = trimAggressivelyToNull(cookieComponent);
449                                        if (cookieComponent == null) continue;
450
451                                        String[] cookiePair = cookieComponent.split("=", 2);
452                                        String rawName = trimAggressivelyToNull(cookiePair[0]);
453                                        String rawValue = (cookiePair.length == 2 ? trimAggressivelyToNull(cookiePair[1]) : null);
454
455                                        if (rawName == null) continue;
456
457                                        // DO NOT decode the name; cookie names are case-sensitive and rarely encoded
458                                        String cookieName = rawName;
459
460                                        String cookieValue = null;
461                                        if (rawValue != null) {
462                                                // If it's quoted, unquote+unescape first, then percent-decode (still no '+' -> space)
463                                                String unquoted = unquoteCookieValueIfNeeded(rawValue);
464                                                cookieValue = percentDecodeCookieValue(unquoted);
465                                        }
466
467                                        cookies.computeIfAbsent(cookieName, key -> new LinkedHashSet<>());
468                                        if (cookieValue != null)
469                                                cookies.get(cookieName).add(cookieValue);
470                                }
471                        }
472                }
473
474                return cookies;
475        }
476
477        /**
478         * Percent-decodes %HH to bytes->UTF-8. Does NOT treat '+' specially.
479         */
480        @Nonnull
481        private static String percentDecodeCookieValue(@Nonnull String cookieValue) {
482                requireNonNull(cookieValue);
483
484                ByteArrayOutputStream out = new ByteArrayOutputStream(cookieValue.length());
485
486                for (int i = 0; i < cookieValue.length(); ) {
487                        char c = cookieValue.charAt(i);
488                        if (c == '%' && i + 2 < cookieValue.length()) {
489                                int hi = Character.digit(cookieValue.charAt(i + 1), 16);
490                                int lo = Character.digit(cookieValue.charAt(i + 2), 16);
491                                if (hi >= 0 && lo >= 0) {
492                                        out.write((hi << 4) + lo);
493                                        i += 3;
494                                        continue;
495                                }
496                        }
497
498                        out.write((byte) c);
499                        i++;
500                }
501
502                return out.toString(StandardCharsets.UTF_8);
503        }
504
505        /**
506         * Splits a Cookie header string into components on ';' but ONLY when not inside a quoted value.
507         * Supports backslash-escaped quotes within quoted strings.
508         */
509        private static List<String> splitCookieHeaderRespectingQuotes(@Nonnull String headerValue) {
510                List<String> parts = new ArrayList<>();
511                StringBuilder cur = new StringBuilder(headerValue.length());
512                boolean inQuotes = false;
513                boolean escape = false;
514
515                for (int i = 0; i < headerValue.length(); i++) {
516                        char c = headerValue.charAt(i);
517
518                        if (escape) {
519                                // keep escaped char literally (e.g., \" \; \\)
520                                cur.append(c);
521                                escape = false;
522                                continue;
523                        }
524
525                        if (c == '\\') {
526                                escape = true;
527                                // keep the backslash for now; unquote step will handle unescaping
528                                cur.append(c);
529                                continue;
530                        }
531
532                        if (c == '"') {
533                                inQuotes = !inQuotes;
534                                cur.append(c);
535                                continue;
536                        }
537
538                        if (c == ';' && !inQuotes) {
539                                parts.add(cur.toString());
540                                cur.setLength(0);
541                                continue;
542                        }
543
544                        cur.append(c);
545                }
546
547                if (cur.length() > 0)
548                        parts.add(cur.toString());
549
550                return parts;
551        }
552
553        /**
554         * If the cookie value is a quoted-string, remove surrounding quotes and unescape \" \\ and \; .
555         * Otherwise returns the input as-is.
556         */
557        @Nonnull
558        private static String unquoteCookieValueIfNeeded(@Nonnull String rawValue) {
559                requireNonNull(rawValue);
560
561                if (rawValue.length() >= 2 && rawValue.charAt(0) == '"' && rawValue.charAt(rawValue.length() - 1) == '"') {
562                        // Strip the surrounding quotes
563                        String inner = rawValue.substring(1, rawValue.length() - 1);
564
565                        // Unescape \" \\ and \; (common patterns seen in the wild)
566                        // Order matters: unescape backslash-escape sequences, then leave other chars intact.
567                        StringBuilder sb = new StringBuilder(inner.length());
568                        boolean escape = false;
569
570                        for (int i = 0; i < inner.length(); i++) {
571                                char c = inner.charAt(i);
572                                if (escape) {
573                                        // Only special-case a few common escapes; otherwise keep the char
574                                        if (c == '"' || c == '\\' || c == ';')
575                                                sb.append(c);
576                                        else
577                                                sb.append(c); // unknown escape -> keep literally (liberal in what we accept)
578
579                                        escape = false;
580                                } else if (c == '\\') {
581                                        escape = true;
582                                } else {
583                                        sb.append(c);
584                                }
585                        }
586
587                        // If string ended with a dangling backslash, keep it literally
588                        if (escape)
589                                sb.append('\\');
590
591                        return sb.toString();
592                }
593
594                return rawValue;
595        }
596
597        /**
598         * Normalizes a URL or path into a canonical request path and optionally performs percent-decoding on the path.
599         * <p>
600         * For example, {@code "https://www.soklet.com/ab%20c?one=two"} would be normalized to {@code "/ab c"}.
601         * <p>
602         * The {@code OPTIONS *} special case returns {@code "*"}.
603         * <p>
604         * Behavior:
605         * <ul>
606         *   <li>If input starts with {@code http://} or {@code https://}, the path portion is extracted.</li>
607         *   <li>Ensures the result begins with {@code '/'}.</li>
608         *   <li>Removes any trailing {@code '/'} (except for the root path {@code '/'}).</li>
609         *   <li>Safely normalizes path traversals, e.g. path {@code '/a/../b'} would be normalized to {@code '/b'}</li>
610         *   <li>Strips any query string.</li>
611         *   <li>Applies aggressive trimming of Unicode whitespace.</li>
612         * </ul>
613         *
614         * @param url             a URL or path to normalize
615         * @param performDecoding {@code true} if decoding should be performed on the path (e.g. replace {@code %20} with a space character), {@code false} otherwise
616         * @return the normalized path, {@code "/"} for empty input
617         */
618        @Nonnull
619        public static String extractPathFromUrl(@Nonnull String url,
620                                                                                                                                                                        @Nonnull Boolean performDecoding) {
621                requireNonNull(url);
622
623                url = trimAggressivelyToEmpty(url);
624
625                // Special case for OPTIONS * requests
626                if (url.equals("*"))
627                        return "*";
628
629                // Parse with java.net.URI to isolate raw path; then percent-decode only the path
630                try {
631                        URI uri = new URI(url);
632
633                        String rawPath = uri.getRawPath(); // null => "/"
634
635                        if (rawPath == null || rawPath.isEmpty())
636                                rawPath = "/";
637
638                        String decodedPath = performDecoding ? percentDecode(rawPath, StandardCharsets.UTF_8) : rawPath;
639
640                        // Sanitize path traversal (e.g. /a/../b -> /b)
641                        decodedPath = removeDotSegments(decodedPath);
642
643                        // Normalize trailing slashes like normalizedPathForUrl currently does
644                        if (!decodedPath.startsWith("/"))
645                                decodedPath = "/" + decodedPath;
646
647                        if (!"/".equals(decodedPath))
648                                while (decodedPath.endsWith("/"))
649                                        decodedPath = decodedPath.substring(0, decodedPath.length() - 1);
650
651                        return decodedPath;
652                } catch (URISyntaxException e) {
653                        // If it's not an absolute URL, treat the whole string as a path and percent-decode
654                        String path = url;
655                        int q = path.indexOf('?');
656
657                        if (q != -1)
658                                path = path.substring(0, q);
659
660                        String decodedPath = performDecoding ? percentDecode(path, StandardCharsets.UTF_8) : path;
661
662                        // Sanitize path traversal (e.g. /a/../b -> /b)
663                        decodedPath = removeDotSegments(decodedPath);
664
665                        if (!decodedPath.startsWith("/"))
666                                decodedPath = "/" + decodedPath;
667
668                        if (!"/".equals(decodedPath))
669                                while (decodedPath.endsWith("/"))
670                                        decodedPath = decodedPath.substring(0, decodedPath.length() - 1);
671
672                        return decodedPath;
673                }
674        }
675
676        /**
677         * Extracts the raw (un-decoded) query component from a URL.
678         * <p>
679         * For example, {@code "/path?a=b&c=d%20e"} would return {@code "a=b&c=d%20e"}.
680         *
681         * @param url a raw URL or path
682         * @return the raw query component, or {@link Optional#empty()} if none
683         */
684        @Nonnull
685        public static Optional<String> extractRawQueryFromUrl(@Nonnull String url) {
686                requireNonNull(url);
687
688                url = trimAggressivelyToEmpty(url);
689
690                if ("*".equals(url))
691                        return Optional.empty();
692
693                try {
694                        URI uri = new URI(url);
695                        return Optional.ofNullable(trimAggressivelyToNull(uri.getRawQuery()));
696                } catch (URISyntaxException e) {
697                        // Not a valid URI, try to extract query manually
698                        int q = url.indexOf('?');
699                        if (q == -1)
700                                return Optional.empty();
701
702                        String query = trimAggressivelyToNull(url.substring(q + 1));
703                        return Optional.ofNullable(query);
704                }
705        }
706
707        /**
708         * Encodes decoded query parameters into a raw query string.
709         * <p>
710         * For example, given {@code {a=[b], c=[d e]}} and {@link QueryFormat#RFC_3986_STRICT},
711         * returns {@code "a=b&c=d%20e"}.
712         *
713         * @param queryParameters the decoded query parameters
714         * @param queryFormat     the encoding strategy
715         * @return the encoded query string, or the empty string if no parameters
716         */
717        @Nonnull
718        public static String encodeQueryParameters(@Nonnull Map<String, Set<String>> queryParameters,
719                                                                                                                                                                                 @Nonnull QueryFormat queryFormat) {
720                requireNonNull(queryParameters);
721                requireNonNull(queryFormat);
722
723                if (queryParameters.isEmpty())
724                        return "";
725
726                StringBuilder sb = new StringBuilder();
727                boolean first = true;
728
729                for (Entry<String, Set<String>> entry : queryParameters.entrySet()) {
730                        String encodedName = encodeQueryComponent(entry.getKey(), queryFormat);
731
732                        for (String value : entry.getValue()) {
733                                if (!first)
734                                        sb.append('&');
735
736                                sb.append(encodedName);
737                                sb.append('=');
738                                sb.append(encodeQueryComponent(value, queryFormat));
739
740                                first = false;
741                        }
742                }
743
744                return sb.toString();
745        }
746
747        @Nonnull
748        static String encodeQueryComponent(@Nonnull String queryComponent,
749                                                                                                                                                 @Nonnull QueryFormat queryFormat) {
750                requireNonNull(queryComponent);
751                requireNonNull(queryFormat);
752
753                String encoded = URLEncoder.encode(queryComponent, StandardCharsets.UTF_8);
754
755                if (queryFormat == QueryFormat.RFC_3986_STRICT)
756                        encoded = encoded.replace("+", "%20");
757
758                return encoded;
759        }
760
761        @Nonnull
762        static String encodePath(@Nonnull String path) {
763                requireNonNull(path);
764
765                if ("*".equals(path))
766                        return path;
767
768                // Encode each path segment individually, preserving '/' separators.
769                // RFC 3986 is used for path encoding (spaces as %20, not +).
770                return Arrays.stream(path.split("/", -1))
771                                .map(segment -> URLEncoder.encode(segment, StandardCharsets.UTF_8).replace("+", "%20"))
772                                .collect(Collectors.joining("/"));
773        }
774
775        /**
776         * Parses an {@code Accept-Language} header value into a best-effort ordered list of {@link Locale}s.
777         * <p>
778         * Quality weights are honored by {@link Locale.LanguageRange#parse(String)}; results are then mapped to available
779         * JVM locales. Unknown or unavailable language ranges are skipped. On parse failure, an empty list is returned.
780         *
781         * @param acceptLanguageHeaderValue the raw header value (must be non-{@code null})
782         * @return locales in descending preference order; empty if none could be resolved
783         */
784        @Nonnull
785        public static List<Locale> extractLocalesFromAcceptLanguageHeaderValue(@Nonnull String acceptLanguageHeaderValue) {
786                requireNonNull(acceptLanguageHeaderValue);
787
788                try {
789                        List<LanguageRange> languageRanges = LanguageRange.parse(acceptLanguageHeaderValue);
790
791                        return languageRanges.stream()
792                                        .map(languageRange -> LOCALES_BY_LANGUAGE_RANGE_RANGE.get(languageRange.getRange()))
793                                        .filter(locale -> locale != null)
794                                        .collect(Collectors.toList());
795                } catch (Exception ignored) {
796                        return List.of();
797                }
798        }
799
800        /**
801         * Best-effort attempt to determine a client's URL prefix by examining request headers.
802         * <p>
803         * A URL prefix in this context is defined as {@code <scheme>://host<:optional port>}, but no path or query components.
804         * <p>
805         * Soklet is generally the "last hop" behind a load balancer/reverse proxy and does get accessed directly by clients.
806         * <p>
807         * Normally a load balancer/reverse proxy/other upstream proxies will provide information about the true source of the
808         * request through headers like the following:
809         * <ul>
810         *   <li>{@code Host}</li>
811         *   <li>{@code Forwarded}</li>
812         *   <li>{@code Origin}</li>
813         *   <li>{@code X-Forwarded-Proto}</li>
814         *   <li>{@code X-Forwarded-Protocol}</li>
815         *   <li>{@code X-Url-Scheme}</li>
816         *   <li>{@code Front-End-Https}</li>
817         *   <li>{@code X-Forwarded-Ssl}</li>
818         *   <li>{@code X-Forwarded-Host}</li>
819         *   <li>{@code X-Forwarded-Port}</li>
820         * </ul>
821         * <p>
822         * This method may take these and other headers into account when determining URL prefix.
823         * <p>
824         * For example, the following would be legal URL prefixes returned from this method:
825         * <ul>
826         *   <li>{@code https://www.soklet.com}</li>
827         *   <li>{@code http://www.fake.com:1234}</li>
828         * </ul>
829         * <p>
830         * The following would NOT be legal URL prefixes:
831         * <ul>
832         *   <li>{@code www.soklet.com} (missing protocol) </li>
833         *   <li>{@code https://www.soklet.com/} (trailing slash)</li>
834         *   <li>{@code https://www.soklet.com/test} (trailing slash, path)</li>
835         *   <li>{@code https://www.soklet.com/test?abc=1234} (trailing slash, path, query)</li>
836         * </ul>
837         *
838         * @param headers HTTP request headers
839         * @return the URL prefix, or {@link Optional#empty()} if it could not be determined
840         */
841        @Nonnull
842        public static Optional<String> extractClientUrlPrefixFromHeaders(@Nonnull Map<String, Set<String>> headers) {
843                requireNonNull(headers);
844
845                // Host                   developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443
846                // Forwarded              by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89)
847                // Origin                 null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port>
848                // X-Forwarded-Proto      https
849                // X-Forwarded-Protocol   https (Microsoft's alternate name)
850                // X-Url-Scheme           https (Microsoft's alternate name)
851                // Front-End-Https        on (Microsoft's alternate name)
852                // X-Forwarded-Ssl        on (Microsoft's alternate name)
853                // X-Forwarded-Host       id42.example-cdn.com
854                // X-Forwarded-Port       443
855
856                String protocol = null;
857                String host = null;
858                String portAsString = null;
859                Boolean portExplicit = false;
860
861                // Host: developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443
862                Set<String> hostHeaders = headers.get("Host");
863
864                if (hostHeaders != null && !hostHeaders.isEmpty()) {
865                        HostPort hostPort = parseHostPort(hostHeaders.iterator().next()).orElse(null);
866
867                        if (hostPort != null) {
868                                host = hostPort.getHost();
869
870                                if (hostPort.getPort().isPresent()) {
871                                        portAsString = String.valueOf(hostPort.getPort().get());
872                                        portExplicit = true;
873                                }
874                        }
875                }
876
877                // Forwarded: by=<identifier>;for=<identifier>;host=<host>;proto=<http|https>
878                Set<String> forwardedHeaders = headers.get("Forwarded");
879                if (forwardedHeaders != null && forwardedHeaders.size() > 0) {
880                        String forwardedHeader = trimAggressivelyToNull(forwardedHeaders.stream().findFirst().get());
881
882                        // If there are multiple comma-separated components, pick the first one
883                        String[] forwardedHeaderComponents = forwardedHeader != null ? forwardedHeader.split(",") : new String[0];
884                        forwardedHeader = forwardedHeaderComponents.length > 0 ? trimAggressivelyToNull(forwardedHeaderComponents[0]) : null;
885
886                        if (forwardedHeader != null) {
887                                // Each field component might look like "by=<identifier>"
888                                String[] forwardedHeaderFieldComponents = forwardedHeader.split(";");
889                                for (String forwardedHeaderFieldComponent : forwardedHeaderFieldComponents) {
890                                        forwardedHeaderFieldComponent = trimAggressivelyToNull(forwardedHeaderFieldComponent);
891                                        if (forwardedHeaderFieldComponent == null)
892                                                continue;
893
894                                        // Break "by=<identifier>" into "by" and "<identifier>" pieces
895                                        String[] forwardedHeaderFieldNameAndValue = forwardedHeaderFieldComponent.split(Pattern.quote("=" /* escape special Regex char */));
896                                        if (forwardedHeaderFieldNameAndValue.length != 2)
897                                                continue;
898
899                                        String name = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[0]);
900                                        String value = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[1]);
901                                        if (name == null || value == null)
902                                                continue;
903
904                                        if ("host".equalsIgnoreCase(name)) {
905                                                if (host == null) {
906                                                        HostPort hostPort = parseHostPort(value).orElse(null);
907
908                                                        if (hostPort != null) {
909                                                                host = hostPort.getHost();
910
911                                                                if (hostPort.getPort().isPresent()) {
912                                                                        portAsString = String.valueOf(hostPort.getPort().get());
913                                                                        portExplicit = true;
914                                                                }
915                                                        }
916                                                }
917                                        } else if ("proto".equalsIgnoreCase(name)) {
918                                                if (protocol == null)
919                                                        protocol = stripOptionalQuotes(value);
920                                        }
921                                }
922                        }
923                }
924
925                // Origin: null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> (IPv6 supported)
926                if (protocol == null || host == null || portAsString == null) {
927                        Set<String> originHeaders = headers.get("Origin");
928
929                        if (originHeaders != null && !originHeaders.isEmpty()) {
930                                String originHeader = trimAggressivelyToNull(originHeaders.iterator().next());
931                                try {
932                                        URI o = new URI(originHeader);
933                                        String sch = trimAggressivelyToNull(o.getScheme());
934                                        String h = o.getHost(); // may be bracketed already on some JDKs
935                                        int p = o.getPort(); // -1 if absent
936
937                                        if (sch != null)
938                                                protocol = sch;
939
940                                        if (h != null) {
941                                                boolean alreadyBracketed = h.startsWith("[") && h.endsWith("]");
942                                                boolean isIpv6Like = h.indexOf(':') >= 0; // contains colon(s)
943                                                host = (isIpv6Like && !alreadyBracketed) ? "[" + h + "]" : h;
944                                        }
945
946                                        if (p >= 0) {
947                                                portAsString = String.valueOf(p);
948                                                portExplicit = true;
949                                        }
950                                } catch (URISyntaxException ignored) {
951                                        // no-op
952                                }
953                        }
954                }
955
956                // X-Forwarded-Proto: https
957                if (protocol == null) {
958                        Set<String> xForwardedProtoHeaders = headers.get("X-Forwarded-Proto");
959                        if (xForwardedProtoHeaders != null && xForwardedProtoHeaders.size() > 0) {
960                                String xForwardedProtoHeader = trimAggressivelyToNull(xForwardedProtoHeaders.stream().findFirst().get());
961                                protocol = xForwardedProtoHeader;
962                        }
963                }
964
965                // X-Forwarded-Protocol: https (Microsoft's alternate name)
966                if (protocol == null) {
967                        Set<String> xForwardedProtocolHeaders = headers.get("X-Forwarded-Protocol");
968                        if (xForwardedProtocolHeaders != null && xForwardedProtocolHeaders.size() > 0) {
969                                String xForwardedProtocolHeader = trimAggressivelyToNull(xForwardedProtocolHeaders.stream().findFirst().get());
970                                protocol = xForwardedProtocolHeader;
971                        }
972                }
973
974                // X-Url-Scheme: https (Microsoft's alternate name)
975                if (protocol == null) {
976                        Set<String> xUrlSchemeHeaders = headers.get("X-Url-Scheme");
977                        if (xUrlSchemeHeaders != null && xUrlSchemeHeaders.size() > 0) {
978                                String xUrlSchemeHeader = trimAggressivelyToNull(xUrlSchemeHeaders.stream().findFirst().get());
979                                protocol = xUrlSchemeHeader;
980                        }
981                }
982
983                // Front-End-Https: on (Microsoft's alternate name)
984                if (protocol == null) {
985                        Set<String> frontEndHttpsHeaders = headers.get("Front-End-Https");
986                        if (frontEndHttpsHeaders != null && frontEndHttpsHeaders.size() > 0) {
987                                String frontEndHttpsHeader = trimAggressivelyToNull(frontEndHttpsHeaders.stream().findFirst().get());
988
989                                if (frontEndHttpsHeader != null)
990                                        protocol = "on".equalsIgnoreCase(frontEndHttpsHeader) ? "https" : "http";
991                        }
992                }
993
994                // X-Forwarded-Ssl: on (Microsoft's alternate name)
995                if (protocol == null) {
996                        Set<String> xForwardedSslHeaders = headers.get("X-Forwarded-Ssl");
997                        if (xForwardedSslHeaders != null && xForwardedSslHeaders.size() > 0) {
998                                String xForwardedSslHeader = trimAggressivelyToNull(xForwardedSslHeaders.stream().findFirst().get());
999
1000                                if (xForwardedSslHeader != null)
1001                                        protocol = "on".equalsIgnoreCase(xForwardedSslHeader) ? "https" : "http";
1002                        }
1003                }
1004
1005                // X-Forwarded-Host: id42.example-cdn.com (or with port / IPv6)
1006                if (host == null) {
1007                        Set<String> xForwardedHostHeaders = headers.get("X-Forwarded-Host");
1008                        if (xForwardedHostHeaders != null && xForwardedHostHeaders.size() > 0) {
1009                                HostPort hostPort = parseHostPort(xForwardedHostHeaders.iterator().next()).orElse(null);
1010
1011                                if (hostPort != null) {
1012                                        host = hostPort.getHost();
1013
1014                                        if (hostPort.getPort().isPresent() && portAsString == null) {
1015                                                portAsString = String.valueOf(hostPort.getPort().get());
1016                                                portExplicit = true;
1017                                        }
1018                                }
1019                        }
1020                }
1021
1022                // X-Forwarded-Port: 443
1023                if (portAsString == null) {
1024                        Set<String> xForwardedPortHeaders = headers.get("X-Forwarded-Port");
1025                        if (xForwardedPortHeaders != null && xForwardedPortHeaders.size() > 0) {
1026                                String xForwardedPortHeader = trimAggressivelyToNull(xForwardedPortHeaders.stream().findFirst().get());
1027                                portAsString = xForwardedPortHeader;
1028
1029                                if (xForwardedPortHeader != null)
1030                                        portExplicit = true;
1031                        }
1032                }
1033
1034                Integer port = null;
1035
1036                if (portAsString != null) {
1037                        try {
1038                                port = Integer.parseInt(portAsString, 10);
1039                        } catch (Exception ignored) {
1040                                // Not an integer; ignore it
1041                        }
1042                }
1043
1044                if (protocol != null && host != null && port == null) {
1045                        return Optional.of(format("%s://%s", protocol, host));
1046                }
1047
1048                if (protocol != null && host != null && port != null) {
1049                        boolean usingDefaultPort =
1050                                        ("http".equalsIgnoreCase(protocol) && port.equals(80)) ||
1051                                                        ("https".equalsIgnoreCase(protocol) && port.equals(443));
1052
1053                        // Keep default ports if the client/proxy explicitly sent them
1054                        String clientUrlPrefix = (usingDefaultPort && !portExplicit)
1055                                        ? format("%s://%s", protocol, host)
1056                                        : format("%s://%s:%s", protocol, host, port);
1057
1058                        return Optional.of(clientUrlPrefix);
1059                }
1060
1061                return Optional.empty();
1062        }
1063
1064        /**
1065         * Extracts the media type (without parameters) from the first {@code Content-Type} header.
1066         * <p>
1067         * For example, {@code "text/html; charset=UTF-8"} → {@code "text/html"}.
1068         *
1069         * @param headers request/response headers (must be non-{@code null})
1070         * @return the media type if present; otherwise {@link Optional#empty()}
1071         * @see #extractContentTypeFromHeaderValue(String)
1072         */
1073        @Nonnull
1074        public static Optional<String> extractContentTypeFromHeaders(@Nonnull Map<String, Set<String>> headers) {
1075                requireNonNull(headers);
1076
1077                Set<String> contentTypeHeaderValues = headers.get("Content-Type");
1078
1079                if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0)
1080                        return Optional.empty();
1081
1082                return extractContentTypeFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get());
1083        }
1084
1085        /**
1086         * Extracts the media type (without parameters) from a {@code Content-Type} header value.
1087         * <p>
1088         * For example, {@code "application/json; charset=UTF-8"} → {@code "application/json"}.
1089         *
1090         * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank
1091         * @return the media type if present; otherwise {@link Optional#empty()}
1092         */
1093        @Nonnull
1094        public static Optional<String> extractContentTypeFromHeaderValue(@Nullable String contentTypeHeaderValue) {
1095                contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue);
1096
1097                if (contentTypeHeaderValue == null)
1098                        return Optional.empty();
1099
1100                // Examples
1101                // Content-Type: text/html; charset=UTF-8
1102                // Content-Type: multipart/form-data; boundary=something
1103
1104                int indexOfSemicolon = contentTypeHeaderValue.indexOf(";");
1105
1106                // Simple case, e.g. "text/html"
1107                if (indexOfSemicolon == -1)
1108                        return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue));
1109
1110                // More complex case, e.g. "text/html; charset=UTF-8"
1111                return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue.substring(0, indexOfSemicolon)));
1112        }
1113
1114        /**
1115         * Extracts the {@link Charset} from the first {@code Content-Type} header, if present and valid.
1116         * <p>
1117         * Tolerates additional parameters and arbitrary whitespace. Invalid or unknown charset tokens yield {@link Optional#empty()}.
1118         *
1119         * @param headers request/response headers (must be non-{@code null})
1120         * @return the charset declared by the header; otherwise {@link Optional#empty()}
1121         * @see #extractCharsetFromHeaderValue(String)
1122         */
1123        @Nonnull
1124        public static Optional<Charset> extractCharsetFromHeaders(@Nonnull Map<String, Set<String>> headers) {
1125                requireNonNull(headers);
1126
1127                Set<String> contentTypeHeaderValues = headers.get("Content-Type");
1128
1129                if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0)
1130                        return Optional.empty();
1131
1132                return extractCharsetFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get());
1133        }
1134
1135        /**
1136         * Extracts the {@code charset=...} parameter from a {@code Content-Type} header value.
1137         * <p>
1138         * Parsing is forgiving: parameters may appear in any order and with arbitrary spacing. If a charset is found,
1139         * it is validated via {@link Charset#forName(String)}; invalid names result in {@link Optional#empty()}.
1140         *
1141         * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank
1142         * @return the resolved charset if present and valid; otherwise {@link Optional#empty()}
1143         */
1144        @Nonnull
1145        public static Optional<Charset> extractCharsetFromHeaderValue(@Nullable String contentTypeHeaderValue) {
1146                contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue);
1147
1148                if (contentTypeHeaderValue == null)
1149                        return Optional.empty();
1150
1151                // Examples
1152                // Content-Type: text/html; charset=UTF-8
1153                // Content-Type: multipart/form-data; boundary=something
1154
1155                int indexOfSemicolon = contentTypeHeaderValue.indexOf(";");
1156
1157                // Simple case, e.g. "text/html"
1158                if (indexOfSemicolon == -1)
1159                        return Optional.empty();
1160
1161                // More complex case, e.g. "text/html; charset=UTF-8" or "multipart/form-data; charset=UTF-8; boundary=something"
1162                boolean finishedContentType = false;
1163                boolean finishedCharsetName = false;
1164                StringBuilder buffer = new StringBuilder();
1165                String charsetName = null;
1166
1167                for (int i = 0; i < contentTypeHeaderValue.length(); i++) {
1168                        char c = contentTypeHeaderValue.charAt(i);
1169
1170                        if (Character.isWhitespace(c))
1171                                continue;
1172
1173                        if (c == ';') {
1174                                // No content type yet?  This just be it...
1175                                if (!finishedContentType) {
1176                                        finishedContentType = true;
1177                                        buffer = new StringBuilder();
1178                                } else if (!finishedCharsetName) {
1179                                        if (buffer.indexOf("charset=") == 0) {
1180                                                charsetName = buffer.toString();
1181                                                finishedCharsetName = true;
1182                                                break;
1183                                        }
1184                                }
1185                        } else {
1186                                buffer.append(Character.toLowerCase(c));
1187                        }
1188                }
1189
1190                // Handle case where charset is the end of the string, e.g. "whatever;charset=UTF-8"
1191                if (!finishedCharsetName) {
1192                        String potentialCharset = trimAggressivelyToNull(buffer.toString());
1193                        if (potentialCharset != null && potentialCharset.startsWith("charset=")) {
1194                                finishedCharsetName = true;
1195                                charsetName = potentialCharset;
1196                        }
1197                }
1198
1199                if (finishedCharsetName) {
1200                        // e.g. charset=UTF-8 or charset="UTF-8" or charset='UTF-8'
1201                        String possibleCharsetName = trimAggressivelyToNull(charsetName.replace("charset=", ""));
1202
1203                        if (possibleCharsetName != null) {
1204                                // strip optional surrounding quotes
1205                                if ((possibleCharsetName.length() >= 2) &&
1206                                                ((possibleCharsetName.charAt(0) == '"' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '"') ||
1207                                                                (possibleCharsetName.charAt(0) == '\'' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '\''))) {
1208                                        possibleCharsetName = possibleCharsetName.substring(1, possibleCharsetName.length() - 1);
1209                                        possibleCharsetName = trimAggressivelyToNull(possibleCharsetName);
1210                                }
1211
1212                                if (possibleCharsetName != null) {
1213                                        try {
1214                                                return Optional.of(Charset.forName(possibleCharsetName));
1215                                        } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) {
1216                                                return Optional.empty();
1217                                        }
1218                                }
1219                        }
1220                }
1221
1222                return Optional.empty();
1223        }
1224
1225        /**
1226         * A "stronger" version of {@link String#trim()} which discards any kind of whitespace or invisible separator.
1227         * <p>
1228         * In a web environment with user-supplied inputs, this is the behavior we want the vast majority of the time.
1229         * For example, users copy-paste URLs from Microsoft Word or Outlook and it's easy to accidentally include a {@code U+202F
1230         * "Narrow No-Break Space (NNBSP)"} character at the end, which might break parsing.
1231         * <p>
1232         * See <a href="https://www.compart.com/en/unicode/U+202F">https://www.compart.com/en/unicode/U+202F</a> for details.
1233         *
1234         * @param string the string to trim
1235         * @return the trimmed string, or {@code null} if the input string is {@code null} or the trimmed representation is of length {@code 0}
1236         */
1237        @Nullable
1238        public static String trimAggressively(@Nullable String string) {
1239                if (string == null)
1240                        return null;
1241
1242                string = HEAD_WHITESPACE_PATTERN.matcher(string).replaceAll("");
1243
1244                if (string.length() == 0)
1245                        return string;
1246
1247                string = TAIL_WHITESPACE_PATTERN.matcher(string).replaceAll("");
1248
1249                return string;
1250        }
1251
1252        /**
1253         * Aggressively trims Unicode whitespace from the given string and returns {@code null} if the result is empty.
1254         * <p>
1255         * See {@link #trimAggressively(String)} for details on which code points are removed.
1256         *
1257         * @param string the input string; may be {@code null}
1258         * @return a trimmed, non-empty string; or {@code null} if input was {@code null} or trimmed to empty
1259         */
1260        @Nullable
1261        public static String trimAggressivelyToNull(@Nullable String string) {
1262                if (string == null)
1263                        return null;
1264
1265                string = trimAggressively(string);
1266                return string.length() == 0 ? null : string;
1267        }
1268
1269        /**
1270         * Aggressively trims Unicode whitespace from the given string and returns {@code ""} if the input is {@code null}.
1271         * <p>
1272         * See {@link #trimAggressively(String)} for details on which code points are removed.
1273         *
1274         * @param string the input string; may be {@code null}
1275         * @return a trimmed string (never {@code null}); {@code ""} if input was {@code null}
1276         */
1277        @Nonnull
1278        public static String trimAggressivelyToEmpty(@Nullable String string) {
1279                if (string == null)
1280                        return "";
1281
1282                return trimAggressively(string);
1283        }
1284
1285        static void validateHeaderNameAndValue(@Nullable String name,
1286                                                                                                                                                                 @Nullable String value) {
1287                // First, validate name:
1288                name = trimAggressivelyToNull(name);
1289
1290                if (name == null)
1291                        throw new IllegalArgumentException("Header name is blank");
1292
1293                for (int i = 0; i < name.length(); i++) {
1294                        char c = name.charAt(i);
1295                        // RFC 9110 tchar: "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
1296                        if (!(c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' || c == '*' || c == '+' ||
1297                                        c == '-' || c == '.' || c == '^' || c == '_' || c == '`' || c == '|' || c == '~' ||
1298                                        Character.isLetterOrDigit(c))) {
1299                                throw new IllegalArgumentException(format("Illegal header name '%s'. Offending character: '%s'", name, printableChar(c)));
1300                        }
1301                }
1302
1303                // Then, validate value:
1304                if (value == null)
1305                        return;
1306
1307                for (int i = 0; i < value.length(); i++) {
1308                        char c = value.charAt(i);
1309                        if (c == '\r' || c == '\n' || c == 0x00 || (c >= 0x00 && c < 0x20 && c != '\t')) {
1310                                throw new IllegalArgumentException(format("Illegal header value '%s' for header name '%s'. Offending character: '%s'", value, name, printableChar(c)));
1311                        }
1312                }
1313
1314                // Percent-encoded control sequence checks
1315                Matcher m = HEADER_PERCENT_ENCODING_PATTERN.matcher(value);
1316
1317                while (m.find()) {
1318                        int b = Integer.parseInt(m.group(1), 16);
1319                        if (b == 0x0D || b == 0x0A || b == 0x00 || (b >= 0x00 && b < 0x20 && b != 0x09)) {
1320                                throw new IllegalArgumentException(format(
1321                                                "Illegal (percent-encoded) header value '%s' for header name '%s'. Offending octet: 0x%02X",
1322                                                value, name, b));
1323                        }
1324                }
1325        }
1326
1327        @Nonnull
1328        static String printableString(@Nonnull String input) {
1329                requireNonNull(input);
1330
1331                StringBuilder out = new StringBuilder(input.length() + 16);
1332
1333                for (int i = 0; i < input.length(); i++)
1334                        out.append(printableChar(input.charAt(i)));
1335
1336                return out.toString();
1337        }
1338
1339        @Nonnull
1340        static String printableChar(char c) {
1341                if (c == '\r') return "\\r";
1342                if (c == '\n') return "\\n";
1343                if (c == '\t') return "\\t";
1344                if (c == '\f') return "\\f";
1345                if (c == '\b') return "\\b";
1346                if (c == '\\') return "\\\\";
1347                if (c == '\'') return "\\'";
1348                if (c == '\"') return "\\\"";
1349                if (c == 0) return "\\0";
1350
1351                if (c < 0x20 || c == 0x7F)  // control chars
1352                        return String.format("\\u%04X", (int) c);
1353
1354                if (Character.isISOControl(c) || Character.getType(c) == Character.FORMAT)
1355                        return String.format("\\u%04X", (int) c);
1356
1357                return String.valueOf(c);
1358        }
1359
1360        @Nonnull
1361        private static final Set<String> COMMA_JOINABLE_HEADER_NAMES = Set.of(
1362                        // Common list-type headers (RFC 7230/9110)
1363                        "accept",
1364                        "accept-encoding",
1365                        "accept-language",
1366                        "cache-control",
1367                        "pragma",
1368                        "vary",
1369                        "connection",
1370                        "transfer-encoding",
1371                        "upgrade",
1372                        "allow",
1373                        "via",
1374                        "warning"
1375                        // intentionally NOT: set-cookie, authorization, cookie, content-disposition, location
1376        );
1377
1378        /**
1379         * Given a list of raw HTTP header lines, convert them into a normalized case-insensitive, order-preserving map which "inflates" comma-separated headers into distinct values where permitted according to RFC 7230/9110.
1380         * <p>
1381         * For example, given these raw header lines:
1382         * <pre>{@code List<String> lines = List.of(
1383         *   "Cache-Control: no-cache, no-store",
1384         *   "Set-Cookie: a=b; Path=/; HttpOnly",
1385         *   "Set-Cookie: c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/"
1386         * );}</pre>
1387         * The result of parsing would look like this:
1388         * <pre>{@code result.get("cache-control") -> [
1389         *   "no-cache",
1390         *   "no-store"
1391         * ]
1392         * result.get("set-cookie") -> [
1393         *   "a=b; Path=/; HttpOnly",
1394         *   "c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/"
1395         * ]}</pre>
1396         * <p>
1397         * Keys in the returned map are case-insensitive and are guaranteed to be in the same order as encountered in {@code rawHeaderLines}.
1398         * <p>
1399         * Values in the returned map are guaranteed to be in the same order as encountered in {@code rawHeaderLines}.
1400         *
1401         * @param rawHeaderLines the raw HTTP header lines to parse
1402         * @return a normalized mapping of header name keys to values
1403         */
1404        @Nonnull
1405        public static Map<String, Set<String>> extractHeadersFromRawHeaderLines(@Nonnull List<String> rawHeaderLines) {
1406                requireNonNull(rawHeaderLines);
1407
1408                // 1) Unfold obsolete folded lines (obs-fold): lines beginning with SP/HT are continuations
1409                List<String> lines = unfold(rawHeaderLines);
1410
1411                // 2) Parse into map
1412                Map<String, Set<String>> headers = new LinkedCaseInsensitiveMap<>();
1413
1414                for (String raw : lines) {
1415                        String line = trimAggressivelyToNull(raw);
1416
1417                        if (line == null)
1418                                continue;
1419
1420                        int idx = line.indexOf(':');
1421
1422                        if (idx <= 0)
1423                                continue; // skip malformed
1424
1425                        String key = trimAggressivelyToEmpty(line.substring(0, idx)); // keep original case for display
1426                        String keyLowercase = key.toLowerCase(Locale.ROOT);
1427                        String value = trimAggressivelyToNull(line.substring(idx + 1));
1428
1429                        if (value == null)
1430                                continue;
1431
1432                        Set<String> bucket = headers.computeIfAbsent(key, k -> new LinkedHashSet<>());
1433
1434                        if (COMMA_JOINABLE_HEADER_NAMES.contains(keyLowercase)) {
1435                                for (String part : splitCommaAware(value)) {
1436                                        String v = trimAggressivelyToNull(part);
1437                                        if (v != null)
1438                                                bucket.add(v);
1439                                }
1440                        } else {
1441                                bucket.add(value.trim());
1442                        }
1443                }
1444
1445                return headers;
1446        }
1447
1448        /**
1449         * Header parsing helper
1450         */
1451        @Nonnull
1452        private static List<String> unfold(@Nonnull List<String> raw) {
1453                requireNonNull(raw);
1454                if (raw.isEmpty()) return List.of();
1455
1456                List<String> out = new ArrayList<>(raw.size());
1457                StringBuilder cur = null;
1458                boolean curIsHeader = false;
1459
1460                for (String line : raw) {
1461                        if (line == null) continue;
1462
1463                        boolean isContinuation = !line.isEmpty() && (line.charAt(0) == ' ' || line.charAt(0) == '\t');
1464                        if (isContinuation) {
1465                                if (cur != null && curIsHeader) {
1466                                        cur.append(' ').append(line.trim());
1467                                } else {
1468                                        // Do not fold into a non-header; flush previous and start anew
1469                                        if (cur != null) out.add(cur.toString());
1470                                        cur = new StringBuilder(line);
1471                                        curIsHeader = line.indexOf(':') > 0; // almost certainly false for leading-space lines
1472                                }
1473                        } else {
1474                                if (cur != null) out.add(cur.toString());
1475                                cur = new StringBuilder(line);
1476                                curIsHeader = line.indexOf(':') > 0;
1477                        }
1478                }
1479                if (cur != null) out.add(cur.toString());
1480                return out;
1481        }
1482
1483        /**
1484         * Header parsing helper: split on commas that are not inside a quoted-string; supports \" escapes inside quotes.
1485         */
1486        @Nonnull
1487        private static List<String> splitCommaAware(@Nonnull String string) {
1488                requireNonNull(string);
1489
1490                List<String> out = new ArrayList<>(4);
1491                StringBuilder cur = new StringBuilder();
1492                boolean inQuotes = false;
1493                boolean escaped = false;
1494
1495                for (int i = 0; i < string.length(); i++) {
1496                        char c = string.charAt(i);
1497
1498                        if (escaped) {
1499                                // Preserve the escaped char as-is
1500                                cur.append(c);
1501                                escaped = false;
1502                        } else if (c == '\\') {
1503                                if (inQuotes) {
1504                                        // Preserve the backslash itself, then mark next char as escaped
1505                                        cur.append('\\');       // ← keep the backslash
1506                                        escaped = true;
1507                                } else {
1508                                        cur.append('\\');       // literal backslash outside quotes
1509                                }
1510                        } else if (c == '"') {
1511                                inQuotes = !inQuotes;
1512                                cur.append('"');
1513                        } else if (c == ',' && !inQuotes) {
1514                                out.add(cur.toString());
1515                                cur.setLength(0);
1516                        } else {
1517                                cur.append(c);
1518                        }
1519                }
1520                out.add(cur.toString());
1521                return out;
1522        }
1523
1524        /**
1525         * Remove a single pair of surrounding quotes if present.
1526         */
1527        @Nonnull
1528        private static String stripOptionalQuotes(@Nonnull String string) {
1529                requireNonNull(string);
1530
1531                if (string.length() >= 2) {
1532                        char first = string.charAt(0), last = string.charAt(string.length() - 1);
1533
1534                        if ((first == '"' && last == '"') || (first == '\'' && last == '\''))
1535                                return string.substring(1, string.length() - 1);
1536                }
1537
1538                return string;
1539        }
1540
1541        /**
1542         * Parse host[:port] with IPv6 support: "[v6](:port)?" or "host(:port)?".
1543         * Returns host (with brackets for v6) and port (nullable).
1544         */
1545        @ThreadSafe
1546        private static final class HostPort {
1547                @Nonnull
1548                private final String host;
1549                @Nullable
1550                private final Integer port;
1551
1552                HostPort(@Nonnull String host,
1553                                                 @Nullable Integer port) {
1554                        this.host = host;
1555                        this.port = port;
1556                }
1557
1558                @Nonnull
1559                public String getHost() {
1560                        return this.host;
1561                }
1562
1563                @Nonnull
1564                public Optional<Integer> getPort() {
1565                        return Optional.ofNullable(this.port);
1566                }
1567        }
1568
1569        @Nonnull
1570        private static Optional<HostPort> parseHostPort(@Nullable String input) {
1571                input = trimAggressivelyToNull(input);
1572
1573                if (input == null)
1574                        return Optional.empty();
1575
1576                input = stripOptionalQuotes(input);
1577
1578                if (input.startsWith("[")) {
1579                        int close = input.indexOf(']');
1580
1581                        if (close > 0) {
1582                                String core = input.substring(1, close); // IPv6 literal without brackets
1583                                String rest = input.substring(close + 1); // maybe ":port"
1584                                String host = "[" + core + "]";
1585                                Integer port = null;
1586
1587                                if (rest.startsWith(":")) {
1588                                        String ps = trimAggressivelyToNull(rest.substring(1));
1589                                        if (ps != null) {
1590                                                try {
1591                                                        port = Integer.parseInt(ps, 10);
1592                                                } catch (Exception ignored) {
1593                                                        // Nothing to do
1594                                                }
1595                                        }
1596                                }
1597
1598                                return Optional.of(new HostPort(host, port));
1599                        }
1600                }
1601
1602                int colon = input.indexOf(':');
1603
1604                if (colon > 0 && input.indexOf(':', colon + 1) == -1) {
1605                        // exactly one ':' -> host:port (IPv4/hostname)
1606                        String h = trimAggressivelyToNull(input.substring(0, colon));
1607                        String ps = trimAggressivelyToNull(input.substring(colon + 1));
1608                        Integer p = null;
1609
1610                        if (ps != null) {
1611                                try {
1612                                        p = Integer.parseInt(ps, 10);
1613                                } catch (Exception ignored) {
1614                                        // Nothing to do
1615                                }
1616                        }
1617                        if (h != null)
1618                                return Optional.of(new HostPort(h, p));
1619                }
1620
1621                // no port
1622                return Optional.of(new HostPort(input, null));
1623        }
1624
1625        @Nonnull
1626        private static String removeDotSegments(@Nonnull String path) {
1627                requireNonNull(path);
1628
1629                Deque<String> stack = new ArrayDeque<>();
1630
1631                for (String seg : path.split("/")) {
1632                        if (seg.isEmpty() || ".".equals(seg))
1633                                continue;
1634
1635                        if ("..".equals(seg)) {
1636                                if (!stack.isEmpty())
1637                                        stack.removeLast();
1638                        } else {
1639                                stack.addLast(seg);
1640                        }
1641                }
1642
1643                return "/" + String.join("/", stack);
1644        }
1645}