001/* 002 * Copyright 2022-2026 Revetware LLC. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.soklet; 018 019import com.soklet.exception.IllegalRequestException; 020import com.soklet.internal.spring.LinkedCaseInsensitiveMap; 021import org.jspecify.annotations.NonNull; 022import org.jspecify.annotations.Nullable; 023 024import javax.annotation.concurrent.ThreadSafe; 025import java.io.ByteArrayOutputStream; 026import java.lang.Thread.UncaughtExceptionHandler; 027import java.lang.invoke.MethodHandle; 028import java.lang.invoke.MethodHandles; 029import java.lang.invoke.MethodHandles.Lookup; 030import java.lang.invoke.MethodType; 031import java.net.URI; 032import java.net.URISyntaxException; 033import java.net.URLEncoder; 034import java.nio.charset.Charset; 035import java.nio.charset.IllegalCharsetNameException; 036import java.nio.charset.StandardCharsets; 037import java.nio.charset.UnsupportedCharsetException; 038import java.util.ArrayDeque; 039import java.util.ArrayList; 040import java.util.Arrays; 041import java.util.Collections; 042import java.util.Deque; 043import java.util.LinkedHashMap; 044import java.util.LinkedHashSet; 045import java.util.List; 046import java.util.Locale; 047import java.util.Locale.LanguageRange; 048import java.util.Map; 049import java.util.Map.Entry; 050import java.util.Optional; 051import java.util.Set; 052import java.util.concurrent.ExecutorService; 053import java.util.concurrent.Executors; 054import java.util.concurrent.ThreadFactory; 055import java.util.regex.Matcher; 056import java.util.regex.Pattern; 057import java.util.stream.Collectors; 058 059import static java.lang.String.format; 060import static java.util.Objects.requireNonNull; 061 062/** 063 * A non-instantiable collection of utility methods. 064 * 065 * @author <a href="https://www.revetkn.com">Mark Allen</a> 066 */ 067@ThreadSafe 068public final class Utilities { 069 @NonNull 070 private static final boolean VIRTUAL_THREADS_AVAILABLE; 071 @NonNull 072 private static final byte[] EMPTY_BYTE_ARRAY; 073 @NonNull 074 private static final Pattern HEAD_WHITESPACE_PATTERN; 075 @NonNull 076 private static final Pattern TAIL_WHITESPACE_PATTERN; 077 @NonNull 078 private static final Pattern HEADER_PERCENT_ENCODING_PATTERN; 079 080 static { 081 EMPTY_BYTE_ARRAY = new byte[0]; 082 083 boolean virtualThreadsAvailable = false; 084 085 try { 086 // Detect if Virtual Threads are usable by feature testing via reflection. 087 // Hat tip to https://github.com/javalin/javalin for this technique 088 Class.forName("java.lang.Thread$Builder$OfVirtual"); 089 virtualThreadsAvailable = true; 090 } catch (Exception ignored) { 091 // We don't care why this failed, but if we're here we know JVM does not support virtual threads 092 } 093 094 VIRTUAL_THREADS_AVAILABLE = virtualThreadsAvailable; 095 096 // See https://www.regular-expressions.info/unicode.html 097 // \p{Z} or \p{Separator}: Unicode space-separator characters. 098 // 099 // First pattern matches those separator characters at the head of a string, second matches the same for tail. 100 // Useful for a "stronger" trim() function, which is almost always what we want in a web context 101 // with user-supplied input. 102 HEAD_WHITESPACE_PATTERN = Pattern.compile("^(\\p{Z})+"); 103 TAIL_WHITESPACE_PATTERN = Pattern.compile("(\\p{Z})+$"); 104 105 HEADER_PERCENT_ENCODING_PATTERN = Pattern.compile("%([0-9A-Fa-f]{2})"); 106 } 107 108 private Utilities() { 109 // Non-instantiable 110 } 111 112 /** 113 * Does the platform runtime support virtual threads (either Java 19 and 20 w/preview enabled or Java 21+)? 114 * 115 * @return {@code true} if the runtime supports virtual threads, {@code false} otherwise 116 */ 117 @NonNull 118 static Boolean virtualThreadsAvailable() { 119 return VIRTUAL_THREADS_AVAILABLE; 120 } 121 122 /** 123 * Provides a virtual thread factory if supported by the runtime. 124 * <p> 125 * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+), 126 * we compile Soklet with a source level < 19 and avoid any hard references to virtual threads by dynamically creating our {@link ThreadFactory} via {@link MethodHandle} references. 127 * <p> 128 * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong> 129 * 130 * @param threadNamePrefix thread name prefix for the virtual thread factory builder 131 * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder 132 * @return a virtual thread factory 133 * @throws IllegalStateException if the runtime environment does not support virtual threads 134 */ 135 @NonNull 136 static ThreadFactory createVirtualThreadFactory(@NonNull String threadNamePrefix, 137 @NonNull UncaughtExceptionHandler uncaughtExceptionHandler) { 138 requireNonNull(threadNamePrefix); 139 requireNonNull(uncaughtExceptionHandler); 140 141 if (!virtualThreadsAvailable()) 142 throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+"); 143 144 // Hat tip to https://github.com/javalin/javalin for this technique 145 Class<?> threadBuilderOfVirtualClass; 146 147 try { 148 threadBuilderOfVirtualClass = Class.forName("java.lang.Thread$Builder$OfVirtual"); 149 } catch (ClassNotFoundException e) { 150 throw new IllegalStateException("Unable to load virtual thread builder class", e); 151 } 152 153 Lookup lookup = MethodHandles.publicLookup(); 154 155 MethodHandle methodHandleThreadOfVirtual; 156 MethodHandle methodHandleThreadBuilderOfVirtualName; 157 MethodHandle methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler; 158 MethodHandle methodHandleThreadBuilderOfVirtualFactory; 159 160 try { 161 methodHandleThreadOfVirtual = lookup.findStatic(Thread.class, "ofVirtual", MethodType.methodType(threadBuilderOfVirtualClass)); 162 methodHandleThreadBuilderOfVirtualName = lookup.findVirtual(threadBuilderOfVirtualClass, "name", MethodType.methodType(threadBuilderOfVirtualClass, String.class, long.class)); 163 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler = lookup.findVirtual(threadBuilderOfVirtualClass, "uncaughtExceptionHandler", MethodType.methodType(threadBuilderOfVirtualClass, UncaughtExceptionHandler.class)); 164 methodHandleThreadBuilderOfVirtualFactory = lookup.findVirtual(threadBuilderOfVirtualClass, "factory", MethodType.methodType(ThreadFactory.class)); 165 } catch (NoSuchMethodException | IllegalAccessException e) { 166 throw new IllegalStateException("Unable to load method handle for virtual thread factory", e); 167 } 168 169 try { 170 // Thread.ofVirtual() 171 Object virtualThreadBuilder = methodHandleThreadOfVirtual.invoke(); 172 // .name(threadNamePrefix, start) 173 methodHandleThreadBuilderOfVirtualName.invoke(virtualThreadBuilder, threadNamePrefix, 1); 174 // .uncaughtExceptionHandler(uncaughtExceptionHandler) 175 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler.invoke(virtualThreadBuilder, uncaughtExceptionHandler); 176 // .factory(); 177 return (ThreadFactory) methodHandleThreadBuilderOfVirtualFactory.invoke(virtualThreadBuilder); 178 } catch (Throwable t) { 179 throw new IllegalStateException("Unable to create virtual thread factory", t); 180 } 181 } 182 183 /** 184 * Provides a virtual-thread-per-task executor service if supported by the runtime. 185 * <p> 186 * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+), 187 * we compile Soklet with a source level < 19 and avoid any hard references to virtual threads by dynamically creating our executor service via {@link MethodHandle} references. 188 * <p> 189 * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong> 190 * <pre>{@code // This method is effectively equivalent to this code 191 * return Executors.newThreadPerTaskExecutor( 192 * Thread.ofVirtual() 193 * .name(threadNamePrefix) 194 * .uncaughtExceptionHandler(uncaughtExceptionHandler) 195 * .factory() 196 * );}</pre> 197 * 198 * @param threadNamePrefix thread name prefix for the virtual thread factory builder 199 * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder 200 * @return a virtual-thread-per-task executor service 201 * @throws IllegalStateException if the runtime environment does not support virtual threads 202 */ 203 @NonNull 204 static ExecutorService createVirtualThreadsNewThreadPerTaskExecutor(@NonNull String threadNamePrefix, 205 @NonNull UncaughtExceptionHandler uncaughtExceptionHandler) { 206 requireNonNull(threadNamePrefix); 207 requireNonNull(uncaughtExceptionHandler); 208 209 if (!virtualThreadsAvailable()) 210 throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+"); 211 212 ThreadFactory threadFactory = createVirtualThreadFactory(threadNamePrefix, uncaughtExceptionHandler); 213 214 Lookup lookup = MethodHandles.publicLookup(); 215 MethodHandle methodHandleExecutorsNewThreadPerTaskExecutor; 216 217 try { 218 methodHandleExecutorsNewThreadPerTaskExecutor = lookup.findStatic(Executors.class, "newThreadPerTaskExecutor", MethodType.methodType(ExecutorService.class, ThreadFactory.class)); 219 } catch (NoSuchMethodException | IllegalAccessException e) { 220 throw new IllegalStateException("Unable to load method handle for virtual thread factory", e); 221 } 222 223 try { 224 // return Executors.newThreadPerTaskExecutor(threadFactory); 225 return (ExecutorService) methodHandleExecutorsNewThreadPerTaskExecutor.invoke(threadFactory); 226 } catch (Throwable t) { 227 throw new IllegalStateException("Unable to create virtual thread executor service", t); 228 } 229 } 230 231 /** 232 * Returns a shared zero-length {@code byte[]} instance. 233 * <p> 234 * Useful as a sentinel when you need a non-{@code null} byte array but have no content. 235 * 236 * @return a zero-length byte array (never {@code null}) 237 */ 238 @NonNull 239 static byte[] emptyByteArray() { 240 return EMPTY_BYTE_ARRAY; 241 } 242 243 /** 244 * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values. 245 * <p> 246 * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromQuery(String, QueryFormat, Charset)} if you need to specify a different charset). 247 * <p> 248 * Pairs missing a name are ignored. 249 * <p> 250 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 251 * 252 * @param query a raw query string such as {@code "a=1&b=2&c=%20"} 253 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 254 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 255 * @throws IllegalRequestException if the query string contains malformed percent-encoding 256 */ 257 @NonNull 258 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromQuery(@NonNull String query, 259 @NonNull QueryFormat queryFormat) { 260 requireNonNull(query); 261 requireNonNull(queryFormat); 262 263 return extractQueryParametersFromQuery(query, queryFormat, StandardCharsets.UTF_8); 264 } 265 266 /** 267 * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values. 268 * <p> 269 * Decodes percent-escapes using the specified charset. 270 * <p> 271 * Pairs missing a name are ignored. 272 * <p> 273 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 274 * 275 * @param query a raw query string such as {@code "a=1&b=2&c=%20"} 276 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 277 * @param charset the charset to use when decoding percent-escapes 278 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 279 * @throws IllegalRequestException if the query string contains malformed percent-encoding 280 */ 281 @NonNull 282 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromQuery(@NonNull String query, 283 @NonNull QueryFormat queryFormat, 284 @NonNull Charset charset) { 285 requireNonNull(query); 286 requireNonNull(queryFormat); 287 requireNonNull(charset); 288 289 // For form parameters, body will look like "One=Two&Three=Four" ...a query string. 290 String syntheticUrl = format("https://soklet.invalid?%s", query); // avoid referencing real domain 291 return extractQueryParametersFromUrl(syntheticUrl, queryFormat, charset); 292 } 293 294 @NonNull 295 static Optional<Set<@NonNull String>> extractQueryParameterValuesFromQuery(@NonNull String query, 296 @NonNull String name, 297 @NonNull QueryFormat queryFormat, 298 @NonNull Charset charset) { 299 requireNonNull(query); 300 requireNonNull(name); 301 requireNonNull(queryFormat); 302 requireNonNull(charset); 303 304 query = trimAggressivelyToEmpty(query); 305 306 if (query.isEmpty()) 307 return Optional.empty(); 308 309 String singleValue = null; 310 Set<String> values = null; 311 boolean matched = false; 312 int pairStart = 0; 313 314 while (pairStart <= query.length()) { 315 int pairEnd = query.indexOf('&', pairStart); 316 if (pairEnd == -1) 317 pairEnd = query.length(); 318 319 if (pairEnd > pairStart) { 320 int separator = query.indexOf('=', pairStart); 321 if (separator == -1 || separator > pairEnd) 322 separator = pairEnd; 323 324 String rawName = trimAggressivelyToNull(query.substring(pairStart, separator)); 325 326 if (rawName != null) { 327 String decodedName = decodeQueryComponent(rawName, queryFormat, charset); 328 329 if (decodedName.equals(name)) { 330 String rawValue = separator < pairEnd ? trimAggressivelyToNull(query.substring(separator + 1, pairEnd)) : null; 331 332 if (rawValue == null) 333 rawValue = ""; 334 335 String value = decodeQueryComponent(rawValue, queryFormat, charset); 336 337 if (!matched) { 338 singleValue = value; 339 matched = true; 340 } else { 341 if (values == null) { 342 values = new LinkedHashSet<>(); 343 values.add(singleValue); 344 } 345 346 values.add(value); 347 } 348 } 349 } 350 } 351 352 if (pairEnd == query.length()) 353 break; 354 355 pairStart = pairEnd + 1; 356 } 357 358 if (!matched) 359 return Optional.empty(); 360 361 if (values == null) 362 return Optional.of(Set.of(singleValue)); 363 364 return Optional.of(Collections.unmodifiableSet(values)); 365 } 366 367 /** 368 * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values. 369 * <p> 370 * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromUrl(String, QueryFormat, Charset)} if you need to specify a different charset). 371 * <p> 372 * Pairs missing a name are ignored. 373 * <p> 374 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 375 * 376 * @param url a relative or absolute URL/URI string 377 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 378 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 379 * @throws IllegalRequestException if the URL or query contains malformed percent-encoding 380 */ 381 @NonNull 382 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromUrl(@NonNull String url, 383 @NonNull QueryFormat queryFormat) { 384 requireNonNull(url); 385 requireNonNull(queryFormat); 386 387 return extractQueryParametersFromUrl(url, queryFormat, StandardCharsets.UTF_8); 388 } 389 390 /** 391 * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values. 392 * <p> 393 * Decodes percent-escapes using the specified charset. 394 * <p> 395 * Pairs missing a name are ignored. 396 * <p> 397 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 398 * 399 * @param url a relative or absolute URL/URI string 400 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 401 * @param charset the charset to use when decoding percent-escapes 402 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 403 * @throws IllegalRequestException if the URL or query contains malformed percent-encoding 404 */ 405 @NonNull 406 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromUrl(@NonNull String url, 407 @NonNull QueryFormat queryFormat, 408 @NonNull Charset charset) { 409 requireNonNull(url); 410 requireNonNull(queryFormat); 411 requireNonNull(charset); 412 413 URI uri; 414 415 try { 416 uri = new URI(url); 417 } catch (URISyntaxException e) { 418 throw new IllegalRequestException(format("Invalid URL '%s'", url), e); 419 } 420 421 String query = trimAggressivelyToNull(uri.getRawQuery()); 422 423 if (query == null) 424 return Map.of(); 425 426 Map<String, Set<String>> queryParameters = new LinkedHashMap<>(); 427 for (String pair : query.split("&")) { 428 if (pair.isEmpty()) 429 continue; 430 431 String[] nv = pair.split("=", 2); 432 String rawName = trimAggressivelyToNull(nv.length > 0 ? nv[0] : null); 433 String rawValue = trimAggressivelyToNull(nv.length > 1 ? nv[1] : null); 434 435 if (rawName == null) 436 continue; 437 438 // Preserve empty values; it's what users probably expect 439 if (rawValue == null) 440 rawValue = ""; 441 442 String name = decodeQueryComponent(rawName, queryFormat, charset); 443 String value = decodeQueryComponent(rawValue, queryFormat, charset); 444 445 addStringValue(queryParameters, name, value); 446 } 447 448 freezeStringValueSets(queryParameters); 449 return queryParameters; 450 } 451 452 /** 453 * Decodes a single key or value using the given mode and charset. 454 */ 455 @NonNull 456 private static String decodeQueryComponent(@NonNull String string, 457 @NonNull QueryFormat queryFormat, 458 @NonNull Charset charset) { 459 requireNonNull(string); 460 requireNonNull(queryFormat); 461 requireNonNull(charset); 462 463 if (string.isEmpty()) 464 return ""; 465 466 // Step 1: in form mode, '+' means space 467 String prepped = (queryFormat == QueryFormat.X_WWW_FORM_URLENCODED) ? string.replace('+', ' ') : string; 468 // Step 2: percent-decode bytes, then interpret bytes with the provided charset 469 return percentDecode(prepped, charset); 470 } 471 472 /** 473 * Percent-decodes a string into bytes, then constructs a String using the provided charset. 474 * One pass only: invalid %xy sequences trigger an exception. 475 */ 476 @NonNull 477 private static String percentDecode(@NonNull String s, @NonNull Charset charset) { 478 requireNonNull(s); 479 requireNonNull(charset); 480 481 if (s.isEmpty()) 482 return ""; 483 484 StringBuilder sb = new StringBuilder(s.length()); 485 ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 486 487 for (int i = 0; i < s.length(); ) { 488 char c = s.charAt(i); 489 490 if (c == '%') { 491 // Consume one or more consecutive %xx triplets into bytes 492 bytes.reset(); 493 int j = i; 494 495 while (j < s.length() && s.charAt(j) == '%') { 496 if (j + 2 >= s.length()) 497 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 498 499 int hi = hex(s.charAt(j + 1)); 500 int lo = hex(s.charAt(j + 2)); 501 if (hi < 0 || lo < 0) 502 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 503 504 bytes.write((hi << 4) | lo); 505 j += 3; 506 } 507 508 sb.append(new String(bytes.toByteArray(), charset)); 509 i = j; 510 continue; 511 } 512 513 // Non-'%' char: append it as-is. 514 // This preserves surrogate pairs naturally as the loop hits both chars. 515 sb.append(c); 516 i++; 517 } 518 519 return sb.toString(); 520 } 521 522 static void validatePercentEncodingInUrlComponent(@NonNull String urlComponent) { 523 requireNonNull(urlComponent); 524 525 for (int i = 0; i < urlComponent.length(); i++) { 526 if (urlComponent.charAt(i) != '%') 527 continue; 528 529 if (i + 2 >= urlComponent.length()) 530 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 531 532 int hi = hex(urlComponent.charAt(i + 1)); 533 int lo = hex(urlComponent.charAt(i + 2)); 534 if (hi < 0 || lo < 0) 535 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 536 537 i += 2; 538 } 539 } 540 541 private static int hex(char c) { 542 if (c >= '0' && c <= '9') return c - '0'; 543 if (c >= 'A' && c <= 'F') return c - 'A' + 10; 544 if (c >= 'a' && c <= 'f') return c - 'a' + 10; 545 return -1; 546 } 547 548 /** 549 * Parses {@code Cookie} request headers into a map of cookie names to values. 550 * <p> 551 * Header name matching is case-insensitive ({@code "Cookie"} vs {@code "cookie"}), but <em>cookie names are case-sensitive</em>. 552 * Values are parsed per the following liberal rules: 553 * <ul> 554 * <li>Components are split on {@code ';'} unless inside a quoted string.</li> 555 * <li>Quoted values have surrounding quotes removed and common backslash escapes unescaped.</li> 556 * <li>Percent-escapes are decoded as UTF-8. {@code '+'} is <strong>not</strong> treated specially.</li> 557 * </ul> 558 * Multiple occurrences of the same cookie name are collected into a {@link Set} in insertion order. 559 * 560 * @param headers request headers as a multimap of header name to values (must be non-{@code null}) 561 * @return a map of cookie name to distinct values; empty if no valid cookies are present 562 */ 563 @NonNull 564 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractCookiesFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 565 requireNonNull(headers); 566 567 // Cookie *names* must be case-sensitive; keep LinkedHashMap (NOT case-insensitive) 568 Map<String, Set<String>> cookies = new LinkedHashMap<>(); 569 570 for (Entry<String, Set<String>> entry : headers.entrySet()) { 571 String headerName = entry.getKey(); 572 if (headerName == null || !"cookie".equalsIgnoreCase(headerName.trim())) 573 continue; 574 575 Set<String> values = entry.getValue(); 576 if (values == null) continue; 577 578 for (String headerValue : values) { 579 headerValue = trimAggressivelyToNull(headerValue); 580 if (headerValue == null) continue; 581 582 // Split on ';' only when NOT inside a quoted string 583 List<String> cookieComponents = splitCookieHeaderRespectingQuotes(headerValue); 584 585 for (String cookieComponent : cookieComponents) { 586 cookieComponent = trimAggressivelyToNull(cookieComponent); 587 if (cookieComponent == null) continue; 588 589 String[] cookiePair = cookieComponent.split("=", 2); 590 String rawName = trimAggressivelyToNull(cookiePair[0]); 591 String rawValue = (cookiePair.length == 2 ? trimAggressivelyToNull(cookiePair[1]) : null); 592 593 if (rawName == null) continue; 594 595 // DO NOT decode the name; cookie names are case-sensitive and rarely encoded 596 String cookieName = rawName; 597 598 String cookieValue = null; 599 if (rawValue != null) { 600 // If it's quoted, unquote+unescape first, then percent-decode (still no '+' -> space) 601 String unquoted = unquoteCookieValueIfNeeded(rawValue); 602 cookieValue = percentDecodeCookieValue(unquoted); 603 } 604 605 cookies.putIfAbsent(cookieName, Set.of()); 606 if (cookieValue != null) 607 addStringValue(cookies, cookieName, cookieValue); 608 } 609 } 610 } 611 612 freezeStringValueSets(cookies); 613 return cookies; 614 } 615 616 /** 617 * Percent-decodes %HH to bytes->UTF-8. Does NOT treat '+' specially. 618 */ 619 @NonNull 620 private static String percentDecodeCookieValue(@NonNull String cookieValue) { 621 requireNonNull(cookieValue); 622 623 ByteArrayOutputStream out = new ByteArrayOutputStream(cookieValue.length()); 624 625 for (int i = 0; i < cookieValue.length(); ) { 626 char c = cookieValue.charAt(i); 627 if (c == '%') { 628 if (i + 2 >= cookieValue.length()) 629 throw new IllegalRequestException("Invalid percent-encoding in Cookie header"); 630 631 int hi = Character.digit(cookieValue.charAt(i + 1), 16); 632 int lo = Character.digit(cookieValue.charAt(i + 2), 16); 633 if (hi < 0 || lo < 0) 634 throw new IllegalRequestException("Invalid percent-encoding in Cookie header"); 635 636 out.write((hi << 4) + lo); 637 i += 3; 638 continue; 639 } 640 641 String rawCharacter; 642 643 if (Character.isHighSurrogate(c) && i + 1 < cookieValue.length() && Character.isLowSurrogate(cookieValue.charAt(i + 1))) { 644 rawCharacter = cookieValue.substring(i, i + 2); 645 i += 2; 646 } else { 647 rawCharacter = Character.toString(c); 648 i++; 649 } 650 651 byte[] encoded = rawCharacter.getBytes(StandardCharsets.UTF_8); 652 out.write(encoded, 0, encoded.length); 653 } 654 655 return out.toString(StandardCharsets.UTF_8); 656 } 657 658 /** 659 * Splits a Cookie header string into components on ';' but ONLY when not inside a quoted value. 660 * Supports backslash-escaped quotes within quoted strings. 661 */ 662 private static List<@NonNull String> splitCookieHeaderRespectingQuotes(@NonNull String headerValue) { 663 List<String> parts = new ArrayList<>(); 664 StringBuilder cur = new StringBuilder(headerValue.length()); 665 boolean inQuotes = false; 666 boolean escape = false; 667 668 for (int i = 0; i < headerValue.length(); i++) { 669 char c = headerValue.charAt(i); 670 671 if (escape) { 672 // keep escaped char literally (e.g., \" \; \\) 673 cur.append(c); 674 escape = false; 675 continue; 676 } 677 678 if (c == '\\') { 679 escape = true; 680 // keep the backslash for now; unquote step will handle unescaping 681 cur.append(c); 682 continue; 683 } 684 685 if (c == '"') { 686 inQuotes = !inQuotes; 687 cur.append(c); 688 continue; 689 } 690 691 if (c == ';' && !inQuotes) { 692 parts.add(cur.toString()); 693 cur.setLength(0); 694 continue; 695 } 696 697 cur.append(c); 698 } 699 700 if (cur.length() > 0) 701 parts.add(cur.toString()); 702 703 return parts; 704 } 705 706 /** 707 * If the cookie value is a quoted-string, remove surrounding quotes and unescape \" \\ and \; . 708 * Otherwise returns the input as-is. 709 */ 710 @NonNull 711 private static String unquoteCookieValueIfNeeded(@NonNull String rawValue) { 712 requireNonNull(rawValue); 713 714 if (rawValue.length() >= 2 && rawValue.charAt(0) == '"' && rawValue.charAt(rawValue.length() - 1) == '"') { 715 // Strip the surrounding quotes 716 String inner = rawValue.substring(1, rawValue.length() - 1); 717 718 // Unescape \" \\ and \; (common patterns seen in the wild) 719 // Order matters: unescape backslash-escape sequences, then leave other chars intact. 720 StringBuilder sb = new StringBuilder(inner.length()); 721 boolean escape = false; 722 723 for (int i = 0; i < inner.length(); i++) { 724 char c = inner.charAt(i); 725 if (escape) { 726 // Only special-case a few common escapes; otherwise keep the char 727 if (c == '"' || c == '\\' || c == ';') 728 sb.append(c); 729 else 730 sb.append(c); // unknown escape -> keep literally (liberal in what we accept) 731 732 escape = false; 733 } else if (c == '\\') { 734 escape = true; 735 } else { 736 sb.append(c); 737 } 738 } 739 740 // If string ended with a dangling backslash, keep it literally 741 if (escape) 742 sb.append('\\'); 743 744 return sb.toString(); 745 } 746 747 return rawValue; 748 } 749 750 /** 751 * Normalizes a URL or path into a canonical request path and optionally performs percent-decoding on the path. 752 * <p> 753 * For example, {@code "https://www.soklet.com/ab%20c?one=two"} would be normalized to {@code "/ab c"}. 754 * <p> 755 * The {@code OPTIONS *} special case returns {@code "*"}. 756 * <p> 757 * Behavior: 758 * <ul> 759 * <li>If input starts with {@code http://} or {@code https://}, the path portion is extracted.</li> 760 * <li>Ensures the result begins with {@code '/'}.</li> 761 * <li>Removes any trailing {@code '/'} (except for the root path {@code '/'}).</li> 762 * <li>Safely normalizes path traversals, e.g. path {@code '/a/../b'} would be normalized to {@code '/b'}</li> 763 * <li>Strips any query string.</li> 764 * <li>Applies aggressive trimming of Unicode whitespace.</li> 765 * <li>Rejects malformed percent-encoding when decoding is enabled.</li> 766 * </ul> 767 * 768 * @param url a URL or path to normalize 769 * @param performDecoding {@code true} if decoding should be performed on the path (e.g. replace {@code %20} with a space character), {@code false} otherwise 770 * @return the normalized path, {@code "/"} for empty input 771 */ 772 @NonNull 773 public static String extractPathFromUrl(@NonNull String url, 774 @NonNull Boolean performDecoding) { 775 requireNonNull(url); 776 777 url = trimAggressivelyToEmpty(url); 778 779 // Special case for OPTIONS * requests 780 if (url.equals("*")) 781 return "*"; 782 783 // Parse with java.net.URI to isolate raw path; then percent-decode only the path 784 try { 785 URI uri = new URI(url); 786 787 String rawPath = uri.getRawPath(); // null => "/" 788 789 if (rawPath == null || rawPath.isEmpty()) 790 rawPath = "/"; 791 792 if (!performDecoding) 793 return rawPath; 794 795 String decodedPath = percentDecode(rawPath, StandardCharsets.UTF_8); 796 797 // Sanitize path traversal (e.g. /a/../b -> /b) 798 decodedPath = removeDotSegments(decodedPath); 799 800 // Normalize trailing slashes like normalizedPathForUrl currently does 801 if (!decodedPath.startsWith("/")) 802 decodedPath = "/" + decodedPath; 803 804 if (!"/".equals(decodedPath)) 805 while (decodedPath.endsWith("/")) 806 decodedPath = decodedPath.substring(0, decodedPath.length() - 1); 807 808 return decodedPath; 809 } catch (URISyntaxException e) { 810 // If it's not an absolute URL, treat the whole string as a path and percent-decode 811 String path = url; 812 int q = path.indexOf('?'); 813 814 if (q != -1) 815 path = path.substring(0, q); 816 817 if (path.isEmpty()) 818 path = "/"; 819 820 if (!performDecoding) 821 return path; 822 823 String decodedPath = percentDecode(path, StandardCharsets.UTF_8); 824 825 // Sanitize path traversal (e.g. /a/../b -> /b) 826 decodedPath = removeDotSegments(decodedPath); 827 828 if (!decodedPath.startsWith("/")) 829 decodedPath = "/" + decodedPath; 830 831 if (!"/".equals(decodedPath)) 832 while (decodedPath.endsWith("/")) 833 decodedPath = decodedPath.substring(0, decodedPath.length() - 1); 834 835 return decodedPath; 836 } 837 } 838 839 /** 840 * Extracts the raw (un-decoded) query component from a URL. 841 * <p> 842 * For example, {@code "/path?a=b&c=d%20e"} would return {@code "a=b&c=d%20e"}. 843 * 844 * @param url a raw URL or path 845 * @return the raw query component, or {@link Optional#empty()} if none 846 */ 847 @NonNull 848 public static Optional<String> extractRawQueryFromUrl(@NonNull String url) { 849 requireNonNull(url); 850 851 url = trimAggressivelyToEmpty(url); 852 853 if ("*".equals(url)) 854 return Optional.empty(); 855 856 try { 857 URI uri = new URI(url); 858 return Optional.ofNullable(trimAggressivelyToNull(uri.getRawQuery())); 859 } catch (URISyntaxException e) { 860 // Not a valid URI, try to extract query manually 861 int q = url.indexOf('?'); 862 if (q == -1) 863 return Optional.empty(); 864 865 String query = trimAggressivelyToNull(url.substring(q + 1)); 866 return Optional.ofNullable(query); 867 } 868 } 869 870 @NonNull 871 static Optional<String> extractRawQueryFromUrlStrict(@NonNull String url) { 872 requireNonNull(url); 873 874 url = trimAggressivelyToEmpty(url); 875 876 if ("*".equals(url)) 877 return Optional.empty(); 878 879 try { 880 URI uri = new URI(url); 881 return Optional.ofNullable(trimAggressivelyToNull(uri.getRawQuery())); 882 } catch (URISyntaxException e) { 883 throw new IllegalRequestException(format("Invalid URL '%s'", url), e); 884 } 885 } 886 887 /** 888 * Encodes decoded query parameters into a raw query string. 889 * <p> 890 * For example, given {@code {a=[b], c=[d e]}} and {@link QueryFormat#RFC_3986_STRICT}, 891 * returns {@code "a=b&c=d%20e"}. 892 * 893 * @param queryParameters the decoded query parameters 894 * @param queryFormat the encoding strategy 895 * @return the encoded query string, or the empty string if no parameters 896 */ 897 @NonNull 898 public static String encodeQueryParameters(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> queryParameters, 899 @NonNull QueryFormat queryFormat) { 900 requireNonNull(queryParameters); 901 requireNonNull(queryFormat); 902 903 if (queryParameters.isEmpty()) 904 return ""; 905 906 StringBuilder sb = new StringBuilder(); 907 boolean first = true; 908 909 for (Entry<String, Set<String>> entry : queryParameters.entrySet()) { 910 String encodedName = encodeQueryComponent(entry.getKey(), queryFormat); 911 912 for (String value : entry.getValue()) { 913 if (!first) 914 sb.append('&'); 915 916 sb.append(encodedName); 917 sb.append('='); 918 sb.append(encodeQueryComponent(value, queryFormat)); 919 920 first = false; 921 } 922 } 923 924 return sb.toString(); 925 } 926 927 @NonNull 928 static String encodeQueryComponent(@NonNull String queryComponent, 929 @NonNull QueryFormat queryFormat) { 930 requireNonNull(queryComponent); 931 requireNonNull(queryFormat); 932 933 String encoded = URLEncoder.encode(queryComponent, StandardCharsets.UTF_8); 934 935 if (queryFormat == QueryFormat.RFC_3986_STRICT) 936 encoded = encoded.replace("+", "%20"); 937 938 return encoded; 939 } 940 941 @NonNull 942 static String encodePath(@NonNull String path) { 943 requireNonNull(path); 944 945 if ("*".equals(path)) 946 return path; 947 948 // Encode each path segment individually, preserving '/' separators. 949 // RFC 3986 is used for path encoding (spaces as %20, not +). 950 return Arrays.stream(path.split("/", -1)) 951 .map(segment -> URLEncoder.encode(segment, StandardCharsets.UTF_8).replace("+", "%20")) 952 .collect(Collectors.joining("/")); 953 } 954 955 /** 956 * Parses an {@code Accept-Language} header value into a best-effort ordered list of {@link Locale}s. 957 * <p> 958 * Quality weights are honored by {@link Locale.LanguageRange#parse(String)}; results are then converted to 959 * {@link Locale} instances that represent the client-supplied language tags. Wildcard ranges are ignored unless 960 * they include a language component (e.g. {@code en-*} becomes {@code en}). On parse failure, an empty list is 961 * returned. 962 * 963 * @param acceptLanguageHeaderValue the raw header value (must be non-{@code null}) 964 * @return locales in descending preference order; empty if none could be resolved 965 */ 966 @NonNull 967 public static List<@NonNull Locale> extractLocalesFromAcceptLanguageHeaderValue(@NonNull String acceptLanguageHeaderValue) { 968 requireNonNull(acceptLanguageHeaderValue); 969 970 try { 971 List<LanguageRange> languageRanges = LanguageRange.parse(acceptLanguageHeaderValue); 972 List<Locale> locales = new ArrayList<>(languageRanges.size()); 973 974 for (LanguageRange languageRange : languageRanges) { 975 if (!(languageRange.getWeight() > 0.0)) 976 continue; 977 978 String range = languageRange.getRange(); 979 String languageTag = range; 980 981 if (range.indexOf('*') != -1) { 982 int wildcardIndex = range.indexOf('*'); 983 984 if (wildcardIndex == 0) 985 continue; 986 987 int languageEndIndex = range.indexOf('-'); 988 989 if (languageEndIndex == -1 || languageEndIndex > wildcardIndex) 990 languageEndIndex = wildcardIndex; 991 992 languageTag = range.substring(0, languageEndIndex); 993 } 994 995 if (languageTag.isBlank()) 996 continue; 997 998 Locale locale = Locale.forLanguageTag(languageTag); 999 1000 if (!locale.getLanguage().isBlank() && !locales.contains(locale)) 1001 locales.add(locale); 1002 } 1003 1004 return Collections.unmodifiableList(locales); 1005 } catch (Exception ignored) { 1006 return List.of(); 1007 } 1008 } 1009 1010 @Nullable 1011 private static String firstHeaderValue(@Nullable Set<String> headerValues) { 1012 if (headerValues == null || headerValues.isEmpty()) 1013 return null; 1014 1015 for (String value : headerValues) { 1016 String trimmed = trimAggressivelyToNull(value); 1017 if (trimmed == null) 1018 continue; 1019 1020 for (String part : splitCommaAware(trimmed)) { 1021 String candidate = trimAggressivelyToNull(part); 1022 if (candidate != null) 1023 return candidate; 1024 } 1025 } 1026 1027 return null; 1028 } 1029 1030 /** 1031 * Best-effort attempt to determine a client's effective origin by examining request headers. 1032 * <p> 1033 * An effective origin in this context is defined as {@code <scheme>://host<:optional port>}, but no path or query components. 1034 * <p> 1035 * Soklet is generally the "last hop" behind a load balancer/reverse proxy but may also be accessed directly by clients. 1036 * <p> 1037 * Normally a load balancer/reverse proxy/other upstream proxies will provide information about the true source of the 1038 * request through headers like the following: 1039 * <ul> 1040 * <li>{@code Host}</li> 1041 * <li>{@code Forwarded}</li> 1042 * <li>{@code Origin}</li> 1043 * <li>{@code X-Forwarded-Proto}</li> 1044 * <li>{@code X-Forwarded-Protocol}</li> 1045 * <li>{@code X-Url-Scheme}</li> 1046 * <li>{@code Front-End-Https}</li> 1047 * <li>{@code X-Forwarded-Ssl}</li> 1048 * <li>{@code X-Forwarded-Host}</li> 1049 * <li>{@code X-Forwarded-Port}</li> 1050 * </ul> 1051 * <p> 1052 * This method may take these and other headers into account when determining an effective origin. 1053 * <p> 1054 * For example, the following would be legal effective origins returned from this method: 1055 * <ul> 1056 * <li>{@code https://www.soklet.com}</li> 1057 * <li>{@code http://www.fake.com:1234}</li> 1058 * </ul> 1059 * <p> 1060 * The following would NOT be legal effective origins: 1061 * <ul> 1062 * <li>{@code www.soklet.com} (missing protocol) </li> 1063 * <li>{@code https://www.soklet.com/} (trailing slash)</li> 1064 * <li>{@code https://www.soklet.com/test} (trailing slash, path)</li> 1065 * <li>{@code https://www.soklet.com/test?abc=1234} (trailing slash, path, query)</li> 1066 * </ul> 1067 * <p> 1068 * {@code Origin} is treated as a fallback signal only and will not override a conflicting {@code Host} or forwarded host value. 1069 * <p> 1070 * Forwarded headers are only used when permitted by {@link EffectiveOriginResolver.TrustPolicy}. When using 1071 * {@link EffectiveOriginResolver.TrustPolicy#TRUST_PROXY_ALLOWLIST}, you must provide a trusted proxy predicate or allowlist. 1072 * If the remote address is missing or not trusted, forwarded headers are ignored. 1073 * <p> 1074 * Extraction order is: trusted forwarded headers → {@code Host} → (optional) {@code Origin} fallback. 1075 * If {@link EffectiveOriginResolver#allowOriginFallback(Boolean)} is unset, {@code Origin} fallback is enabled only for 1076 * {@link EffectiveOriginResolver.TrustPolicy#TRUST_ALL}. 1077 * 1078 * @param effectiveOriginResolver request headers and trust settings 1079 * @return the effective origin, or {@link Optional#empty()} if it could not be determined 1080 */ 1081 @NonNull 1082 static Optional<String> extractEffectiveOrigin(@NonNull EffectiveOriginResolver effectiveOriginResolver) { 1083 requireNonNull(effectiveOriginResolver); 1084 requireNonNull(effectiveOriginResolver.getHeaders()); 1085 requireNonNull(effectiveOriginResolver.getTrustPolicy()); 1086 1087 if (effectiveOriginResolver.getTrustPolicy() == EffectiveOriginResolver.TrustPolicy.TRUST_PROXY_ALLOWLIST 1088 && effectiveOriginResolver.getTrustedProxyPredicate() == null) { 1089 throw new IllegalStateException(format("%s policy requires a trusted proxy predicate or allowlist.", 1090 EffectiveOriginResolver.TrustPolicy.TRUST_PROXY_ALLOWLIST)); 1091 } 1092 1093 Map<String, Set<String>> headers = effectiveOriginResolver.getHeaders(); 1094 boolean trustForwardedHeaders = shouldTrustForwardedHeaders(effectiveOriginResolver); 1095 boolean allowOriginFallback = effectiveOriginResolver.getAllowOriginFallback() != null 1096 ? effectiveOriginResolver.getAllowOriginFallback() 1097 : effectiveOriginResolver.getTrustPolicy() == EffectiveOriginResolver.TrustPolicy.TRUST_ALL; 1098 1099 // Host developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443 1100 // Forwarded by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89) 1101 // Origin null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> 1102 // X-Forwarded-Proto https 1103 // X-Forwarded-Protocol https (Microsoft's alternate name) 1104 // X-Url-Scheme https (Microsoft's alternate name) 1105 // Front-End-Https on (Microsoft's alternate name) 1106 // X-Forwarded-Ssl on (Microsoft's alternate name) 1107 // X-Forwarded-Host id42.example-cdn.com 1108 // X-Forwarded-Port 443 1109 1110 String protocol = null; 1111 String host = null; 1112 String portAsString = null; 1113 Boolean portExplicit = false; 1114 1115 // Forwarded: by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> 1116 if (trustForwardedHeaders) { 1117 Set<String> forwardedHeaders = headers.get("Forwarded"); 1118 if (forwardedHeaders != null) { 1119 forwardedHeaderLoop: 1120 for (String forwardedHeader : forwardedHeaders) { 1121 String trimmed = trimAggressivelyToNull(forwardedHeader); 1122 if (trimmed == null) 1123 continue; 1124 1125 for (String forwardedEntry : splitCommaAware(trimmed)) { 1126 String entry = trimAggressivelyToNull(forwardedEntry); 1127 if (entry == null) 1128 continue; 1129 1130 String entryHost = null; 1131 String entryProtocol = null; 1132 String entryPortAsString = null; 1133 Boolean entryPortExplicit = false; 1134 1135 // Each field component might look like "by=<identifier>" 1136 List<String> forwardedHeaderFieldComponents = splitSemicolonAware(entry); 1137 for (String forwardedHeaderFieldComponent : forwardedHeaderFieldComponents) { 1138 forwardedHeaderFieldComponent = trimAggressivelyToNull(forwardedHeaderFieldComponent); 1139 if (forwardedHeaderFieldComponent == null) 1140 continue; 1141 1142 // Break "by=<identifier>" into "by" and "<identifier>" pieces 1143 String[] forwardedHeaderFieldNameAndValue = forwardedHeaderFieldComponent.split(Pattern.quote("=" /* escape special Regex char */), 2); 1144 if (forwardedHeaderFieldNameAndValue.length != 2) 1145 continue; 1146 1147 String name = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[0]); 1148 String value = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[1]); 1149 if (name == null || value == null) 1150 continue; 1151 1152 if ("host".equalsIgnoreCase(name)) { 1153 if (entryHost == null) { 1154 HostPort hostPort = parseHostPort(value).orElse(null); 1155 1156 if (hostPort != null) { 1157 entryHost = hostPort.getHost(); 1158 1159 if (hostPort.getPort().isPresent()) { 1160 entryPortAsString = String.valueOf(hostPort.getPort().get()); 1161 entryPortExplicit = true; 1162 } 1163 } 1164 } 1165 } else if ("proto".equalsIgnoreCase(name)) { 1166 if (entryProtocol == null) 1167 entryProtocol = stripOptionalQuotes(value); 1168 } 1169 } 1170 1171 if (entryHost != null || entryProtocol != null) { 1172 host = entryHost; 1173 protocol = entryProtocol; 1174 if (entryPortAsString != null) { 1175 portAsString = entryPortAsString; 1176 portExplicit = entryPortExplicit; 1177 } 1178 break forwardedHeaderLoop; 1179 } 1180 } 1181 } 1182 } 1183 } 1184 1185 // X-Forwarded-Proto: https 1186 if (trustForwardedHeaders && protocol == null) { 1187 String xForwardedProtoHeader = firstHeaderValue(headers.get("X-Forwarded-Proto")); 1188 if (xForwardedProtoHeader != null) 1189 protocol = stripOptionalQuotes(xForwardedProtoHeader); 1190 } 1191 1192 // X-Forwarded-Protocol: https (Microsoft's alternate name) 1193 if (trustForwardedHeaders && protocol == null) { 1194 String xForwardedProtocolHeader = firstHeaderValue(headers.get("X-Forwarded-Protocol")); 1195 if (xForwardedProtocolHeader != null) 1196 protocol = stripOptionalQuotes(xForwardedProtocolHeader); 1197 } 1198 1199 // X-Url-Scheme: https (Microsoft's alternate name) 1200 if (trustForwardedHeaders && protocol == null) { 1201 String xUrlSchemeHeader = firstHeaderValue(headers.get("X-Url-Scheme")); 1202 if (xUrlSchemeHeader != null) 1203 protocol = stripOptionalQuotes(xUrlSchemeHeader); 1204 } 1205 1206 // Front-End-Https: on (Microsoft's alternate name) 1207 if (trustForwardedHeaders && protocol == null) { 1208 String frontEndHttpsHeader = firstHeaderValue(headers.get("Front-End-Https")); 1209 if (frontEndHttpsHeader != null) 1210 protocol = "on".equalsIgnoreCase(frontEndHttpsHeader) ? "https" : "http"; 1211 } 1212 1213 // X-Forwarded-Ssl: on (Microsoft's alternate name) 1214 if (trustForwardedHeaders && protocol == null) { 1215 String xForwardedSslHeader = firstHeaderValue(headers.get("X-Forwarded-Ssl")); 1216 if (xForwardedSslHeader != null) 1217 protocol = "on".equalsIgnoreCase(xForwardedSslHeader) ? "https" : "http"; 1218 } 1219 1220 // X-Forwarded-Host: id42.example-cdn.com (or with port / IPv6) 1221 if (trustForwardedHeaders && host == null) { 1222 String xForwardedHostHeader = firstHeaderValue(headers.get("X-Forwarded-Host")); 1223 if (xForwardedHostHeader != null) { 1224 HostPort hostPort = parseHostPort(xForwardedHostHeader).orElse(null); 1225 1226 if (hostPort != null) { 1227 host = hostPort.getHost(); 1228 1229 if (hostPort.getPort().isPresent() && portAsString == null) { 1230 portAsString = String.valueOf(hostPort.getPort().get()); 1231 portExplicit = true; 1232 } 1233 } 1234 } 1235 } 1236 1237 // X-Forwarded-Port: 443 1238 if (trustForwardedHeaders && portAsString == null) { 1239 String xForwardedPortHeader = firstHeaderValue(headers.get("X-Forwarded-Port")); 1240 if (xForwardedPortHeader != null) { 1241 portAsString = stripOptionalQuotes(xForwardedPortHeader); 1242 portExplicit = true; 1243 } 1244 } 1245 1246 // Host: developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443 1247 if (host == null) { 1248 String hostHeader = firstHeaderValue(headers.get("Host")); 1249 1250 if (hostHeader != null) { 1251 HostPort hostPort = parseHostPort(hostHeader).orElse(null); 1252 1253 if (hostPort != null) { 1254 host = hostPort.getHost(); 1255 1256 if (hostPort.getPort().isPresent() && portAsString == null) { 1257 portAsString = String.valueOf(hostPort.getPort().get()); 1258 portExplicit = true; 1259 } 1260 } 1261 } 1262 } 1263 1264 // Origin: null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> (IPv6 supported) 1265 // Use Origin only when host is missing or when it matches the Host-derived value. 1266 if (allowOriginFallback && (protocol == null || host == null || portAsString == null)) { 1267 String originHeader = firstHeaderValue(headers.get("Origin")); 1268 1269 if (originHeader != null) { 1270 try { 1271 URI o = new URI(originHeader); 1272 String originProtocol = trimAggressivelyToNull(o.getScheme()); 1273 String originHost = o.getHost(); // may be bracketed already on some JDKs 1274 int originPort = o.getPort(); // -1 if absent 1275 1276 if (originHost != null) { 1277 boolean alreadyBracketed = originHost.startsWith("[") && originHost.endsWith("]"); 1278 boolean isIpv6Like = originHost.indexOf(':') >= 0; // contains colon(s) 1279 originHost = (isIpv6Like && !alreadyBracketed) ? "[" + originHost + "]" : originHost; 1280 } 1281 1282 boolean hostMatchesOrigin = host != null && originHost != null && host.equalsIgnoreCase(originHost); 1283 1284 if (host == null) { 1285 if (originHost != null) 1286 host = originHost; 1287 if (originProtocol != null) 1288 protocol = originProtocol; 1289 if (originPort >= 0) { 1290 portAsString = String.valueOf(originPort); 1291 portExplicit = true; 1292 } 1293 } else if (hostMatchesOrigin) { 1294 if (protocol == null && originProtocol != null) 1295 protocol = originProtocol; 1296 if (portAsString == null && originPort >= 0) { 1297 portAsString = String.valueOf(originPort); 1298 portExplicit = true; 1299 } 1300 } 1301 } catch (URISyntaxException ignored) { 1302 // no-op 1303 } 1304 } 1305 } 1306 1307 Integer port = null; 1308 1309 if (portAsString != null) { 1310 try { 1311 int parsedPort = Integer.parseInt(portAsString, 10); 1312 if (parsedPort >= 1 && parsedPort <= 65535) 1313 port = parsedPort; 1314 } catch (Exception ignored) { 1315 // Not an integer; ignore it 1316 } 1317 } 1318 1319 if (protocol != null && host != null && port == null) { 1320 return Optional.of(format("%s://%s", protocol, host)); 1321 } 1322 1323 if (protocol != null && host != null && port != null) { 1324 boolean usingDefaultPort = 1325 ("http".equalsIgnoreCase(protocol) && port.equals(80)) || 1326 ("https".equalsIgnoreCase(protocol) && port.equals(443)); 1327 1328 // Keep default ports if the client/proxy explicitly sent them 1329 String effectiveOrigin = (usingDefaultPort && !portExplicit) 1330 ? format("%s://%s", protocol, host) 1331 : format("%s://%s:%s", protocol, host, port); 1332 1333 return Optional.of(effectiveOrigin); 1334 } 1335 1336 return Optional.empty(); 1337 } 1338 1339 private static boolean shouldTrustForwardedHeaders(@NonNull EffectiveOriginResolver effectiveOriginResolver) { 1340 if (effectiveOriginResolver.getTrustPolicy() == EffectiveOriginResolver.TrustPolicy.TRUST_ALL) 1341 return true; 1342 1343 if (effectiveOriginResolver.getTrustPolicy() == EffectiveOriginResolver.TrustPolicy.TRUST_NONE) 1344 return false; 1345 1346 var remoteAddress = effectiveOriginResolver.getRemoteAddress(); 1347 var trustedProxyPredicate = effectiveOriginResolver.getTrustedProxyPredicate(); 1348 1349 if (remoteAddress == null || trustedProxyPredicate == null) 1350 return false; 1351 1352 return trustedProxyPredicate.test(remoteAddress); 1353 } 1354 1355 /** 1356 * Extracts the media type (without parameters) from the first {@code Content-Type} header. 1357 * <p> 1358 * For example, {@code "text/html; charset=UTF-8"} → {@code "text/html"}. 1359 * 1360 * @param headers request/response headers (must be non-{@code null}) 1361 * @return the media type if present; otherwise {@link Optional#empty()} 1362 * @see #extractContentTypeFromHeaderValue(String) 1363 */ 1364 @NonNull 1365 public static Optional<String> extractContentTypeFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 1366 requireNonNull(headers); 1367 1368 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 1369 1370 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 1371 return Optional.empty(); 1372 1373 return extractContentTypeFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 1374 } 1375 1376 /** 1377 * Extracts the media type (without parameters) from a {@code Content-Type} header value. 1378 * <p> 1379 * For example, {@code "application/json; charset=UTF-8"} → {@code "application/json"}. 1380 * 1381 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 1382 * @return the media type if present; otherwise {@link Optional#empty()} 1383 */ 1384 @NonNull 1385 public static Optional<String> extractContentTypeFromHeaderValue(@Nullable String contentTypeHeaderValue) { 1386 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 1387 1388 if (contentTypeHeaderValue == null) 1389 return Optional.empty(); 1390 1391 // Examples 1392 // Content-Type: text/html; charset=UTF-8 1393 // Content-Type: multipart/form-data; boundary=something 1394 1395 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 1396 1397 // Simple case, e.g. "text/html" 1398 if (indexOfSemicolon == -1) 1399 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue)); 1400 1401 // More complex case, e.g. "text/html; charset=UTF-8" 1402 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue.substring(0, indexOfSemicolon))); 1403 } 1404 1405 /** 1406 * Extracts the {@link Charset} from the first {@code Content-Type} header, if present and valid. 1407 * <p> 1408 * Tolerates additional parameters and arbitrary whitespace. Invalid or unknown charset tokens yield {@link Optional#empty()}. 1409 * 1410 * @param headers request/response headers (must be non-{@code null}) 1411 * @return the charset declared by the header; otherwise {@link Optional#empty()} 1412 * @see #extractCharsetFromHeaderValue(String) 1413 */ 1414 @NonNull 1415 public static Optional<Charset> extractCharsetFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 1416 requireNonNull(headers); 1417 1418 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 1419 1420 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 1421 return Optional.empty(); 1422 1423 return extractCharsetFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 1424 } 1425 1426 /** 1427 * Extracts the {@code charset=...} parameter from a {@code Content-Type} header value. 1428 * <p> 1429 * Parsing is forgiving: parameters may appear in any order and with arbitrary spacing. If a charset is found, 1430 * it is validated via {@link Charset#forName(String)}; invalid names result in {@link Optional#empty()}. 1431 * 1432 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 1433 * @return the resolved charset if present and valid; otherwise {@link Optional#empty()} 1434 */ 1435 @NonNull 1436 public static Optional<Charset> extractCharsetFromHeaderValue(@Nullable String contentTypeHeaderValue) { 1437 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 1438 1439 if (contentTypeHeaderValue == null) 1440 return Optional.empty(); 1441 1442 // Examples 1443 // Content-Type: text/html; charset=UTF-8 1444 // Content-Type: multipart/form-data; boundary=something 1445 1446 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 1447 1448 // Simple case, e.g. "text/html" 1449 if (indexOfSemicolon == -1) 1450 return Optional.empty(); 1451 1452 // More complex case, e.g. "text/html; charset=UTF-8" or "multipart/form-data; charset=UTF-8; boundary=something" 1453 boolean finishedContentType = false; 1454 boolean finishedCharsetName = false; 1455 StringBuilder buffer = new StringBuilder(); 1456 String charsetName = null; 1457 1458 for (int i = 0; i < contentTypeHeaderValue.length(); i++) { 1459 char c = contentTypeHeaderValue.charAt(i); 1460 1461 if (Character.isWhitespace(c)) 1462 continue; 1463 1464 if (c == ';') { 1465 // No content type yet? This just be it... 1466 if (!finishedContentType) { 1467 finishedContentType = true; 1468 buffer = new StringBuilder(); 1469 } else if (!finishedCharsetName) { 1470 if (buffer.indexOf("charset=") == 0) { 1471 charsetName = buffer.toString(); 1472 finishedCharsetName = true; 1473 break; 1474 } 1475 } 1476 } else { 1477 buffer.append(Character.toLowerCase(c)); 1478 } 1479 } 1480 1481 // Handle case where charset is the end of the string, e.g. "whatever;charset=UTF-8" 1482 if (!finishedCharsetName) { 1483 String potentialCharset = trimAggressivelyToNull(buffer.toString()); 1484 if (potentialCharset != null && potentialCharset.startsWith("charset=")) { 1485 finishedCharsetName = true; 1486 charsetName = potentialCharset; 1487 } 1488 } 1489 1490 if (finishedCharsetName) { 1491 // e.g. charset=UTF-8 or charset="UTF-8" or charset='UTF-8' 1492 String possibleCharsetName = trimAggressivelyToNull(charsetName.replace("charset=", "")); 1493 1494 if (possibleCharsetName != null) { 1495 // strip optional surrounding quotes 1496 if ((possibleCharsetName.length() >= 2) && 1497 ((possibleCharsetName.charAt(0) == '"' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '"') || 1498 (possibleCharsetName.charAt(0) == '\'' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '\''))) { 1499 possibleCharsetName = possibleCharsetName.substring(1, possibleCharsetName.length() - 1); 1500 possibleCharsetName = trimAggressivelyToNull(possibleCharsetName); 1501 } 1502 1503 if (possibleCharsetName != null) { 1504 try { 1505 return Optional.of(Charset.forName(possibleCharsetName)); 1506 } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) { 1507 return Optional.empty(); 1508 } 1509 } 1510 } 1511 } 1512 1513 return Optional.empty(); 1514 } 1515 1516 /** 1517 * A "stronger" version of {@link String#trim()} which discards leading and trailing Unicode space-separator characters ({@code \p{Z}}). 1518 * <p> 1519 * In a web environment with user-supplied inputs, this is the behavior we want the vast majority of the time. 1520 * For example, users copy-paste URLs from Microsoft Word or Outlook and it's easy to accidentally include a {@code U+202F 1521 * "Narrow No-Break Space (NNBSP)"} character at the end, which might break parsing. 1522 * <p> 1523 * Note that this does not remove other whitespace characters such as tabs, carriage returns, or line feeds. 1524 * <p> 1525 * See <a href="https://www.compart.com/en/unicode/U+202F">https://www.compart.com/en/unicode/U+202F</a> for details. 1526 * 1527 * @param string the string to trim 1528 * @return the trimmed string, or {@code null} if the input string is {@code null} or the trimmed representation is of length {@code 0} 1529 */ 1530 @Nullable 1531 public static String trimAggressively(@Nullable String string) { 1532 if (string == null) 1533 return null; 1534 1535 string = HEAD_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 1536 1537 if (string.length() == 0) 1538 return string; 1539 1540 string = TAIL_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 1541 1542 return string; 1543 } 1544 1545 /** 1546 * Aggressively trims leading and trailing Unicode space-separator characters from the given string and returns {@code null} if the result is empty. 1547 * <p> 1548 * See {@link #trimAggressively(String)} for details on which code points are removed. 1549 * 1550 * @param string the input string; may be {@code null} 1551 * @return a trimmed, non-empty string; or {@code null} if input was {@code null} or trimmed to empty 1552 */ 1553 @Nullable 1554 public static String trimAggressivelyToNull(@Nullable String string) { 1555 if (string == null) 1556 return null; 1557 1558 string = trimAggressively(string); 1559 return string.length() == 0 ? null : string; 1560 } 1561 1562 /** 1563 * Aggressively trims leading and trailing Unicode space-separator characters from the given string and returns {@code ""} if the input is {@code null}. 1564 * <p> 1565 * See {@link #trimAggressively(String)} for details on which code points are removed. 1566 * 1567 * @param string the input string; may be {@code null} 1568 * @return a trimmed string (never {@code null}); {@code ""} if input was {@code null} 1569 */ 1570 @NonNull 1571 public static String trimAggressivelyToEmpty(@Nullable String string) { 1572 if (string == null) 1573 return ""; 1574 1575 return trimAggressively(string); 1576 } 1577 1578 static void validateHeaderNameAndValue(@Nullable String name, 1579 @Nullable String value) { 1580 // First, validate name: 1581 name = trimAggressivelyToNull(name); 1582 1583 if (name == null) 1584 throw new IllegalArgumentException("Header name is blank"); 1585 1586 for (int i = 0; i < name.length(); i++) { 1587 char c = name.charAt(i); 1588 // RFC 9110 tchar: "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 1589 if (c > 0x7F || !(c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' || c == '*' || c == '+' || 1590 c == '-' || c == '.' || c == '^' || c == '_' || c == '`' || c == '|' || c == '~' || 1591 Character.isLetterOrDigit(c))) { 1592 throw new IllegalArgumentException(format("Illegal header name '%s'. Offending character: '%s'", name, printableChar(c))); 1593 } 1594 } 1595 1596 // Then, validate value: 1597 if (value == null) 1598 return; 1599 1600 for (int i = 0; i < value.length(); i++) { 1601 char c = value.charAt(i); 1602 if (c == '\r' || c == '\n' || c == 0x00 || c > 0xFF || (c >= 0x00 && c < 0x20 && c != '\t')) { 1603 throw new IllegalArgumentException(format("Illegal header value '%s' for header name '%s'. Offending character: '%s'", value, name, printableChar(c))); 1604 } 1605 } 1606 1607 // Percent-encoded control sequence checks 1608 Matcher m = HEADER_PERCENT_ENCODING_PATTERN.matcher(value); 1609 1610 while (m.find()) { 1611 int b = Integer.parseInt(m.group(1), 16); 1612 if (b == 0x0D || b == 0x0A || b == 0x00 || (b >= 0x00 && b < 0x20 && b != 0x09)) { 1613 throw new IllegalArgumentException(format( 1614 "Illegal (percent-encoded) header value '%s' for header name '%s'. Offending octet: 0x%02X", 1615 value, name, b)); 1616 } 1617 } 1618 } 1619 1620 @NonNull 1621 static String printableString(@NonNull String input) { 1622 requireNonNull(input); 1623 1624 StringBuilder out = new StringBuilder(input.length() + 16); 1625 1626 for (int i = 0; i < input.length(); i++) 1627 out.append(printableChar(input.charAt(i))); 1628 1629 return out.toString(); 1630 } 1631 1632 @NonNull 1633 static String printableChar(char c) { 1634 if (c == '\r') return "\\r"; 1635 if (c == '\n') return "\\n"; 1636 if (c == '\t') return "\\t"; 1637 if (c == '\f') return "\\f"; 1638 if (c == '\b') return "\\b"; 1639 if (c == '\\') return "\\\\"; 1640 if (c == '\'') return "\\'"; 1641 if (c == '\"') return "\\\""; 1642 if (c == 0) return "\\0"; 1643 1644 if (c < 0x20 || c == 0x7F) // control chars 1645 return String.format("\\u%04X", (int) c); 1646 1647 if (Character.isISOControl(c) || Character.getType(c) == Character.FORMAT) 1648 return String.format("\\u%04X", (int) c); 1649 1650 return String.valueOf(c); 1651 } 1652 1653 @NonNull 1654 private static final Set<String> COMMA_JOINABLE_HEADER_NAMES = Set.of( 1655 // Common list-type headers (RFC 7230/9110) 1656 "accept", 1657 "accept-encoding", 1658 "accept-language", 1659 "cache-control", 1660 "pragma", 1661 "vary", 1662 "connection", 1663 "transfer-encoding", 1664 "upgrade", 1665 "allow", 1666 "via", 1667 "warning" 1668 // intentionally NOT: set-cookie, authorization, cookie, content-disposition, location 1669 ); 1670 1671 /** 1672 * Given a list of raw HTTP header lines, convert them into a normalized case-insensitive, order-preserving map which "inflates" comma-separated headers into distinct values where permitted according to RFC 7230/9110. 1673 * <p> 1674 * For example, given these raw header lines: 1675 * <pre>{@code List<String> lines = List.of( 1676 * "Cache-Control: no-cache, no-store", 1677 * "Set-Cookie: a=b; Path=/; HttpOnly", 1678 * "Set-Cookie: c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/" 1679 * );}</pre> 1680 * The result of parsing would look like this: 1681 * <pre>{@code result.get("cache-control") -> [ 1682 * "no-cache", 1683 * "no-store" 1684 * ] 1685 * result.get("set-cookie") -> [ 1686 * "a=b; Path=/; HttpOnly", 1687 * "c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/" 1688 * ]}</pre> 1689 * <p> 1690 * Keys in the returned map are case-insensitive and are guaranteed to be in the same order as encountered in {@code rawHeaderLines}. 1691 * <p> 1692 * Values in the returned map are guaranteed to be in the same order as encountered in {@code rawHeaderLines}. 1693 * 1694 * @param rawHeaderLines the raw HTTP header lines to parse 1695 * @return a normalized mapping of header name keys to values 1696 */ 1697 @NonNull 1698 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractHeadersFromRawHeaderLines(@NonNull List<@NonNull String> rawHeaderLines) { 1699 requireNonNull(rawHeaderLines); 1700 1701 // 1) Unfold obsolete folded lines (obs-fold): lines beginning with SP/HT are continuations 1702 List<String> lines = unfold(rawHeaderLines); 1703 1704 // 2) Parse into map 1705 Map<String, Set<String>> headers = new LinkedCaseInsensitiveMap<>(); 1706 1707 for (String raw : lines) { 1708 String line = trimAggressivelyToNull(raw); 1709 1710 if (line == null) 1711 continue; 1712 1713 int idx = line.indexOf(':'); 1714 1715 if (idx <= 0) 1716 continue; // skip malformed 1717 1718 addParsedHeader(headers, line.substring(0, idx), line.substring(idx + 1)); 1719 } 1720 1721 freezeStringValueSets(headers); 1722 return headers; 1723 } 1724 1725 static void addParsedHeader(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers, 1726 @Nullable String name, 1727 @Nullable String value) { 1728 requireNonNull(headers); 1729 1730 String key = trimAggressivelyToEmpty(name); // keep original case for display 1731 if (trimAggressivelyToNull(value) == null) 1732 return; 1733 1734 if (COMMA_JOINABLE_HEADER_NAMES.contains(key.toLowerCase(Locale.ROOT))) { 1735 for (String part : splitCommaAware(value)) { 1736 String v = trimAggressivelyToNull(part); 1737 if (v != null) 1738 addStringValue(headers, key, v); 1739 } 1740 } else { 1741 addStringValue(headers, key, value.trim()); 1742 } 1743 } 1744 1745 static void addParsedHeaderValues(@NonNull Set<@NonNull String> values, 1746 @Nullable String name, 1747 @Nullable String value) { 1748 requireNonNull(values); 1749 1750 String key = trimAggressivelyToEmpty(name); 1751 String keyLowercase = key.toLowerCase(Locale.ROOT); 1752 value = trimAggressivelyToNull(value); 1753 1754 if (value == null) 1755 return; 1756 1757 if (COMMA_JOINABLE_HEADER_NAMES.contains(keyLowercase)) { 1758 for (String part : splitCommaAware(value)) { 1759 String v = trimAggressivelyToNull(part); 1760 if (v != null) 1761 values.add(v); 1762 } 1763 } else { 1764 values.add(value.trim()); 1765 } 1766 } 1767 1768 static void freezeStringValueSets(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> valuesByName) { 1769 requireNonNull(valuesByName); 1770 1771 for (Entry<String, Set<String>> entry : valuesByName.entrySet()) { 1772 Set<String> values = entry.getValue(); 1773 1774 if (values == null || values.isEmpty()) { 1775 entry.setValue(Set.of()); 1776 } else if (values instanceof LinkedHashSet) { 1777 entry.setValue(Collections.unmodifiableSet(values)); 1778 } 1779 } 1780 } 1781 1782 private static void addStringValue(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> valuesByName, 1783 @NonNull String name, 1784 @NonNull String value) { 1785 requireNonNull(valuesByName); 1786 requireNonNull(name); 1787 requireNonNull(value); 1788 1789 Set<String> values = valuesByName.get(name); 1790 1791 if (values == null || values.isEmpty()) { 1792 valuesByName.put(name, Set.of(value)); 1793 return; 1794 } 1795 1796 if (values.contains(value)) 1797 return; 1798 1799 if (values instanceof LinkedHashSet) { 1800 values.add(value); 1801 return; 1802 } 1803 1804 Set<String> promotedValues = new LinkedHashSet<>(values); 1805 promotedValues.add(value); 1806 valuesByName.put(name, promotedValues); 1807 } 1808 1809 /** 1810 * Header parsing helper 1811 */ 1812 @NonNull 1813 private static List<String> unfold(@NonNull List<String> raw) { 1814 requireNonNull(raw); 1815 if (raw.isEmpty()) return List.of(); 1816 1817 List<String> out = new ArrayList<>(raw.size()); 1818 StringBuilder cur = null; 1819 boolean curIsHeader = false; 1820 1821 for (String line : raw) { 1822 if (line == null) continue; 1823 1824 boolean isContinuation = !line.isEmpty() && (line.charAt(0) == ' ' || line.charAt(0) == '\t'); 1825 if (isContinuation) { 1826 if (cur != null && curIsHeader) { 1827 cur.append(' ').append(line.trim()); 1828 } else { 1829 // Do not fold into a non-header; flush previous and start anew 1830 if (cur != null) out.add(cur.toString()); 1831 cur = new StringBuilder(line); 1832 curIsHeader = line.indexOf(':') > 0; // almost certainly false for leading-space lines 1833 } 1834 } else { 1835 if (cur != null) out.add(cur.toString()); 1836 cur = new StringBuilder(line); 1837 curIsHeader = line.indexOf(':') > 0; 1838 } 1839 } 1840 if (cur != null) out.add(cur.toString()); 1841 return out; 1842 } 1843 1844 /** 1845 * Header parsing helper: split on commas that are not inside a quoted-string; supports \" escapes inside quotes. 1846 */ 1847 @NonNull 1848 private static List<String> splitCommaAware(@NonNull String string) { 1849 requireNonNull(string); 1850 1851 List<String> out = new ArrayList<>(4); 1852 StringBuilder cur = new StringBuilder(); 1853 boolean inQuotes = false; 1854 boolean escaped = false; 1855 1856 for (int i = 0; i < string.length(); i++) { 1857 char c = string.charAt(i); 1858 1859 if (escaped) { 1860 // Preserve the escaped char as-is 1861 cur.append(c); 1862 escaped = false; 1863 } else if (c == '\\') { 1864 if (inQuotes) { 1865 // Preserve the backslash itself, then mark next char as escaped 1866 cur.append('\\'); // ← keep the backslash 1867 escaped = true; 1868 } else { 1869 cur.append('\\'); // literal backslash outside quotes 1870 } 1871 } else if (c == '"') { 1872 inQuotes = !inQuotes; 1873 cur.append('"'); 1874 } else if (c == ',' && !inQuotes) { 1875 out.add(cur.toString()); 1876 cur.setLength(0); 1877 } else { 1878 cur.append(c); 1879 } 1880 } 1881 out.add(cur.toString()); 1882 return out; 1883 } 1884 1885 /** 1886 * Header parsing helper: split on semicolons that are not inside a quoted-string; supports \" escapes inside quotes. 1887 */ 1888 @NonNull 1889 private static List<String> splitSemicolonAware(@NonNull String string) { 1890 requireNonNull(string); 1891 1892 List<String> out = new ArrayList<>(4); 1893 StringBuilder cur = new StringBuilder(); 1894 boolean inQuotes = false; 1895 boolean escaped = false; 1896 1897 for (int i = 0; i < string.length(); i++) { 1898 char c = string.charAt(i); 1899 1900 if (escaped) { 1901 cur.append(c); 1902 escaped = false; 1903 } else if (c == '\\') { 1904 if (inQuotes) { 1905 cur.append('\\'); 1906 escaped = true; 1907 } else { 1908 cur.append('\\'); 1909 } 1910 } else if (c == '"') { 1911 inQuotes = !inQuotes; 1912 cur.append('"'); 1913 } else if (c == ';' && !inQuotes) { 1914 out.add(cur.toString()); 1915 cur.setLength(0); 1916 } else { 1917 cur.append(c); 1918 } 1919 } 1920 1921 out.add(cur.toString()); 1922 return out; 1923 } 1924 1925 /** 1926 * Remove a single pair of surrounding quotes if present. 1927 */ 1928 @NonNull 1929 private static String stripOptionalQuotes(@NonNull String string) { 1930 requireNonNull(string); 1931 1932 if (string.length() >= 2) { 1933 char first = string.charAt(0), last = string.charAt(string.length() - 1); 1934 1935 if ((first == '"' && last == '"') || (first == '\'' && last == '\'')) 1936 return string.substring(1, string.length() - 1); 1937 } 1938 1939 return string; 1940 } 1941 1942 /** 1943 * Parse host[:port] with IPv6 support: "[v6](:port)?" or "host(:port)?". 1944 * Returns host (with brackets for v6) and port (nullable). 1945 */ 1946 @ThreadSafe 1947 private static final class HostPort { 1948 @NonNull 1949 private final String host; 1950 @Nullable 1951 private final Integer port; 1952 1953 HostPort(@NonNull String host, 1954 @Nullable Integer port) { 1955 this.host = host; 1956 this.port = port; 1957 } 1958 1959 @NonNull 1960 public String getHost() { 1961 return this.host; 1962 } 1963 1964 @NonNull 1965 public Optional<Integer> getPort() { 1966 return Optional.ofNullable(this.port); 1967 } 1968 } 1969 1970 @NonNull 1971 private static Optional<HostPort> parseHostPort(@Nullable String input) { 1972 input = trimAggressivelyToNull(input); 1973 1974 if (input == null) 1975 return Optional.empty(); 1976 1977 input = stripOptionalQuotes(input); 1978 1979 if (input.startsWith("[")) { 1980 int close = input.indexOf(']'); 1981 1982 if (close > 0) { 1983 String core = input.substring(1, close); // IPv6 literal without brackets 1984 String rest = input.substring(close + 1); // maybe ":port" 1985 String host = "[" + core + "]"; 1986 Integer port = null; 1987 1988 if (rest.startsWith(":")) { 1989 String ps = trimAggressivelyToNull(rest.substring(1)); 1990 if (ps != null) { 1991 try { 1992 port = Integer.parseInt(ps, 10); 1993 } catch (Exception ignored) { 1994 // Nothing to do 1995 } 1996 } 1997 } 1998 1999 return Optional.of(new HostPort(host, port)); 2000 } 2001 } 2002 2003 int colon = input.indexOf(':'); 2004 2005 if (colon > 0 && input.indexOf(':', colon + 1) == -1) { 2006 // exactly one ':' -> host:port (IPv4/hostname) 2007 String h = trimAggressivelyToNull(input.substring(0, colon)); 2008 String ps = trimAggressivelyToNull(input.substring(colon + 1)); 2009 Integer p = null; 2010 2011 if (ps != null) { 2012 try { 2013 p = Integer.parseInt(ps, 10); 2014 } catch (Exception ignored) { 2015 // Nothing to do 2016 } 2017 } 2018 if (h != null) 2019 return Optional.of(new HostPort(h, p)); 2020 } 2021 2022 // no port 2023 return Optional.of(new HostPort(input, null)); 2024 } 2025 2026 @NonNull 2027 private static String removeDotSegments(@NonNull String path) { 2028 requireNonNull(path); 2029 2030 Deque<String> stack = new ArrayDeque<>(); 2031 2032 for (String seg : path.split("/")) { 2033 if (seg.isEmpty() || ".".equals(seg)) 2034 continue; 2035 2036 if ("..".equals(seg)) { 2037 if (!stack.isEmpty()) 2038 stack.removeLast(); 2039 } else { 2040 stack.addLast(seg); 2041 } 2042 } 2043 2044 return "/" + String.join("/", stack); 2045 } 2046}