001/* 002 * Copyright 2022-2026 Revetware LLC. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.soklet; 018 019import com.soklet.exception.IllegalRequestException; 020import com.soklet.internal.spring.LinkedCaseInsensitiveMap; 021import org.jspecify.annotations.NonNull; 022import org.jspecify.annotations.Nullable; 023 024import javax.annotation.concurrent.NotThreadSafe; 025import javax.annotation.concurrent.ThreadSafe; 026import java.io.ByteArrayOutputStream; 027import java.lang.Thread.UncaughtExceptionHandler; 028import java.lang.invoke.MethodHandle; 029import java.lang.invoke.MethodHandles; 030import java.lang.invoke.MethodHandles.Lookup; 031import java.lang.invoke.MethodType; 032import java.net.InetAddress; 033import java.net.InetSocketAddress; 034import java.net.URI; 035import java.net.URISyntaxException; 036import java.net.URLEncoder; 037import java.nio.charset.Charset; 038import java.nio.charset.IllegalCharsetNameException; 039import java.nio.charset.StandardCharsets; 040import java.nio.charset.UnsupportedCharsetException; 041import java.util.ArrayDeque; 042import java.util.ArrayList; 043import java.util.Arrays; 044import java.util.Collections; 045import java.util.Deque; 046import java.util.LinkedHashMap; 047import java.util.LinkedHashSet; 048import java.util.List; 049import java.util.Locale; 050import java.util.Locale.LanguageRange; 051import java.util.Map; 052import java.util.Map.Entry; 053import java.util.Optional; 054import java.util.Set; 055import java.util.concurrent.ExecutorService; 056import java.util.concurrent.Executors; 057import java.util.concurrent.ThreadFactory; 058import java.util.function.Predicate; 059import java.util.regex.Matcher; 060import java.util.regex.Pattern; 061import java.util.stream.Collectors; 062 063import static java.lang.String.format; 064import static java.util.Objects.requireNonNull; 065 066/** 067 * A non-instantiable collection of utility methods. 068 * 069 * @author <a href="https://www.revetkn.com">Mark Allen</a> 070 */ 071@ThreadSafe 072public final class Utilities { 073 @NonNull 074 private static final boolean VIRTUAL_THREADS_AVAILABLE; 075 @NonNull 076 private static final byte[] EMPTY_BYTE_ARRAY; 077 @NonNull 078 private static final Pattern HEAD_WHITESPACE_PATTERN; 079 @NonNull 080 private static final Pattern TAIL_WHITESPACE_PATTERN; 081 @NonNull 082 private static final Pattern HEADER_PERCENT_ENCODING_PATTERN; 083 084 static { 085 EMPTY_BYTE_ARRAY = new byte[0]; 086 087 boolean virtualThreadsAvailable = false; 088 089 try { 090 // Detect if Virtual Threads are usable by feature testing via reflection. 091 // Hat tip to https://github.com/javalin/javalin for this technique 092 Class.forName("java.lang.Thread$Builder$OfVirtual"); 093 virtualThreadsAvailable = true; 094 } catch (Exception ignored) { 095 // We don't care why this failed, but if we're here we know JVM does not support virtual threads 096 } 097 098 VIRTUAL_THREADS_AVAILABLE = virtualThreadsAvailable; 099 100 // See https://www.regular-expressions.info/unicode.html 101 // \p{Z} or \p{Separator}: Unicode space-separator characters. 102 // 103 // First pattern matches those separator characters at the head of a string, second matches the same for tail. 104 // Useful for a "stronger" trim() function, which is almost always what we want in a web context 105 // with user-supplied input. 106 HEAD_WHITESPACE_PATTERN = Pattern.compile("^(\\p{Z})+"); 107 TAIL_WHITESPACE_PATTERN = Pattern.compile("(\\p{Z})+$"); 108 109 HEADER_PERCENT_ENCODING_PATTERN = Pattern.compile("%([0-9A-Fa-f]{2})"); 110 } 111 112 private Utilities() { 113 // Non-instantiable 114 } 115 116 /** 117 * Does the platform runtime support virtual threads (either Java 19 and 20 w/preview enabled or Java 21+)? 118 * 119 * @return {@code true} if the runtime supports virtual threads, {@code false} otherwise 120 */ 121 @NonNull 122 static Boolean virtualThreadsAvailable() { 123 return VIRTUAL_THREADS_AVAILABLE; 124 } 125 126 /** 127 * Provides a virtual thread factory if supported by the runtime. 128 * <p> 129 * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+), 130 * we compile Soklet with a source level < 19 and avoid any hard references to virtual threads by dynamically creating our {@link ThreadFactory} via {@link MethodHandle} references. 131 * <p> 132 * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong> 133 * 134 * @param threadNamePrefix thread name prefix for the virtual thread factory builder 135 * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder 136 * @return a virtual thread factory 137 * @throws IllegalStateException if the runtime environment does not support virtual threads 138 */ 139 @NonNull 140 static ThreadFactory createVirtualThreadFactory(@NonNull String threadNamePrefix, 141 @NonNull UncaughtExceptionHandler uncaughtExceptionHandler) { 142 requireNonNull(threadNamePrefix); 143 requireNonNull(uncaughtExceptionHandler); 144 145 if (!virtualThreadsAvailable()) 146 throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+"); 147 148 // Hat tip to https://github.com/javalin/javalin for this technique 149 Class<?> threadBuilderOfVirtualClass; 150 151 try { 152 threadBuilderOfVirtualClass = Class.forName("java.lang.Thread$Builder$OfVirtual"); 153 } catch (ClassNotFoundException e) { 154 throw new IllegalStateException("Unable to load virtual thread builder class", e); 155 } 156 157 Lookup lookup = MethodHandles.publicLookup(); 158 159 MethodHandle methodHandleThreadOfVirtual; 160 MethodHandle methodHandleThreadBuilderOfVirtualName; 161 MethodHandle methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler; 162 MethodHandle methodHandleThreadBuilderOfVirtualFactory; 163 164 try { 165 methodHandleThreadOfVirtual = lookup.findStatic(Thread.class, "ofVirtual", MethodType.methodType(threadBuilderOfVirtualClass)); 166 methodHandleThreadBuilderOfVirtualName = lookup.findVirtual(threadBuilderOfVirtualClass, "name", MethodType.methodType(threadBuilderOfVirtualClass, String.class, long.class)); 167 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler = lookup.findVirtual(threadBuilderOfVirtualClass, "uncaughtExceptionHandler", MethodType.methodType(threadBuilderOfVirtualClass, UncaughtExceptionHandler.class)); 168 methodHandleThreadBuilderOfVirtualFactory = lookup.findVirtual(threadBuilderOfVirtualClass, "factory", MethodType.methodType(ThreadFactory.class)); 169 } catch (NoSuchMethodException | IllegalAccessException e) { 170 throw new IllegalStateException("Unable to load method handle for virtual thread factory", e); 171 } 172 173 try { 174 // Thread.ofVirtual() 175 Object virtualThreadBuilder = methodHandleThreadOfVirtual.invoke(); 176 // .name(threadNamePrefix, start) 177 methodHandleThreadBuilderOfVirtualName.invoke(virtualThreadBuilder, threadNamePrefix, 1); 178 // .uncaughtExceptionHandler(uncaughtExceptionHandler) 179 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler.invoke(virtualThreadBuilder, uncaughtExceptionHandler); 180 // .factory(); 181 return (ThreadFactory) methodHandleThreadBuilderOfVirtualFactory.invoke(virtualThreadBuilder); 182 } catch (Throwable t) { 183 throw new IllegalStateException("Unable to create virtual thread factory", t); 184 } 185 } 186 187 /** 188 * Provides a virtual-thread-per-task executor service if supported by the runtime. 189 * <p> 190 * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+), 191 * we compile Soklet with a source level < 19 and avoid any hard references to virtual threads by dynamically creating our executor service via {@link MethodHandle} references. 192 * <p> 193 * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong> 194 * <pre>{@code // This method is effectively equivalent to this code 195 * return Executors.newThreadPerTaskExecutor( 196 * Thread.ofVirtual() 197 * .name(threadNamePrefix) 198 * .uncaughtExceptionHandler(uncaughtExceptionHandler) 199 * .factory() 200 * );}</pre> 201 * 202 * @param threadNamePrefix thread name prefix for the virtual thread factory builder 203 * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder 204 * @return a virtual-thread-per-task executor service 205 * @throws IllegalStateException if the runtime environment does not support virtual threads 206 */ 207 @NonNull 208 static ExecutorService createVirtualThreadsNewThreadPerTaskExecutor(@NonNull String threadNamePrefix, 209 @NonNull UncaughtExceptionHandler uncaughtExceptionHandler) { 210 requireNonNull(threadNamePrefix); 211 requireNonNull(uncaughtExceptionHandler); 212 213 if (!virtualThreadsAvailable()) 214 throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+"); 215 216 ThreadFactory threadFactory = createVirtualThreadFactory(threadNamePrefix, uncaughtExceptionHandler); 217 218 Lookup lookup = MethodHandles.publicLookup(); 219 MethodHandle methodHandleExecutorsNewThreadPerTaskExecutor; 220 221 try { 222 methodHandleExecutorsNewThreadPerTaskExecutor = lookup.findStatic(Executors.class, "newThreadPerTaskExecutor", MethodType.methodType(ExecutorService.class, ThreadFactory.class)); 223 } catch (NoSuchMethodException | IllegalAccessException e) { 224 throw new IllegalStateException("Unable to load method handle for virtual thread factory", e); 225 } 226 227 try { 228 // return Executors.newThreadPerTaskExecutor(threadFactory); 229 return (ExecutorService) methodHandleExecutorsNewThreadPerTaskExecutor.invoke(threadFactory); 230 } catch (Throwable t) { 231 throw new IllegalStateException("Unable to create virtual thread executor service", t); 232 } 233 } 234 235 /** 236 * Returns a shared zero-length {@code byte[]} instance. 237 * <p> 238 * Useful as a sentinel when you need a non-{@code null} byte array but have no content. 239 * 240 * @return a zero-length byte array (never {@code null}) 241 */ 242 @NonNull 243 static byte[] emptyByteArray() { 244 return EMPTY_BYTE_ARRAY; 245 } 246 247 /** 248 * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values. 249 * <p> 250 * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromQuery(String, QueryFormat, Charset)} if you need to specify a different charset). 251 * <p> 252 * Pairs missing a name are ignored. 253 * <p> 254 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 255 * 256 * @param query a raw query string such as {@code "a=1&b=2&c=%20"} 257 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 258 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 259 * @throws IllegalRequestException if the query string contains malformed percent-encoding 260 */ 261 @NonNull 262 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromQuery(@NonNull String query, 263 @NonNull QueryFormat queryFormat) { 264 requireNonNull(query); 265 requireNonNull(queryFormat); 266 267 return extractQueryParametersFromQuery(query, queryFormat, StandardCharsets.UTF_8); 268 } 269 270 /** 271 * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values. 272 * <p> 273 * Decodes percent-escapes using the specified charset. 274 * <p> 275 * Pairs missing a name are ignored. 276 * <p> 277 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 278 * 279 * @param query a raw query string such as {@code "a=1&b=2&c=%20"} 280 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 281 * @param charset the charset to use when decoding percent-escapes 282 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 283 * @throws IllegalRequestException if the query string contains malformed percent-encoding 284 */ 285 @NonNull 286 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromQuery(@NonNull String query, 287 @NonNull QueryFormat queryFormat, 288 @NonNull Charset charset) { 289 requireNonNull(query); 290 requireNonNull(queryFormat); 291 requireNonNull(charset); 292 293 // For form parameters, body will look like "One=Two&Three=Four" ...a query string. 294 String syntheticUrl = format("https://soklet.invalid?%s", query); // avoid referencing real domain 295 return extractQueryParametersFromUrl(syntheticUrl, queryFormat, charset); 296 } 297 298 @NonNull 299 static Optional<Set<@NonNull String>> extractQueryParameterValuesFromQuery(@NonNull String query, 300 @NonNull String name, 301 @NonNull QueryFormat queryFormat, 302 @NonNull Charset charset) { 303 requireNonNull(query); 304 requireNonNull(name); 305 requireNonNull(queryFormat); 306 requireNonNull(charset); 307 308 query = trimAggressivelyToEmpty(query); 309 310 if (query.isEmpty()) 311 return Optional.empty(); 312 313 String singleValue = null; 314 Set<String> values = null; 315 boolean matched = false; 316 int pairStart = 0; 317 318 while (pairStart <= query.length()) { 319 int pairEnd = query.indexOf('&', pairStart); 320 if (pairEnd == -1) 321 pairEnd = query.length(); 322 323 if (pairEnd > pairStart) { 324 int separator = query.indexOf('=', pairStart); 325 if (separator == -1 || separator > pairEnd) 326 separator = pairEnd; 327 328 String rawName = trimAggressivelyToNull(query.substring(pairStart, separator)); 329 330 if (rawName != null) { 331 String decodedName = decodeQueryComponent(rawName, queryFormat, charset); 332 333 if (decodedName.equals(name)) { 334 String rawValue = separator < pairEnd ? trimAggressivelyToNull(query.substring(separator + 1, pairEnd)) : null; 335 336 if (rawValue == null) 337 rawValue = ""; 338 339 String value = decodeQueryComponent(rawValue, queryFormat, charset); 340 341 if (!matched) { 342 singleValue = value; 343 matched = true; 344 } else { 345 if (values == null) { 346 values = new LinkedHashSet<>(); 347 values.add(singleValue); 348 } 349 350 values.add(value); 351 } 352 } 353 } 354 } 355 356 if (pairEnd == query.length()) 357 break; 358 359 pairStart = pairEnd + 1; 360 } 361 362 if (!matched) 363 return Optional.empty(); 364 365 if (values == null) 366 return Optional.of(Set.of(singleValue)); 367 368 return Optional.of(Collections.unmodifiableSet(values)); 369 } 370 371 /** 372 * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values. 373 * <p> 374 * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromUrl(String, QueryFormat, Charset)} if you need to specify a different charset). 375 * <p> 376 * Pairs missing a name are ignored. 377 * <p> 378 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 379 * 380 * @param url a relative or absolute URL/URI string 381 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 382 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 383 * @throws IllegalRequestException if the URL or query contains malformed percent-encoding 384 */ 385 @NonNull 386 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromUrl(@NonNull String url, 387 @NonNull QueryFormat queryFormat) { 388 requireNonNull(url); 389 requireNonNull(queryFormat); 390 391 return extractQueryParametersFromUrl(url, queryFormat, StandardCharsets.UTF_8); 392 } 393 394 /** 395 * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values. 396 * <p> 397 * Decodes percent-escapes using the specified charset. 398 * <p> 399 * Pairs missing a name are ignored. 400 * <p> 401 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 402 * 403 * @param url a relative or absolute URL/URI string 404 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 405 * @param charset the charset to use when decoding percent-escapes 406 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 407 * @throws IllegalRequestException if the URL or query contains malformed percent-encoding 408 */ 409 @NonNull 410 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromUrl(@NonNull String url, 411 @NonNull QueryFormat queryFormat, 412 @NonNull Charset charset) { 413 requireNonNull(url); 414 requireNonNull(queryFormat); 415 requireNonNull(charset); 416 417 URI uri; 418 419 try { 420 uri = new URI(url); 421 } catch (URISyntaxException e) { 422 throw new IllegalRequestException(format("Invalid URL '%s'", url), e); 423 } 424 425 String query = trimAggressivelyToNull(uri.getRawQuery()); 426 427 if (query == null) 428 return Map.of(); 429 430 Map<String, Set<String>> queryParameters = new LinkedHashMap<>(); 431 for (String pair : query.split("&")) { 432 if (pair.isEmpty()) 433 continue; 434 435 String[] nv = pair.split("=", 2); 436 String rawName = trimAggressivelyToNull(nv.length > 0 ? nv[0] : null); 437 String rawValue = trimAggressivelyToNull(nv.length > 1 ? nv[1] : null); 438 439 if (rawName == null) 440 continue; 441 442 // Preserve empty values; it's what users probably expect 443 if (rawValue == null) 444 rawValue = ""; 445 446 String name = decodeQueryComponent(rawName, queryFormat, charset); 447 String value = decodeQueryComponent(rawValue, queryFormat, charset); 448 449 addStringValue(queryParameters, name, value); 450 } 451 452 freezeStringValueSets(queryParameters); 453 return queryParameters; 454 } 455 456 /** 457 * Decodes a single key or value using the given mode and charset. 458 */ 459 @NonNull 460 private static String decodeQueryComponent(@NonNull String string, 461 @NonNull QueryFormat queryFormat, 462 @NonNull Charset charset) { 463 requireNonNull(string); 464 requireNonNull(queryFormat); 465 requireNonNull(charset); 466 467 if (string.isEmpty()) 468 return ""; 469 470 // Step 1: in form mode, '+' means space 471 String prepped = (queryFormat == QueryFormat.X_WWW_FORM_URLENCODED) ? string.replace('+', ' ') : string; 472 // Step 2: percent-decode bytes, then interpret bytes with the provided charset 473 return percentDecode(prepped, charset); 474 } 475 476 /** 477 * Percent-decodes a string into bytes, then constructs a String using the provided charset. 478 * One pass only: invalid %xy sequences trigger an exception. 479 */ 480 @NonNull 481 private static String percentDecode(@NonNull String s, @NonNull Charset charset) { 482 requireNonNull(s); 483 requireNonNull(charset); 484 485 if (s.isEmpty()) 486 return ""; 487 488 StringBuilder sb = new StringBuilder(s.length()); 489 ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 490 491 for (int i = 0; i < s.length(); ) { 492 char c = s.charAt(i); 493 494 if (c == '%') { 495 // Consume one or more consecutive %xx triplets into bytes 496 bytes.reset(); 497 int j = i; 498 499 while (j < s.length() && s.charAt(j) == '%') { 500 if (j + 2 >= s.length()) 501 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 502 503 int hi = hex(s.charAt(j + 1)); 504 int lo = hex(s.charAt(j + 2)); 505 if (hi < 0 || lo < 0) 506 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 507 508 bytes.write((hi << 4) | lo); 509 j += 3; 510 } 511 512 sb.append(new String(bytes.toByteArray(), charset)); 513 i = j; 514 continue; 515 } 516 517 // Non-'%' char: append it as-is. 518 // This preserves surrogate pairs naturally as the loop hits both chars. 519 sb.append(c); 520 i++; 521 } 522 523 return sb.toString(); 524 } 525 526 static void validatePercentEncodingInUrlComponent(@NonNull String urlComponent) { 527 requireNonNull(urlComponent); 528 529 for (int i = 0; i < urlComponent.length(); i++) { 530 if (urlComponent.charAt(i) != '%') 531 continue; 532 533 if (i + 2 >= urlComponent.length()) 534 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 535 536 int hi = hex(urlComponent.charAt(i + 1)); 537 int lo = hex(urlComponent.charAt(i + 2)); 538 if (hi < 0 || lo < 0) 539 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 540 541 i += 2; 542 } 543 } 544 545 private static int hex(char c) { 546 if (c >= '0' && c <= '9') return c - '0'; 547 if (c >= 'A' && c <= 'F') return c - 'A' + 10; 548 if (c >= 'a' && c <= 'f') return c - 'a' + 10; 549 return -1; 550 } 551 552 /** 553 * Parses {@code Cookie} request headers into a map of cookie names to values. 554 * <p> 555 * Header name matching is case-insensitive ({@code "Cookie"} vs {@code "cookie"}), but <em>cookie names are case-sensitive</em>. 556 * Values are parsed per the following liberal rules: 557 * <ul> 558 * <li>Components are split on {@code ';'} unless inside a quoted string.</li> 559 * <li>Quoted values have surrounding quotes removed and common backslash escapes unescaped.</li> 560 * <li>Percent-escapes are decoded as UTF-8. {@code '+'} is <strong>not</strong> treated specially.</li> 561 * </ul> 562 * Multiple occurrences of the same cookie name are collected into a {@link Set} in insertion order. 563 * 564 * @param headers request headers as a multimap of header name to values (must be non-{@code null}) 565 * @return a map of cookie name to distinct values; empty if no valid cookies are present 566 */ 567 @NonNull 568 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractCookiesFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 569 requireNonNull(headers); 570 571 // Cookie *names* must be case-sensitive; keep LinkedHashMap (NOT case-insensitive) 572 Map<String, Set<String>> cookies = new LinkedHashMap<>(); 573 574 for (Entry<String, Set<String>> entry : headers.entrySet()) { 575 String headerName = entry.getKey(); 576 if (headerName == null || !"cookie".equalsIgnoreCase(headerName.trim())) 577 continue; 578 579 Set<String> values = entry.getValue(); 580 if (values == null) continue; 581 582 for (String headerValue : values) { 583 headerValue = trimAggressivelyToNull(headerValue); 584 if (headerValue == null) continue; 585 586 // Split on ';' only when NOT inside a quoted string 587 List<String> cookieComponents = splitCookieHeaderRespectingQuotes(headerValue); 588 589 for (String cookieComponent : cookieComponents) { 590 cookieComponent = trimAggressivelyToNull(cookieComponent); 591 if (cookieComponent == null) continue; 592 593 String[] cookiePair = cookieComponent.split("=", 2); 594 String rawName = trimAggressivelyToNull(cookiePair[0]); 595 String rawValue = (cookiePair.length == 2 ? trimAggressivelyToNull(cookiePair[1]) : null); 596 597 if (rawName == null) continue; 598 599 // DO NOT decode the name; cookie names are case-sensitive and rarely encoded 600 String cookieName = rawName; 601 602 String cookieValue = null; 603 if (rawValue != null) { 604 // If it's quoted, unquote+unescape first, then percent-decode (still no '+' -> space) 605 String unquoted = unquoteCookieValueIfNeeded(rawValue); 606 cookieValue = percentDecodeCookieValue(unquoted); 607 } 608 609 cookies.putIfAbsent(cookieName, Set.of()); 610 if (cookieValue != null) 611 addStringValue(cookies, cookieName, cookieValue); 612 } 613 } 614 } 615 616 freezeStringValueSets(cookies); 617 return cookies; 618 } 619 620 /** 621 * Percent-decodes %HH to bytes->UTF-8. Does NOT treat '+' specially. 622 */ 623 @NonNull 624 private static String percentDecodeCookieValue(@NonNull String cookieValue) { 625 requireNonNull(cookieValue); 626 627 ByteArrayOutputStream out = new ByteArrayOutputStream(cookieValue.length()); 628 629 for (int i = 0; i < cookieValue.length(); ) { 630 char c = cookieValue.charAt(i); 631 if (c == '%') { 632 if (i + 2 >= cookieValue.length()) 633 throw new IllegalRequestException("Invalid percent-encoding in Cookie header"); 634 635 int hi = Character.digit(cookieValue.charAt(i + 1), 16); 636 int lo = Character.digit(cookieValue.charAt(i + 2), 16); 637 if (hi < 0 || lo < 0) 638 throw new IllegalRequestException("Invalid percent-encoding in Cookie header"); 639 640 out.write((hi << 4) + lo); 641 i += 3; 642 continue; 643 } 644 645 String rawCharacter; 646 647 if (Character.isHighSurrogate(c) && i + 1 < cookieValue.length() && Character.isLowSurrogate(cookieValue.charAt(i + 1))) { 648 rawCharacter = cookieValue.substring(i, i + 2); 649 i += 2; 650 } else { 651 rawCharacter = Character.toString(c); 652 i++; 653 } 654 655 byte[] encoded = rawCharacter.getBytes(StandardCharsets.UTF_8); 656 out.write(encoded, 0, encoded.length); 657 } 658 659 return out.toString(StandardCharsets.UTF_8); 660 } 661 662 /** 663 * Splits a Cookie header string into components on ';' but ONLY when not inside a quoted value. 664 * Supports backslash-escaped quotes within quoted strings. 665 */ 666 private static List<@NonNull String> splitCookieHeaderRespectingQuotes(@NonNull String headerValue) { 667 List<String> parts = new ArrayList<>(); 668 StringBuilder cur = new StringBuilder(headerValue.length()); 669 boolean inQuotes = false; 670 boolean escape = false; 671 672 for (int i = 0; i < headerValue.length(); i++) { 673 char c = headerValue.charAt(i); 674 675 if (escape) { 676 // keep escaped char literally (e.g., \" \; \\) 677 cur.append(c); 678 escape = false; 679 continue; 680 } 681 682 if (c == '\\') { 683 escape = true; 684 // keep the backslash for now; unquote step will handle unescaping 685 cur.append(c); 686 continue; 687 } 688 689 if (c == '"') { 690 inQuotes = !inQuotes; 691 cur.append(c); 692 continue; 693 } 694 695 if (c == ';' && !inQuotes) { 696 parts.add(cur.toString()); 697 cur.setLength(0); 698 continue; 699 } 700 701 cur.append(c); 702 } 703 704 if (cur.length() > 0) 705 parts.add(cur.toString()); 706 707 return parts; 708 } 709 710 /** 711 * If the cookie value is a quoted-string, remove surrounding quotes and unescape \" \\ and \; . 712 * Otherwise returns the input as-is. 713 */ 714 @NonNull 715 private static String unquoteCookieValueIfNeeded(@NonNull String rawValue) { 716 requireNonNull(rawValue); 717 718 if (rawValue.length() >= 2 && rawValue.charAt(0) == '"' && rawValue.charAt(rawValue.length() - 1) == '"') { 719 // Strip the surrounding quotes 720 String inner = rawValue.substring(1, rawValue.length() - 1); 721 722 // Unescape \" \\ and \; (common patterns seen in the wild) 723 // Order matters: unescape backslash-escape sequences, then leave other chars intact. 724 StringBuilder sb = new StringBuilder(inner.length()); 725 boolean escape = false; 726 727 for (int i = 0; i < inner.length(); i++) { 728 char c = inner.charAt(i); 729 if (escape) { 730 // Only special-case a few common escapes; otherwise keep the char 731 if (c == '"' || c == '\\' || c == ';') 732 sb.append(c); 733 else 734 sb.append(c); // unknown escape -> keep literally (liberal in what we accept) 735 736 escape = false; 737 } else if (c == '\\') { 738 escape = true; 739 } else { 740 sb.append(c); 741 } 742 } 743 744 // If string ended with a dangling backslash, keep it literally 745 if (escape) 746 sb.append('\\'); 747 748 return sb.toString(); 749 } 750 751 return rawValue; 752 } 753 754 /** 755 * Normalizes a URL or path into a canonical request path and optionally performs percent-decoding on the path. 756 * <p> 757 * For example, {@code "https://www.soklet.com/ab%20c?one=two"} would be normalized to {@code "/ab c"}. 758 * <p> 759 * The {@code OPTIONS *} special case returns {@code "*"}. 760 * <p> 761 * Behavior: 762 * <ul> 763 * <li>If input starts with {@code http://} or {@code https://}, the path portion is extracted.</li> 764 * <li>Ensures the result begins with {@code '/'}.</li> 765 * <li>Removes any trailing {@code '/'} (except for the root path {@code '/'}).</li> 766 * <li>Safely normalizes path traversals, e.g. path {@code '/a/../b'} would be normalized to {@code '/b'}</li> 767 * <li>Strips any query string.</li> 768 * <li>Applies aggressive trimming of Unicode whitespace.</li> 769 * <li>Rejects malformed percent-encoding when decoding is enabled.</li> 770 * </ul> 771 * 772 * @param url a URL or path to normalize 773 * @param performDecoding {@code true} if decoding should be performed on the path (e.g. replace {@code %20} with a space character), {@code false} otherwise 774 * @return the normalized path, {@code "/"} for empty input 775 */ 776 @NonNull 777 public static String extractPathFromUrl(@NonNull String url, 778 @NonNull Boolean performDecoding) { 779 requireNonNull(url); 780 781 url = trimAggressivelyToEmpty(url); 782 783 // Special case for OPTIONS * requests 784 if (url.equals("*")) 785 return "*"; 786 787 // Parse with java.net.URI to isolate raw path; then percent-decode only the path 788 try { 789 URI uri = new URI(url); 790 791 String rawPath = uri.getRawPath(); // null => "/" 792 793 if (rawPath == null || rawPath.isEmpty()) 794 rawPath = "/"; 795 796 if (!performDecoding) 797 return rawPath; 798 799 String decodedPath = percentDecode(rawPath, StandardCharsets.UTF_8); 800 801 // Sanitize path traversal (e.g. /a/../b -> /b) 802 decodedPath = removeDotSegments(decodedPath); 803 804 // Normalize trailing slashes like normalizedPathForUrl currently does 805 if (!decodedPath.startsWith("/")) 806 decodedPath = "/" + decodedPath; 807 808 if (!"/".equals(decodedPath)) 809 while (decodedPath.endsWith("/")) 810 decodedPath = decodedPath.substring(0, decodedPath.length() - 1); 811 812 return decodedPath; 813 } catch (URISyntaxException e) { 814 // If it's not an absolute URL, treat the whole string as a path and percent-decode 815 String path = url; 816 int q = path.indexOf('?'); 817 818 if (q != -1) 819 path = path.substring(0, q); 820 821 if (path.isEmpty()) 822 path = "/"; 823 824 if (!performDecoding) 825 return path; 826 827 String decodedPath = percentDecode(path, StandardCharsets.UTF_8); 828 829 // Sanitize path traversal (e.g. /a/../b -> /b) 830 decodedPath = removeDotSegments(decodedPath); 831 832 if (!decodedPath.startsWith("/")) 833 decodedPath = "/" + decodedPath; 834 835 if (!"/".equals(decodedPath)) 836 while (decodedPath.endsWith("/")) 837 decodedPath = decodedPath.substring(0, decodedPath.length() - 1); 838 839 return decodedPath; 840 } 841 } 842 843 /** 844 * Extracts the raw (un-decoded) query component from a URL. 845 * <p> 846 * For example, {@code "/path?a=b&c=d%20e"} would return {@code "a=b&c=d%20e"}. 847 * 848 * @param url a raw URL or path 849 * @return the raw query component, or {@link Optional#empty()} if none 850 */ 851 @NonNull 852 public static Optional<String> extractRawQueryFromUrl(@NonNull String url) { 853 requireNonNull(url); 854 855 url = trimAggressivelyToEmpty(url); 856 857 if ("*".equals(url)) 858 return Optional.empty(); 859 860 try { 861 URI uri = new URI(url); 862 return Optional.ofNullable(trimAggressivelyToNull(uri.getRawQuery())); 863 } catch (URISyntaxException e) { 864 // Not a valid URI, try to extract query manually 865 int q = url.indexOf('?'); 866 if (q == -1) 867 return Optional.empty(); 868 869 String query = trimAggressivelyToNull(url.substring(q + 1)); 870 return Optional.ofNullable(query); 871 } 872 } 873 874 @NonNull 875 static Optional<String> extractRawQueryFromUrlStrict(@NonNull String url) { 876 requireNonNull(url); 877 878 url = trimAggressivelyToEmpty(url); 879 880 if ("*".equals(url)) 881 return Optional.empty(); 882 883 try { 884 URI uri = new URI(url); 885 return Optional.ofNullable(trimAggressivelyToNull(uri.getRawQuery())); 886 } catch (URISyntaxException e) { 887 throw new IllegalRequestException(format("Invalid URL '%s'", url), e); 888 } 889 } 890 891 /** 892 * Encodes decoded query parameters into a raw query string. 893 * <p> 894 * For example, given {@code {a=[b], c=[d e]}} and {@link QueryFormat#RFC_3986_STRICT}, 895 * returns {@code "a=b&c=d%20e"}. 896 * 897 * @param queryParameters the decoded query parameters 898 * @param queryFormat the encoding strategy 899 * @return the encoded query string, or the empty string if no parameters 900 */ 901 @NonNull 902 public static String encodeQueryParameters(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> queryParameters, 903 @NonNull QueryFormat queryFormat) { 904 requireNonNull(queryParameters); 905 requireNonNull(queryFormat); 906 907 if (queryParameters.isEmpty()) 908 return ""; 909 910 StringBuilder sb = new StringBuilder(); 911 boolean first = true; 912 913 for (Entry<String, Set<String>> entry : queryParameters.entrySet()) { 914 String encodedName = encodeQueryComponent(entry.getKey(), queryFormat); 915 916 for (String value : entry.getValue()) { 917 if (!first) 918 sb.append('&'); 919 920 sb.append(encodedName); 921 sb.append('='); 922 sb.append(encodeQueryComponent(value, queryFormat)); 923 924 first = false; 925 } 926 } 927 928 return sb.toString(); 929 } 930 931 @NonNull 932 static String encodeQueryComponent(@NonNull String queryComponent, 933 @NonNull QueryFormat queryFormat) { 934 requireNonNull(queryComponent); 935 requireNonNull(queryFormat); 936 937 String encoded = URLEncoder.encode(queryComponent, StandardCharsets.UTF_8); 938 939 if (queryFormat == QueryFormat.RFC_3986_STRICT) 940 encoded = encoded.replace("+", "%20"); 941 942 return encoded; 943 } 944 945 @NonNull 946 static String encodePath(@NonNull String path) { 947 requireNonNull(path); 948 949 if ("*".equals(path)) 950 return path; 951 952 // Encode each path segment individually, preserving '/' separators. 953 // RFC 3986 is used for path encoding (spaces as %20, not +). 954 return Arrays.stream(path.split("/", -1)) 955 .map(segment -> URLEncoder.encode(segment, StandardCharsets.UTF_8).replace("+", "%20")) 956 .collect(Collectors.joining("/")); 957 } 958 959 /** 960 * Parses an {@code Accept-Language} header value into a best-effort ordered list of {@link Locale}s. 961 * <p> 962 * Quality weights are honored by {@link Locale.LanguageRange#parse(String)}; results are then converted to 963 * {@link Locale} instances that represent the client-supplied language tags. Wildcard ranges are ignored unless 964 * they include a language component (e.g. {@code en-*} becomes {@code en}). On parse failure, an empty list is 965 * returned. 966 * 967 * @param acceptLanguageHeaderValue the raw header value (must be non-{@code null}) 968 * @return locales in descending preference order; empty if none could be resolved 969 */ 970 @NonNull 971 public static List<@NonNull Locale> extractLocalesFromAcceptLanguageHeaderValue(@NonNull String acceptLanguageHeaderValue) { 972 requireNonNull(acceptLanguageHeaderValue); 973 974 try { 975 List<LanguageRange> languageRanges = LanguageRange.parse(acceptLanguageHeaderValue); 976 List<Locale> locales = new ArrayList<>(languageRanges.size()); 977 978 for (LanguageRange languageRange : languageRanges) { 979 if (!(languageRange.getWeight() > 0.0)) 980 continue; 981 982 String range = languageRange.getRange(); 983 String languageTag = range; 984 985 if (range.indexOf('*') != -1) { 986 int wildcardIndex = range.indexOf('*'); 987 988 if (wildcardIndex == 0) 989 continue; 990 991 int languageEndIndex = range.indexOf('-'); 992 993 if (languageEndIndex == -1 || languageEndIndex > wildcardIndex) 994 languageEndIndex = wildcardIndex; 995 996 languageTag = range.substring(0, languageEndIndex); 997 } 998 999 if (languageTag.isBlank()) 1000 continue; 1001 1002 Locale locale = Locale.forLanguageTag(languageTag); 1003 1004 if (!locale.getLanguage().isBlank() && !locales.contains(locale)) 1005 locales.add(locale); 1006 } 1007 1008 return Collections.unmodifiableList(locales); 1009 } catch (Exception ignored) { 1010 return List.of(); 1011 } 1012 } 1013 1014 @Nullable 1015 private static String firstHeaderValue(@Nullable Set<String> headerValues) { 1016 if (headerValues == null || headerValues.isEmpty()) 1017 return null; 1018 1019 for (String value : headerValues) { 1020 String trimmed = trimAggressivelyToNull(value); 1021 if (trimmed == null) 1022 continue; 1023 1024 for (String part : splitCommaAware(trimmed)) { 1025 String candidate = trimAggressivelyToNull(part); 1026 if (candidate != null) 1027 return candidate; 1028 } 1029 } 1030 1031 return null; 1032 } 1033 1034 /** 1035 * Best-effort attempt to determine a client's effective origin by examining request headers. 1036 * <p> 1037 * An effective origin in this context is defined as {@code <scheme>://host<:optional port>}, but no path or query components. 1038 * <p> 1039 * Soklet is generally the "last hop" behind a load balancer/reverse proxy but may also be accessed directly by clients. 1040 * <p> 1041 * Normally a load balancer/reverse proxy/other upstream proxies will provide information about the true source of the 1042 * request through headers like the following: 1043 * <ul> 1044 * <li>{@code Host}</li> 1045 * <li>{@code Forwarded}</li> 1046 * <li>{@code Origin}</li> 1047 * <li>{@code X-Forwarded-Proto}</li> 1048 * <li>{@code X-Forwarded-Protocol}</li> 1049 * <li>{@code X-Url-Scheme}</li> 1050 * <li>{@code Front-End-Https}</li> 1051 * <li>{@code X-Forwarded-Ssl}</li> 1052 * <li>{@code X-Forwarded-Host}</li> 1053 * <li>{@code X-Forwarded-Port}</li> 1054 * </ul> 1055 * <p> 1056 * This method may take these and other headers into account when determining an effective origin. 1057 * <p> 1058 * For example, the following would be legal effective origins returned from this method: 1059 * <ul> 1060 * <li>{@code https://www.soklet.com}</li> 1061 * <li>{@code http://www.fake.com:1234}</li> 1062 * </ul> 1063 * <p> 1064 * The following would NOT be legal effective origins: 1065 * <ul> 1066 * <li>{@code www.soklet.com} (missing protocol) </li> 1067 * <li>{@code https://www.soklet.com/} (trailing slash)</li> 1068 * <li>{@code https://www.soklet.com/test} (trailing slash, path)</li> 1069 * <li>{@code https://www.soklet.com/test?abc=1234} (trailing slash, path, query)</li> 1070 * </ul> 1071 * <p> 1072 * {@code Origin} is treated as a fallback signal only and will not override a conflicting {@code Host} or forwarded host value. 1073 * <p> 1074 * Forwarded headers are only used when permitted by {@link EffectiveOriginResolver.TrustPolicy}. When using 1075 * {@link EffectiveOriginResolver.TrustPolicy#TRUST_PROXY_ALLOWLIST}, you must provide a trusted proxy predicate or allowlist. 1076 * If the remote address is missing or not trusted, forwarded headers are ignored. 1077 * <p> 1078 * Extraction order is: trusted forwarded headers → {@code Host} → (optional) {@code Origin} fallback. 1079 * If {@link EffectiveOriginResolver#allowOriginFallback(Boolean)} is unset, {@code Origin} fallback is enabled only for 1080 * {@link EffectiveOriginResolver.TrustPolicy#TRUST_ALL}. 1081 * 1082 * @param effectiveOriginResolver request headers and trust settings 1083 * @return the effective origin, or {@link Optional#empty()} if it could not be determined 1084 */ 1085 @NonNull 1086 public static Optional<String> extractEffectiveOrigin(@NonNull EffectiveOriginResolver effectiveOriginResolver) { 1087 requireNonNull(effectiveOriginResolver); 1088 requireNonNull(effectiveOriginResolver.headers); 1089 requireNonNull(effectiveOriginResolver.trustPolicy); 1090 1091 if (effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_PROXY_ALLOWLIST 1092 && effectiveOriginResolver.trustedProxyPredicate == null) { 1093 throw new IllegalStateException(format("%s policy requires a trusted proxy predicate or allowlist.", 1094 EffectiveOriginResolver.TrustPolicy.TRUST_PROXY_ALLOWLIST)); 1095 } 1096 1097 Map<String, Set<String>> headers = effectiveOriginResolver.headers; 1098 boolean trustForwardedHeaders = shouldTrustForwardedHeaders(effectiveOriginResolver); 1099 boolean allowOriginFallback = effectiveOriginResolver.allowOriginFallback != null 1100 ? effectiveOriginResolver.allowOriginFallback 1101 : effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_ALL; 1102 1103 // Host developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443 1104 // Forwarded by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89) 1105 // Origin null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> 1106 // X-Forwarded-Proto https 1107 // X-Forwarded-Protocol https (Microsoft's alternate name) 1108 // X-Url-Scheme https (Microsoft's alternate name) 1109 // Front-End-Https on (Microsoft's alternate name) 1110 // X-Forwarded-Ssl on (Microsoft's alternate name) 1111 // X-Forwarded-Host id42.example-cdn.com 1112 // X-Forwarded-Port 443 1113 1114 String protocol = null; 1115 String host = null; 1116 String portAsString = null; 1117 Boolean portExplicit = false; 1118 1119 // Forwarded: by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> 1120 if (trustForwardedHeaders) { 1121 Set<String> forwardedHeaders = headers.get("Forwarded"); 1122 if (forwardedHeaders != null) { 1123 forwardedHeaderLoop: 1124 for (String forwardedHeader : forwardedHeaders) { 1125 String trimmed = trimAggressivelyToNull(forwardedHeader); 1126 if (trimmed == null) 1127 continue; 1128 1129 for (String forwardedEntry : splitCommaAware(trimmed)) { 1130 String entry = trimAggressivelyToNull(forwardedEntry); 1131 if (entry == null) 1132 continue; 1133 1134 String entryHost = null; 1135 String entryProtocol = null; 1136 String entryPortAsString = null; 1137 Boolean entryPortExplicit = false; 1138 1139 // Each field component might look like "by=<identifier>" 1140 List<String> forwardedHeaderFieldComponents = splitSemicolonAware(entry); 1141 for (String forwardedHeaderFieldComponent : forwardedHeaderFieldComponents) { 1142 forwardedHeaderFieldComponent = trimAggressivelyToNull(forwardedHeaderFieldComponent); 1143 if (forwardedHeaderFieldComponent == null) 1144 continue; 1145 1146 // Break "by=<identifier>" into "by" and "<identifier>" pieces 1147 String[] forwardedHeaderFieldNameAndValue = forwardedHeaderFieldComponent.split(Pattern.quote("=" /* escape special Regex char */), 2); 1148 if (forwardedHeaderFieldNameAndValue.length != 2) 1149 continue; 1150 1151 String name = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[0]); 1152 String value = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[1]); 1153 if (name == null || value == null) 1154 continue; 1155 1156 if ("host".equalsIgnoreCase(name)) { 1157 if (entryHost == null) { 1158 HostPort hostPort = parseHostPort(value).orElse(null); 1159 1160 if (hostPort != null) { 1161 entryHost = hostPort.getHost(); 1162 1163 if (hostPort.getPort().isPresent()) { 1164 entryPortAsString = String.valueOf(hostPort.getPort().get()); 1165 entryPortExplicit = true; 1166 } 1167 } 1168 } 1169 } else if ("proto".equalsIgnoreCase(name)) { 1170 if (entryProtocol == null) 1171 entryProtocol = stripOptionalQuotes(value); 1172 } 1173 } 1174 1175 if (entryHost != null || entryProtocol != null) { 1176 host = entryHost; 1177 protocol = entryProtocol; 1178 if (entryPortAsString != null) { 1179 portAsString = entryPortAsString; 1180 portExplicit = entryPortExplicit; 1181 } 1182 break forwardedHeaderLoop; 1183 } 1184 } 1185 } 1186 } 1187 } 1188 1189 // X-Forwarded-Proto: https 1190 if (trustForwardedHeaders && protocol == null) { 1191 String xForwardedProtoHeader = firstHeaderValue(headers.get("X-Forwarded-Proto")); 1192 if (xForwardedProtoHeader != null) 1193 protocol = stripOptionalQuotes(xForwardedProtoHeader); 1194 } 1195 1196 // X-Forwarded-Protocol: https (Microsoft's alternate name) 1197 if (trustForwardedHeaders && protocol == null) { 1198 String xForwardedProtocolHeader = firstHeaderValue(headers.get("X-Forwarded-Protocol")); 1199 if (xForwardedProtocolHeader != null) 1200 protocol = stripOptionalQuotes(xForwardedProtocolHeader); 1201 } 1202 1203 // X-Url-Scheme: https (Microsoft's alternate name) 1204 if (trustForwardedHeaders && protocol == null) { 1205 String xUrlSchemeHeader = firstHeaderValue(headers.get("X-Url-Scheme")); 1206 if (xUrlSchemeHeader != null) 1207 protocol = stripOptionalQuotes(xUrlSchemeHeader); 1208 } 1209 1210 // Front-End-Https: on (Microsoft's alternate name) 1211 if (trustForwardedHeaders && protocol == null) { 1212 String frontEndHttpsHeader = firstHeaderValue(headers.get("Front-End-Https")); 1213 if (frontEndHttpsHeader != null) 1214 protocol = "on".equalsIgnoreCase(frontEndHttpsHeader) ? "https" : "http"; 1215 } 1216 1217 // X-Forwarded-Ssl: on (Microsoft's alternate name) 1218 if (trustForwardedHeaders && protocol == null) { 1219 String xForwardedSslHeader = firstHeaderValue(headers.get("X-Forwarded-Ssl")); 1220 if (xForwardedSslHeader != null) 1221 protocol = "on".equalsIgnoreCase(xForwardedSslHeader) ? "https" : "http"; 1222 } 1223 1224 // X-Forwarded-Host: id42.example-cdn.com (or with port / IPv6) 1225 if (trustForwardedHeaders && host == null) { 1226 String xForwardedHostHeader = firstHeaderValue(headers.get("X-Forwarded-Host")); 1227 if (xForwardedHostHeader != null) { 1228 HostPort hostPort = parseHostPort(xForwardedHostHeader).orElse(null); 1229 1230 if (hostPort != null) { 1231 host = hostPort.getHost(); 1232 1233 if (hostPort.getPort().isPresent() && portAsString == null) { 1234 portAsString = String.valueOf(hostPort.getPort().get()); 1235 portExplicit = true; 1236 } 1237 } 1238 } 1239 } 1240 1241 // X-Forwarded-Port: 443 1242 if (trustForwardedHeaders && portAsString == null) { 1243 String xForwardedPortHeader = firstHeaderValue(headers.get("X-Forwarded-Port")); 1244 if (xForwardedPortHeader != null) { 1245 portAsString = stripOptionalQuotes(xForwardedPortHeader); 1246 portExplicit = true; 1247 } 1248 } 1249 1250 // Host: developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443 1251 if (host == null) { 1252 String hostHeader = firstHeaderValue(headers.get("Host")); 1253 1254 if (hostHeader != null) { 1255 HostPort hostPort = parseHostPort(hostHeader).orElse(null); 1256 1257 if (hostPort != null) { 1258 host = hostPort.getHost(); 1259 1260 if (hostPort.getPort().isPresent() && portAsString == null) { 1261 portAsString = String.valueOf(hostPort.getPort().get()); 1262 portExplicit = true; 1263 } 1264 } 1265 } 1266 } 1267 1268 // Origin: null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> (IPv6 supported) 1269 // Use Origin only when host is missing or when it matches the Host-derived value. 1270 if (allowOriginFallback && (protocol == null || host == null || portAsString == null)) { 1271 String originHeader = firstHeaderValue(headers.get("Origin")); 1272 1273 if (originHeader != null) { 1274 try { 1275 URI o = new URI(originHeader); 1276 String originProtocol = trimAggressivelyToNull(o.getScheme()); 1277 String originHost = o.getHost(); // may be bracketed already on some JDKs 1278 int originPort = o.getPort(); // -1 if absent 1279 1280 if (originHost != null) { 1281 boolean alreadyBracketed = originHost.startsWith("[") && originHost.endsWith("]"); 1282 boolean isIpv6Like = originHost.indexOf(':') >= 0; // contains colon(s) 1283 originHost = (isIpv6Like && !alreadyBracketed) ? "[" + originHost + "]" : originHost; 1284 } 1285 1286 boolean hostMatchesOrigin = host != null && originHost != null && host.equalsIgnoreCase(originHost); 1287 1288 if (host == null) { 1289 if (originHost != null) 1290 host = originHost; 1291 if (originProtocol != null) 1292 protocol = originProtocol; 1293 if (originPort >= 0) { 1294 portAsString = String.valueOf(originPort); 1295 portExplicit = true; 1296 } 1297 } else if (hostMatchesOrigin) { 1298 if (protocol == null && originProtocol != null) 1299 protocol = originProtocol; 1300 if (portAsString == null && originPort >= 0) { 1301 portAsString = String.valueOf(originPort); 1302 portExplicit = true; 1303 } 1304 } 1305 } catch (URISyntaxException ignored) { 1306 // no-op 1307 } 1308 } 1309 } 1310 1311 Integer port = null; 1312 1313 if (portAsString != null) { 1314 try { 1315 int parsedPort = Integer.parseInt(portAsString, 10); 1316 if (parsedPort >= 1 && parsedPort <= 65535) 1317 port = parsedPort; 1318 } catch (Exception ignored) { 1319 // Not an integer; ignore it 1320 } 1321 } 1322 1323 if (protocol != null && host != null && port == null) { 1324 return Optional.of(format("%s://%s", protocol, host)); 1325 } 1326 1327 if (protocol != null && host != null && port != null) { 1328 boolean usingDefaultPort = 1329 ("http".equalsIgnoreCase(protocol) && port.equals(80)) || 1330 ("https".equalsIgnoreCase(protocol) && port.equals(443)); 1331 1332 // Keep default ports if the client/proxy explicitly sent them 1333 String effectiveOrigin = (usingDefaultPort && !portExplicit) 1334 ? format("%s://%s", protocol, host) 1335 : format("%s://%s:%s", protocol, host, port); 1336 1337 return Optional.of(effectiveOrigin); 1338 } 1339 1340 return Optional.empty(); 1341 } 1342 1343 private static boolean shouldTrustForwardedHeaders(@NonNull EffectiveOriginResolver effectiveOriginResolver) { 1344 if (effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_ALL) 1345 return true; 1346 1347 if (effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_NONE) 1348 return false; 1349 1350 if (effectiveOriginResolver.remoteAddress == null || effectiveOriginResolver.trustedProxyPredicate == null) 1351 return false; 1352 1353 return effectiveOriginResolver.trustedProxyPredicate.test(effectiveOriginResolver.remoteAddress); 1354 } 1355 1356 /** 1357 * Builder for {@link #extractEffectiveOrigin(EffectiveOriginResolver)}. 1358 * <p> 1359 * Packages the inputs needed to reconstruct a client origin (scheme + host + optional port) from request headers. 1360 * The resulting value never includes a path or query component. 1361 * <p> 1362 * Forwarded headers can be spoofed if Soklet is reachable directly. Choose a {@link TrustPolicy} that matches your 1363 * deployment and, for {@link TrustPolicy#TRUST_PROXY_ALLOWLIST}, provide a trusted proxy predicate or allowlist. 1364 * If the remote address is missing or not trusted, forwarded headers are ignored. 1365 * <p> 1366 * Extraction order is: trusted forwarded headers → {@code Host} → (optional) {@code Origin} fallback. {@code Origin} 1367 * never overrides a conflicting host value; it only fills missing scheme/port or supplies host when absent. 1368 * <p> 1369 * Defaults: if {@link #allowOriginFallback(Boolean)} is left unset, {@code Origin} fallback is enabled only for 1370 * {@link TrustPolicy#TRUST_ALL}; otherwise it is disabled. 1371 */ 1372 @NotThreadSafe 1373 public static final class EffectiveOriginResolver { 1374 @NonNull 1375 private final Map<@NonNull String, @NonNull Set<@NonNull String>> headers; 1376 @NonNull 1377 private final TrustPolicy trustPolicy; 1378 @Nullable 1379 private InetSocketAddress remoteAddress; 1380 @Nullable 1381 private Predicate<InetSocketAddress> trustedProxyPredicate; 1382 @Nullable 1383 private Boolean allowOriginFallback; 1384 1385 /** 1386 * Acquires a builder seeded with raw request headers and a trust policy. 1387 * 1388 * @param headers HTTP request headers 1389 * @param trustPolicy how forwarded headers should be trusted 1390 * @return the builder 1391 */ 1392 @NonNull 1393 public static EffectiveOriginResolver withHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers, 1394 @NonNull TrustPolicy trustPolicy) { 1395 requireNonNull(headers); 1396 requireNonNull(trustPolicy); 1397 return new EffectiveOriginResolver(headers, trustPolicy); 1398 } 1399 1400 /** 1401 * Acquires a builder seeded with a {@link Request} and a trust policy. 1402 * 1403 * @param request the current request 1404 * @param trustPolicy how forwarded headers should be trusted 1405 * @return the builder 1406 */ 1407 @NonNull 1408 public static EffectiveOriginResolver withRequest(@NonNull Request request, 1409 @NonNull TrustPolicy trustPolicy) { 1410 requireNonNull(request); 1411 EffectiveOriginResolver resolver = withHeaders(request.getHeaders(), trustPolicy); 1412 resolver.remoteAddress = request.getRemoteAddress().orElse(null); 1413 return resolver; 1414 } 1415 1416 private EffectiveOriginResolver(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers, 1417 @NonNull TrustPolicy trustPolicy) { 1418 this.headers = new LinkedCaseInsensitiveMap<>(headers); 1419 this.trustPolicy = trustPolicy; 1420 } 1421 1422 /** 1423 * The remote address of the client connection. 1424 * 1425 * @param remoteAddress the remote address, or {@code null} if unavailable 1426 * @return this builder 1427 */ 1428 @NonNull 1429 public EffectiveOriginResolver remoteAddress(@Nullable InetSocketAddress remoteAddress) { 1430 this.remoteAddress = remoteAddress; 1431 return this; 1432 } 1433 1434 /** 1435 * Predicate used when {@link TrustPolicy#TRUST_PROXY_ALLOWLIST} is in effect. 1436 * 1437 * @param trustedProxyPredicate predicate that returns {@code true} for trusted proxies 1438 * @return this builder 1439 */ 1440 @NonNull 1441 public EffectiveOriginResolver trustedProxyPredicate(@Nullable Predicate<InetSocketAddress> trustedProxyPredicate) { 1442 this.trustedProxyPredicate = trustedProxyPredicate; 1443 return this; 1444 } 1445 1446 /** 1447 * Allows specifying an IP allowlist for trusted proxies. 1448 * 1449 * @param trustedProxyAddresses IP addresses of trusted proxies 1450 * @return this builder 1451 */ 1452 @NonNull 1453 public EffectiveOriginResolver trustedProxyAddresses(@NonNull Set<@NonNull InetAddress> trustedProxyAddresses) { 1454 requireNonNull(trustedProxyAddresses); 1455 Set<InetAddress> normalizedAddresses = Set.copyOf(trustedProxyAddresses); 1456 this.trustedProxyPredicate = remoteAddress -> { 1457 if (remoteAddress == null) 1458 return false; 1459 1460 InetAddress address = remoteAddress.getAddress(); 1461 return address != null && normalizedAddresses.contains(address); 1462 }; 1463 return this; 1464 } 1465 1466 /** 1467 * Controls whether {@code Origin} is used as a fallback signal when determining the client URL prefix. 1468 * 1469 * @param allowOriginFallback {@code true} to allow {@code Origin} fallback, {@code false} to disable it 1470 * @return this builder 1471 */ 1472 @NonNull 1473 public EffectiveOriginResolver allowOriginFallback(@Nullable Boolean allowOriginFallback) { 1474 this.allowOriginFallback = allowOriginFallback; 1475 return this; 1476 } 1477 1478 /** 1479 * Forwarded header trust policy. 1480 */ 1481 public enum TrustPolicy { 1482 /** 1483 * Trust forwarded headers from any source. 1484 */ 1485 TRUST_ALL, 1486 1487 /** 1488 * Trust forwarded headers only from proxies in a configured allowlist. 1489 */ 1490 TRUST_PROXY_ALLOWLIST, 1491 1492 /** 1493 * Ignore forwarded headers entirely. 1494 */ 1495 TRUST_NONE 1496 } 1497 } 1498 1499 /** 1500 * Extracts the media type (without parameters) from the first {@code Content-Type} header. 1501 * <p> 1502 * For example, {@code "text/html; charset=UTF-8"} → {@code "text/html"}. 1503 * 1504 * @param headers request/response headers (must be non-{@code null}) 1505 * @return the media type if present; otherwise {@link Optional#empty()} 1506 * @see #extractContentTypeFromHeaderValue(String) 1507 */ 1508 @NonNull 1509 public static Optional<String> extractContentTypeFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 1510 requireNonNull(headers); 1511 1512 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 1513 1514 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 1515 return Optional.empty(); 1516 1517 return extractContentTypeFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 1518 } 1519 1520 /** 1521 * Extracts the media type (without parameters) from a {@code Content-Type} header value. 1522 * <p> 1523 * For example, {@code "application/json; charset=UTF-8"} → {@code "application/json"}. 1524 * 1525 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 1526 * @return the media type if present; otherwise {@link Optional#empty()} 1527 */ 1528 @NonNull 1529 public static Optional<String> extractContentTypeFromHeaderValue(@Nullable String contentTypeHeaderValue) { 1530 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 1531 1532 if (contentTypeHeaderValue == null) 1533 return Optional.empty(); 1534 1535 // Examples 1536 // Content-Type: text/html; charset=UTF-8 1537 // Content-Type: multipart/form-data; boundary=something 1538 1539 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 1540 1541 // Simple case, e.g. "text/html" 1542 if (indexOfSemicolon == -1) 1543 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue)); 1544 1545 // More complex case, e.g. "text/html; charset=UTF-8" 1546 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue.substring(0, indexOfSemicolon))); 1547 } 1548 1549 /** 1550 * Extracts the {@link Charset} from the first {@code Content-Type} header, if present and valid. 1551 * <p> 1552 * Tolerates additional parameters and arbitrary whitespace. Invalid or unknown charset tokens yield {@link Optional#empty()}. 1553 * 1554 * @param headers request/response headers (must be non-{@code null}) 1555 * @return the charset declared by the header; otherwise {@link Optional#empty()} 1556 * @see #extractCharsetFromHeaderValue(String) 1557 */ 1558 @NonNull 1559 public static Optional<Charset> extractCharsetFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 1560 requireNonNull(headers); 1561 1562 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 1563 1564 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 1565 return Optional.empty(); 1566 1567 return extractCharsetFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 1568 } 1569 1570 /** 1571 * Extracts the {@code charset=...} parameter from a {@code Content-Type} header value. 1572 * <p> 1573 * Parsing is forgiving: parameters may appear in any order and with arbitrary spacing. If a charset is found, 1574 * it is validated via {@link Charset#forName(String)}; invalid names result in {@link Optional#empty()}. 1575 * 1576 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 1577 * @return the resolved charset if present and valid; otherwise {@link Optional#empty()} 1578 */ 1579 @NonNull 1580 public static Optional<Charset> extractCharsetFromHeaderValue(@Nullable String contentTypeHeaderValue) { 1581 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 1582 1583 if (contentTypeHeaderValue == null) 1584 return Optional.empty(); 1585 1586 // Examples 1587 // Content-Type: text/html; charset=UTF-8 1588 // Content-Type: multipart/form-data; boundary=something 1589 1590 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 1591 1592 // Simple case, e.g. "text/html" 1593 if (indexOfSemicolon == -1) 1594 return Optional.empty(); 1595 1596 // More complex case, e.g. "text/html; charset=UTF-8" or "multipart/form-data; charset=UTF-8; boundary=something" 1597 boolean finishedContentType = false; 1598 boolean finishedCharsetName = false; 1599 StringBuilder buffer = new StringBuilder(); 1600 String charsetName = null; 1601 1602 for (int i = 0; i < contentTypeHeaderValue.length(); i++) { 1603 char c = contentTypeHeaderValue.charAt(i); 1604 1605 if (Character.isWhitespace(c)) 1606 continue; 1607 1608 if (c == ';') { 1609 // No content type yet? This just be it... 1610 if (!finishedContentType) { 1611 finishedContentType = true; 1612 buffer = new StringBuilder(); 1613 } else if (!finishedCharsetName) { 1614 if (buffer.indexOf("charset=") == 0) { 1615 charsetName = buffer.toString(); 1616 finishedCharsetName = true; 1617 break; 1618 } 1619 } 1620 } else { 1621 buffer.append(Character.toLowerCase(c)); 1622 } 1623 } 1624 1625 // Handle case where charset is the end of the string, e.g. "whatever;charset=UTF-8" 1626 if (!finishedCharsetName) { 1627 String potentialCharset = trimAggressivelyToNull(buffer.toString()); 1628 if (potentialCharset != null && potentialCharset.startsWith("charset=")) { 1629 finishedCharsetName = true; 1630 charsetName = potentialCharset; 1631 } 1632 } 1633 1634 if (finishedCharsetName) { 1635 // e.g. charset=UTF-8 or charset="UTF-8" or charset='UTF-8' 1636 String possibleCharsetName = trimAggressivelyToNull(charsetName.replace("charset=", "")); 1637 1638 if (possibleCharsetName != null) { 1639 // strip optional surrounding quotes 1640 if ((possibleCharsetName.length() >= 2) && 1641 ((possibleCharsetName.charAt(0) == '"' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '"') || 1642 (possibleCharsetName.charAt(0) == '\'' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '\''))) { 1643 possibleCharsetName = possibleCharsetName.substring(1, possibleCharsetName.length() - 1); 1644 possibleCharsetName = trimAggressivelyToNull(possibleCharsetName); 1645 } 1646 1647 if (possibleCharsetName != null) { 1648 try { 1649 return Optional.of(Charset.forName(possibleCharsetName)); 1650 } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) { 1651 return Optional.empty(); 1652 } 1653 } 1654 } 1655 } 1656 1657 return Optional.empty(); 1658 } 1659 1660 /** 1661 * A "stronger" version of {@link String#trim()} which discards leading and trailing Unicode space-separator characters ({@code \p{Z}}). 1662 * <p> 1663 * In a web environment with user-supplied inputs, this is the behavior we want the vast majority of the time. 1664 * For example, users copy-paste URLs from Microsoft Word or Outlook and it's easy to accidentally include a {@code U+202F 1665 * "Narrow No-Break Space (NNBSP)"} character at the end, which might break parsing. 1666 * <p> 1667 * Note that this does not remove other whitespace characters such as tabs, carriage returns, or line feeds. 1668 * <p> 1669 * See <a href="https://www.compart.com/en/unicode/U+202F">https://www.compart.com/en/unicode/U+202F</a> for details. 1670 * 1671 * @param string the string to trim 1672 * @return the trimmed string, or {@code null} if the input string is {@code null} or the trimmed representation is of length {@code 0} 1673 */ 1674 @Nullable 1675 public static String trimAggressively(@Nullable String string) { 1676 if (string == null) 1677 return null; 1678 1679 string = HEAD_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 1680 1681 if (string.length() == 0) 1682 return string; 1683 1684 string = TAIL_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 1685 1686 return string; 1687 } 1688 1689 /** 1690 * Aggressively trims leading and trailing Unicode space-separator characters from the given string and returns {@code null} if the result is empty. 1691 * <p> 1692 * See {@link #trimAggressively(String)} for details on which code points are removed. 1693 * 1694 * @param string the input string; may be {@code null} 1695 * @return a trimmed, non-empty string; or {@code null} if input was {@code null} or trimmed to empty 1696 */ 1697 @Nullable 1698 public static String trimAggressivelyToNull(@Nullable String string) { 1699 if (string == null) 1700 return null; 1701 1702 string = trimAggressively(string); 1703 return string.length() == 0 ? null : string; 1704 } 1705 1706 /** 1707 * Aggressively trims leading and trailing Unicode space-separator characters from the given string and returns {@code ""} if the input is {@code null}. 1708 * <p> 1709 * See {@link #trimAggressively(String)} for details on which code points are removed. 1710 * 1711 * @param string the input string; may be {@code null} 1712 * @return a trimmed string (never {@code null}); {@code ""} if input was {@code null} 1713 */ 1714 @NonNull 1715 public static String trimAggressivelyToEmpty(@Nullable String string) { 1716 if (string == null) 1717 return ""; 1718 1719 return trimAggressively(string); 1720 } 1721 1722 static void validateHeaderNameAndValue(@Nullable String name, 1723 @Nullable String value) { 1724 // First, validate name: 1725 name = trimAggressivelyToNull(name); 1726 1727 if (name == null) 1728 throw new IllegalArgumentException("Header name is blank"); 1729 1730 for (int i = 0; i < name.length(); i++) { 1731 char c = name.charAt(i); 1732 // RFC 9110 tchar: "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 1733 if (c > 0x7F || !(c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' || c == '*' || c == '+' || 1734 c == '-' || c == '.' || c == '^' || c == '_' || c == '`' || c == '|' || c == '~' || 1735 Character.isLetterOrDigit(c))) { 1736 throw new IllegalArgumentException(format("Illegal header name '%s'. Offending character: '%s'", name, printableChar(c))); 1737 } 1738 } 1739 1740 // Then, validate value: 1741 if (value == null) 1742 return; 1743 1744 for (int i = 0; i < value.length(); i++) { 1745 char c = value.charAt(i); 1746 if (c == '\r' || c == '\n' || c == 0x00 || c > 0xFF || (c >= 0x00 && c < 0x20 && c != '\t')) { 1747 throw new IllegalArgumentException(format("Illegal header value '%s' for header name '%s'. Offending character: '%s'", value, name, printableChar(c))); 1748 } 1749 } 1750 1751 // Percent-encoded control sequence checks 1752 Matcher m = HEADER_PERCENT_ENCODING_PATTERN.matcher(value); 1753 1754 while (m.find()) { 1755 int b = Integer.parseInt(m.group(1), 16); 1756 if (b == 0x0D || b == 0x0A || b == 0x00 || (b >= 0x00 && b < 0x20 && b != 0x09)) { 1757 throw new IllegalArgumentException(format( 1758 "Illegal (percent-encoded) header value '%s' for header name '%s'. Offending octet: 0x%02X", 1759 value, name, b)); 1760 } 1761 } 1762 } 1763 1764 @NonNull 1765 static String printableString(@NonNull String input) { 1766 requireNonNull(input); 1767 1768 StringBuilder out = new StringBuilder(input.length() + 16); 1769 1770 for (int i = 0; i < input.length(); i++) 1771 out.append(printableChar(input.charAt(i))); 1772 1773 return out.toString(); 1774 } 1775 1776 @NonNull 1777 static String printableChar(char c) { 1778 if (c == '\r') return "\\r"; 1779 if (c == '\n') return "\\n"; 1780 if (c == '\t') return "\\t"; 1781 if (c == '\f') return "\\f"; 1782 if (c == '\b') return "\\b"; 1783 if (c == '\\') return "\\\\"; 1784 if (c == '\'') return "\\'"; 1785 if (c == '\"') return "\\\""; 1786 if (c == 0) return "\\0"; 1787 1788 if (c < 0x20 || c == 0x7F) // control chars 1789 return String.format("\\u%04X", (int) c); 1790 1791 if (Character.isISOControl(c) || Character.getType(c) == Character.FORMAT) 1792 return String.format("\\u%04X", (int) c); 1793 1794 return String.valueOf(c); 1795 } 1796 1797 @NonNull 1798 private static final Set<String> COMMA_JOINABLE_HEADER_NAMES = Set.of( 1799 // Common list-type headers (RFC 7230/9110) 1800 "accept", 1801 "accept-encoding", 1802 "accept-language", 1803 "cache-control", 1804 "pragma", 1805 "vary", 1806 "connection", 1807 "transfer-encoding", 1808 "upgrade", 1809 "allow", 1810 "via", 1811 "warning" 1812 // intentionally NOT: set-cookie, authorization, cookie, content-disposition, location 1813 ); 1814 1815 /** 1816 * Given a list of raw HTTP header lines, convert them into a normalized case-insensitive, order-preserving map which "inflates" comma-separated headers into distinct values where permitted according to RFC 7230/9110. 1817 * <p> 1818 * For example, given these raw header lines: 1819 * <pre>{@code List<String> lines = List.of( 1820 * "Cache-Control: no-cache, no-store", 1821 * "Set-Cookie: a=b; Path=/; HttpOnly", 1822 * "Set-Cookie: c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/" 1823 * );}</pre> 1824 * The result of parsing would look like this: 1825 * <pre>{@code result.get("cache-control") -> [ 1826 * "no-cache", 1827 * "no-store" 1828 * ] 1829 * result.get("set-cookie") -> [ 1830 * "a=b; Path=/; HttpOnly", 1831 * "c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/" 1832 * ]}</pre> 1833 * <p> 1834 * Keys in the returned map are case-insensitive and are guaranteed to be in the same order as encountered in {@code rawHeaderLines}. 1835 * <p> 1836 * Values in the returned map are guaranteed to be in the same order as encountered in {@code rawHeaderLines}. 1837 * 1838 * @param rawHeaderLines the raw HTTP header lines to parse 1839 * @return a normalized mapping of header name keys to values 1840 */ 1841 @NonNull 1842 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractHeadersFromRawHeaderLines(@NonNull List<@NonNull String> rawHeaderLines) { 1843 requireNonNull(rawHeaderLines); 1844 1845 // 1) Unfold obsolete folded lines (obs-fold): lines beginning with SP/HT are continuations 1846 List<String> lines = unfold(rawHeaderLines); 1847 1848 // 2) Parse into map 1849 Map<String, Set<String>> headers = new LinkedCaseInsensitiveMap<>(); 1850 1851 for (String raw : lines) { 1852 String line = trimAggressivelyToNull(raw); 1853 1854 if (line == null) 1855 continue; 1856 1857 int idx = line.indexOf(':'); 1858 1859 if (idx <= 0) 1860 continue; // skip malformed 1861 1862 addParsedHeader(headers, line.substring(0, idx), line.substring(idx + 1)); 1863 } 1864 1865 freezeStringValueSets(headers); 1866 return headers; 1867 } 1868 1869 static void addParsedHeader(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers, 1870 @Nullable String name, 1871 @Nullable String value) { 1872 requireNonNull(headers); 1873 1874 String key = trimAggressivelyToEmpty(name); // keep original case for display 1875 if (trimAggressivelyToNull(value) == null) 1876 return; 1877 1878 if (COMMA_JOINABLE_HEADER_NAMES.contains(key.toLowerCase(Locale.ROOT))) { 1879 for (String part : splitCommaAware(value)) { 1880 String v = trimAggressivelyToNull(part); 1881 if (v != null) 1882 addStringValue(headers, key, v); 1883 } 1884 } else { 1885 addStringValue(headers, key, value.trim()); 1886 } 1887 } 1888 1889 static void addParsedHeaderValues(@NonNull Set<@NonNull String> values, 1890 @Nullable String name, 1891 @Nullable String value) { 1892 requireNonNull(values); 1893 1894 String key = trimAggressivelyToEmpty(name); 1895 String keyLowercase = key.toLowerCase(Locale.ROOT); 1896 value = trimAggressivelyToNull(value); 1897 1898 if (value == null) 1899 return; 1900 1901 if (COMMA_JOINABLE_HEADER_NAMES.contains(keyLowercase)) { 1902 for (String part : splitCommaAware(value)) { 1903 String v = trimAggressivelyToNull(part); 1904 if (v != null) 1905 values.add(v); 1906 } 1907 } else { 1908 values.add(value.trim()); 1909 } 1910 } 1911 1912 static void freezeStringValueSets(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> valuesByName) { 1913 requireNonNull(valuesByName); 1914 1915 for (Entry<String, Set<String>> entry : valuesByName.entrySet()) { 1916 Set<String> values = entry.getValue(); 1917 1918 if (values == null || values.isEmpty()) { 1919 entry.setValue(Set.of()); 1920 } else if (values instanceof LinkedHashSet) { 1921 entry.setValue(Collections.unmodifiableSet(values)); 1922 } 1923 } 1924 } 1925 1926 private static void addStringValue(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> valuesByName, 1927 @NonNull String name, 1928 @NonNull String value) { 1929 requireNonNull(valuesByName); 1930 requireNonNull(name); 1931 requireNonNull(value); 1932 1933 Set<String> values = valuesByName.get(name); 1934 1935 if (values == null || values.isEmpty()) { 1936 valuesByName.put(name, Set.of(value)); 1937 return; 1938 } 1939 1940 if (values.contains(value)) 1941 return; 1942 1943 if (values instanceof LinkedHashSet) { 1944 values.add(value); 1945 return; 1946 } 1947 1948 Set<String> promotedValues = new LinkedHashSet<>(values); 1949 promotedValues.add(value); 1950 valuesByName.put(name, promotedValues); 1951 } 1952 1953 /** 1954 * Header parsing helper 1955 */ 1956 @NonNull 1957 private static List<String> unfold(@NonNull List<String> raw) { 1958 requireNonNull(raw); 1959 if (raw.isEmpty()) return List.of(); 1960 1961 List<String> out = new ArrayList<>(raw.size()); 1962 StringBuilder cur = null; 1963 boolean curIsHeader = false; 1964 1965 for (String line : raw) { 1966 if (line == null) continue; 1967 1968 boolean isContinuation = !line.isEmpty() && (line.charAt(0) == ' ' || line.charAt(0) == '\t'); 1969 if (isContinuation) { 1970 if (cur != null && curIsHeader) { 1971 cur.append(' ').append(line.trim()); 1972 } else { 1973 // Do not fold into a non-header; flush previous and start anew 1974 if (cur != null) out.add(cur.toString()); 1975 cur = new StringBuilder(line); 1976 curIsHeader = line.indexOf(':') > 0; // almost certainly false for leading-space lines 1977 } 1978 } else { 1979 if (cur != null) out.add(cur.toString()); 1980 cur = new StringBuilder(line); 1981 curIsHeader = line.indexOf(':') > 0; 1982 } 1983 } 1984 if (cur != null) out.add(cur.toString()); 1985 return out; 1986 } 1987 1988 /** 1989 * Header parsing helper: split on commas that are not inside a quoted-string; supports \" escapes inside quotes. 1990 */ 1991 @NonNull 1992 private static List<String> splitCommaAware(@NonNull String string) { 1993 requireNonNull(string); 1994 1995 List<String> out = new ArrayList<>(4); 1996 StringBuilder cur = new StringBuilder(); 1997 boolean inQuotes = false; 1998 boolean escaped = false; 1999 2000 for (int i = 0; i < string.length(); i++) { 2001 char c = string.charAt(i); 2002 2003 if (escaped) { 2004 // Preserve the escaped char as-is 2005 cur.append(c); 2006 escaped = false; 2007 } else if (c == '\\') { 2008 if (inQuotes) { 2009 // Preserve the backslash itself, then mark next char as escaped 2010 cur.append('\\'); // ← keep the backslash 2011 escaped = true; 2012 } else { 2013 cur.append('\\'); // literal backslash outside quotes 2014 } 2015 } else if (c == '"') { 2016 inQuotes = !inQuotes; 2017 cur.append('"'); 2018 } else if (c == ',' && !inQuotes) { 2019 out.add(cur.toString()); 2020 cur.setLength(0); 2021 } else { 2022 cur.append(c); 2023 } 2024 } 2025 out.add(cur.toString()); 2026 return out; 2027 } 2028 2029 /** 2030 * Header parsing helper: split on semicolons that are not inside a quoted-string; supports \" escapes inside quotes. 2031 */ 2032 @NonNull 2033 private static List<String> splitSemicolonAware(@NonNull String string) { 2034 requireNonNull(string); 2035 2036 List<String> out = new ArrayList<>(4); 2037 StringBuilder cur = new StringBuilder(); 2038 boolean inQuotes = false; 2039 boolean escaped = false; 2040 2041 for (int i = 0; i < string.length(); i++) { 2042 char c = string.charAt(i); 2043 2044 if (escaped) { 2045 cur.append(c); 2046 escaped = false; 2047 } else if (c == '\\') { 2048 if (inQuotes) { 2049 cur.append('\\'); 2050 escaped = true; 2051 } else { 2052 cur.append('\\'); 2053 } 2054 } else if (c == '"') { 2055 inQuotes = !inQuotes; 2056 cur.append('"'); 2057 } else if (c == ';' && !inQuotes) { 2058 out.add(cur.toString()); 2059 cur.setLength(0); 2060 } else { 2061 cur.append(c); 2062 } 2063 } 2064 2065 out.add(cur.toString()); 2066 return out; 2067 } 2068 2069 /** 2070 * Remove a single pair of surrounding quotes if present. 2071 */ 2072 @NonNull 2073 private static String stripOptionalQuotes(@NonNull String string) { 2074 requireNonNull(string); 2075 2076 if (string.length() >= 2) { 2077 char first = string.charAt(0), last = string.charAt(string.length() - 1); 2078 2079 if ((first == '"' && last == '"') || (first == '\'' && last == '\'')) 2080 return string.substring(1, string.length() - 1); 2081 } 2082 2083 return string; 2084 } 2085 2086 /** 2087 * Parse host[:port] with IPv6 support: "[v6](:port)?" or "host(:port)?". 2088 * Returns host (with brackets for v6) and port (nullable). 2089 */ 2090 @ThreadSafe 2091 private static final class HostPort { 2092 @NonNull 2093 private final String host; 2094 @Nullable 2095 private final Integer port; 2096 2097 HostPort(@NonNull String host, 2098 @Nullable Integer port) { 2099 this.host = host; 2100 this.port = port; 2101 } 2102 2103 @NonNull 2104 public String getHost() { 2105 return this.host; 2106 } 2107 2108 @NonNull 2109 public Optional<Integer> getPort() { 2110 return Optional.ofNullable(this.port); 2111 } 2112 } 2113 2114 @NonNull 2115 private static Optional<HostPort> parseHostPort(@Nullable String input) { 2116 input = trimAggressivelyToNull(input); 2117 2118 if (input == null) 2119 return Optional.empty(); 2120 2121 input = stripOptionalQuotes(input); 2122 2123 if (input.startsWith("[")) { 2124 int close = input.indexOf(']'); 2125 2126 if (close > 0) { 2127 String core = input.substring(1, close); // IPv6 literal without brackets 2128 String rest = input.substring(close + 1); // maybe ":port" 2129 String host = "[" + core + "]"; 2130 Integer port = null; 2131 2132 if (rest.startsWith(":")) { 2133 String ps = trimAggressivelyToNull(rest.substring(1)); 2134 if (ps != null) { 2135 try { 2136 port = Integer.parseInt(ps, 10); 2137 } catch (Exception ignored) { 2138 // Nothing to do 2139 } 2140 } 2141 } 2142 2143 return Optional.of(new HostPort(host, port)); 2144 } 2145 } 2146 2147 int colon = input.indexOf(':'); 2148 2149 if (colon > 0 && input.indexOf(':', colon + 1) == -1) { 2150 // exactly one ':' -> host:port (IPv4/hostname) 2151 String h = trimAggressivelyToNull(input.substring(0, colon)); 2152 String ps = trimAggressivelyToNull(input.substring(colon + 1)); 2153 Integer p = null; 2154 2155 if (ps != null) { 2156 try { 2157 p = Integer.parseInt(ps, 10); 2158 } catch (Exception ignored) { 2159 // Nothing to do 2160 } 2161 } 2162 if (h != null) 2163 return Optional.of(new HostPort(h, p)); 2164 } 2165 2166 // no port 2167 return Optional.of(new HostPort(input, null)); 2168 } 2169 2170 @NonNull 2171 private static String removeDotSegments(@NonNull String path) { 2172 requireNonNull(path); 2173 2174 Deque<String> stack = new ArrayDeque<>(); 2175 2176 for (String seg : path.split("/")) { 2177 if (seg.isEmpty() || ".".equals(seg)) 2178 continue; 2179 2180 if ("..".equals(seg)) { 2181 if (!stack.isEmpty()) 2182 stack.removeLast(); 2183 } else { 2184 stack.addLast(seg); 2185 } 2186 } 2187 2188 return "/" + String.join("/", stack); 2189 } 2190}