001/* 002 * Copyright 2022-2026 Revetware LLC. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.soklet; 018 019import com.soklet.exception.IllegalRequestException; 020import com.soklet.internal.spring.LinkedCaseInsensitiveMap; 021import org.jspecify.annotations.NonNull; 022import org.jspecify.annotations.Nullable; 023 024import javax.annotation.concurrent.NotThreadSafe; 025import javax.annotation.concurrent.ThreadSafe; 026import java.io.ByteArrayOutputStream; 027import java.lang.Thread.UncaughtExceptionHandler; 028import java.lang.invoke.MethodHandle; 029import java.lang.invoke.MethodHandles; 030import java.lang.invoke.MethodHandles.Lookup; 031import java.lang.invoke.MethodType; 032import java.net.InetAddress; 033import java.net.InetSocketAddress; 034import java.net.URI; 035import java.net.URISyntaxException; 036import java.net.URLEncoder; 037import java.nio.charset.Charset; 038import java.nio.charset.IllegalCharsetNameException; 039import java.nio.charset.StandardCharsets; 040import java.nio.charset.UnsupportedCharsetException; 041import java.util.ArrayDeque; 042import java.util.ArrayList; 043import java.util.Arrays; 044import java.util.Collections; 045import java.util.Deque; 046import java.util.LinkedHashMap; 047import java.util.LinkedHashSet; 048import java.util.List; 049import java.util.Locale; 050import java.util.Locale.LanguageRange; 051import java.util.Map; 052import java.util.Map.Entry; 053import java.util.Optional; 054import java.util.Set; 055import java.util.concurrent.ExecutorService; 056import java.util.concurrent.Executors; 057import java.util.concurrent.ThreadFactory; 058import java.util.function.Predicate; 059import java.util.regex.Matcher; 060import java.util.regex.Pattern; 061import java.util.stream.Collectors; 062 063import static java.lang.String.format; 064import static java.util.Objects.requireNonNull; 065 066/** 067 * A non-instantiable collection of utility methods. 068 * 069 * @author <a href="https://www.revetkn.com">Mark Allen</a> 070 */ 071@ThreadSafe 072public final class Utilities { 073 @NonNull 074 private static final boolean VIRTUAL_THREADS_AVAILABLE; 075 @NonNull 076 private static final byte[] EMPTY_BYTE_ARRAY; 077 @NonNull 078 private static final Pattern HEAD_WHITESPACE_PATTERN; 079 @NonNull 080 private static final Pattern TAIL_WHITESPACE_PATTERN; 081 @NonNull 082 private static final Pattern HEADER_PERCENT_ENCODING_PATTERN; 083 084 static { 085 EMPTY_BYTE_ARRAY = new byte[0]; 086 087 boolean virtualThreadsAvailable = false; 088 089 try { 090 // Detect if Virtual Threads are usable by feature testing via reflection. 091 // Hat tip to https://github.com/javalin/javalin for this technique 092 Class.forName("java.lang.Thread$Builder$OfVirtual"); 093 virtualThreadsAvailable = true; 094 } catch (Exception ignored) { 095 // We don't care why this failed, but if we're here we know JVM does not support virtual threads 096 } 097 098 VIRTUAL_THREADS_AVAILABLE = virtualThreadsAvailable; 099 100 // See https://www.regular-expressions.info/unicode.html 101 // \p{Z} or \p{Separator}: any kind of whitespace or invisible separator. 102 // 103 // First pattern matches all whitespace at the head of a string, second matches the same for tail. 104 // Useful for a "stronger" trim() function, which is almost always what we want in a web context 105 // with user-supplied input. 106 HEAD_WHITESPACE_PATTERN = Pattern.compile("^(\\p{Z})+"); 107 TAIL_WHITESPACE_PATTERN = Pattern.compile("(\\p{Z})+$"); 108 109 HEADER_PERCENT_ENCODING_PATTERN = Pattern.compile("%([0-9A-Fa-f]{2})"); 110 } 111 112 private Utilities() { 113 // Non-instantiable 114 } 115 116 /** 117 * Does the platform runtime support virtual threads (either Java 19 and 20 w/preview enabled or Java 21+)? 118 * 119 * @return {@code true} if the runtime supports virtual threads, {@code false} otherwise 120 */ 121 @NonNull 122 static Boolean virtualThreadsAvailable() { 123 return VIRTUAL_THREADS_AVAILABLE; 124 } 125 126 /** 127 * Provides a virtual thread factory if supported by the runtime. 128 * <p> 129 * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+), 130 * we compile Soklet with a source level < 19 and avoid any hard references to virtual threads by dynamically creating our {@link ThreadFactory} via {@link MethodHandle} references. 131 * <p> 132 * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong> 133 * 134 * @param threadNamePrefix thread name prefix for the virtual thread factory builder 135 * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder 136 * @return a virtual thread factory 137 * @throws IllegalStateException if the runtime environment does not support virtual threads 138 */ 139 @NonNull 140 static ThreadFactory createVirtualThreadFactory(@NonNull String threadNamePrefix, 141 @NonNull UncaughtExceptionHandler uncaughtExceptionHandler) { 142 requireNonNull(threadNamePrefix); 143 requireNonNull(uncaughtExceptionHandler); 144 145 if (!virtualThreadsAvailable()) 146 throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+"); 147 148 // Hat tip to https://github.com/javalin/javalin for this technique 149 Class<?> threadBuilderOfVirtualClass; 150 151 try { 152 threadBuilderOfVirtualClass = Class.forName("java.lang.Thread$Builder$OfVirtual"); 153 } catch (ClassNotFoundException e) { 154 throw new IllegalStateException("Unable to load virtual thread builder class", e); 155 } 156 157 Lookup lookup = MethodHandles.publicLookup(); 158 159 MethodHandle methodHandleThreadOfVirtual; 160 MethodHandle methodHandleThreadBuilderOfVirtualName; 161 MethodHandle methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler; 162 MethodHandle methodHandleThreadBuilderOfVirtualFactory; 163 164 try { 165 methodHandleThreadOfVirtual = lookup.findStatic(Thread.class, "ofVirtual", MethodType.methodType(threadBuilderOfVirtualClass)); 166 methodHandleThreadBuilderOfVirtualName = lookup.findVirtual(threadBuilderOfVirtualClass, "name", MethodType.methodType(threadBuilderOfVirtualClass, String.class, long.class)); 167 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler = lookup.findVirtual(threadBuilderOfVirtualClass, "uncaughtExceptionHandler", MethodType.methodType(threadBuilderOfVirtualClass, UncaughtExceptionHandler.class)); 168 methodHandleThreadBuilderOfVirtualFactory = lookup.findVirtual(threadBuilderOfVirtualClass, "factory", MethodType.methodType(ThreadFactory.class)); 169 } catch (NoSuchMethodException | IllegalAccessException e) { 170 throw new IllegalStateException("Unable to load method handle for virtual thread factory", e); 171 } 172 173 try { 174 // Thread.ofVirtual() 175 Object virtualThreadBuilder = methodHandleThreadOfVirtual.invoke(); 176 // .name(threadNamePrefix, start) 177 methodHandleThreadBuilderOfVirtualName.invoke(virtualThreadBuilder, threadNamePrefix, 1); 178 // .uncaughtExceptionHandler(uncaughtExceptionHandler) 179 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler.invoke(virtualThreadBuilder, uncaughtExceptionHandler); 180 // .factory(); 181 return (ThreadFactory) methodHandleThreadBuilderOfVirtualFactory.invoke(virtualThreadBuilder); 182 } catch (Throwable t) { 183 throw new IllegalStateException("Unable to create virtual thread factory", t); 184 } 185 } 186 187 /** 188 * Provides a virtual-thread-per-task executor service if supported by the runtime. 189 * <p> 190 * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+), 191 * we compile Soklet with a source level < 19 and avoid any hard references to virtual threads by dynamically creating our executor service via {@link MethodHandle} references. 192 * <p> 193 * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong> 194 * <pre>{@code // This method is effectively equivalent to this code 195 * return Executors.newThreadPerTaskExecutor( 196 * Thread.ofVirtual() 197 * .name(threadNamePrefix) 198 * .uncaughtExceptionHandler(uncaughtExceptionHandler) 199 * .factory() 200 * );}</pre> 201 * 202 * @param threadNamePrefix thread name prefix for the virtual thread factory builder 203 * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder 204 * @return a virtual-thread-per-task executor service 205 * @throws IllegalStateException if the runtime environment does not support virtual threads 206 */ 207 @NonNull 208 static ExecutorService createVirtualThreadsNewThreadPerTaskExecutor(@NonNull String threadNamePrefix, 209 @NonNull UncaughtExceptionHandler uncaughtExceptionHandler) { 210 requireNonNull(threadNamePrefix); 211 requireNonNull(uncaughtExceptionHandler); 212 213 if (!virtualThreadsAvailable()) 214 throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+"); 215 216 ThreadFactory threadFactory = createVirtualThreadFactory(threadNamePrefix, uncaughtExceptionHandler); 217 218 Lookup lookup = MethodHandles.publicLookup(); 219 MethodHandle methodHandleExecutorsNewThreadPerTaskExecutor; 220 221 try { 222 methodHandleExecutorsNewThreadPerTaskExecutor = lookup.findStatic(Executors.class, "newThreadPerTaskExecutor", MethodType.methodType(ExecutorService.class, ThreadFactory.class)); 223 } catch (NoSuchMethodException | IllegalAccessException e) { 224 throw new IllegalStateException("Unable to load method handle for virtual thread factory", e); 225 } 226 227 try { 228 // return Executors.newThreadPerTaskExecutor(threadFactory); 229 return (ExecutorService) methodHandleExecutorsNewThreadPerTaskExecutor.invoke(threadFactory); 230 } catch (Throwable t) { 231 throw new IllegalStateException("Unable to create virtual thread executor service", t); 232 } 233 } 234 235 /** 236 * Returns a shared zero-length {@code byte[]} instance. 237 * <p> 238 * Useful as a sentinel when you need a non-{@code null} byte array but have no content. 239 * 240 * @return a zero-length byte array (never {@code null}) 241 */ 242 @NonNull 243 static byte[] emptyByteArray() { 244 return EMPTY_BYTE_ARRAY; 245 } 246 247 /** 248 * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values. 249 * <p> 250 * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromQuery(String, QueryFormat, Charset)} if you need to specify a different charset). 251 * <p> 252 * Pairs missing a name are ignored. 253 * <p> 254 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 255 * 256 * @param query a raw query string such as {@code "a=1&b=2&c=%20"} 257 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 258 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 259 * @throws IllegalRequestException if the query string contains malformed percent-encoding 260 */ 261 @NonNull 262 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromQuery(@NonNull String query, 263 @NonNull QueryFormat queryFormat) { 264 requireNonNull(query); 265 requireNonNull(queryFormat); 266 267 return extractQueryParametersFromQuery(query, queryFormat, StandardCharsets.UTF_8); 268 } 269 270 /** 271 * Parses a query string such as {@code "a=1&b=2&c=%20"} into a multimap of names to values. 272 * <p> 273 * Decodes percent-escapes using the specified charset. 274 * <p> 275 * Pairs missing a name are ignored. 276 * <p> 277 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 278 * 279 * @param query a raw query string such as {@code "a=1&b=2&c=%20"} 280 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 281 * @param charset the charset to use when decoding percent-escapes 282 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 283 * @throws IllegalRequestException if the query string contains malformed percent-encoding 284 */ 285 @NonNull 286 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromQuery(@NonNull String query, 287 @NonNull QueryFormat queryFormat, 288 @NonNull Charset charset) { 289 requireNonNull(query); 290 requireNonNull(queryFormat); 291 requireNonNull(charset); 292 293 // For form parameters, body will look like "One=Two&Three=Four" ...a query string. 294 String syntheticUrl = format("https://soklet.invalid?%s", query); // avoid referencing real domain 295 return extractQueryParametersFromUrl(syntheticUrl, queryFormat, charset); 296 } 297 298 /** 299 * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values. 300 * <p> 301 * Decodes percent-escapes using UTF-8, which is usually what you want (see {@link #extractQueryParametersFromUrl(String, QueryFormat, Charset)} if you need to specify a different charset). 302 * <p> 303 * Pairs missing a name are ignored. 304 * <p> 305 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 306 * 307 * @param url a relative or absolute URL/URI string 308 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 309 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 310 * @throws IllegalRequestException if the URL or query contains malformed percent-encoding 311 */ 312 @NonNull 313 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromUrl(@NonNull String url, 314 @NonNull QueryFormat queryFormat) { 315 requireNonNull(url); 316 requireNonNull(queryFormat); 317 318 return extractQueryParametersFromUrl(url, queryFormat, StandardCharsets.UTF_8); 319 } 320 321 /** 322 * Parses query strings from relative or absolute URLs such as {@code "/example?a=a=1&b=2&c=%20"} or {@code "https://www.soklet.com/example?a=1&b=2&c=%20"} into a multimap of names to values. 323 * <p> 324 * Decodes percent-escapes using the specified charset. 325 * <p> 326 * Pairs missing a name are ignored. 327 * <p> 328 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 329 * 330 * @param url a relative or absolute URL/URI string 331 * @param queryFormat how to decode: {@code application/x-www-form-urlencoded} or "strict" RFC 3986 332 * @param charset the charset to use when decoding percent-escapes 333 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 334 * @throws IllegalRequestException if the URL or query contains malformed percent-encoding 335 */ 336 @NonNull 337 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractQueryParametersFromUrl(@NonNull String url, 338 @NonNull QueryFormat queryFormat, 339 @NonNull Charset charset) { 340 requireNonNull(url); 341 requireNonNull(queryFormat); 342 requireNonNull(charset); 343 344 URI uri; 345 346 try { 347 uri = new URI(url); 348 } catch (URISyntaxException e) { 349 throw new IllegalRequestException(format("Invalid URL '%s'", url), e); 350 } 351 352 String query = trimAggressivelyToNull(uri.getRawQuery()); 353 354 if (query == null) 355 return Map.of(); 356 357 Map<String, Set<String>> queryParameters = new LinkedHashMap<>(); 358 for (String pair : query.split("&")) { 359 if (pair.isEmpty()) 360 continue; 361 362 String[] nv = pair.split("=", 2); 363 String rawName = trimAggressivelyToNull(nv.length > 0 ? nv[0] : null); 364 String rawValue = trimAggressivelyToNull(nv.length > 1 ? nv[1] : null); 365 366 if (rawName == null) 367 continue; 368 369 // Preserve empty values; it's what users probably expect 370 if (rawValue == null) 371 rawValue = ""; 372 373 String name = decodeQueryComponent(rawName, queryFormat, charset); 374 String value = decodeQueryComponent(rawValue, queryFormat, charset); 375 376 queryParameters.computeIfAbsent(name, k -> new LinkedHashSet<>()).add(value); 377 } 378 379 return queryParameters; 380 } 381 382 /** 383 * Decodes a single key or value using the given mode and charset. 384 */ 385 @NonNull 386 private static String decodeQueryComponent(@NonNull String string, 387 @NonNull QueryFormat queryFormat, 388 @NonNull Charset charset) { 389 requireNonNull(string); 390 requireNonNull(queryFormat); 391 requireNonNull(charset); 392 393 if (string.isEmpty()) 394 return ""; 395 396 // Step 1: in form mode, '+' means space 397 String prepped = (queryFormat == QueryFormat.X_WWW_FORM_URLENCODED) ? string.replace('+', ' ') : string; 398 // Step 2: percent-decode bytes, then interpret bytes with the provided charset 399 return percentDecode(prepped, charset); 400 } 401 402 /** 403 * Percent-decodes a string into bytes, then constructs a String using the provided charset. 404 * One pass only: invalid %xy sequences trigger an exception. 405 */ 406 @NonNull 407 private static String percentDecode(@NonNull String s, @NonNull Charset charset) { 408 requireNonNull(s); 409 requireNonNull(charset); 410 411 if (s.isEmpty()) 412 return ""; 413 414 StringBuilder sb = new StringBuilder(s.length()); 415 ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 416 417 for (int i = 0; i < s.length(); ) { 418 char c = s.charAt(i); 419 420 if (c == '%') { 421 // Consume one or more consecutive %xx triplets into bytes 422 bytes.reset(); 423 int j = i; 424 425 while (j < s.length() && s.charAt(j) == '%') { 426 if (j + 2 >= s.length()) 427 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 428 429 int hi = hex(s.charAt(j + 1)); 430 int lo = hex(s.charAt(j + 2)); 431 if (hi < 0 || lo < 0) 432 throw new IllegalRequestException("Invalid percent-encoding in URL component"); 433 434 bytes.write((hi << 4) | lo); 435 j += 3; 436 } 437 438 sb.append(new String(bytes.toByteArray(), charset)); 439 i = j; 440 continue; 441 } 442 443 // Non-'%' char: append it as-is. 444 // This preserves surrogate pairs naturally as the loop hits both chars. 445 sb.append(c); 446 i++; 447 } 448 449 return sb.toString(); 450 } 451 452 private static int hex(char c) { 453 if (c >= '0' && c <= '9') return c - '0'; 454 if (c >= 'A' && c <= 'F') return c - 'A' + 10; 455 if (c >= 'a' && c <= 'f') return c - 'a' + 10; 456 return -1; 457 } 458 459 /** 460 * Parses {@code Cookie} request headers into a map of cookie names to values. 461 * <p> 462 * Header name matching is case-insensitive ({@code "Cookie"} vs {@code "cookie"}), but <em>cookie names are case-sensitive</em>. 463 * Values are parsed per the following liberal rules: 464 * <ul> 465 * <li>Components are split on {@code ';'} unless inside a quoted string.</li> 466 * <li>Quoted values have surrounding quotes removed and common backslash escapes unescaped.</li> 467 * <li>Percent-escapes are decoded as UTF-8. {@code '+'} is <strong>not</strong> treated specially.</li> 468 * </ul> 469 * Multiple occurrences of the same cookie name are collected into a {@link Set} in insertion order. 470 * 471 * @param headers request headers as a multimap of header name to values (must be non-{@code null}) 472 * @return a map of cookie name to distinct values; empty if no valid cookies are present 473 */ 474 @NonNull 475 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractCookiesFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 476 requireNonNull(headers); 477 478 // Cookie *names* must be case-sensitive; keep LinkedHashMap (NOT case-insensitive) 479 Map<String, Set<String>> cookies = new LinkedHashMap<>(); 480 481 for (Entry<String, Set<String>> entry : headers.entrySet()) { 482 String headerName = entry.getKey(); 483 if (headerName == null || !"cookie".equalsIgnoreCase(headerName.trim())) 484 continue; 485 486 Set<String> values = entry.getValue(); 487 if (values == null) continue; 488 489 for (String headerValue : values) { 490 headerValue = trimAggressivelyToNull(headerValue); 491 if (headerValue == null) continue; 492 493 // Split on ';' only when NOT inside a quoted string 494 List<String> cookieComponents = splitCookieHeaderRespectingQuotes(headerValue); 495 496 for (String cookieComponent : cookieComponents) { 497 cookieComponent = trimAggressivelyToNull(cookieComponent); 498 if (cookieComponent == null) continue; 499 500 String[] cookiePair = cookieComponent.split("=", 2); 501 String rawName = trimAggressivelyToNull(cookiePair[0]); 502 String rawValue = (cookiePair.length == 2 ? trimAggressivelyToNull(cookiePair[1]) : null); 503 504 if (rawName == null) continue; 505 506 // DO NOT decode the name; cookie names are case-sensitive and rarely encoded 507 String cookieName = rawName; 508 509 String cookieValue = null; 510 if (rawValue != null) { 511 // If it's quoted, unquote+unescape first, then percent-decode (still no '+' -> space) 512 String unquoted = unquoteCookieValueIfNeeded(rawValue); 513 cookieValue = percentDecodeCookieValue(unquoted); 514 } 515 516 cookies.computeIfAbsent(cookieName, key -> new LinkedHashSet<>()); 517 if (cookieValue != null) 518 cookies.get(cookieName).add(cookieValue); 519 } 520 } 521 } 522 523 return cookies; 524 } 525 526 /** 527 * Percent-decodes %HH to bytes->UTF-8. Does NOT treat '+' specially. 528 */ 529 @NonNull 530 private static String percentDecodeCookieValue(@NonNull String cookieValue) { 531 requireNonNull(cookieValue); 532 533 ByteArrayOutputStream out = new ByteArrayOutputStream(cookieValue.length()); 534 535 for (int i = 0; i < cookieValue.length(); ) { 536 char c = cookieValue.charAt(i); 537 if (c == '%' && i + 2 < cookieValue.length()) { 538 int hi = Character.digit(cookieValue.charAt(i + 1), 16); 539 int lo = Character.digit(cookieValue.charAt(i + 2), 16); 540 if (hi >= 0 && lo >= 0) { 541 out.write((hi << 4) + lo); 542 i += 3; 543 continue; 544 } 545 } 546 547 out.write((byte) c); 548 i++; 549 } 550 551 return out.toString(StandardCharsets.UTF_8); 552 } 553 554 /** 555 * Splits a Cookie header string into components on ';' but ONLY when not inside a quoted value. 556 * Supports backslash-escaped quotes within quoted strings. 557 */ 558 private static List<@NonNull String> splitCookieHeaderRespectingQuotes(@NonNull String headerValue) { 559 List<String> parts = new ArrayList<>(); 560 StringBuilder cur = new StringBuilder(headerValue.length()); 561 boolean inQuotes = false; 562 boolean escape = false; 563 564 for (int i = 0; i < headerValue.length(); i++) { 565 char c = headerValue.charAt(i); 566 567 if (escape) { 568 // keep escaped char literally (e.g., \" \; \\) 569 cur.append(c); 570 escape = false; 571 continue; 572 } 573 574 if (c == '\\') { 575 escape = true; 576 // keep the backslash for now; unquote step will handle unescaping 577 cur.append(c); 578 continue; 579 } 580 581 if (c == '"') { 582 inQuotes = !inQuotes; 583 cur.append(c); 584 continue; 585 } 586 587 if (c == ';' && !inQuotes) { 588 parts.add(cur.toString()); 589 cur.setLength(0); 590 continue; 591 } 592 593 cur.append(c); 594 } 595 596 if (cur.length() > 0) 597 parts.add(cur.toString()); 598 599 return parts; 600 } 601 602 /** 603 * If the cookie value is a quoted-string, remove surrounding quotes and unescape \" \\ and \; . 604 * Otherwise returns the input as-is. 605 */ 606 @NonNull 607 private static String unquoteCookieValueIfNeeded(@NonNull String rawValue) { 608 requireNonNull(rawValue); 609 610 if (rawValue.length() >= 2 && rawValue.charAt(0) == '"' && rawValue.charAt(rawValue.length() - 1) == '"') { 611 // Strip the surrounding quotes 612 String inner = rawValue.substring(1, rawValue.length() - 1); 613 614 // Unescape \" \\ and \; (common patterns seen in the wild) 615 // Order matters: unescape backslash-escape sequences, then leave other chars intact. 616 StringBuilder sb = new StringBuilder(inner.length()); 617 boolean escape = false; 618 619 for (int i = 0; i < inner.length(); i++) { 620 char c = inner.charAt(i); 621 if (escape) { 622 // Only special-case a few common escapes; otherwise keep the char 623 if (c == '"' || c == '\\' || c == ';') 624 sb.append(c); 625 else 626 sb.append(c); // unknown escape -> keep literally (liberal in what we accept) 627 628 escape = false; 629 } else if (c == '\\') { 630 escape = true; 631 } else { 632 sb.append(c); 633 } 634 } 635 636 // If string ended with a dangling backslash, keep it literally 637 if (escape) 638 sb.append('\\'); 639 640 return sb.toString(); 641 } 642 643 return rawValue; 644 } 645 646 /** 647 * Normalizes a URL or path into a canonical request path and optionally performs percent-decoding on the path. 648 * <p> 649 * For example, {@code "https://www.soklet.com/ab%20c?one=two"} would be normalized to {@code "/ab c"}. 650 * <p> 651 * The {@code OPTIONS *} special case returns {@code "*"}. 652 * <p> 653 * Behavior: 654 * <ul> 655 * <li>If input starts with {@code http://} or {@code https://}, the path portion is extracted.</li> 656 * <li>Ensures the result begins with {@code '/'}.</li> 657 * <li>Removes any trailing {@code '/'} (except for the root path {@code '/'}).</li> 658 * <li>Safely normalizes path traversals, e.g. path {@code '/a/../b'} would be normalized to {@code '/b'}</li> 659 * <li>Strips any query string.</li> 660 * <li>Applies aggressive trimming of Unicode whitespace.</li> 661 * <li>Rejects malformed percent-encoding when decoding is enabled.</li> 662 * </ul> 663 * 664 * @param url a URL or path to normalize 665 * @param performDecoding {@code true} if decoding should be performed on the path (e.g. replace {@code %20} with a space character), {@code false} otherwise 666 * @return the normalized path, {@code "/"} for empty input 667 */ 668 @NonNull 669 public static String extractPathFromUrl(@NonNull String url, 670 @NonNull Boolean performDecoding) { 671 requireNonNull(url); 672 673 url = trimAggressivelyToEmpty(url); 674 675 // Special case for OPTIONS * requests 676 if (url.equals("*")) 677 return "*"; 678 679 // Parse with java.net.URI to isolate raw path; then percent-decode only the path 680 try { 681 URI uri = new URI(url); 682 683 String rawPath = uri.getRawPath(); // null => "/" 684 685 if (rawPath == null || rawPath.isEmpty()) 686 rawPath = "/"; 687 688 if (!performDecoding) 689 return rawPath; 690 691 String decodedPath = percentDecode(rawPath, StandardCharsets.UTF_8); 692 693 // Sanitize path traversal (e.g. /a/../b -> /b) 694 decodedPath = removeDotSegments(decodedPath); 695 696 // Normalize trailing slashes like normalizedPathForUrl currently does 697 if (!decodedPath.startsWith("/")) 698 decodedPath = "/" + decodedPath; 699 700 if (!"/".equals(decodedPath)) 701 while (decodedPath.endsWith("/")) 702 decodedPath = decodedPath.substring(0, decodedPath.length() - 1); 703 704 return decodedPath; 705 } catch (URISyntaxException e) { 706 // If it's not an absolute URL, treat the whole string as a path and percent-decode 707 String path = url; 708 int q = path.indexOf('?'); 709 710 if (q != -1) 711 path = path.substring(0, q); 712 713 if (path.isEmpty()) 714 path = "/"; 715 716 if (!performDecoding) 717 return path; 718 719 String decodedPath = percentDecode(path, StandardCharsets.UTF_8); 720 721 // Sanitize path traversal (e.g. /a/../b -> /b) 722 decodedPath = removeDotSegments(decodedPath); 723 724 if (!decodedPath.startsWith("/")) 725 decodedPath = "/" + decodedPath; 726 727 if (!"/".equals(decodedPath)) 728 while (decodedPath.endsWith("/")) 729 decodedPath = decodedPath.substring(0, decodedPath.length() - 1); 730 731 return decodedPath; 732 } 733 } 734 735 /** 736 * Extracts the raw (un-decoded) query component from a URL. 737 * <p> 738 * For example, {@code "/path?a=b&c=d%20e"} would return {@code "a=b&c=d%20e"}. 739 * 740 * @param url a raw URL or path 741 * @return the raw query component, or {@link Optional#empty()} if none 742 */ 743 @NonNull 744 public static Optional<String> extractRawQueryFromUrl(@NonNull String url) { 745 requireNonNull(url); 746 747 url = trimAggressivelyToEmpty(url); 748 749 if ("*".equals(url)) 750 return Optional.empty(); 751 752 try { 753 URI uri = new URI(url); 754 return Optional.ofNullable(trimAggressivelyToNull(uri.getRawQuery())); 755 } catch (URISyntaxException e) { 756 // Not a valid URI, try to extract query manually 757 int q = url.indexOf('?'); 758 if (q == -1) 759 return Optional.empty(); 760 761 String query = trimAggressivelyToNull(url.substring(q + 1)); 762 return Optional.ofNullable(query); 763 } 764 } 765 766 /** 767 * Encodes decoded query parameters into a raw query string. 768 * <p> 769 * For example, given {@code {a=[b], c=[d e]}} and {@link QueryFormat#RFC_3986_STRICT}, 770 * returns {@code "a=b&c=d%20e"}. 771 * 772 * @param queryParameters the decoded query parameters 773 * @param queryFormat the encoding strategy 774 * @return the encoded query string, or the empty string if no parameters 775 */ 776 @NonNull 777 public static String encodeQueryParameters(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> queryParameters, 778 @NonNull QueryFormat queryFormat) { 779 requireNonNull(queryParameters); 780 requireNonNull(queryFormat); 781 782 if (queryParameters.isEmpty()) 783 return ""; 784 785 StringBuilder sb = new StringBuilder(); 786 boolean first = true; 787 788 for (Entry<String, Set<String>> entry : queryParameters.entrySet()) { 789 String encodedName = encodeQueryComponent(entry.getKey(), queryFormat); 790 791 for (String value : entry.getValue()) { 792 if (!first) 793 sb.append('&'); 794 795 sb.append(encodedName); 796 sb.append('='); 797 sb.append(encodeQueryComponent(value, queryFormat)); 798 799 first = false; 800 } 801 } 802 803 return sb.toString(); 804 } 805 806 @NonNull 807 static String encodeQueryComponent(@NonNull String queryComponent, 808 @NonNull QueryFormat queryFormat) { 809 requireNonNull(queryComponent); 810 requireNonNull(queryFormat); 811 812 String encoded = URLEncoder.encode(queryComponent, StandardCharsets.UTF_8); 813 814 if (queryFormat == QueryFormat.RFC_3986_STRICT) 815 encoded = encoded.replace("+", "%20"); 816 817 return encoded; 818 } 819 820 @NonNull 821 static String encodePath(@NonNull String path) { 822 requireNonNull(path); 823 824 if ("*".equals(path)) 825 return path; 826 827 // Encode each path segment individually, preserving '/' separators. 828 // RFC 3986 is used for path encoding (spaces as %20, not +). 829 return Arrays.stream(path.split("/", -1)) 830 .map(segment -> URLEncoder.encode(segment, StandardCharsets.UTF_8).replace("+", "%20")) 831 .collect(Collectors.joining("/")); 832 } 833 834 /** 835 * Parses an {@code Accept-Language} header value into a best-effort ordered list of {@link Locale}s. 836 * <p> 837 * Quality weights are honored by {@link Locale.LanguageRange#parse(String)}; results are then converted to 838 * {@link Locale} instances that represent the client-supplied language tags. Wildcard ranges are ignored unless 839 * they include a language component (e.g. {@code en-*} becomes {@code en}). On parse failure, an empty list is 840 * returned. 841 * 842 * @param acceptLanguageHeaderValue the raw header value (must be non-{@code null}) 843 * @return locales in descending preference order; empty if none could be resolved 844 */ 845 @NonNull 846 public static List<@NonNull Locale> extractLocalesFromAcceptLanguageHeaderValue(@NonNull String acceptLanguageHeaderValue) { 847 requireNonNull(acceptLanguageHeaderValue); 848 849 try { 850 List<LanguageRange> languageRanges = LanguageRange.parse(acceptLanguageHeaderValue); 851 List<Locale> locales = new ArrayList<>(languageRanges.size()); 852 853 for (LanguageRange languageRange : languageRanges) { 854 if (!(languageRange.getWeight() > 0.0)) 855 continue; 856 857 String range = languageRange.getRange(); 858 String languageTag = range; 859 860 if (range.indexOf('*') != -1) { 861 int wildcardIndex = range.indexOf('*'); 862 863 if (wildcardIndex == 0) 864 continue; 865 866 int languageEndIndex = range.indexOf('-'); 867 868 if (languageEndIndex == -1 || languageEndIndex > wildcardIndex) 869 languageEndIndex = wildcardIndex; 870 871 languageTag = range.substring(0, languageEndIndex); 872 } 873 874 if (languageTag.isBlank()) 875 continue; 876 877 Locale locale = Locale.forLanguageTag(languageTag); 878 879 if (!locale.getLanguage().isBlank() && !locales.contains(locale)) 880 locales.add(locale); 881 } 882 883 return Collections.unmodifiableList(locales); 884 } catch (Exception ignored) { 885 return List.of(); 886 } 887 } 888 889 @Nullable 890 private static String firstHeaderValue(@Nullable Set<String> headerValues) { 891 if (headerValues == null || headerValues.isEmpty()) 892 return null; 893 894 for (String value : headerValues) { 895 String trimmed = trimAggressivelyToNull(value); 896 if (trimmed == null) 897 continue; 898 899 for (String part : splitCommaAware(trimmed)) { 900 String candidate = trimAggressivelyToNull(part); 901 if (candidate != null) 902 return candidate; 903 } 904 } 905 906 return null; 907 } 908 909 /** 910 * Best-effort attempt to determine a client's effective origin by examining request headers. 911 * <p> 912 * An effective origin in this context is defined as {@code <scheme>://host<:optional port>}, but no path or query components. 913 * <p> 914 * Soklet is generally the "last hop" behind a load balancer/reverse proxy but may also be accessed directly by clients. 915 * <p> 916 * Normally a load balancer/reverse proxy/other upstream proxies will provide information about the true source of the 917 * request through headers like the following: 918 * <ul> 919 * <li>{@code Host}</li> 920 * <li>{@code Forwarded}</li> 921 * <li>{@code Origin}</li> 922 * <li>{@code X-Forwarded-Proto}</li> 923 * <li>{@code X-Forwarded-Protocol}</li> 924 * <li>{@code X-Url-Scheme}</li> 925 * <li>{@code Front-End-Https}</li> 926 * <li>{@code X-Forwarded-Ssl}</li> 927 * <li>{@code X-Forwarded-Host}</li> 928 * <li>{@code X-Forwarded-Port}</li> 929 * </ul> 930 * <p> 931 * This method may take these and other headers into account when determining an effective origin. 932 * <p> 933 * For example, the following would be legal effective origins returned from this method: 934 * <ul> 935 * <li>{@code https://www.soklet.com}</li> 936 * <li>{@code http://www.fake.com:1234}</li> 937 * </ul> 938 * <p> 939 * The following would NOT be legal effective origins: 940 * <ul> 941 * <li>{@code www.soklet.com} (missing protocol) </li> 942 * <li>{@code https://www.soklet.com/} (trailing slash)</li> 943 * <li>{@code https://www.soklet.com/test} (trailing slash, path)</li> 944 * <li>{@code https://www.soklet.com/test?abc=1234} (trailing slash, path, query)</li> 945 * </ul> 946 * <p> 947 * {@code Origin} is treated as a fallback signal only and will not override a conflicting {@code Host} or forwarded host value. 948 * <p> 949 * Forwarded headers are only used when permitted by {@link EffectiveOriginResolver.TrustPolicy}. When using 950 * {@link EffectiveOriginResolver.TrustPolicy#TRUST_PROXY_ALLOWLIST}, you must provide a trusted proxy predicate or allowlist. 951 * If the remote address is missing or not trusted, forwarded headers are ignored. 952 * <p> 953 * Extraction order is: trusted forwarded headers → {@code Host} → (optional) {@code Origin} fallback. 954 * If {@link EffectiveOriginResolver#allowOriginFallback(Boolean)} is unset, {@code Origin} fallback is enabled only for 955 * {@link EffectiveOriginResolver.TrustPolicy#TRUST_ALL}. 956 * 957 * @param effectiveOriginResolver request headers and trust settings 958 * @return the effective origin, or {@link Optional#empty()} if it could not be determined 959 */ 960 @NonNull 961 public static Optional<String> extractEffectiveOrigin(@NonNull EffectiveOriginResolver effectiveOriginResolver) { 962 requireNonNull(effectiveOriginResolver); 963 requireNonNull(effectiveOriginResolver.headers); 964 requireNonNull(effectiveOriginResolver.trustPolicy); 965 966 if (effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_PROXY_ALLOWLIST 967 && effectiveOriginResolver.trustedProxyPredicate == null) { 968 throw new IllegalStateException(format("%s policy requires a trusted proxy predicate or allowlist.", 969 EffectiveOriginResolver.TrustPolicy.TRUST_PROXY_ALLOWLIST)); 970 } 971 972 Map<String, Set<String>> headers = effectiveOriginResolver.headers; 973 boolean trustForwardedHeaders = shouldTrustForwardedHeaders(effectiveOriginResolver); 974 boolean allowOriginFallback = effectiveOriginResolver.allowOriginFallback != null 975 ? effectiveOriginResolver.allowOriginFallback 976 : effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_ALL; 977 978 // Host developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443 979 // Forwarded by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89) 980 // Origin null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> 981 // X-Forwarded-Proto https 982 // X-Forwarded-Protocol https (Microsoft's alternate name) 983 // X-Url-Scheme https (Microsoft's alternate name) 984 // Front-End-Https on (Microsoft's alternate name) 985 // X-Forwarded-Ssl on (Microsoft's alternate name) 986 // X-Forwarded-Host id42.example-cdn.com 987 // X-Forwarded-Port 443 988 989 String protocol = null; 990 String host = null; 991 String portAsString = null; 992 Boolean portExplicit = false; 993 994 // Forwarded: by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> 995 if (trustForwardedHeaders) { 996 Set<String> forwardedHeaders = headers.get("Forwarded"); 997 if (forwardedHeaders != null) { 998 forwardedHeaderLoop: 999 for (String forwardedHeader : forwardedHeaders) { 1000 String trimmed = trimAggressivelyToNull(forwardedHeader); 1001 if (trimmed == null) 1002 continue; 1003 1004 for (String forwardedEntry : splitCommaAware(trimmed)) { 1005 String entry = trimAggressivelyToNull(forwardedEntry); 1006 if (entry == null) 1007 continue; 1008 1009 String entryHost = null; 1010 String entryProtocol = null; 1011 String entryPortAsString = null; 1012 Boolean entryPortExplicit = false; 1013 1014 // Each field component might look like "by=<identifier>" 1015 List<String> forwardedHeaderFieldComponents = splitSemicolonAware(entry); 1016 for (String forwardedHeaderFieldComponent : forwardedHeaderFieldComponents) { 1017 forwardedHeaderFieldComponent = trimAggressivelyToNull(forwardedHeaderFieldComponent); 1018 if (forwardedHeaderFieldComponent == null) 1019 continue; 1020 1021 // Break "by=<identifier>" into "by" and "<identifier>" pieces 1022 String[] forwardedHeaderFieldNameAndValue = forwardedHeaderFieldComponent.split(Pattern.quote("=" /* escape special Regex char */), 2); 1023 if (forwardedHeaderFieldNameAndValue.length != 2) 1024 continue; 1025 1026 String name = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[0]); 1027 String value = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[1]); 1028 if (name == null || value == null) 1029 continue; 1030 1031 if ("host".equalsIgnoreCase(name)) { 1032 if (entryHost == null) { 1033 HostPort hostPort = parseHostPort(value).orElse(null); 1034 1035 if (hostPort != null) { 1036 entryHost = hostPort.getHost(); 1037 1038 if (hostPort.getPort().isPresent()) { 1039 entryPortAsString = String.valueOf(hostPort.getPort().get()); 1040 entryPortExplicit = true; 1041 } 1042 } 1043 } 1044 } else if ("proto".equalsIgnoreCase(name)) { 1045 if (entryProtocol == null) 1046 entryProtocol = stripOptionalQuotes(value); 1047 } 1048 } 1049 1050 if (entryHost != null || entryProtocol != null) { 1051 host = entryHost; 1052 protocol = entryProtocol; 1053 if (entryPortAsString != null) { 1054 portAsString = entryPortAsString; 1055 portExplicit = entryPortExplicit; 1056 } 1057 break forwardedHeaderLoop; 1058 } 1059 } 1060 } 1061 } 1062 } 1063 1064 // X-Forwarded-Proto: https 1065 if (trustForwardedHeaders && protocol == null) { 1066 String xForwardedProtoHeader = firstHeaderValue(headers.get("X-Forwarded-Proto")); 1067 if (xForwardedProtoHeader != null) 1068 protocol = stripOptionalQuotes(xForwardedProtoHeader); 1069 } 1070 1071 // X-Forwarded-Protocol: https (Microsoft's alternate name) 1072 if (trustForwardedHeaders && protocol == null) { 1073 String xForwardedProtocolHeader = firstHeaderValue(headers.get("X-Forwarded-Protocol")); 1074 if (xForwardedProtocolHeader != null) 1075 protocol = stripOptionalQuotes(xForwardedProtocolHeader); 1076 } 1077 1078 // X-Url-Scheme: https (Microsoft's alternate name) 1079 if (trustForwardedHeaders && protocol == null) { 1080 String xUrlSchemeHeader = firstHeaderValue(headers.get("X-Url-Scheme")); 1081 if (xUrlSchemeHeader != null) 1082 protocol = stripOptionalQuotes(xUrlSchemeHeader); 1083 } 1084 1085 // Front-End-Https: on (Microsoft's alternate name) 1086 if (trustForwardedHeaders && protocol == null) { 1087 String frontEndHttpsHeader = firstHeaderValue(headers.get("Front-End-Https")); 1088 if (frontEndHttpsHeader != null) 1089 protocol = "on".equalsIgnoreCase(frontEndHttpsHeader) ? "https" : "http"; 1090 } 1091 1092 // X-Forwarded-Ssl: on (Microsoft's alternate name) 1093 if (trustForwardedHeaders && protocol == null) { 1094 String xForwardedSslHeader = firstHeaderValue(headers.get("X-Forwarded-Ssl")); 1095 if (xForwardedSslHeader != null) 1096 protocol = "on".equalsIgnoreCase(xForwardedSslHeader) ? "https" : "http"; 1097 } 1098 1099 // X-Forwarded-Host: id42.example-cdn.com (or with port / IPv6) 1100 if (trustForwardedHeaders && host == null) { 1101 String xForwardedHostHeader = firstHeaderValue(headers.get("X-Forwarded-Host")); 1102 if (xForwardedHostHeader != null) { 1103 HostPort hostPort = parseHostPort(xForwardedHostHeader).orElse(null); 1104 1105 if (hostPort != null) { 1106 host = hostPort.getHost(); 1107 1108 if (hostPort.getPort().isPresent() && portAsString == null) { 1109 portAsString = String.valueOf(hostPort.getPort().get()); 1110 portExplicit = true; 1111 } 1112 } 1113 } 1114 } 1115 1116 // X-Forwarded-Port: 443 1117 if (trustForwardedHeaders && portAsString == null) { 1118 String xForwardedPortHeader = firstHeaderValue(headers.get("X-Forwarded-Port")); 1119 if (xForwardedPortHeader != null) { 1120 portAsString = stripOptionalQuotes(xForwardedPortHeader); 1121 portExplicit = true; 1122 } 1123 } 1124 1125 // Host: developer.mozilla.org OR developer.mozilla.org:443 OR [2001:db8::1]:8443 1126 if (host == null) { 1127 String hostHeader = firstHeaderValue(headers.get("Host")); 1128 1129 if (hostHeader != null) { 1130 HostPort hostPort = parseHostPort(hostHeader).orElse(null); 1131 1132 if (hostPort != null) { 1133 host = hostPort.getHost(); 1134 1135 if (hostPort.getPort().isPresent() && portAsString == null) { 1136 portAsString = String.valueOf(hostPort.getPort().get()); 1137 portExplicit = true; 1138 } 1139 } 1140 } 1141 } 1142 1143 // Origin: null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> (IPv6 supported) 1144 // Use Origin only when host is missing or when it matches the Host-derived value. 1145 if (allowOriginFallback && (protocol == null || host == null || portAsString == null)) { 1146 String originHeader = firstHeaderValue(headers.get("Origin")); 1147 1148 if (originHeader != null) { 1149 try { 1150 URI o = new URI(originHeader); 1151 String originProtocol = trimAggressivelyToNull(o.getScheme()); 1152 String originHost = o.getHost(); // may be bracketed already on some JDKs 1153 int originPort = o.getPort(); // -1 if absent 1154 1155 if (originHost != null) { 1156 boolean alreadyBracketed = originHost.startsWith("[") && originHost.endsWith("]"); 1157 boolean isIpv6Like = originHost.indexOf(':') >= 0; // contains colon(s) 1158 originHost = (isIpv6Like && !alreadyBracketed) ? "[" + originHost + "]" : originHost; 1159 } 1160 1161 boolean hostMatchesOrigin = host != null && originHost != null && host.equalsIgnoreCase(originHost); 1162 1163 if (host == null) { 1164 if (originHost != null) 1165 host = originHost; 1166 if (originProtocol != null) 1167 protocol = originProtocol; 1168 if (originPort >= 0) { 1169 portAsString = String.valueOf(originPort); 1170 portExplicit = true; 1171 } 1172 } else if (hostMatchesOrigin) { 1173 if (protocol == null && originProtocol != null) 1174 protocol = originProtocol; 1175 if (portAsString == null && originPort >= 0) { 1176 portAsString = String.valueOf(originPort); 1177 portExplicit = true; 1178 } 1179 } 1180 } catch (URISyntaxException ignored) { 1181 // no-op 1182 } 1183 } 1184 } 1185 1186 Integer port = null; 1187 1188 if (portAsString != null) { 1189 try { 1190 int parsedPort = Integer.parseInt(portAsString, 10); 1191 if (parsedPort >= 1 && parsedPort <= 65535) 1192 port = parsedPort; 1193 } catch (Exception ignored) { 1194 // Not an integer; ignore it 1195 } 1196 } 1197 1198 if (protocol != null && host != null && port == null) { 1199 return Optional.of(format("%s://%s", protocol, host)); 1200 } 1201 1202 if (protocol != null && host != null && port != null) { 1203 boolean usingDefaultPort = 1204 ("http".equalsIgnoreCase(protocol) && port.equals(80)) || 1205 ("https".equalsIgnoreCase(protocol) && port.equals(443)); 1206 1207 // Keep default ports if the client/proxy explicitly sent them 1208 String effectiveOrigin = (usingDefaultPort && !portExplicit) 1209 ? format("%s://%s", protocol, host) 1210 : format("%s://%s:%s", protocol, host, port); 1211 1212 return Optional.of(effectiveOrigin); 1213 } 1214 1215 return Optional.empty(); 1216 } 1217 1218 private static boolean shouldTrustForwardedHeaders(@NonNull EffectiveOriginResolver effectiveOriginResolver) { 1219 if (effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_ALL) 1220 return true; 1221 1222 if (effectiveOriginResolver.trustPolicy == EffectiveOriginResolver.TrustPolicy.TRUST_NONE) 1223 return false; 1224 1225 if (effectiveOriginResolver.remoteAddress == null || effectiveOriginResolver.trustedProxyPredicate == null) 1226 return false; 1227 1228 return effectiveOriginResolver.trustedProxyPredicate.test(effectiveOriginResolver.remoteAddress); 1229 } 1230 1231 /** 1232 * Builder for {@link #extractEffectiveOrigin(EffectiveOriginResolver)}. 1233 * <p> 1234 * Packages the inputs needed to reconstruct a client origin (scheme + host + optional port) from request headers. 1235 * The resulting value never includes a path or query component. 1236 * <p> 1237 * Forwarded headers can be spoofed if Soklet is reachable directly. Choose a {@link TrustPolicy} that matches your 1238 * deployment and, for {@link TrustPolicy#TRUST_PROXY_ALLOWLIST}, provide a trusted proxy predicate or allowlist. 1239 * If the remote address is missing or not trusted, forwarded headers are ignored. 1240 * <p> 1241 * Extraction order is: trusted forwarded headers → {@code Host} → (optional) {@code Origin} fallback. {@code Origin} 1242 * never overrides a conflicting host value; it only fills missing scheme/port or supplies host when absent. 1243 * <p> 1244 * Defaults: if {@link #allowOriginFallback(Boolean)} is left unset, {@code Origin} fallback is enabled only for 1245 * {@link TrustPolicy#TRUST_ALL}; otherwise it is disabled. 1246 */ 1247 @NotThreadSafe 1248 public static final class EffectiveOriginResolver { 1249 @NonNull 1250 private final Map<@NonNull String, @NonNull Set<@NonNull String>> headers; 1251 @NonNull 1252 private final TrustPolicy trustPolicy; 1253 @Nullable 1254 private InetSocketAddress remoteAddress; 1255 @Nullable 1256 private Predicate<InetSocketAddress> trustedProxyPredicate; 1257 @Nullable 1258 private Boolean allowOriginFallback; 1259 1260 /** 1261 * Acquires a builder seeded with raw request headers and a trust policy. 1262 * 1263 * @param headers HTTP request headers 1264 * @param trustPolicy how forwarded headers should be trusted 1265 * @return the builder 1266 */ 1267 @NonNull 1268 public static EffectiveOriginResolver withHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers, 1269 @NonNull TrustPolicy trustPolicy) { 1270 requireNonNull(headers); 1271 requireNonNull(trustPolicy); 1272 return new EffectiveOriginResolver(headers, trustPolicy); 1273 } 1274 1275 /** 1276 * Acquires a builder seeded with a {@link Request} and a trust policy. 1277 * 1278 * @param request the current request 1279 * @param trustPolicy how forwarded headers should be trusted 1280 * @return the builder 1281 */ 1282 @NonNull 1283 public static EffectiveOriginResolver withRequest(@NonNull Request request, 1284 @NonNull TrustPolicy trustPolicy) { 1285 requireNonNull(request); 1286 EffectiveOriginResolver resolver = withHeaders(request.getHeaders(), trustPolicy); 1287 resolver.remoteAddress = request.getRemoteAddress().orElse(null); 1288 return resolver; 1289 } 1290 1291 private EffectiveOriginResolver(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers, 1292 @NonNull TrustPolicy trustPolicy) { 1293 this.headers = new LinkedCaseInsensitiveMap<>(headers); 1294 this.trustPolicy = trustPolicy; 1295 } 1296 1297 /** 1298 * The remote address of the client connection. 1299 * 1300 * @param remoteAddress the remote address, or {@code null} if unavailable 1301 * @return this builder 1302 */ 1303 @NonNull 1304 public EffectiveOriginResolver remoteAddress(@Nullable InetSocketAddress remoteAddress) { 1305 this.remoteAddress = remoteAddress; 1306 return this; 1307 } 1308 1309 /** 1310 * Predicate used when {@link TrustPolicy#TRUST_PROXY_ALLOWLIST} is in effect. 1311 * 1312 * @param trustedProxyPredicate predicate that returns {@code true} for trusted proxies 1313 * @return this builder 1314 */ 1315 @NonNull 1316 public EffectiveOriginResolver trustedProxyPredicate(@Nullable Predicate<InetSocketAddress> trustedProxyPredicate) { 1317 this.trustedProxyPredicate = trustedProxyPredicate; 1318 return this; 1319 } 1320 1321 /** 1322 * Allows specifying an IP allowlist for trusted proxies. 1323 * 1324 * @param trustedProxyAddresses IP addresses of trusted proxies 1325 * @return this builder 1326 */ 1327 @NonNull 1328 public EffectiveOriginResolver trustedProxyAddresses(@NonNull Set<@NonNull InetAddress> trustedProxyAddresses) { 1329 requireNonNull(trustedProxyAddresses); 1330 Set<InetAddress> normalizedAddresses = Set.copyOf(trustedProxyAddresses); 1331 this.trustedProxyPredicate = remoteAddress -> { 1332 if (remoteAddress == null) 1333 return false; 1334 1335 InetAddress address = remoteAddress.getAddress(); 1336 return address != null && normalizedAddresses.contains(address); 1337 }; 1338 return this; 1339 } 1340 1341 /** 1342 * Controls whether {@code Origin} is used as a fallback signal when determining the client URL prefix. 1343 * 1344 * @param allowOriginFallback {@code true} to allow {@code Origin} fallback, {@code false} to disable it 1345 * @return this builder 1346 */ 1347 @NonNull 1348 public EffectiveOriginResolver allowOriginFallback(@Nullable Boolean allowOriginFallback) { 1349 this.allowOriginFallback = allowOriginFallback; 1350 return this; 1351 } 1352 1353 /** 1354 * Forwarded header trust policy. 1355 */ 1356 public enum TrustPolicy { 1357 /** 1358 * Trust forwarded headers from any source. 1359 */ 1360 TRUST_ALL, 1361 1362 /** 1363 * Trust forwarded headers only from proxies in a configured allowlist. 1364 */ 1365 TRUST_PROXY_ALLOWLIST, 1366 1367 /** 1368 * Ignore forwarded headers entirely. 1369 */ 1370 TRUST_NONE 1371 } 1372 } 1373 1374 /** 1375 * Extracts the media type (without parameters) from the first {@code Content-Type} header. 1376 * <p> 1377 * For example, {@code "text/html; charset=UTF-8"} → {@code "text/html"}. 1378 * 1379 * @param headers request/response headers (must be non-{@code null}) 1380 * @return the media type if present; otherwise {@link Optional#empty()} 1381 * @see #extractContentTypeFromHeaderValue(String) 1382 */ 1383 @NonNull 1384 public static Optional<String> extractContentTypeFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 1385 requireNonNull(headers); 1386 1387 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 1388 1389 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 1390 return Optional.empty(); 1391 1392 return extractContentTypeFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 1393 } 1394 1395 /** 1396 * Extracts the media type (without parameters) from a {@code Content-Type} header value. 1397 * <p> 1398 * For example, {@code "application/json; charset=UTF-8"} → {@code "application/json"}. 1399 * 1400 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 1401 * @return the media type if present; otherwise {@link Optional#empty()} 1402 */ 1403 @NonNull 1404 public static Optional<String> extractContentTypeFromHeaderValue(@Nullable String contentTypeHeaderValue) { 1405 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 1406 1407 if (contentTypeHeaderValue == null) 1408 return Optional.empty(); 1409 1410 // Examples 1411 // Content-Type: text/html; charset=UTF-8 1412 // Content-Type: multipart/form-data; boundary=something 1413 1414 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 1415 1416 // Simple case, e.g. "text/html" 1417 if (indexOfSemicolon == -1) 1418 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue)); 1419 1420 // More complex case, e.g. "text/html; charset=UTF-8" 1421 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue.substring(0, indexOfSemicolon))); 1422 } 1423 1424 /** 1425 * Extracts the {@link Charset} from the first {@code Content-Type} header, if present and valid. 1426 * <p> 1427 * Tolerates additional parameters and arbitrary whitespace. Invalid or unknown charset tokens yield {@link Optional#empty()}. 1428 * 1429 * @param headers request/response headers (must be non-{@code null}) 1430 * @return the charset declared by the header; otherwise {@link Optional#empty()} 1431 * @see #extractCharsetFromHeaderValue(String) 1432 */ 1433 @NonNull 1434 public static Optional<Charset> extractCharsetFromHeaders(@NonNull Map<@NonNull String, @NonNull Set<@NonNull String>> headers) { 1435 requireNonNull(headers); 1436 1437 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 1438 1439 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 1440 return Optional.empty(); 1441 1442 return extractCharsetFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 1443 } 1444 1445 /** 1446 * Extracts the {@code charset=...} parameter from a {@code Content-Type} header value. 1447 * <p> 1448 * Parsing is forgiving: parameters may appear in any order and with arbitrary spacing. If a charset is found, 1449 * it is validated via {@link Charset#forName(String)}; invalid names result in {@link Optional#empty()}. 1450 * 1451 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 1452 * @return the resolved charset if present and valid; otherwise {@link Optional#empty()} 1453 */ 1454 @NonNull 1455 public static Optional<Charset> extractCharsetFromHeaderValue(@Nullable String contentTypeHeaderValue) { 1456 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 1457 1458 if (contentTypeHeaderValue == null) 1459 return Optional.empty(); 1460 1461 // Examples 1462 // Content-Type: text/html; charset=UTF-8 1463 // Content-Type: multipart/form-data; boundary=something 1464 1465 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 1466 1467 // Simple case, e.g. "text/html" 1468 if (indexOfSemicolon == -1) 1469 return Optional.empty(); 1470 1471 // More complex case, e.g. "text/html; charset=UTF-8" or "multipart/form-data; charset=UTF-8; boundary=something" 1472 boolean finishedContentType = false; 1473 boolean finishedCharsetName = false; 1474 StringBuilder buffer = new StringBuilder(); 1475 String charsetName = null; 1476 1477 for (int i = 0; i < contentTypeHeaderValue.length(); i++) { 1478 char c = contentTypeHeaderValue.charAt(i); 1479 1480 if (Character.isWhitespace(c)) 1481 continue; 1482 1483 if (c == ';') { 1484 // No content type yet? This just be it... 1485 if (!finishedContentType) { 1486 finishedContentType = true; 1487 buffer = new StringBuilder(); 1488 } else if (!finishedCharsetName) { 1489 if (buffer.indexOf("charset=") == 0) { 1490 charsetName = buffer.toString(); 1491 finishedCharsetName = true; 1492 break; 1493 } 1494 } 1495 } else { 1496 buffer.append(Character.toLowerCase(c)); 1497 } 1498 } 1499 1500 // Handle case where charset is the end of the string, e.g. "whatever;charset=UTF-8" 1501 if (!finishedCharsetName) { 1502 String potentialCharset = trimAggressivelyToNull(buffer.toString()); 1503 if (potentialCharset != null && potentialCharset.startsWith("charset=")) { 1504 finishedCharsetName = true; 1505 charsetName = potentialCharset; 1506 } 1507 } 1508 1509 if (finishedCharsetName) { 1510 // e.g. charset=UTF-8 or charset="UTF-8" or charset='UTF-8' 1511 String possibleCharsetName = trimAggressivelyToNull(charsetName.replace("charset=", "")); 1512 1513 if (possibleCharsetName != null) { 1514 // strip optional surrounding quotes 1515 if ((possibleCharsetName.length() >= 2) && 1516 ((possibleCharsetName.charAt(0) == '"' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '"') || 1517 (possibleCharsetName.charAt(0) == '\'' && possibleCharsetName.charAt(possibleCharsetName.length() - 1) == '\''))) { 1518 possibleCharsetName = possibleCharsetName.substring(1, possibleCharsetName.length() - 1); 1519 possibleCharsetName = trimAggressivelyToNull(possibleCharsetName); 1520 } 1521 1522 if (possibleCharsetName != null) { 1523 try { 1524 return Optional.of(Charset.forName(possibleCharsetName)); 1525 } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) { 1526 return Optional.empty(); 1527 } 1528 } 1529 } 1530 } 1531 1532 return Optional.empty(); 1533 } 1534 1535 /** 1536 * A "stronger" version of {@link String#trim()} which discards any kind of whitespace or invisible separator. 1537 * <p> 1538 * In a web environment with user-supplied inputs, this is the behavior we want the vast majority of the time. 1539 * For example, users copy-paste URLs from Microsoft Word or Outlook and it's easy to accidentally include a {@code U+202F 1540 * "Narrow No-Break Space (NNBSP)"} character at the end, which might break parsing. 1541 * <p> 1542 * See <a href="https://www.compart.com/en/unicode/U+202F">https://www.compart.com/en/unicode/U+202F</a> for details. 1543 * 1544 * @param string the string to trim 1545 * @return the trimmed string, or {@code null} if the input string is {@code null} or the trimmed representation is of length {@code 0} 1546 */ 1547 @Nullable 1548 public static String trimAggressively(@Nullable String string) { 1549 if (string == null) 1550 return null; 1551 1552 string = HEAD_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 1553 1554 if (string.length() == 0) 1555 return string; 1556 1557 string = TAIL_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 1558 1559 return string; 1560 } 1561 1562 /** 1563 * Aggressively trims Unicode whitespace from the given string and returns {@code null} if the result is empty. 1564 * <p> 1565 * See {@link #trimAggressively(String)} for details on which code points are removed. 1566 * 1567 * @param string the input string; may be {@code null} 1568 * @return a trimmed, non-empty string; or {@code null} if input was {@code null} or trimmed to empty 1569 */ 1570 @Nullable 1571 public static String trimAggressivelyToNull(@Nullable String string) { 1572 if (string == null) 1573 return null; 1574 1575 string = trimAggressively(string); 1576 return string.length() == 0 ? null : string; 1577 } 1578 1579 /** 1580 * Aggressively trims Unicode whitespace from the given string and returns {@code ""} if the input is {@code null}. 1581 * <p> 1582 * See {@link #trimAggressively(String)} for details on which code points are removed. 1583 * 1584 * @param string the input string; may be {@code null} 1585 * @return a trimmed string (never {@code null}); {@code ""} if input was {@code null} 1586 */ 1587 @NonNull 1588 public static String trimAggressivelyToEmpty(@Nullable String string) { 1589 if (string == null) 1590 return ""; 1591 1592 return trimAggressively(string); 1593 } 1594 1595 static void validateHeaderNameAndValue(@Nullable String name, 1596 @Nullable String value) { 1597 // First, validate name: 1598 name = trimAggressivelyToNull(name); 1599 1600 if (name == null) 1601 throw new IllegalArgumentException("Header name is blank"); 1602 1603 for (int i = 0; i < name.length(); i++) { 1604 char c = name.charAt(i); 1605 // RFC 9110 tchar: "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 1606 if (c > 0x7F || !(c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' || c == '*' || c == '+' || 1607 c == '-' || c == '.' || c == '^' || c == '_' || c == '`' || c == '|' || c == '~' || 1608 Character.isLetterOrDigit(c))) { 1609 throw new IllegalArgumentException(format("Illegal header name '%s'. Offending character: '%s'", name, printableChar(c))); 1610 } 1611 } 1612 1613 // Then, validate value: 1614 if (value == null) 1615 return; 1616 1617 for (int i = 0; i < value.length(); i++) { 1618 char c = value.charAt(i); 1619 if (c == '\r' || c == '\n' || c == 0x00 || c > 0xFF || (c >= 0x00 && c < 0x20 && c != '\t')) { 1620 throw new IllegalArgumentException(format("Illegal header value '%s' for header name '%s'. Offending character: '%s'", value, name, printableChar(c))); 1621 } 1622 } 1623 1624 // Percent-encoded control sequence checks 1625 Matcher m = HEADER_PERCENT_ENCODING_PATTERN.matcher(value); 1626 1627 while (m.find()) { 1628 int b = Integer.parseInt(m.group(1), 16); 1629 if (b == 0x0D || b == 0x0A || b == 0x00 || (b >= 0x00 && b < 0x20 && b != 0x09)) { 1630 throw new IllegalArgumentException(format( 1631 "Illegal (percent-encoded) header value '%s' for header name '%s'. Offending octet: 0x%02X", 1632 value, name, b)); 1633 } 1634 } 1635 } 1636 1637 @NonNull 1638 static String printableString(@NonNull String input) { 1639 requireNonNull(input); 1640 1641 StringBuilder out = new StringBuilder(input.length() + 16); 1642 1643 for (int i = 0; i < input.length(); i++) 1644 out.append(printableChar(input.charAt(i))); 1645 1646 return out.toString(); 1647 } 1648 1649 @NonNull 1650 static String printableChar(char c) { 1651 if (c == '\r') return "\\r"; 1652 if (c == '\n') return "\\n"; 1653 if (c == '\t') return "\\t"; 1654 if (c == '\f') return "\\f"; 1655 if (c == '\b') return "\\b"; 1656 if (c == '\\') return "\\\\"; 1657 if (c == '\'') return "\\'"; 1658 if (c == '\"') return "\\\""; 1659 if (c == 0) return "\\0"; 1660 1661 if (c < 0x20 || c == 0x7F) // control chars 1662 return String.format("\\u%04X", (int) c); 1663 1664 if (Character.isISOControl(c) || Character.getType(c) == Character.FORMAT) 1665 return String.format("\\u%04X", (int) c); 1666 1667 return String.valueOf(c); 1668 } 1669 1670 @NonNull 1671 private static final Set<String> COMMA_JOINABLE_HEADER_NAMES = Set.of( 1672 // Common list-type headers (RFC 7230/9110) 1673 "accept", 1674 "accept-encoding", 1675 "accept-language", 1676 "cache-control", 1677 "pragma", 1678 "vary", 1679 "connection", 1680 "transfer-encoding", 1681 "upgrade", 1682 "allow", 1683 "via", 1684 "warning" 1685 // intentionally NOT: set-cookie, authorization, cookie, content-disposition, location 1686 ); 1687 1688 /** 1689 * Given a list of raw HTTP header lines, convert them into a normalized case-insensitive, order-preserving map which "inflates" comma-separated headers into distinct values where permitted according to RFC 7230/9110. 1690 * <p> 1691 * For example, given these raw header lines: 1692 * <pre>{@code List<String> lines = List.of( 1693 * "Cache-Control: no-cache, no-store", 1694 * "Set-Cookie: a=b; Path=/; HttpOnly", 1695 * "Set-Cookie: c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/" 1696 * );}</pre> 1697 * The result of parsing would look like this: 1698 * <pre>{@code result.get("cache-control") -> [ 1699 * "no-cache", 1700 * "no-store" 1701 * ] 1702 * result.get("set-cookie") -> [ 1703 * "a=b; Path=/; HttpOnly", 1704 * "c=d; Expires=Wed, 21 Oct 2015 07:28:00 GMT; Path=/" 1705 * ]}</pre> 1706 * <p> 1707 * Keys in the returned map are case-insensitive and are guaranteed to be in the same order as encountered in {@code rawHeaderLines}. 1708 * <p> 1709 * Values in the returned map are guaranteed to be in the same order as encountered in {@code rawHeaderLines}. 1710 * 1711 * @param rawHeaderLines the raw HTTP header lines to parse 1712 * @return a normalized mapping of header name keys to values 1713 */ 1714 @NonNull 1715 public static Map<@NonNull String, @NonNull Set<@NonNull String>> extractHeadersFromRawHeaderLines(@NonNull List<@NonNull String> rawHeaderLines) { 1716 requireNonNull(rawHeaderLines); 1717 1718 // 1) Unfold obsolete folded lines (obs-fold): lines beginning with SP/HT are continuations 1719 List<String> lines = unfold(rawHeaderLines); 1720 1721 // 2) Parse into map 1722 Map<String, Set<String>> headers = new LinkedCaseInsensitiveMap<>(); 1723 1724 for (String raw : lines) { 1725 String line = trimAggressivelyToNull(raw); 1726 1727 if (line == null) 1728 continue; 1729 1730 int idx = line.indexOf(':'); 1731 1732 if (idx <= 0) 1733 continue; // skip malformed 1734 1735 String key = trimAggressivelyToEmpty(line.substring(0, idx)); // keep original case for display 1736 String keyLowercase = key.toLowerCase(Locale.ROOT); 1737 String value = trimAggressivelyToNull(line.substring(idx + 1)); 1738 1739 if (value == null) 1740 continue; 1741 1742 Set<String> bucket = headers.computeIfAbsent(key, k -> new LinkedHashSet<>()); 1743 1744 if (COMMA_JOINABLE_HEADER_NAMES.contains(keyLowercase)) { 1745 for (String part : splitCommaAware(value)) { 1746 String v = trimAggressivelyToNull(part); 1747 if (v != null) 1748 bucket.add(v); 1749 } 1750 } else { 1751 bucket.add(value.trim()); 1752 } 1753 } 1754 1755 return headers; 1756 } 1757 1758 /** 1759 * Header parsing helper 1760 */ 1761 @NonNull 1762 private static List<String> unfold(@NonNull List<String> raw) { 1763 requireNonNull(raw); 1764 if (raw.isEmpty()) return List.of(); 1765 1766 List<String> out = new ArrayList<>(raw.size()); 1767 StringBuilder cur = null; 1768 boolean curIsHeader = false; 1769 1770 for (String line : raw) { 1771 if (line == null) continue; 1772 1773 boolean isContinuation = !line.isEmpty() && (line.charAt(0) == ' ' || line.charAt(0) == '\t'); 1774 if (isContinuation) { 1775 if (cur != null && curIsHeader) { 1776 cur.append(' ').append(line.trim()); 1777 } else { 1778 // Do not fold into a non-header; flush previous and start anew 1779 if (cur != null) out.add(cur.toString()); 1780 cur = new StringBuilder(line); 1781 curIsHeader = line.indexOf(':') > 0; // almost certainly false for leading-space lines 1782 } 1783 } else { 1784 if (cur != null) out.add(cur.toString()); 1785 cur = new StringBuilder(line); 1786 curIsHeader = line.indexOf(':') > 0; 1787 } 1788 } 1789 if (cur != null) out.add(cur.toString()); 1790 return out; 1791 } 1792 1793 /** 1794 * Header parsing helper: split on commas that are not inside a quoted-string; supports \" escapes inside quotes. 1795 */ 1796 @NonNull 1797 private static List<String> splitCommaAware(@NonNull String string) { 1798 requireNonNull(string); 1799 1800 List<String> out = new ArrayList<>(4); 1801 StringBuilder cur = new StringBuilder(); 1802 boolean inQuotes = false; 1803 boolean escaped = false; 1804 1805 for (int i = 0; i < string.length(); i++) { 1806 char c = string.charAt(i); 1807 1808 if (escaped) { 1809 // Preserve the escaped char as-is 1810 cur.append(c); 1811 escaped = false; 1812 } else if (c == '\\') { 1813 if (inQuotes) { 1814 // Preserve the backslash itself, then mark next char as escaped 1815 cur.append('\\'); // ← keep the backslash 1816 escaped = true; 1817 } else { 1818 cur.append('\\'); // literal backslash outside quotes 1819 } 1820 } else if (c == '"') { 1821 inQuotes = !inQuotes; 1822 cur.append('"'); 1823 } else if (c == ',' && !inQuotes) { 1824 out.add(cur.toString()); 1825 cur.setLength(0); 1826 } else { 1827 cur.append(c); 1828 } 1829 } 1830 out.add(cur.toString()); 1831 return out; 1832 } 1833 1834 /** 1835 * Header parsing helper: split on semicolons that are not inside a quoted-string; supports \" escapes inside quotes. 1836 */ 1837 @NonNull 1838 private static List<String> splitSemicolonAware(@NonNull String string) { 1839 requireNonNull(string); 1840 1841 List<String> out = new ArrayList<>(4); 1842 StringBuilder cur = new StringBuilder(); 1843 boolean inQuotes = false; 1844 boolean escaped = false; 1845 1846 for (int i = 0; i < string.length(); i++) { 1847 char c = string.charAt(i); 1848 1849 if (escaped) { 1850 cur.append(c); 1851 escaped = false; 1852 } else if (c == '\\') { 1853 if (inQuotes) { 1854 cur.append('\\'); 1855 escaped = true; 1856 } else { 1857 cur.append('\\'); 1858 } 1859 } else if (c == '"') { 1860 inQuotes = !inQuotes; 1861 cur.append('"'); 1862 } else if (c == ';' && !inQuotes) { 1863 out.add(cur.toString()); 1864 cur.setLength(0); 1865 } else { 1866 cur.append(c); 1867 } 1868 } 1869 1870 out.add(cur.toString()); 1871 return out; 1872 } 1873 1874 /** 1875 * Remove a single pair of surrounding quotes if present. 1876 */ 1877 @NonNull 1878 private static String stripOptionalQuotes(@NonNull String string) { 1879 requireNonNull(string); 1880 1881 if (string.length() >= 2) { 1882 char first = string.charAt(0), last = string.charAt(string.length() - 1); 1883 1884 if ((first == '"' && last == '"') || (first == '\'' && last == '\'')) 1885 return string.substring(1, string.length() - 1); 1886 } 1887 1888 return string; 1889 } 1890 1891 /** 1892 * Parse host[:port] with IPv6 support: "[v6](:port)?" or "host(:port)?". 1893 * Returns host (with brackets for v6) and port (nullable). 1894 */ 1895 @ThreadSafe 1896 private static final class HostPort { 1897 @NonNull 1898 private final String host; 1899 @Nullable 1900 private final Integer port; 1901 1902 HostPort(@NonNull String host, 1903 @Nullable Integer port) { 1904 this.host = host; 1905 this.port = port; 1906 } 1907 1908 @NonNull 1909 public String getHost() { 1910 return this.host; 1911 } 1912 1913 @NonNull 1914 public Optional<Integer> getPort() { 1915 return Optional.ofNullable(this.port); 1916 } 1917 } 1918 1919 @NonNull 1920 private static Optional<HostPort> parseHostPort(@Nullable String input) { 1921 input = trimAggressivelyToNull(input); 1922 1923 if (input == null) 1924 return Optional.empty(); 1925 1926 input = stripOptionalQuotes(input); 1927 1928 if (input.startsWith("[")) { 1929 int close = input.indexOf(']'); 1930 1931 if (close > 0) { 1932 String core = input.substring(1, close); // IPv6 literal without brackets 1933 String rest = input.substring(close + 1); // maybe ":port" 1934 String host = "[" + core + "]"; 1935 Integer port = null; 1936 1937 if (rest.startsWith(":")) { 1938 String ps = trimAggressivelyToNull(rest.substring(1)); 1939 if (ps != null) { 1940 try { 1941 port = Integer.parseInt(ps, 10); 1942 } catch (Exception ignored) { 1943 // Nothing to do 1944 } 1945 } 1946 } 1947 1948 return Optional.of(new HostPort(host, port)); 1949 } 1950 } 1951 1952 int colon = input.indexOf(':'); 1953 1954 if (colon > 0 && input.indexOf(':', colon + 1) == -1) { 1955 // exactly one ':' -> host:port (IPv4/hostname) 1956 String h = trimAggressivelyToNull(input.substring(0, colon)); 1957 String ps = trimAggressivelyToNull(input.substring(colon + 1)); 1958 Integer p = null; 1959 1960 if (ps != null) { 1961 try { 1962 p = Integer.parseInt(ps, 10); 1963 } catch (Exception ignored) { 1964 // Nothing to do 1965 } 1966 } 1967 if (h != null) 1968 return Optional.of(new HostPort(h, p)); 1969 } 1970 1971 // no port 1972 return Optional.of(new HostPort(input, null)); 1973 } 1974 1975 @NonNull 1976 private static String removeDotSegments(@NonNull String path) { 1977 requireNonNull(path); 1978 1979 Deque<String> stack = new ArrayDeque<>(); 1980 1981 for (String seg : path.split("/")) { 1982 if (seg.isEmpty() || ".".equals(seg)) 1983 continue; 1984 1985 if ("..".equals(seg)) { 1986 if (!stack.isEmpty()) 1987 stack.removeLast(); 1988 } else { 1989 stack.addLast(seg); 1990 } 1991 } 1992 1993 return "/" + String.join("/", stack); 1994 } 1995}