001/* 002 * Copyright 2022-2025 Revetware LLC. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.soklet; 018 019import javax.annotation.Nonnull; 020import javax.annotation.Nullable; 021import javax.annotation.concurrent.ThreadSafe; 022import java.io.ByteArrayOutputStream; 023import java.lang.Thread.UncaughtExceptionHandler; 024import java.lang.invoke.MethodHandle; 025import java.lang.invoke.MethodHandles; 026import java.lang.invoke.MethodHandles.Lookup; 027import java.lang.invoke.MethodType; 028import java.net.MalformedURLException; 029import java.net.URI; 030import java.net.URISyntaxException; 031import java.net.URL; 032import java.net.URLDecoder; 033import java.nio.charset.Charset; 034import java.nio.charset.IllegalCharsetNameException; 035import java.nio.charset.StandardCharsets; 036import java.nio.charset.UnsupportedCharsetException; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.LinkedHashMap; 040import java.util.LinkedHashSet; 041import java.util.List; 042import java.util.Locale; 043import java.util.Locale.LanguageRange; 044import java.util.Map; 045import java.util.Map.Entry; 046import java.util.Optional; 047import java.util.Set; 048import java.util.concurrent.ExecutorService; 049import java.util.concurrent.Executors; 050import java.util.concurrent.ThreadFactory; 051import java.util.regex.Pattern; 052import java.util.stream.Collectors; 053 054import static java.lang.String.format; 055import static java.util.Objects.requireNonNull; 056 057/** 058 * A non-instantiable collection of utility methods. 059 * 060 * @author <a href="https://www.revetkn.com">Mark Allen</a> 061 */ 062@ThreadSafe 063public final class Utilities { 064 @Nonnull 065 private static final boolean VIRTUAL_THREADS_AVAILABLE; 066 @Nonnull 067 private static final byte[] EMPTY_BYTE_ARRAY; 068 @Nonnull 069 private static final Map<String, Locale> LOCALES_BY_LANGUAGE_RANGE_RANGE; 070 @Nonnull 071 private static final Pattern HEAD_WHITESPACE_PATTERN; 072 @Nonnull 073 private static final Pattern TAIL_WHITESPACE_PATTERN; 074 075 static { 076 EMPTY_BYTE_ARRAY = new byte[0]; 077 078 Locale[] locales = Locale.getAvailableLocales(); 079 Map<String, Locale> localesByLanguageRangeRange = new LinkedHashMap<>(locales.length); 080 081 for (Locale locale : locales) { 082 LanguageRange languageRange = new LanguageRange(locale.toLanguageTag()); 083 localesByLanguageRangeRange.put(languageRange.getRange(), locale); 084 } 085 086 LOCALES_BY_LANGUAGE_RANGE_RANGE = Collections.unmodifiableMap(localesByLanguageRangeRange); 087 088 boolean virtualThreadsAvailable = false; 089 090 try { 091 // Detect if Virtual Threads are usable by feature testing via reflection. 092 // Hat tip to https://github.com/javalin/javalin for this technique 093 Class.forName("java.lang.Thread$Builder$OfVirtual"); 094 virtualThreadsAvailable = true; 095 } catch (Exception ignored) { 096 // We don't care why this failed, but if we're here we know JVM does not support virtual threads 097 } 098 099 VIRTUAL_THREADS_AVAILABLE = virtualThreadsAvailable; 100 101 // See https://www.regular-expressions.info/unicode.html 102 // \p{Z} or \p{Separator}: any kind of whitespace or invisible separator. 103 // 104 // First pattern matches all whitespace at the head of a string, second matches the same for tail. 105 // Useful for a "stronger" trim() function, which is almost always what we want in a web context 106 // with user-supplied input. 107 HEAD_WHITESPACE_PATTERN = Pattern.compile("^(\\p{Z})+"); 108 TAIL_WHITESPACE_PATTERN = Pattern.compile("(\\p{Z})+$"); 109 } 110 111 private Utilities() { 112 // Non-instantiable 113 } 114 115 /** 116 * Does the platform runtime support virtual threads (either Java 19 and 20 w/preview enabled or Java 21+)? 117 * 118 * @return {@code true} if the runtime supports virtual threads, {@code false} otherwise 119 */ 120 @Nonnull 121 public static Boolean virtualThreadsAvailable() { 122 return VIRTUAL_THREADS_AVAILABLE; 123 } 124 125 /** 126 * Provides a virtual-thread-per-task executor service if supported by the runtime. 127 * <p> 128 * In order to support Soklet users who are not yet ready to enable virtual threads (those <strong>not</strong> running either Java 19 and 20 w/preview enabled or Java 21+), 129 * we compile Soklet with a source level < 19 and avoid any hard references to virtual threads by dynamically creating our executor service via {@link MethodHandle} references. 130 * <p> 131 * <strong>You should not call this method if {@link Utilities#virtualThreadsAvailable()} is {@code false}.</strong> 132 * <pre>{@code // This method is effectively equivalent to this code 133 * return Executors.newThreadPerTaskExecutor( 134 * Thread.ofVirtual() 135 * .name(threadNamePrefix) 136 * .uncaughtExceptionHandler(uncaughtExceptionHandler) 137 * .factory() 138 * );}</pre> 139 * 140 * @param threadNamePrefix thread name prefix for the virtual thread factory builder 141 * @param uncaughtExceptionHandler uncaught exception handler for the virtual thread factory builder 142 * @return a virtual-thread-per-task executor service 143 * @throws IllegalStateException if the runtime environment does not support virtual threads 144 */ 145 @Nonnull 146 public static ExecutorService createVirtualThreadsNewThreadPerTaskExecutor(@Nonnull String threadNamePrefix, 147 @Nonnull UncaughtExceptionHandler uncaughtExceptionHandler) { 148 requireNonNull(threadNamePrefix); 149 requireNonNull(uncaughtExceptionHandler); 150 151 if (!virtualThreadsAvailable()) 152 throw new IllegalStateException("Virtual threads are not available. Please confirm you are using Java 19-20 with the '--enable-preview' javac parameter specified or Java 21+"); 153 154 // Hat tip to https://github.com/javalin/javalin for this technique 155 Class<?> threadBuilderOfVirtualClass; 156 157 try { 158 threadBuilderOfVirtualClass = Class.forName("java.lang.Thread$Builder$OfVirtual"); 159 } catch (ClassNotFoundException e) { 160 throw new IllegalStateException("Unable to load virtual thread builder class", e); 161 } 162 163 Lookup lookup = MethodHandles.publicLookup(); 164 165 MethodHandle methodHandleThreadOfVirtual; 166 MethodHandle methodHandleThreadBuilderOfVirtualName; 167 MethodHandle methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler; 168 MethodHandle methodHandleThreadBuilderOfVirtualFactory; 169 MethodHandle methodHandleExecutorsNewThreadPerTaskExecutor; 170 171 try { 172 methodHandleThreadOfVirtual = lookup.findStatic(Thread.class, "ofVirtual", MethodType.methodType(threadBuilderOfVirtualClass)); 173 methodHandleThreadBuilderOfVirtualName = lookup.findVirtual(threadBuilderOfVirtualClass, "name", MethodType.methodType(threadBuilderOfVirtualClass, String.class, long.class)); 174 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler = lookup.findVirtual(threadBuilderOfVirtualClass, "uncaughtExceptionHandler", MethodType.methodType(threadBuilderOfVirtualClass, UncaughtExceptionHandler.class)); 175 methodHandleThreadBuilderOfVirtualFactory = lookup.findVirtual(threadBuilderOfVirtualClass, "factory", MethodType.methodType(ThreadFactory.class)); 176 methodHandleExecutorsNewThreadPerTaskExecutor = lookup.findStatic(Executors.class, "newThreadPerTaskExecutor", MethodType.methodType(ExecutorService.class, ThreadFactory.class)); 177 } catch (NoSuchMethodException | IllegalAccessException e) { 178 throw new IllegalStateException("Unable to load method handle for virtual thread factory", e); 179 } 180 181 try { 182 // Thread.ofVirtual() 183 Object virtualThreadBuilder = methodHandleThreadOfVirtual.invoke(); 184 // .name(threadNamePrefix, start) 185 methodHandleThreadBuilderOfVirtualName.invoke(virtualThreadBuilder, threadNamePrefix, 1); 186 // .uncaughtExceptionHandler(uncaughtExceptionHandler) 187 methodHandleThreadBuilderOfVirtualUncaughtExceptionHandler.invoke(virtualThreadBuilder, uncaughtExceptionHandler); 188 // .factory(); 189 ThreadFactory threadFactory = (ThreadFactory) methodHandleThreadBuilderOfVirtualFactory.invoke(virtualThreadBuilder); 190 191 // return Executors.newThreadPerTaskExecutor(threadFactory); 192 return (ExecutorService) methodHandleExecutorsNewThreadPerTaskExecutor.invoke(threadFactory); 193 } catch (Throwable t) { 194 throw new IllegalStateException("Unable to create virtual thread executor service", t); 195 } 196 } 197 198 /** 199 * Returns a shared zero-length {@code byte[]} instance. 200 * <p> 201 * Useful as a sentinel when you need a non-{@code null} byte array but have no content. 202 * 203 * @return a zero-length byte array (never {@code null}) 204 */ 205 @Nonnull 206 public static byte[] emptyByteArray() { 207 return EMPTY_BYTE_ARRAY; 208 } 209 210 /** 211 * Parses an {@code application/x-www-form-urlencoded} query string into a multimap of names to values. 212 * <p> 213 * Decodes percent-escapes and {@code '+'} as space using UTF-8. Pairs missing a name or value are ignored. 214 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 215 * 216 * @param query a raw query string such as {@code "a=1&b=2&b=3"} (must be non-{@code null}) 217 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none 218 * @see #extractQueryParametersFromUrl(String) 219 */ 220 @Nonnull 221 public static Map<String, Set<String>> extractQueryParametersFromQuery(@Nonnull String query) { 222 requireNonNull(query); 223 224 // For form parameters, body will look like "One=Two&Three=Four" ...a query string. 225 String syntheticUrl = format("https://soklet.invalid?%s", query); // avoid referencing real domain 226 return extractQueryParametersFromUrl(syntheticUrl); 227 } 228 229 /** 230 * Extracts query parameters from a URL (or URI string) into a multimap of names to values. 231 * <p> 232 * If the input is not a valid {@link URI}, an empty map is returned. The raw query is split on {@code '&'} into 233 * name/value pairs, values are split on the first {@code '='}, and both name and value are UTF-8 decoded 234 * (percent-escapes and {@code '+'} → space). Blank pairs and pairs missing either name or value are ignored. 235 * Multiple occurrences of the same name are collected into a {@link Set} in insertion order (duplicates are de-duplicated). 236 * 237 * @param url an absolute or relative URL/URI string (must be non-{@code null}) 238 * @return a map of parameter names to their distinct values, preserving first-seen name order; empty if none/invalid 239 */ 240 @Nonnull 241 public static Map<String, Set<String>> extractQueryParametersFromUrl(@Nonnull String url) { 242 requireNonNull(url); 243 244 URI uri; 245 246 try { 247 uri = new URI(url); 248 } catch (URISyntaxException e) { 249 return Map.of(); 250 } 251 252 String query = trimAggressivelyToNull(uri.getRawQuery()); 253 254 if (query == null) 255 return Map.of(); 256 257 Map<String, Set<String>> queryParameters = new LinkedHashMap<>(); 258 for (String pair : query.split("&")) { 259 if (pair.isEmpty()) 260 continue; 261 262 String[] nv = pair.split("=", 2); 263 String rawName = trimAggressivelyToNull(nv.length > 0 ? nv[0] : null); 264 String rawValue = trimAggressivelyToNull(nv.length > 1 ? nv[1] : null); 265 266 if (rawName == null || rawValue == null) 267 continue; 268 269 String name = URLDecoder.decode(rawName, StandardCharsets.UTF_8); 270 String value = URLDecoder.decode(rawValue, StandardCharsets.UTF_8); 271 272 queryParameters.computeIfAbsent(name, k -> new LinkedHashSet<>()).add(value); 273 } 274 275 return queryParameters; 276 } 277 278 /** 279 * Parses {@code Cookie} request headers into a map of cookie names to values. 280 * <p> 281 * Header name matching is case-insensitive ({@code "Cookie"} vs {@code "cookie"}), but <em>cookie names are case-sensitive</em>. 282 * Values are parsed per the following liberal rules: 283 * <ul> 284 * <li>Components are split on {@code ';'} unless inside a quoted string.</li> 285 * <li>Quoted values have surrounding quotes removed and common backslash escapes unescaped.</li> 286 * <li>Percent-escapes are decoded as UTF-8. {@code '+'} is <strong>not</strong> treated specially.</li> 287 * </ul> 288 * Multiple occurrences of the same cookie name are collected into a {@link Set} in insertion order. 289 * 290 * @param headers request headers as a multimap of header name to values (must be non-{@code null}) 291 * @return a map of cookie name to distinct values; empty if no valid cookies are present 292 */ 293 @Nonnull 294 public static Map<String, Set<String>> extractCookiesFromHeaders(@Nonnull Map<String, Set<String>> headers) { 295 requireNonNull(headers); 296 297 // Cookie *names* must be case-sensitive; keep LinkedHashMap (NOT case-insensitive) 298 Map<String, Set<String>> cookies = new LinkedHashMap<>(); 299 300 for (Entry<String, Set<String>> entry : headers.entrySet()) { 301 String headerName = entry.getKey(); 302 if (headerName == null || !"cookie".equalsIgnoreCase(headerName.trim())) 303 continue; 304 305 Set<String> values = entry.getValue(); 306 if (values == null) continue; 307 308 for (String headerValue : values) { 309 headerValue = trimAggressivelyToNull(headerValue); 310 if (headerValue == null) continue; 311 312 // Split on ';' only when NOT inside a quoted string 313 List<String> cookieComponents = splitCookieHeaderRespectingQuotes(headerValue); 314 315 for (String cookieComponent : cookieComponents) { 316 cookieComponent = trimAggressivelyToNull(cookieComponent); 317 if (cookieComponent == null) continue; 318 319 String[] cookiePair = cookieComponent.split("=", 2); 320 String rawName = trimAggressivelyToNull(cookiePair[0]); 321 String rawValue = (cookiePair.length == 2 ? trimAggressivelyToNull(cookiePair[1]) : null); 322 323 if (rawName == null) continue; 324 325 // DO NOT decode the name; cookie names are case-sensitive and rarely encoded 326 String cookieName = rawName; 327 328 String cookieValue = null; 329 if (rawValue != null) { 330 // If it's quoted, unquote+unescape first, then percent-decode (still no '+' -> space) 331 String unquoted = unquoteCookieValueIfNeeded(rawValue); 332 cookieValue = percentDecodeCookieValue(unquoted); 333 } 334 335 cookies.computeIfAbsent(cookieName, key -> new LinkedHashSet<>()); 336 if (cookieValue != null) 337 cookies.get(cookieName).add(cookieValue); 338 } 339 } 340 } 341 342 return cookies; 343 } 344 345 /** 346 * Percent-decodes %HH to bytes->UTF-8. Does NOT treat '+' specially. 347 */ 348 @Nonnull 349 private static String percentDecodeCookieValue(@Nonnull String cookieValue) { 350 requireNonNull(cookieValue); 351 352 ByteArrayOutputStream out = new ByteArrayOutputStream(cookieValue.length()); 353 354 for (int i = 0; i < cookieValue.length(); ) { 355 char c = cookieValue.charAt(i); 356 if (c == '%' && i + 2 < cookieValue.length()) { 357 int hi = Character.digit(cookieValue.charAt(i + 1), 16); 358 int lo = Character.digit(cookieValue.charAt(i + 2), 16); 359 if (hi >= 0 && lo >= 0) { 360 out.write((hi << 4) + lo); 361 i += 3; 362 continue; 363 } 364 } 365 366 out.write((byte) c); 367 i++; 368 } 369 370 return out.toString(StandardCharsets.UTF_8); 371 } 372 373 /** 374 * Splits a Cookie header string into components on ';' but ONLY when not inside a quoted value. 375 * Supports backslash-escaped quotes within quoted strings. 376 */ 377 private static List<String> splitCookieHeaderRespectingQuotes(@Nonnull String headerValue) { 378 List<String> parts = new ArrayList<>(); 379 StringBuilder cur = new StringBuilder(headerValue.length()); 380 boolean inQuotes = false; 381 boolean escape = false; 382 383 for (int i = 0; i < headerValue.length(); i++) { 384 char c = headerValue.charAt(i); 385 386 if (escape) { 387 // keep escaped char literally (e.g., \" \; \\) 388 cur.append(c); 389 escape = false; 390 continue; 391 } 392 393 if (c == '\\') { 394 escape = true; 395 // keep the backslash for now; unquote step will handle unescaping 396 cur.append(c); 397 continue; 398 } 399 400 if (c == '"') { 401 inQuotes = !inQuotes; 402 cur.append(c); 403 continue; 404 } 405 406 if (c == ';' && !inQuotes) { 407 parts.add(cur.toString()); 408 cur.setLength(0); 409 continue; 410 } 411 412 cur.append(c); 413 } 414 415 if (cur.length() > 0) 416 parts.add(cur.toString()); 417 418 return parts; 419 } 420 421 /** 422 * If the cookie value is a quoted-string, remove surrounding quotes and unescape \" \\ and \; . 423 * Otherwise returns the input as-is. 424 */ 425 @Nonnull 426 private static String unquoteCookieValueIfNeeded(@Nonnull String rawValue) { 427 requireNonNull(rawValue); 428 429 if (rawValue.length() >= 2 && rawValue.charAt(0) == '"' && rawValue.charAt(rawValue.length() - 1) == '"') { 430 // Strip the surrounding quotes 431 String inner = rawValue.substring(1, rawValue.length() - 1); 432 433 // Unescape \" \\ and \; (common patterns seen in the wild) 434 // Order matters: unescape backslash-escape sequences, then leave other chars intact. 435 StringBuilder sb = new StringBuilder(inner.length()); 436 boolean escape = false; 437 438 for (int i = 0; i < inner.length(); i++) { 439 char c = inner.charAt(i); 440 if (escape) { 441 // Only special-case a few common escapes; otherwise keep the char 442 if (c == '"' || c == '\\' || c == ';') 443 sb.append(c); 444 else 445 sb.append(c); // unknown escape -> keep literally (liberal in what we accept) 446 447 escape = false; 448 } else if (c == '\\') { 449 escape = true; 450 } else { 451 sb.append(c); 452 } 453 } 454 455 // If string ended with a dangling backslash, keep it literally 456 if (escape) 457 sb.append('\\'); 458 459 return sb.toString(); 460 } 461 462 return rawValue; 463 } 464 465 /** 466 * Normalizes a URL or path into a canonical request path. 467 * <p> 468 * Behavior: 469 * <ul> 470 * <li>If input starts with {@code http://} or {@code https://}, the path portion is extracted.</li> 471 * <li>Ensures the result begins with {@code '/'}.</li> 472 * <li>Removes any trailing {@code '/'} (except for the root path {@code '/'}).</li> 473 * <li>Strips any query string.</li> 474 * <li>Applies aggressive trimming of Unicode whitespace.</li> 475 * </ul> 476 * 477 * @param url a URL or path to normalize (must be non-{@code null}) 478 * @return the normalized path (never {@code null}); {@code "/"} for empty input 479 */ 480 @Nonnull 481 public static String normalizedPathForUrl(@Nonnull String url) { 482 requireNonNull(url); 483 484 url = trimAggressively(url); 485 486 if (url.length() == 0) 487 return "/"; 488 489 if (url.startsWith("http://") || url.startsWith("https://")) { 490 try { 491 URL absoluteUrl = new URL(url); 492 url = absoluteUrl.getPath(); 493 } catch (MalformedURLException e) { 494 throw new RuntimeException(format("Malformed URL: %s", url), e); 495 } 496 } 497 498 if (!url.startsWith("/")) 499 url = format("/%s", url); 500 501 if ("/".equals(url)) 502 return url; 503 504 while (url.endsWith("/")) 505 url = url.substring(0, url.length() - 1); 506 507 int queryIndex = url.indexOf("?"); 508 509 if (queryIndex != -1) 510 url = url.substring(0, queryIndex); 511 512 return url; 513 } 514 515 /** 516 * Parses an {@code Accept-Language} header value into a best-effort ordered list of {@link Locale}s. 517 * <p> 518 * Quality weights are honored by {@link Locale.LanguageRange#parse(String)}; results are then mapped to available 519 * JVM locales. Unknown or unavailable language ranges are skipped. On parse failure, an empty list is returned. 520 * 521 * @param acceptLanguageHeaderValue the raw header value (must be non-{@code null}) 522 * @return locales in descending preference order; empty if none could be resolved 523 */ 524 @Nonnull 525 public static List<Locale> localesFromAcceptLanguageHeaderValue(@Nonnull String acceptLanguageHeaderValue) { 526 requireNonNull(acceptLanguageHeaderValue); 527 528 try { 529 List<LanguageRange> languageRanges = LanguageRange.parse(acceptLanguageHeaderValue); 530 531 return languageRanges.stream() 532 .map(languageRange -> LOCALES_BY_LANGUAGE_RANGE_RANGE.get(languageRange.getRange())) 533 .filter(locale -> locale != null) 534 .collect(Collectors.toList()); 535 } catch (Exception ignored) { 536 return List.of(); 537 } 538 } 539 540 /** 541 * Best-effort attempt to determine a client's URL prefix by examining request headers. 542 * <p> 543 * A URL prefix in this context is defined as {@code <scheme>://host<:optional port>}, but no path or query components. 544 * <p> 545 * Soklet is generally the "last hop" behind a load balancer/reverse proxy and does get accessed directly by clients. 546 * <p> 547 * Normally a load balancer/reverse proxy/other upstream proxies will provide information about the true source of the 548 * request through headers like the following: 549 * <ul> 550 * <li>{@code Host}</li> 551 * <li>{@code Forwarded}</li> 552 * <li>{@code Origin}</li> 553 * <li>{@code X-Forwarded-Proto}</li> 554 * <li>{@code X-Forwarded-Protocol}</li> 555 * <li>{@code X-Url-Scheme}</li> 556 * <li>{@code Front-End-Https}</li> 557 * <li>{@code X-Forwarded-Ssl}</li> 558 * <li>{@code X-Forwarded-Host}</li> 559 * <li>{@code X-Forwarded-Port}</li> 560 * </ul> 561 * <p> 562 * This method may take these and other headers into account when determining URL prefix. 563 * <p> 564 * For example, the following would be legal URL prefixes returned from this method: 565 * <ul> 566 * <li>{@code https://www.soklet.com}</li> 567 * <li>{@code http://www.fake.com:1234}</li> 568 * </ul> 569 * <p> 570 * The following would NOT be legal URL prefixes: 571 * <ul> 572 * <li>{@code www.soklet.com} (missing protocol) </li> 573 * <li>{@code https://www.soklet.com/} (trailing slash)</li> 574 * <li>{@code https://www.soklet.com/test} (trailing slash, path)</li> 575 * <li>{@code https://www.soklet.com/test?abc=1234} (trailing slash, path, query)</li> 576 * </ul> 577 * 578 * @param headers HTTP request headers 579 * @return the URL prefix, or {@link Optional#empty()} if it could not be determined 580 */ 581 @Nonnull 582 public static Optional<String> extractClientUrlPrefixFromHeaders(@Nonnull Map<String, Set<String>> headers) { 583 requireNonNull(headers); 584 585 // Host developer.mozilla.org OR developer.mozilla.org:443 586 // Forwarded by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89) 587 // Origin null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> 588 // X-Forwarded-Proto https 589 // X-Forwarded-Protocol https (Microsoft's alternate name) 590 // X-Url-Scheme https (Microsoft's alternate name) 591 // Front-End-Https on (Microsoft's alternate name) 592 // X-Forwarded-Ssl on (Microsoft's alternate name) 593 // X-Forwarded-Host id42.example-cdn.com 594 // X-Forwarded-Port 443 595 596 String protocol = null; 597 String host = null; 598 String portAsString = null; 599 600 // Host: developer.mozilla.org OR developer.mozilla.org:443 601 Set<String> hostHeaders = headers.get("Host"); 602 603 if (hostHeaders != null && hostHeaders.size() > 0) { 604 String hostHeader = trimAggressivelyToNull(hostHeaders.stream().findFirst().get()); 605 606 if (hostHeader != null) { 607 if (hostHeader.contains(":")) { 608 String[] hostHeaderComponents = hostHeader.split(":"); 609 if (hostHeaderComponents.length == 2) { 610 host = trimAggressivelyToNull(hostHeaderComponents[0]); 611 portAsString = trimAggressivelyToNull(hostHeaderComponents[1]); 612 } 613 } else { 614 host = hostHeader; 615 } 616 } 617 } 618 619 // Forwarded: by=<identifier>;for=<identifier>;host=<host>;proto=<http|https> (can be repeated if comma-separated, e.g. for=12.34.56.78;host=example.com;proto=https, for=23.45.67.89) 620 Set<String> forwardedHeaders = headers.get("Forwarded"); 621 622 if (forwardedHeaders != null && forwardedHeaders.size() > 0) { 623 String forwardedHeader = trimAggressivelyToNull(forwardedHeaders.stream().findFirst().get()); 624 625 // If there are multiple comma-separated components, pick the first one 626 String[] forwardedHeaderComponents = forwardedHeader.split(","); 627 forwardedHeader = trimAggressivelyToNull(forwardedHeaderComponents[0]); 628 629 if (forwardedHeader != null) { 630 // Each field component might look like "by=<identifier>" 631 String[] forwardedHeaderFieldComponents = forwardedHeader.split(";"); 632 633 for (String forwardedHeaderFieldComponent : forwardedHeaderFieldComponents) { 634 forwardedHeaderFieldComponent = trimAggressivelyToNull(forwardedHeaderFieldComponent); 635 636 if (forwardedHeaderFieldComponent == null) 637 continue; 638 639 // Break "by=<identifier>" into "by" and "<identifier>" pieces 640 String[] forwardedHeaderFieldNameAndValue = forwardedHeaderFieldComponent.split(Pattern.quote("=" /* escape special Regex char */)); 641 if (forwardedHeaderFieldNameAndValue.length != 2) 642 continue; 643 644 // e.g. "by" 645 String name = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[0]); 646 // e.g. "<identifier>" 647 String value = trimAggressivelyToNull(forwardedHeaderFieldNameAndValue[1]); 648 649 if (name == null || value == null) 650 continue; 651 652 // We only care about the "Host" and "Proto" components here. 653 if ("host".equalsIgnoreCase(name)) { 654 if (host == null) 655 host = value; 656 } else if ("proto".equalsIgnoreCase(name)) { 657 if (protocol == null) 658 protocol = value; 659 } 660 } 661 } 662 } 663 664 // Origin: null OR <scheme>://<hostname> OR <scheme>://<hostname>:<port> 665 if (protocol == null || host == null || portAsString == null) { 666 Set<String> originHeaders = headers.get("Origin"); 667 668 if (originHeaders != null && originHeaders.size() > 0) { 669 String originHeader = trimAggressivelyToNull(originHeaders.stream().findFirst().get()); 670 String[] originHeaderComponents = originHeader.split("://"); 671 672 if (originHeaderComponents.length == 2) { 673 protocol = trimAggressivelyToNull(originHeaderComponents[0]); 674 String originHostAndMaybePort = trimAggressivelyToNull(originHeaderComponents[1]); 675 676 if (originHostAndMaybePort != null) { 677 if (originHostAndMaybePort.contains(":")) { 678 String[] originHostAndPortComponents = originHostAndMaybePort.split(":"); 679 680 if (originHostAndPortComponents.length == 2) { 681 host = trimAggressivelyToNull(originHostAndPortComponents[0]); 682 portAsString = trimAggressivelyToNull(originHostAndPortComponents[1]); 683 } 684 } else { 685 host = originHostAndMaybePort; 686 } 687 } 688 } 689 } 690 } 691 692 // X-Forwarded-Proto: https 693 if (protocol == null) { 694 Set<String> xForwardedProtoHeaders = headers.get("X-Forwarded-Proto"); 695 if (xForwardedProtoHeaders != null && xForwardedProtoHeaders.size() > 0) { 696 String xForwardedProtoHeader = trimAggressivelyToNull(xForwardedProtoHeaders.stream().findFirst().get()); 697 protocol = xForwardedProtoHeader; 698 } 699 } 700 701 // X-Forwarded-Protocol: https (Microsoft's alternate name) 702 if (protocol == null) { 703 Set<String> xForwardedProtocolHeaders = headers.get("X-Forwarded-Protocol"); 704 if (xForwardedProtocolHeaders != null && xForwardedProtocolHeaders.size() > 0) { 705 String xForwardedProtocolHeader = trimAggressivelyToNull(xForwardedProtocolHeaders.stream().findFirst().get()); 706 protocol = xForwardedProtocolHeader; 707 } 708 } 709 710 // X-Url-Scheme: https (Microsoft's alternate name) 711 if (protocol == null) { 712 Set<String> xUrlSchemeHeaders = headers.get("X-Url-Scheme"); 713 if (xUrlSchemeHeaders != null && xUrlSchemeHeaders.size() > 0) { 714 String xUrlSchemeHeader = trimAggressivelyToNull(xUrlSchemeHeaders.stream().findFirst().get()); 715 protocol = xUrlSchemeHeader; 716 } 717 } 718 719 // Front-End-Https: on (Microsoft's alternate name) 720 if (protocol == null) { 721 Set<String> frontEndHttpsHeaders = headers.get("Front-End-Https"); 722 if (frontEndHttpsHeaders != null && frontEndHttpsHeaders.size() > 0) { 723 String frontEndHttpsHeader = trimAggressivelyToNull(frontEndHttpsHeaders.stream().findFirst().get()); 724 725 if (frontEndHttpsHeader != null) 726 protocol = "on".equalsIgnoreCase(frontEndHttpsHeader) ? "https" : "http"; 727 } 728 } 729 730 // X-Forwarded-Ssl: on (Microsoft's alternate name) 731 if (protocol == null) { 732 Set<String> xForwardedSslHeaders = headers.get("X-Forwarded-Ssl"); 733 if (xForwardedSslHeaders != null && xForwardedSslHeaders.size() > 0) { 734 String xForwardedSslHeader = trimAggressivelyToNull(xForwardedSslHeaders.stream().findFirst().get()); 735 736 if (xForwardedSslHeader != null) 737 protocol = "on".equalsIgnoreCase(xForwardedSslHeader) ? "https" : "http"; 738 } 739 } 740 741 // X-Forwarded-Host: id42.example-cdn.com 742 if (host == null) { 743 Set<String> xForwardedHostHeaders = headers.get("X-Forwarded-Host"); 744 if (xForwardedHostHeaders != null && xForwardedHostHeaders.size() > 0) { 745 String xForwardedHostHeader = trimAggressivelyToNull(xForwardedHostHeaders.stream().findFirst().get()); 746 host = xForwardedHostHeader; 747 } 748 } 749 750 // X-Forwarded-Port: 443 751 if (portAsString == null) { 752 Set<String> xForwardedPortHeaders = headers.get("X-Forwarded-Port"); 753 if (xForwardedPortHeaders != null && xForwardedPortHeaders.size() > 0) { 754 String xForwardedPortHeader = trimAggressivelyToNull(xForwardedPortHeaders.stream().findFirst().get()); 755 portAsString = xForwardedPortHeader; 756 } 757 } 758 759 Integer port = null; 760 761 if (portAsString != null) { 762 try { 763 port = Integer.parseInt(portAsString, 10); 764 } catch (Exception ignored) { 765 // Not an integer; ignore it 766 } 767 } 768 769 if (protocol != null && host != null && port == null) 770 return Optional.of(format("%s://%s", protocol, host)); 771 772 if (protocol != null && host != null && port != null) { 773 boolean usingDefaultPort = ("http".equalsIgnoreCase(protocol) && port.equals(80)) 774 || ("https".equalsIgnoreCase(protocol) && port.equals(443)); 775 776 // Only include the port number if it's nonstandard for the protocol 777 String clientUrlPrefix = usingDefaultPort 778 ? format("%s://%s", protocol, host) 779 : format("%s://%s:%s", protocol, host, port); 780 781 return Optional.of(clientUrlPrefix); 782 } 783 784 return Optional.empty(); 785 } 786 787 /** 788 * Extracts the media type (without parameters) from the first {@code Content-Type} header. 789 * <p> 790 * For example, {@code "text/html; charset=utf-8"} → {@code "text/html"}. 791 * 792 * @param headers request/response headers (must be non-{@code null}) 793 * @return the media type if present; otherwise {@link Optional#empty()} 794 * @see #extractContentTypeFromHeaderValue(String) 795 */ 796 @Nonnull 797 public static Optional<String> extractContentTypeFromHeaders(@Nonnull Map<String, Set<String>> headers) { 798 requireNonNull(headers); 799 800 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 801 802 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 803 return Optional.empty(); 804 805 return extractContentTypeFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 806 } 807 808 /** 809 * Extracts the media type (without parameters) from a {@code Content-Type} header value. 810 * <p> 811 * For example, {@code "application/json; charset=utf-8"} → {@code "application/json"}. 812 * 813 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 814 * @return the media type if present; otherwise {@link Optional#empty()} 815 */ 816 @Nonnull 817 public static Optional<String> extractContentTypeFromHeaderValue(@Nullable String contentTypeHeaderValue) { 818 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 819 820 if (contentTypeHeaderValue == null) 821 return Optional.empty(); 822 823 // Examples 824 // Content-Type: text/html; charset=utf-8 825 // Content-Type: multipart/form-data; boundary=something 826 827 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 828 829 // Simple case, e.g. "text/html" 830 if (indexOfSemicolon == -1) 831 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue)); 832 833 // More complex case, e.g. "text/html; charset=utf-8" 834 return Optional.ofNullable(trimAggressivelyToNull(contentTypeHeaderValue.substring(0, indexOfSemicolon))); 835 } 836 837 /** 838 * Extracts the {@link Charset} from the first {@code Content-Type} header, if present and valid. 839 * <p> 840 * Tolerates additional parameters and arbitrary whitespace. Invalid or unknown charset tokens yield {@link Optional#empty()}. 841 * 842 * @param headers request/response headers (must be non-{@code null}) 843 * @return the charset declared by the header; otherwise {@link Optional#empty()} 844 * @see #extractCharsetFromHeaderValue(String) 845 */ 846 @Nonnull 847 public static Optional<Charset> extractCharsetFromHeaders(@Nonnull Map<String, Set<String>> headers) { 848 requireNonNull(headers); 849 850 Set<String> contentTypeHeaderValues = headers.get("Content-Type"); 851 852 if (contentTypeHeaderValues == null || contentTypeHeaderValues.size() == 0) 853 return Optional.empty(); 854 855 return extractCharsetFromHeaderValue(contentTypeHeaderValues.stream().findFirst().get()); 856 } 857 858 /** 859 * Extracts the {@code charset=...} parameter from a {@code Content-Type} header value. 860 * <p> 861 * Parsing is forgiving: parameters may appear in any order and with arbitrary spacing. If a charset is found, 862 * it is validated via {@link Charset#forName(String)}; invalid names result in {@link Optional#empty()}. 863 * 864 * @param contentTypeHeaderValue the raw header value; may be {@code null} or blank 865 * @return the resolved charset if present and valid; otherwise {@link Optional#empty()} 866 */ 867 @Nonnull 868 public static Optional<Charset> extractCharsetFromHeaderValue(@Nullable String contentTypeHeaderValue) { 869 contentTypeHeaderValue = trimAggressivelyToNull(contentTypeHeaderValue); 870 871 if (contentTypeHeaderValue == null) 872 return Optional.empty(); 873 874 // Examples 875 // Content-Type: text/html; charset=utf-8 876 // Content-Type: multipart/form-data; boundary=something 877 878 int indexOfSemicolon = contentTypeHeaderValue.indexOf(";"); 879 880 // Simple case, e.g. "text/html" 881 if (indexOfSemicolon == -1) 882 return Optional.empty(); 883 884 // More complex case, e.g. "text/html; charset=utf-8" or "multipart/form-data; charset=utf-8; boundary=something" 885 boolean finishedContentType = false; 886 boolean finishedCharsetName = false; 887 StringBuilder buffer = new StringBuilder(); 888 String charsetName = null; 889 890 for (int i = 0; i < contentTypeHeaderValue.length(); i++) { 891 char c = contentTypeHeaderValue.charAt(i); 892 893 if (Character.isWhitespace(c)) 894 continue; 895 896 if (c == ';') { 897 // No content type yet? This just be it... 898 if (!finishedContentType) { 899 finishedContentType = true; 900 buffer = new StringBuilder(); 901 } else if (!finishedCharsetName) { 902 if (buffer.indexOf("charset=") == 0) { 903 charsetName = buffer.toString(); 904 finishedCharsetName = true; 905 break; 906 } 907 } 908 } else { 909 buffer.append(Character.toLowerCase(c)); 910 } 911 } 912 913 // Handle case where charset is the end of the string, e.g. "whatever;charset=utf-8" 914 if (!finishedCharsetName) { 915 String potentialCharset = trimAggressivelyToNull(buffer.toString()); 916 if (potentialCharset != null && potentialCharset.startsWith("charset=")) { 917 finishedCharsetName = true; 918 charsetName = potentialCharset; 919 } 920 } 921 922 if (finishedCharsetName) { 923 // e.g. "charset=utf-8" -> "utf-8" 924 charsetName = trimAggressivelyToNull(charsetName.replace("charset=", "")); 925 926 if (charsetName != null) { 927 try { 928 return Optional.of(Charset.forName(charsetName)); 929 } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) { 930 return Optional.empty(); 931 } 932 } 933 } 934 935 return Optional.empty(); 936 } 937 938 /** 939 * A "stronger" version of {@link String#trim()} which discards any kind of whitespace or invisible separator. 940 * <p> 941 * In a web environment with user-supplied inputs, this is the behavior we want the vast majority of the time. 942 * For example, users copy-paste URLs from Microsoft Word or Outlook and it's easy to accidentally include a {@code U+202F 943 * "Narrow No-Break Space (NNBSP)"} character at the end, which might break parsing. 944 * <p> 945 * See <a href="https://www.compart.com/en/unicode/U+202F">https://www.compart.com/en/unicode/U+202F</a> for details. 946 * 947 * @param string the string to trim 948 * @return the trimmed string, or {@code null} if the input string is {@code null} or the trimmed representation is of length {@code 0} 949 */ 950 @Nullable 951 public static String trimAggressively(@Nullable String string) { 952 if (string == null) 953 return null; 954 955 string = HEAD_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 956 957 if (string.length() == 0) 958 return string; 959 960 string = TAIL_WHITESPACE_PATTERN.matcher(string).replaceAll(""); 961 962 return string; 963 } 964 965 /** 966 * Aggressively trims Unicode whitespace from the given string and returns {@code null} if the result is empty. 967 * <p> 968 * See {@link #trimAggressively(String)} for details on which code points are removed. 969 * 970 * @param string the input string; may be {@code null} 971 * @return a trimmed, non-empty string; or {@code null} if input was {@code null} or trimmed to empty 972 */ 973 @Nullable 974 public static String trimAggressivelyToNull(@Nullable String string) { 975 if (string == null) 976 return null; 977 978 string = trimAggressively(string); 979 return string.length() == 0 ? null : string; 980 } 981 982 /** 983 * Aggressively trims Unicode whitespace from the given string and returns {@code ""} if the input is {@code null}. 984 * <p> 985 * See {@link #trimAggressively(String)} for details on which code points are removed. 986 * 987 * @param string the input string; may be {@code null} 988 * @return a trimmed string (never {@code null}); {@code ""} if input was {@code null} 989 */ 990 @Nonnull 991 public static String trimAggressivelyToEmpty(@Nullable String string) { 992 if (string == null) 993 return ""; 994 995 return trimAggressively(string); 996 } 997}