001/* 002 * Copyright 2022-2026 Revetware LLC. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.soklet; 018 019import org.jspecify.annotations.NonNull; 020 021import javax.annotation.concurrent.NotThreadSafe; 022import javax.annotation.concurrent.ThreadSafe; 023import java.nio.charset.StandardCharsets; 024import java.util.ArrayList; 025import java.util.Collections; 026import java.util.List; 027import java.util.Objects; 028 029import static java.lang.String.format; 030import static java.util.Objects.requireNonNull; 031 032/** 033 * Represents a single HTTP header field value that includes a name and may include semicolon-delimited parameters - encoding rules per RFC specifications are strictly enforced. 034 * <p> 035 * Many HTTP header field values are of the form: 036 * <pre> 037 * name *( OWS ";" OWS parameter ) 038 * </pre> 039 * where each {@code parameter} is a {@code name=value} pair. 040 * <p> 041 * This class provides a small builder that makes it easy to construct parameterized header values 042 * using the formal HTTP grammar terms: 043 * <ul> 044 * <li>{@link Builder#tokenParameter(String, String)} adds a parameter whose value is a {@code token} (RFC 9110).</li> 045 * <li>{@link Builder#quotedParameter(String, String)} adds a parameter whose value is a {@code quoted-string} (RFC 9110).</li> 046 * <li>{@link Builder#rfc8187Parameter(String, String)} adds an <em>extended parameter</em> ({@code name*=}) 047 * whose value is an {@code ext-value} encoded per RFC 8187 (UTF-8, percent-encoded).</li> 048 * </ul> 049 * <p> 050 * Example {@code Content-Disposition} header value: 051 * <pre>{@code 052 * String contentDisposition = ParameterizedHeaderValue.withName("attachment") 053 * .quotedParameter("filename", "resume.pdf") 054 * .rfc8187Parameter("filename", "résumé.pdf") 055 * .stringValue(); 056 * 057 * // contentDisposition => 058 * // attachment; filename="resume.pdf"; filename*=UTF-8''r%C3%A9sum%C3%A9.pdf 059 * }</pre> 060 * <p> 061 * The {@code name} must be ISO-8859-1 and must not contain the {@code ';'} parameter delimiter. 062 * <p> 063 * This class is immutable and thread-safe. The {@link Builder} is not thread-safe. 064 * 065 * @author <a href="https://www.revetkn.com">Mark Allen</a> 066 */ 067@ThreadSafe 068public final class ParameterizedHeaderValue { 069 @NonNull 070 private final String name; 071 @NonNull 072 private final List<@NonNull Parameter> parameters; 073 074 @NonNull 075 public static Builder withName(@NonNull String value) { 076 requireNonNull(value); 077 return new Builder(value); 078 } 079 080 private ParameterizedHeaderValue(@NonNull Builder builder) { 081 requireNonNull(builder); 082 this.name = requireNonNull(builder.name); 083 this.parameters = Collections.unmodifiableList(new ArrayList<>(builder.parameters)); 084 } 085 086 /** 087 * Returns the name (non-parameter) portion of this header value. 088 */ 089 @NonNull 090 public String getName() { 091 return this.name; 092 } 093 094 /** 095 * Returns the HTTP <em>wire format</em> string for this header field value: the name followed by any 096 * semicolon-delimited parameters. 097 * <p> 098 * This is the official string form of {@link ParameterizedHeaderValue}. No guarantees are made about {@link #toString()}. 099 * 100 * @return the wire-format header field value 101 */ 102 @NonNull 103 public String getStringValue() { 104 return render(this.name, this.parameters); 105 } 106 107 /** 108 * Returns the parameters (including their types and unencoded values) that make up this header value. 109 * <p> 110 * The returned list is immutable. 111 */ 112 @NonNull 113 public List<@NonNull Parameter> getParameters() { 114 return this.parameters; 115 } 116 117 /** 118 * Returns a debug representation of this instance and its internal state. 119 * <p> 120 * No wire-format or stability guarantees are made about this output; use {@link #getStringValue()} for the 121 * wire-format header field value. 122 */ 123 @Override 124 @NonNull 125 public String toString() { 126 return format("ParameterizedHeaderValue{name=%s, parameters=%s}", getName(), getParameters()); 127 } 128 129 @Override 130 public boolean equals(Object other) { 131 if (this == other) 132 return true; 133 if (!(other instanceof ParameterizedHeaderValue)) 134 return false; 135 136 ParameterizedHeaderValue that = (ParameterizedHeaderValue) other; 137 return this.name.equals(that.name) 138 && this.parameters.equals(that.parameters); 139 } 140 141 @Override 142 public int hashCode() { 143 return Objects.hash(this.name, this.parameters); 144 } 145 146 @NonNull 147 private static String render(@NonNull String value, 148 @NonNull List<@NonNull Parameter> parameters) { 149 requireNonNull(value); 150 requireNonNull(parameters); 151 152 if (parameters.isEmpty()) 153 return value; 154 155 StringBuilder sb = new StringBuilder(value.length() + (parameters.size() * 16)); 156 sb.append(value); 157 158 for (Parameter parameter : parameters) 159 sb.append("; ").append(parameter.getEncodedFragment()); 160 161 return sb.toString(); 162 } 163 164 /** 165 * What type of header-value parameter this is: {@link #TOKEN}, {@link #QUOTED}, or {@link #RFC_8187}. 166 */ 167 public enum ParameterType { 168 /** 169 * {@code name=value} where value is an HTTP token (RFC 9110). 170 */ 171 TOKEN, 172 /** 173 * {@code name="value"} where value is an HTTP quoted-string (RFC 9110). 174 */ 175 QUOTED, 176 /** 177 * {@code name*=UTF-8''...} where value is an RFC 8187 ext-value. 178 */ 179 RFC_8187 180 } 181 182 /** 183 * A single header-value parameter: given a header value like {@code attachment; filename="resume.pdf"; filename*=UTF-8''r%C3%A9sum%C3%A9.pdf}, there are two {@code filename} parameter name-value pairs. 184 */ 185 @ThreadSafe 186 public static final class Parameter { 187 @NonNull 188 private final ParameterType parameterType; 189 @NonNull 190 private final String name; 191 @NonNull 192 private final String value; // unencoded/original value (for debugging/state) 193 @NonNull 194 private final String encodedFragment; // already encoded "name=value" or "name*=ext-value" 195 196 private Parameter(@NonNull ParameterType parameterType, 197 @NonNull String name, 198 @NonNull String value, 199 @NonNull String encodedFragment) { 200 this.parameterType = requireNonNull(parameterType); 201 this.name = requireNonNull(name); 202 this.value = requireNonNull(value); 203 this.encodedFragment = requireNonNull(encodedFragment); 204 } 205 206 /** 207 * Gets the type of this parameter. 208 * 209 * @return the parameter type 210 */ 211 @NonNull 212 public ParameterType getParameterType() { 213 return parameterType; 214 } 215 216 /** 217 * Gets the parameter name. 218 * 219 * @return the parameter name 220 */ 221 @NonNull 222 public String getName() { 223 return name; 224 } 225 226 /** 227 * Gets the unencoded/original parameter value. 228 * 229 * @return the parameter value 230 */ 231 @NonNull 232 public String getValue() { 233 return value; 234 } 235 236 /** 237 * Gets the encoded fragment in wire format (e.g. {@code name=value} or {@code name*=...}). 238 * 239 * @return the encoded parameter fragment 240 */ 241 @NonNull 242 String getEncodedFragment() { 243 return encodedFragment; 244 } 245 246 @Override 247 @NonNull 248 public String toString() { 249 return "Parameter{" 250 + "parameterType=" + parameterType 251 + ", name=" + name 252 + ", value=" + value 253 + ", encodedFragment=" + encodedFragment 254 + '}'; 255 } 256 257 @Override 258 public boolean equals(Object other) { 259 if (this == other) 260 return true; 261 if (!(other instanceof Parameter)) 262 return false; 263 264 Parameter that = (Parameter) other; 265 return this.parameterType == that.parameterType 266 && this.name.equals(that.name) 267 && this.value.equals(that.value) 268 && this.encodedFragment.equals(that.encodedFragment); 269 } 270 271 @Override 272 public int hashCode() { 273 return Objects.hash(parameterType, name, value, encodedFragment); 274 } 275 } 276 277 /** 278 * Builder used to construct instances of {@link ParameterizedHeaderValue} via {@link ParameterizedHeaderValue#withName(String)}. 279 * <p> 280 * This class is intended for use by a single thread. 281 * 282 * @author <a href="https://www.revetkn.com">Mark Allen</a> 283 */ 284 @NotThreadSafe 285 public static final class Builder { 286 @NonNull 287 private final String name; 288 @NonNull 289 private final List<@NonNull Parameter> parameters; 290 291 private Builder(@NonNull String name) { 292 requireNonNull(name); 293 this.name = sanitizeValue(name); 294 this.parameters = new ArrayList<>(); 295 } 296 297 /** 298 * Adds a parameter whose value is encoded as an HTTP {@code token} (RFC 9110). 299 * <p> 300 * Both the parameter name and value must be valid {@code token}s. 301 * This method fails fast if {@code name} or {@code value} are invalid. 302 * 303 * @param name parameter name (token) 304 * @param value parameter value (token) 305 * @return this builder 306 * @throws IllegalArgumentException if {@code name} or {@code value} are not valid tokens or contain control chars 307 */ 308 @NonNull 309 public Builder tokenParameter(@NonNull String name, 310 @NonNull String value) { 311 requireNonNull(name); 312 requireNonNull(value); 313 314 String n = sanitizeParameterName(name); 315 String v = sanitizeTokenValue(value); 316 317 String encoded = n + "=" + v; 318 this.parameters.add(new Parameter(ParameterType.TOKEN, n, v, encoded)); 319 return this; 320 } 321 322 /** 323 * Adds a parameter whose value is encoded as an HTTP {@code quoted-string} (RFC 9110). 324 * <p> 325 * The parameter name must be a valid {@code token}. The value must be ASCII and must not contain control 326 * characters. Double-quotes and backslashes are escaped as required for {@code quoted-string}. 327 * <p> 328 * This method fails fast if illegal data is provided. For non-ASCII values, use {@link #rfc8187Parameter(String, String)}. 329 * 330 * @param name parameter name (token) 331 * @param value parameter value (quoted-string content) 332 * @return this builder 333 * @throws IllegalArgumentException if {@code name} is not a valid token or {@code value} is non-ASCII or contains control chars 334 */ 335 @NonNull 336 public Builder quotedParameter(@NonNull String name, 337 @NonNull String value) { 338 requireNonNull(name); 339 requireNonNull(value); 340 341 String n = sanitizeParameterName(name); 342 String v = sanitizeQuotedValue(value); // fail-fast (ASCII, no CTLs) 343 344 String encodedValue = encodeQuotedString(v); 345 String encoded = n + "=" + encodedValue; 346 347 this.parameters.add(new Parameter(ParameterType.QUOTED, n, v, encoded)); 348 return this; 349 } 350 351 /** 352 * Adds an <em>extended parameter</em> (denoted by the {@code *} suffix on the parameter name) whose value is 353 * encoded as an RFC 8187 {@code ext-value} (UTF-8, percent-encoded). 354 * <p> 355 * This produces a fragment of the form: 356 * <pre> 357 * name*=UTF-8''percent-encoded-value 358 * </pre> 359 * where the percent-encoded bytes are the UTF-8 encoding of {@code value}. 360 * 361 * @param name parameter name (token). The {@code *} is appended automatically. 362 * @param value parameter value to encode as an RFC 8187 {@code ext-value} 363 * @return this builder 364 * @throws IllegalArgumentException if {@code name} is not a valid token or {@code value} contains control chars 365 */ 366 @NonNull 367 public Builder rfc8187Parameter(@NonNull String name, 368 @NonNull String value) { 369 requireNonNull(name); 370 requireNonNull(value); 371 372 String n = sanitizeParameterName(name); 373 if (n.indexOf('*') != -1) 374 throw new IllegalArgumentException("RFC 8187 parameter name must not contain '*': " + n); 375 String v = sanitizeRfc8187Value(value); 376 377 String extValue = encodeRfc8187ExtValue(v); 378 String encoded = n + "*=" + extValue; 379 380 this.parameters.add(new Parameter(ParameterType.RFC_8187, n, v, encoded)); 381 return this; 382 } 383 384 /** 385 * Builds an immutable {@link ParameterizedHeaderValue}. 386 */ 387 @NonNull 388 public ParameterizedHeaderValue build() { 389 return new ParameterizedHeaderValue(this); 390 } 391 392 /** 393 * Returns the HTTP wire-format string for this header field value. 394 * <p> 395 * This is equivalent to {@code build().getStringValue()} but avoids creating an intermediate {@link ParameterizedHeaderValue}. 396 * 397 * @return the wire-format header field value 398 */ 399 @NonNull 400 public String stringValue() { 401 return ParameterizedHeaderValue.render(this.name, this.parameters); 402 } 403 404 /* --------------------------- internals --------------------------- */ 405 406 @NonNull 407 private static String sanitizeValue(@NonNull String string) { 408 requireNonNull(string); 409 410 // We don't attempt to fully validate "primary-value" because its grammar is header-specific. 411 // We do enforce a security baseline: no control characters. 412 assertNoControlCharacters(string, "value"); 413 414 String trimmed = string.trim(); 415 416 if (trimmed.isEmpty()) 417 throw new IllegalArgumentException("Value must not be empty"); 418 419 for (int i = 0; i < trimmed.length(); i++) { 420 char ch = trimmed.charAt(i); 421 if (ch > 0xFF) 422 throw new IllegalArgumentException("Non-Latin-1 character not permitted in value"); 423 if (ch == ';') 424 throw new IllegalArgumentException("Value must not contain ';' parameter delimiters"); 425 } 426 427 return trimmed; 428 } 429 430 @NonNull 431 private static String sanitizeParameterName(@NonNull String name) { 432 requireNonNull(name); 433 434 assertNoControlCharacters(name, "name"); 435 436 String trimmed = name.trim(); 437 438 if (trimmed.isEmpty()) 439 throw new IllegalArgumentException("Parameter name must not be empty"); 440 441 if (!isToken(trimmed)) 442 throw new IllegalArgumentException("Invalid parameter name token: " + trimmed); 443 444 return trimmed; 445 } 446 447 @NonNull 448 private static String sanitizeTokenValue(@NonNull String value) { 449 requireNonNull(value); 450 451 assertNoControlCharacters(value, "value"); 452 453 String trimmed = value.trim(); 454 455 if (trimmed.isEmpty()) 456 throw new IllegalArgumentException("Token value must not be empty"); 457 458 if (!isToken(trimmed)) 459 throw new IllegalArgumentException("Invalid token value: " + trimmed); 460 461 return trimmed; 462 } 463 464 @NonNull 465 private static String sanitizeQuotedValue(@NonNull String value) { 466 requireNonNull(value); 467 468 // Fail-fast: quoted-string values should be ASCII and must not include CTLs. 469 assertNoControlCharacters(value, "value"); 470 471 for (int i = 0; i < value.length(); i++) { 472 char ch = value.charAt(i); 473 474 // Disallow non-ASCII. 475 if (ch > 0x7E) 476 throw new IllegalArgumentException("Non-ASCII character not permitted in quoted-string; use rfc8187() instead"); 477 478 // We also disallow other ASCII control characters already handled by assertNoControlCharacters(). 479 // Here we allow SP (0x20) through '~' (0x7E). 480 if (ch < 0x20) 481 throw new IllegalArgumentException("Control character not permitted in quoted-string; use rfc8187() if needed"); 482 } 483 484 return value; 485 } 486 487 @NonNull 488 private static String sanitizeRfc8187Value(@NonNull String value) { 489 // RFC 8187 values are encoded as UTF-8 bytes + percent-encoding. 490 // We still fail-fast on ASCII control characters to prevent header injection. 491 assertNoControlCharacters(value, "value"); 492 return value; 493 } 494 495 @NonNull 496 private static String encodeQuotedString(@NonNull String value) { 497 // RFC 9110 quoted-string: DQUOTE *( qdtext / quoted-pair ) DQUOTE 498 // We implement a safe subset: escape backslash and double quote. 499 String escaped = value 500 .replace("\\", "\\\\") 501 .replace("\"", "\\\""); 502 503 return "\"" + escaped + "\""; 504 } 505 506 @NonNull 507 private static String encodeRfc8187ExtValue(@NonNull String value) { 508 requireNonNull(value); 509 510 // RFC 8187 ext-value: charset "'" [ language ] "'" value-chars 511 // We always use UTF-8 and omit language (empty). 512 byte[] bytes = value.getBytes(StandardCharsets.UTF_8); 513 514 StringBuilder sb = new StringBuilder("UTF-8''".length() + bytes.length * 3); 515 sb.append("UTF-8''"); 516 517 for (byte b : bytes) { 518 int c = b & 0xFF; 519 520 if (isAttrChar(c)) { 521 sb.append((char) c); 522 } else { 523 sb.append('%'); 524 sb.append(HEX[(c >>> 4) & 0x0F]); 525 sb.append(HEX[c & 0x0F]); 526 } 527 } 528 529 return sb.toString(); 530 } 531 532 private static void assertNoControlCharacters(@NonNull String string, 533 @NonNull String fieldName) { 534 requireNonNull(string); 535 requireNonNull(fieldName); 536 537 for (int i = 0; i < string.length(); i++) { 538 char ch = string.charAt(i); 539 // Disallow ASCII CTLs (including CR/LF) and DEL. 540 if (ch <= 0x1F || ch == 0x7F) 541 throw new IllegalArgumentException("Control character not permitted in " + fieldName + " (index " + i + ")"); 542 } 543 } 544 545 /** 546 * Returns {@code true} if {@code string} is an HTTP {@code token} per RFC 9110. 547 */ 548 private static boolean isToken(@NonNull String string) { 549 requireNonNull(string); 550 551 if (string.isEmpty()) 552 return false; 553 554 for (int i = 0; i < string.length(); i++) { 555 char ch = string.charAt(i); 556 557 if (!isTchar(ch)) 558 return false; 559 } 560 561 return true; 562 } 563 564 /** 565 * RFC 9110 tchar: 566 * "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 567 */ 568 private static boolean isTchar(char ch) { 569 if (ch >= '0' && ch <= '9') return true; 570 if (ch >= 'A' && ch <= 'Z') return true; 571 if (ch >= 'a' && ch <= 'z') return true; 572 573 switch (ch) { 574 case '!': 575 case '#': 576 case '$': 577 case '%': 578 case '&': 579 case '\'': 580 case '*': 581 case '+': 582 case '-': 583 case '.': 584 case '^': 585 case '_': 586 case '`': 587 case '|': 588 case '~': 589 return true; 590 591 default: 592 return false; 593 } 594 } 595 596 /** 597 * RFC 8187 attr-char: 598 * ALPHA / DIGIT / "!" / "#" / "$" / "&" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" 599 */ 600 private static boolean isAttrChar(int c) { 601 // ALPHA 602 if (c >= 'A' && c <= 'Z') return true; 603 if (c >= 'a' && c <= 'z') return true; 604 // DIGIT 605 if (c >= '0' && c <= '9') return true; 606 607 return c == '!' || c == '#' || c == '$' || c == '&' || c == '+' 608 || c == '-' || c == '.' || c == '^' || c == '_' || c == '`' 609 || c == '|' || c == '~'; 610 } 611 612 private static final char[] HEX = "0123456789ABCDEF".toCharArray(); 613 } 614}