001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.ByteArrayOutputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.OutputStream; 023import java.io.UnsupportedEncodingException; 024import java.nio.charset.Charset; 025 026import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException; 027import org.apache.commons.io.Charsets; 028import org.apache.commons.io.IOUtils; 029import org.apache.commons.io.build.AbstractOrigin; 030import org.apache.commons.io.build.AbstractStreamBuilder; 031import org.apache.commons.io.output.NullOutputStream; 032 033/** 034 * Low-level API for processing file uploads. 035 * 036 * <p> 037 * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 038 * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage. 039 * </p> 040 * <p> 041 * The format of the stream is defined in the following way: 042 * </p> 043 * <pre> 044 * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br> 045 * encapsulation := delimiter body CRLF<br> 046 * delimiter := "--" boundary CRLF<br> 047 * close-delimiter := "--" boundary "--"<br> 048 * preamble := <ignore><br> 049 * epilogue := <ignore><br> 050 * body := header-part CRLF body-part<br> 051 * header-part := 1*header CRLF<br> 052 * header := header-name ":" header-value<br> 053 * header-name := <printable ASCII characters except ":"><br> 054 * header-value := <any ASCII characters except CR & LF><br> 055 * body-data := <arbitrary data><br> 056 * </pre> 057 * 058 * <p> 059 * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is 060 * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}). 061 * </p> 062 * <p> 063 * Here is an example of usage of this class: 064 * </p> 065 * 066 * <pre> 067 * try { 068 * MultipartInput multipartStream = MultipartInput.builder() 069 * .setBoundary(boundary) 070 * .setInputStream(input) 071 * .get(); 072 * boolean nextPart = multipartStream.skipPreamble(); 073 * OutputStream output; 074 * while (nextPart) { 075 * String header = multipartStream.readHeaders(); 076 * // process headers 077 * // create some output stream 078 * multipartStream.readBodyData(output); 079 * nextPart = multipartStream.readBoundary(); 080 * } 081 * } catch (MultipartInput.MalformedStreamException e) { 082 * // the stream failed to follow required syntax 083 * } catch (IOException e) { 084 * // a read or write error occurred 085 * } 086 * </pre> 087 */ 088public final class MultipartInput { 089 090 /** 091 * Builds a new {@link MultipartInput} instance. 092 * <p> 093 * For example: 094 * </p> 095 * 096 * <pre>{@code 097 * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get(); 098 * } 099 * </pre> 100 */ 101 public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> { 102 103 /** 104 * Boundary. 105 */ 106 private byte[] boundary; 107 108 /** 109 * Progress notifier. 110 */ 111 private ProgressNotifier progressNotifier; 112 113 /** The per part size limit for headers. 114 */ 115 private int partHeaderSizeMax = DEFAULT_PART_HEADER_SIZE_MAX; 116 117 /** 118 * Constructs a new instance. 119 */ 120 public Builder() { 121 setBufferSizeDefault(DEFAULT_BUFSIZE); 122 } 123 124 /** 125 * Constructs a new instance. 126 * <p> 127 * This builder uses the InputStream, buffer size, boundary and progress notifier aspects. 128 * </p> 129 * <p> 130 * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an 131 * {@link UnsupportedOperationException}. 132 * </p> 133 * 134 * @return a new instance. 135 * @throws IOException if an I/O error occurs. 136 * @throws UnsupportedOperationException if the origin cannot provide a Path. 137 * @see AbstractOrigin#getReader(Charset) 138 */ 139 @Override 140 public MultipartInput get() throws IOException { 141 return new MultipartInput(getInputStream(), boundary, getBufferSize(), getPartHeaderSizeMax(), progressNotifier); 142 } 143 144 /** Returns the per part size limit for headers. 145 * @return The maximum size of the headers in bytes. 146 * @since 2.0.0-M4 147 */ 148 public int getPartHeaderSizeMax() { 149 return partHeaderSizeMax; 150 } 151 152 /** 153 * Sets the boundary. 154 * 155 * @param boundary the boundary. 156 * @return {@code this} instance. 157 */ 158 public Builder setBoundary(final byte[] boundary) { 159 this.boundary = boundary; 160 return this; 161 } 162 163 /** Sets the per part size limit for headers. 164 * @param partHeaderSizeMax The maximum size of the headers in bytes. 165 * @return This builder. 166 * @since 2.0.0-M4 167 */ 168 public Builder setPartHeaderSizeMax(final int partHeaderSizeMax) { 169 this.partHeaderSizeMax = partHeaderSizeMax; 170 return this; 171 } 172 173 /** 174 * Sets the progress notifier. 175 * 176 * @param progressNotifier progress notifier. 177 * @return {@code this} instance. 178 */ 179 public Builder setProgressNotifier(final ProgressNotifier progressNotifier) { 180 this.progressNotifier = progressNotifier; 181 return this; 182 } 183 184 } 185 186 /** 187 * Signals an attempt to set an invalid boundary token. 188 */ 189 public static class FileUploadBoundaryException extends FileUploadException { 190 191 /** 192 * The UID to use when serializing this instance. 193 */ 194 private static final long serialVersionUID = 2; 195 196 /** 197 * Constructs an instance with the specified detail message. 198 * 199 * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method) 200 */ 201 public FileUploadBoundaryException(final String message) { 202 super(message); 203 } 204 205 } 206 207 /** 208 * An {@link InputStream} for reading an items contents. 209 */ 210 public class ItemInputStream extends InputStream { 211 212 /** 213 * Offset when converting negative bytes to integers. 214 */ 215 private static final int BYTE_POSITIVE_OFFSET = 256; 216 217 /** 218 * The number of bytes, which have been read so far. 219 */ 220 private long total; 221 222 /** 223 * The number of bytes, which must be hold, because they might be a part of the boundary. 224 */ 225 private int pad; 226 227 /** 228 * The current offset in the buffer. 229 */ 230 private int pos; 231 232 /** 233 * Whether the stream is already closed. 234 */ 235 private boolean closed; 236 237 /** 238 * Creates a new instance. 239 */ 240 ItemInputStream() { 241 findSeparator(); 242 } 243 244 /** 245 * Returns the number of bytes, which are currently available, without blocking. 246 * 247 * @throws IOException An I/O error occurs. 248 * @return Number of bytes in the buffer. 249 */ 250 @Override 251 public int available() throws IOException { 252 if (pos == -1) { 253 return tail - head - pad; 254 } 255 return pos - head; 256 } 257 258 private void checkOpen() throws ItemSkippedException { 259 if (closed) { 260 throw new FileItemInput.ItemSkippedException("checkOpen()"); 261 } 262 } 263 264 /** 265 * Closes the input stream. 266 * 267 * @throws IOException An I/O error occurred. 268 */ 269 @Override 270 public void close() throws IOException { 271 close(false); 272 } 273 274 /** 275 * Closes the input stream. 276 * 277 * @param closeUnderlying Whether to close the underlying stream (hard close) 278 * @throws IOException An I/O error occurred. 279 */ 280 public void close(final boolean closeUnderlying) throws IOException { 281 if (closed) { 282 return; 283 } 284 if (closeUnderlying) { 285 closed = true; 286 input.close(); 287 } else { 288 for (;;) { 289 var avail = available(); 290 if (avail == 0) { 291 avail = makeAvailable(); 292 if (avail == 0) { 293 break; 294 } 295 } 296 if (skip(avail) != avail) { 297 // TODO What to do? 298 } 299 } 300 } 301 closed = true; 302 } 303 304 /** 305 * Called for finding the separator. 306 */ 307 private void findSeparator() { 308 pos = MultipartInput.this.findSeparator(); 309 if (pos == -1) { 310 if (tail - head > keepRegion) { 311 pad = keepRegion; 312 } else { 313 pad = tail - head; 314 } 315 } 316 } 317 318 /** 319 * Gets the number of bytes, which have been read by the stream. 320 * 321 * @return Number of bytes, which have been read so far. 322 */ 323 public long getBytesRead() { 324 return total; 325 } 326 327 /** 328 * Tests whether this instance is closed. 329 * 330 * @return whether this instance is closed. 331 */ 332 public boolean isClosed() { 333 return closed; 334 } 335 336 /** 337 * Attempts to read more data. 338 * 339 * @return Number of available bytes 340 * @throws IOException An I/O error occurred. 341 */ 342 private int makeAvailable() throws IOException { 343 if (pos != -1) { 344 return 0; 345 } 346 347 // Move the data to the beginning of the buffer. 348 total += tail - head - pad; 349 System.arraycopy(buffer, tail - pad, buffer, 0, pad); 350 351 // Refill buffer with new data. 352 head = 0; 353 tail = pad; 354 355 for (;;) { 356 final var bytesRead = input.read(buffer, tail, bufSize - tail); 357 if (bytesRead == -1) { 358 // The last pad amount is left in the buffer. 359 // Boundary can't be in there so signal an error 360 // condition. 361 final var msg = "Stream ended unexpectedly"; 362 throw new MalformedStreamException(msg); 363 } 364 if (notifier != null) { 365 notifier.noteBytesRead(bytesRead); 366 } 367 tail += bytesRead; 368 369 findSeparator(); 370 final var av = available(); 371 372 if (av > 0 || pos != -1) { 373 return av; 374 } 375 } 376 } 377 378 /** 379 * Reads the next byte in the stream. 380 * 381 * @return The next byte in the stream, as a non-negative integer, or -1 for EOF. 382 * @throws IOException An I/O error occurred. 383 */ 384 @Override 385 public int read() throws IOException { 386 checkOpen(); 387 if (available() == 0 && makeAvailable() == 0) { 388 return -1; 389 } 390 ++total; 391 final int b = buffer[head++]; 392 if (b >= 0) { 393 return b; 394 } 395 return b + BYTE_POSITIVE_OFFSET; 396 } 397 398 /** 399 * Reads bytes into the given buffer. 400 * 401 * @param b The destination buffer, where to write to. 402 * @param off Offset of the first byte in the buffer. 403 * @param len Maximum number of bytes to read. 404 * @return Number of bytes, which have been actually read, or -1 for EOF. 405 * @throws IOException An I/O error occurred. 406 */ 407 @Override 408 public int read(final byte[] b, final int off, final int len) throws IOException { 409 checkOpen(); 410 if (len == 0) { 411 return 0; 412 } 413 var res = available(); 414 if (res == 0) { 415 res = makeAvailable(); 416 if (res == 0) { 417 return -1; 418 } 419 } 420 res = Math.min(res, len); 421 System.arraycopy(buffer, head, b, off, res); 422 head += res; 423 total += res; 424 return res; 425 } 426 427 /** 428 * Skips the given number of bytes. 429 * 430 * @param bytes Number of bytes to skip. 431 * @return The number of bytes, which have actually been skipped. 432 * @throws IOException An I/O error occurred. 433 */ 434 @Override 435 public long skip(final long bytes) throws IOException { 436 checkOpen(); 437 var available = available(); 438 if (available == 0) { 439 available = makeAvailable(); 440 if (available == 0) { 441 return 0; 442 } 443 } 444 // Fix "Implicit narrowing conversion in compound assignment" 445 // https://github.com/apache/commons-fileupload/security/code-scanning/118 446 // Math.min always returns an int because available is an int. 447 final var res = Math.toIntExact(Math.min(available, bytes)); 448 head += res; 449 return res; 450 } 451 452 } 453 454 /** 455 * Signals that the input stream fails to follow the required syntax. 456 */ 457 public static class MalformedStreamException extends FileUploadException { 458 459 /** 460 * The UID to use when serializing this instance. 461 */ 462 private static final long serialVersionUID = 2; 463 464 /** 465 * Constructs an {@code MalformedStreamException} with the specified detail message. 466 * 467 * @param message The detail message. 468 */ 469 public MalformedStreamException(final String message) { 470 super(message); 471 } 472 473 /** 474 * Constructs an {@code MalformedStreamException} with the specified detail message. 475 * 476 * @param message The detail message. 477 * @param cause The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the 478 * cause is nonexistent or unknown.) 479 */ 480 public MalformedStreamException(final String message, final Throwable cause) { 481 super(message, cause); 482 } 483 484 } 485 486 /** 487 * Internal class, which is used to invoke the {@link ProgressListener}. 488 */ 489 public static class ProgressNotifier { 490 491 /** 492 * The listener to invoke. 493 */ 494 private final ProgressListener progressListener; 495 496 /** 497 * Number of expected bytes, if known, or -1. 498 */ 499 private final long contentLength; 500 501 /** 502 * Number of bytes, which have been read so far. 503 */ 504 private long bytesRead; 505 506 /** 507 * Number of items, which have been read so far. 508 */ 509 private int items; 510 511 /** 512 * Creates a new instance with the given listener and content length. 513 * 514 * @param progressListener The listener to invoke. 515 * @param contentLength The expected content length. 516 */ 517 public ProgressNotifier(final ProgressListener progressListener, final long contentLength) { 518 this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP; 519 this.contentLength = contentLength; 520 } 521 522 /** 523 * Called to indicate that bytes have been read. 524 * 525 * @param byteCount Number of bytes, which have been read. 526 */ 527 void noteBytesRead(final int byteCount) { 528 // 529 // Indicates, that the given number of bytes have been read from the input stream. 530 // 531 bytesRead += byteCount; 532 notifyListener(); 533 } 534 535 /** 536 * Called to indicate, that a new file item has been detected. 537 */ 538 public void noteItem() { 539 ++items; 540 notifyListener(); 541 } 542 543 /** 544 * Called for notifying the listener. 545 */ 546 private void notifyListener() { 547 progressListener.update(bytesRead, contentLength, items); 548 } 549 550 } 551 552 /** 553 * The Carriage Return ASCII character value. 554 */ 555 public static final byte CR = 0x0D; 556 557 /** 558 * The Line Feed ASCII character value. 559 */ 560 public static final byte LF = 0x0A; 561 562 /** 563 * The dash (-) ASCII character value. 564 */ 565 public static final byte DASH = 0x2D; 566 567 /** 568 * The default length of the buffer used for processing a request. 569 */ 570 static final int DEFAULT_BUFSIZE = 4096; 571 572 /** 573 * Default per part header size limit in bytes. 574 * @since 2.0.0-M4 575 */ 576 public static final int DEFAULT_PART_HEADER_SIZE_MAX = 512; 577 578 /** 579 * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}). 580 */ 581 static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF }; 582 583 /** 584 * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}). 585 */ 586 static final byte[] FIELD_SEPARATOR = { CR, LF }; 587 588 /** 589 * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}). 590 */ 591 static final byte[] STREAM_TERMINATOR = { DASH, DASH }; 592 593 /** 594 * A byte sequence that precedes a boundary ({@code CRLF--}). 595 */ 596 static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH }; 597 598 /** 599 * Compares {@code count} first bytes in the arrays {@code a} and {@code b}. 600 * 601 * @param a The first array to compare. 602 * @param b The second array to compare. 603 * @param count How many bytes should be compared. 604 * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal. 605 */ 606 static boolean arrayEquals(final byte[] a, final byte[] b, final int count) { 607 for (var i = 0; i < count; i++) { 608 if (a[i] != b[i]) { 609 return false; 610 } 611 } 612 return true; 613 } 614 615 /** 616 * Constructs a new {@link Builder}. 617 * 618 * @return a new {@link Builder}. 619 */ 620 public static Builder builder() { 621 return new Builder(); 622 } 623 624 /** 625 * The input stream from which data is read. 626 */ 627 private final InputStream input; 628 629 /** 630 * The length of the boundary token plus the leading {@code CRLF--}. 631 */ 632 private int boundaryLength; 633 634 /** 635 * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably. 636 */ 637 private final int keepRegion; 638 639 /** 640 * The byte sequence that partitions the stream. 641 */ 642 private final byte[] boundary; 643 644 /** 645 * The table for Knuth-Morris-Pratt search algorithm. 646 */ 647 private final int[] boundaryTable; 648 649 /** 650 * The length of the buffer used for processing the request. 651 */ 652 private final int bufSize; 653 654 /** 655 * The buffer used for processing the request. 656 */ 657 private final byte[] buffer; 658 659 /** 660 * The index of first valid character in the buffer. <br> 661 * 0 <= head < bufSize 662 */ 663 private int head; 664 665 /** 666 * The index of last valid character in the buffer + 1. <br> 667 * 0 <= tail <= bufSize 668 */ 669 private int tail; 670 671 /** 672 * The content encoding to use when reading headers. 673 */ 674 private Charset headerCharset; 675 676 /** 677 * The progress notifier, if any, or null. 678 */ 679 private final ProgressNotifier notifier; 680 681 /** 682 * The maximum size of the headers in bytes. 683 */ 684 private final int partHeaderSizeMax; 685 686 /** 687 * Constructs a {@code MultipartInput} with a custom size buffer. 688 * <p> 689 * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of 690 * data. Too small a buffer size setting will degrade performance. 691 * </p> 692 * 693 * @param input The {@code InputStream} to serve as a data source. 694 * @param boundary The token used for dividing the stream into {@code encapsulations}. 695 * @param bufferSize The size of the buffer to be used, in bytes. 696 * @param notifier The notifier, which is used for calling the progress listener, if any. 697 * @throws IllegalArgumentException If the buffer size is too small. 698 */ 699 private MultipartInput(final InputStream input, final byte[] boundary, final int bufferSize, final int partHeaderSizeMax, final ProgressNotifier notifier) { 700 if (boundary == null) { 701 throw new IllegalArgumentException("boundary may not be null"); 702 } 703 // We prepend CR/LF to the boundary to chop trailing CR/LF from 704 // body-data tokens. 705 this.boundaryLength = boundary.length + BOUNDARY_PREFIX.length; 706 if (bufferSize < this.boundaryLength + 1) { 707 throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small"); 708 } 709 710 this.input = input; 711 this.bufSize = Math.max(bufferSize, boundaryLength * 2); 712 this.buffer = new byte[this.bufSize]; 713 this.notifier = notifier; 714 this.partHeaderSizeMax = partHeaderSizeMax; 715 716 this.boundary = new byte[this.boundaryLength]; 717 this.boundaryTable = new int[this.boundaryLength + 1]; 718 this.keepRegion = this.boundary.length; 719 720 System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length); 721 System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length); 722 computeBoundaryTable(); 723 724 head = 0; 725 tail = 0; 726 } 727 728 /** 729 * Computes the table used for Knuth-Morris-Pratt search algorithm. 730 */ 731 private void computeBoundaryTable() { 732 var position = 2; 733 var candidate = 0; 734 735 boundaryTable[0] = -1; 736 boundaryTable[1] = 0; 737 738 while (position <= boundaryLength) { 739 if (boundary[position - 1] == boundary[candidate]) { 740 boundaryTable[position] = candidate + 1; 741 candidate++; 742 position++; 743 } else if (candidate > 0) { 744 candidate = boundaryTable[candidate]; 745 } else { 746 boundaryTable[position] = 0; 747 position++; 748 } 749 } 750 } 751 752 /** 753 * Reads {@code body-data} from the current {@code encapsulation} and discards it. 754 * <p> 755 * Use this method to skip encapsulations you don't need or don't understand. 756 * </p> 757 * 758 * @return The amount of data discarded. 759 * @throws MalformedStreamException if the stream ends unexpectedly. 760 * @throws IOException if an i/o error occurs. 761 */ 762 public long discardBodyData() throws MalformedStreamException, IOException { 763 return readBodyData(NullOutputStream.INSTANCE); 764 } 765 766 /** 767 * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}. 768 * 769 * @param value The value to find. 770 * @param pos The starting position for searching. 771 * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found. 772 */ 773 protected int findByte(final byte value, final int pos) { 774 for (var i = pos; i < tail; i++) { 775 if (buffer[i] == value) { 776 return i; 777 } 778 } 779 780 return -1; 781 } 782 783 /** 784 * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}. 785 * 786 * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found. 787 */ 788 protected int findSeparator() { 789 var bufferPos = this.head; 790 var tablePos = 0; 791 while (bufferPos < this.tail) { 792 while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) { 793 tablePos = boundaryTable[tablePos]; 794 } 795 bufferPos++; 796 tablePos++; 797 if (tablePos == boundaryLength) { 798 return bufferPos - boundaryLength; 799 } 800 } 801 return -1; 802 } 803 804 /** 805 * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is 806 * used. 807 * 808 * @return The encoding used to read part headers. 809 */ 810 public Charset getHeaderCharset() { 811 return headerCharset; 812 } 813 814 /** Returns the per part size limit for headers. 815 * 816 * @return The maximum size of the headers in bytes. 817 * @since 2.0.0-M4 818 */ 819 public int getPartHeaderSizeMax() { 820 return partHeaderSizeMax; 821 } 822 823 /** 824 * Creates a new {@link ItemInputStream}. 825 * 826 * @return A new instance of {@link ItemInputStream}. 827 */ 828 public ItemInputStream newInputStream() { 829 return new ItemInputStream(); 830 } 831 832 /** 833 * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}. 834 * <p> 835 * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}). 836 * </p> 837 * 838 * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}. 839 * @return the amount of data written. 840 * @throws MalformedStreamException if the stream ends unexpectedly. 841 * @throws IOException if an i/o error occurs. 842 */ 843 public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException { 844 try (var inputStream = newInputStream()) { 845 return IOUtils.copyLarge(inputStream, output); 846 } 847 } 848 849 /** 850 * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream. 851 * 852 * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise. 853 * @throws FileUploadSizeException if the bytes read from the stream exceeded the size limits 854 * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax. 855 */ 856 public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException { 857 final var marker = new byte[2]; 858 final boolean nextChunk; 859 head += boundaryLength; 860 try { 861 marker[0] = readByte(); 862 if (marker[0] == LF) { 863 // Work around IE5 Mac bug with input type=image. 864 // Because the boundary delimiter, not including the trailing 865 // CRLF, must not appear within any file (RFC 2046, section 866 // 5.1.1), we know the missing CR is due to a buggy browser 867 // rather than a file containing something similar to a 868 // boundary. 869 return true; 870 } 871 872 marker[1] = readByte(); 873 if (arrayEquals(marker, STREAM_TERMINATOR, 2)) { 874 nextChunk = false; 875 } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) { 876 nextChunk = true; 877 } else { 878 throw new MalformedStreamException("Unexpected characters follow a boundary"); 879 } 880 } catch (final FileUploadSizeException e) { 881 throw e; 882 } catch (final IOException e) { 883 throw new MalformedStreamException("Stream ended unexpectedly", e); 884 } 885 return nextChunk; 886 } 887 888 /** 889 * Reads a byte from the {@code buffer}, and refills it as necessary. 890 * 891 * @return The next byte from the input stream. 892 * @throws IOException if there is no more data available. 893 */ 894 public byte readByte() throws IOException { 895 // Buffer depleted ? 896 if (head == tail) { 897 head = 0; 898 // Refill. 899 tail = input.read(buffer, head, bufSize); 900 if (tail == -1) { 901 // No more data available. 902 throw new IOException("No more data is available"); 903 } 904 if (notifier != null) { 905 notifier.noteBytesRead(tail); 906 } 907 } 908 return buffer[head++]; 909 } 910 911 /** 912 * Reads the {@code header-part} of the current {@code encapsulation}. 913 * <p> 914 * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application. 915 * </p> 916 * <p> 917 * <strong>TODO</strong> allow limiting maximum header size to protect against abuse. 918 * </p> 919 * 920 * @return The {@code header-part} of the current encapsulation. 921 * @throws FileUploadSizeException if the bytes read from the stream exceeded the size limits. 922 * @throws MalformedStreamException if the stream ends unexpectedly. 923 */ 924 public String readHeaders() throws FileUploadSizeException, MalformedStreamException { 925 var i = 0; 926 byte b; 927 // to support multi-byte characters 928 final var baos = new ByteArrayOutputStream(); 929 var size = 0; 930 while (i < HEADER_SEPARATOR.length) { 931 try { 932 b = readByte(); 933 } catch (final FileUploadSizeException e) { 934 // wraps a FileUploadSizeException, re-throw as it will be unwrapped later 935 throw e; 936 } catch (final IOException e) { 937 throw new MalformedStreamException("Stream ended unexpectedly", e); 938 } 939 final int phsm = getPartHeaderSizeMax(); 940 if (phsm != -1 && ++size > phsm) { 941 throw new FileUploadSizeException( 942 String.format("Header section has more than %s bytes (maybe it is not properly terminated)", Integer.valueOf(phsm)), phsm, size); 943 } 944 if (b == HEADER_SEPARATOR[i]) { 945 i++; 946 } else { 947 i = 0; 948 } 949 baos.write(b); 950 } 951 try { 952 return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name()); 953 } catch (final UnsupportedEncodingException e) { 954 // not possible 955 throw new IllegalStateException(e); 956 } 957 } 958 959 /** 960 * Changes the boundary token used for partitioning the stream. 961 * <p> 962 * This method allows single pass processing of nested multipart streams. 963 * </p> 964 * <p> 965 * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream. 966 * </p> 967 * <p> 968 * Restoring the parent stream boundary token after processing of a nested stream is left to the application. 969 * </p> 970 * 971 * @param boundary The boundary to be used for parsing of the nested stream. 972 * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed. 973 */ 974 public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException { 975 if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) { 976 throw new FileUploadBoundaryException("The length of a boundary token cannot be changed"); 977 } 978 System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length); 979 computeBoundaryTable(); 980 } 981 982 /** 983 * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding 984 * is used. 985 * 986 * @param headerCharset The encoding used to read part headers. 987 */ 988 public void setHeaderCharset(final Charset headerCharset) { 989 this.headerCharset = headerCharset; 990 } 991 992 /** 993 * Finds the beginning of the first {@code encapsulation}. 994 * 995 * @return {@code true} if an {@code encapsulation} was found in the stream. 996 * @throws IOException if an i/o error occurs. 997 */ 998 public boolean skipPreamble() throws IOException { 999 // First delimiter may be not preceded with a CRLF. 1000 System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2); 1001 boundaryLength = boundary.length - 2; 1002 computeBoundaryTable(); 1003 try { 1004 // Discard all data up to the delimiter. 1005 discardBodyData(); 1006 1007 // Read boundary - if succeeded, the stream contains an 1008 // encapsulation. 1009 return readBoundary(); 1010 } catch (final MalformedStreamException e) { 1011 return false; 1012 } finally { 1013 // Restore delimiter. 1014 System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2); 1015 boundaryLength = boundary.length; 1016 boundary[0] = CR; 1017 boundary[1] = LF; 1018 computeBoundaryTable(); 1019 } 1020 } 1021 1022}