001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.common.collect.Streams; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.MustBeClosed; 030import java.io.BufferedReader; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.Reader; 034import java.io.StringReader; 035import java.io.UncheckedIOException; 036import java.io.Writer; 037import java.nio.charset.Charset; 038import java.util.Iterator; 039import java.util.List; 040import java.util.function.Consumer; 041import java.util.stream.Stream; 042import javax.annotation.CheckForNull; 043import org.checkerframework.checker.nullness.qual.Nullable; 044 045/** 046 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code 047 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, 048 * it is an immutable <i>supplier</i> of {@code Reader} instances. 049 * 050 * <p>{@code CharSource} provides two kinds of methods: 051 * 052 * <ul> 053 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 054 * instance each time they are called. The caller is responsible for ensuring that the 055 * returned reader is closed. 056 * <li><b>Convenience methods:</b> These are implementations of common operations that are 057 * typically implemented by opening a reader using one of the methods in the first category, 058 * doing something and finally closing the reader that was opened. 059 * </ul> 060 * 061 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 062 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code 063 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to 064 * be an empty line at the end if the contents are terminated with a line separator. 065 * 066 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 067 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 068 * 069 * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources 070 * that provide readers that are: 071 * 072 * <ul> 073 * <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either 074 * block indefinitely or fail if the source creates an infinite reader. 075 * <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the 076 * source as they are read from it. A source that provides such readers will not be reusable, 077 * and operations that read from the stream (including {@link #length()}, in some 078 * implementations) will prevent further operations from completing as expected. 079 * </ul> 080 * 081 * @since 14.0 082 * @author Colin Decker 083 */ 084@GwtIncompatible 085@ElementTypesAreNonnullByDefault 086public abstract class CharSource { 087 088 /** Constructor for use by subclasses. */ 089 protected CharSource() {} 090 091 /** 092 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 093 * as bytes using the given {@link Charset}. 094 * 095 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 096 * the default implementation of this method will ensure that the original {@code CharSource} is 097 * returned, rather than round-trip encoding. Subclasses that override this method should behave 098 * the same way. 099 * 100 * @since 20.0 101 */ 102 @Beta 103 public ByteSource asByteSource(Charset charset) { 104 return new AsByteSource(charset); 105 } 106 107 /** 108 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 109 * reader each time it is called. 110 * 111 * <p>The caller is responsible for ensuring that the returned reader is closed. 112 * 113 * @throws IOException if an I/O error occurs while opening the reader 114 */ 115 public abstract Reader openStream() throws IOException; 116 117 /** 118 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 119 * independent reader each time it is called. 120 * 121 * <p>The caller is responsible for ensuring that the returned reader is closed. 122 * 123 * @throws IOException if an I/O error occurs while of opening the reader 124 */ 125 public BufferedReader openBufferedStream() throws IOException { 126 Reader reader = openStream(); 127 return (reader instanceof BufferedReader) 128 ? (BufferedReader) reader 129 : new BufferedReader(reader); 130 } 131 132 /** 133 * Opens a new {@link Stream} for reading text one line at a time from this source. This method 134 * returns a new, independent stream each time it is called. 135 * 136 * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an 137 * I/O error occurs while the stream is reading from the source or when the stream is closed, an 138 * {@link UncheckedIOException} is thrown. 139 * 140 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 141 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 142 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 143 * it does. 144 * 145 * <p>The caller is responsible for ensuring that the returned stream is closed. For example: 146 * 147 * <pre>{@code 148 * try (Stream<String> lines = source.lines()) { 149 * lines.map(...) 150 * .filter(...) 151 * .forEach(...); 152 * } 153 * }</pre> 154 * 155 * @throws IOException if an I/O error occurs while opening the stream 156 * @since 22.0 157 */ 158 @Beta 159 @MustBeClosed 160 public Stream<String> lines() throws IOException { 161 BufferedReader reader = openBufferedStream(); 162 return reader 163 .lines() 164 .onClose( 165 () -> { 166 try { 167 reader.close(); 168 } catch (IOException e) { 169 throw new UncheckedIOException(e); 170 } 171 }); 172 } 173 174 /** 175 * Returns the size of this source in chars, if the size can be easily determined without actually 176 * opening the data stream. 177 * 178 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code 179 * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> 180 * that this method will return a different number of chars than would be returned by reading all 181 * of the chars. 182 * 183 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 184 * return a different number of chars if the contents are changed. 185 * 186 * @since 19.0 187 */ 188 @Beta 189 public Optional<Long> lengthIfKnown() { 190 return Optional.absent(); 191 } 192 193 /** 194 * Returns the length of this source in chars, even if doing so requires opening and traversing an 195 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 196 * 197 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 198 * absent, it will fall back to a heavyweight operation that will open a stream, {@link 199 * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that 200 * were skipped. 201 * 202 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 203 * implementation, it is <i>possible</i> that this method will return a different number of chars 204 * than would be returned by reading all of the chars. 205 * 206 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 207 * number of chars if the contents are changed. 208 * 209 * @throws IOException if an I/O error occurs while reading the length of this source 210 * @since 19.0 211 */ 212 @Beta 213 public long length() throws IOException { 214 Optional<Long> lengthIfKnown = lengthIfKnown(); 215 if (lengthIfKnown.isPresent()) { 216 return lengthIfKnown.get(); 217 } 218 219 Closer closer = Closer.create(); 220 try { 221 Reader reader = closer.register(openStream()); 222 return countBySkipping(reader); 223 } catch (Throwable e) { 224 throw closer.rethrow(e); 225 } finally { 226 closer.close(); 227 } 228 } 229 230 private long countBySkipping(Reader reader) throws IOException { 231 long count = 0; 232 long read; 233 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 234 count += read; 235 } 236 return count; 237 } 238 239 /** 240 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 241 * Does not close {@code appendable} if it is {@code Closeable}. 242 * 243 * @return the number of characters copied 244 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 245 * appendable} 246 */ 247 @CanIgnoreReturnValue 248 public long copyTo(Appendable appendable) throws IOException { 249 checkNotNull(appendable); 250 251 Closer closer = Closer.create(); 252 try { 253 Reader reader = closer.register(openStream()); 254 return CharStreams.copy(reader, appendable); 255 } catch (Throwable e) { 256 throw closer.rethrow(e); 257 } finally { 258 closer.close(); 259 } 260 } 261 262 /** 263 * Copies the contents of this source to the given sink. 264 * 265 * @return the number of characters copied 266 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 267 * sink} 268 */ 269 @CanIgnoreReturnValue 270 public long copyTo(CharSink sink) throws IOException { 271 checkNotNull(sink); 272 273 Closer closer = Closer.create(); 274 try { 275 Reader reader = closer.register(openStream()); 276 Writer writer = closer.register(sink.openStream()); 277 return CharStreams.copy(reader, writer); 278 } catch (Throwable e) { 279 throw closer.rethrow(e); 280 } finally { 281 closer.close(); 282 } 283 } 284 285 /** 286 * Reads the contents of this source as a string. 287 * 288 * @throws IOException if an I/O error occurs while reading from this source 289 */ 290 public String read() throws IOException { 291 Closer closer = Closer.create(); 292 try { 293 Reader reader = closer.register(openStream()); 294 return CharStreams.toString(reader); 295 } catch (Throwable e) { 296 throw closer.rethrow(e); 297 } finally { 298 closer.close(); 299 } 300 } 301 302 /** 303 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 304 * 305 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 306 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 307 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 308 * it does. 309 * 310 * @throws IOException if an I/O error occurs while reading from this source 311 */ 312 @CheckForNull 313 public String readFirstLine() throws IOException { 314 Closer closer = Closer.create(); 315 try { 316 BufferedReader reader = closer.register(openBufferedStream()); 317 return reader.readLine(); 318 } catch (Throwable e) { 319 throw closer.rethrow(e); 320 } finally { 321 closer.close(); 322 } 323 } 324 325 /** 326 * Reads all the lines of this source as a list of strings. The returned list will be empty if 327 * this source is empty. 328 * 329 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 330 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 331 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 332 * it does. 333 * 334 * @throws IOException if an I/O error occurs while reading from this source 335 */ 336 public ImmutableList<String> readLines() throws IOException { 337 Closer closer = Closer.create(); 338 try { 339 BufferedReader reader = closer.register(openBufferedStream()); 340 List<String> result = Lists.newArrayList(); 341 String line; 342 while ((line = reader.readLine()) != null) { 343 result.add(line); 344 } 345 return ImmutableList.copyOf(result); 346 } catch (Throwable e) { 347 throw closer.rethrow(e); 348 } finally { 349 closer.close(); 350 } 351 } 352 353 /** 354 * Reads lines of text from this source, processing each line as it is read using the given {@link 355 * LineProcessor processor}. Stops when all lines have been processed or the processor returns 356 * {@code false} and returns the result produced by the processor. 357 * 358 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 359 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 360 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 361 * it does. 362 * 363 * @throws IOException if an I/O error occurs while reading from this source or if {@code 364 * processor} throws an {@code IOException} 365 * @since 16.0 366 */ 367 @Beta 368 @CanIgnoreReturnValue // some processors won't return a useful result 369 @ParametricNullness 370 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 371 checkNotNull(processor); 372 373 Closer closer = Closer.create(); 374 try { 375 Reader reader = closer.register(openStream()); 376 return CharStreams.readLines(reader, processor); 377 } catch (Throwable e) { 378 throw closer.rethrow(e); 379 } finally { 380 closer.close(); 381 } 382 } 383 384 /** 385 * Reads all lines of text from this source, running the given {@code action} for each line as it 386 * is read. 387 * 388 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 389 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 390 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 391 * it does. 392 * 393 * @throws IOException if an I/O error occurs while reading from this source or if {@code action} 394 * throws an {@code UncheckedIOException} 395 * @since 22.0 396 */ 397 @Beta 398 public void forEachLine(Consumer<? super String> action) throws IOException { 399 try (Stream<String> lines = lines()) { 400 // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure 401 lines.forEachOrdered(action); 402 } catch (UncheckedIOException e) { 403 throw e.getCause(); 404 } 405 } 406 407 /** 408 * Returns whether the source has zero chars. The default implementation first checks {@link 409 * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 410 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 411 * 412 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 413 * chars are actually available for reading. This means that a source may return {@code true} from 414 * {@code isEmpty()} despite having readable content. 415 * 416 * @throws IOException if an I/O error occurs 417 * @since 15.0 418 */ 419 public boolean isEmpty() throws IOException { 420 Optional<Long> lengthIfKnown = lengthIfKnown(); 421 if (lengthIfKnown.isPresent()) { 422 return lengthIfKnown.get() == 0L; 423 } 424 Closer closer = Closer.create(); 425 try { 426 Reader reader = closer.register(openStream()); 427 return reader.read() == -1; 428 } catch (Throwable e) { 429 throw closer.rethrow(e); 430 } finally { 431 closer.close(); 432 } 433 } 434 435 /** 436 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 437 * the source will contain the concatenated data from the streams of the underlying sources. 438 * 439 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 440 * close the open underlying stream. 441 * 442 * @param sources the sources to concatenate 443 * @return a {@code CharSource} containing the concatenated data 444 * @since 15.0 445 */ 446 public static CharSource concat(Iterable<? extends CharSource> sources) { 447 return new ConcatenatedCharSource(sources); 448 } 449 450 /** 451 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 452 * the source will contain the concatenated data from the streams of the underlying sources. 453 * 454 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 455 * close the open underlying stream. 456 * 457 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 458 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 459 * eagerly fetches data for each source when iterated (rather than producing sources that only 460 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 461 * possible. 462 * 463 * @param sources the sources to concatenate 464 * @return a {@code CharSource} containing the concatenated data 465 * @throws NullPointerException if any of {@code sources} is {@code null} 466 * @since 15.0 467 */ 468 public static CharSource concat(Iterator<? extends CharSource> sources) { 469 return concat(ImmutableList.copyOf(sources)); 470 } 471 472 /** 473 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 474 * the source will contain the concatenated data from the streams of the underlying sources. 475 * 476 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 477 * close the open underlying stream. 478 * 479 * @param sources the sources to concatenate 480 * @return a {@code CharSource} containing the concatenated data 481 * @throws NullPointerException if any of {@code sources} is {@code null} 482 * @since 15.0 483 */ 484 public static CharSource concat(CharSource... sources) { 485 return concat(ImmutableList.copyOf(sources)); 486 } 487 488 /** 489 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 490 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 491 * the {@code charSequence} is mutated while it is being read, so don't do that. 492 * 493 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 494 */ 495 public static CharSource wrap(CharSequence charSequence) { 496 return charSequence instanceof String 497 ? new StringCharSource((String) charSequence) 498 : new CharSequenceCharSource(charSequence); 499 } 500 501 /** 502 * Returns an immutable {@link CharSource} that contains no characters. 503 * 504 * @since 15.0 505 */ 506 public static CharSource empty() { 507 return EmptyCharSource.INSTANCE; 508 } 509 510 /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ 511 private final class AsByteSource extends ByteSource { 512 513 final Charset charset; 514 515 AsByteSource(Charset charset) { 516 this.charset = checkNotNull(charset); 517 } 518 519 @Override 520 public CharSource asCharSource(Charset charset) { 521 if (charset.equals(this.charset)) { 522 return CharSource.this; 523 } 524 return super.asCharSource(charset); 525 } 526 527 @Override 528 public InputStream openStream() throws IOException { 529 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 530 } 531 532 @Override 533 public String toString() { 534 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 535 } 536 } 537 538 private static class CharSequenceCharSource extends CharSource { 539 540 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 541 542 protected final CharSequence seq; 543 544 protected CharSequenceCharSource(CharSequence seq) { 545 this.seq = checkNotNull(seq); 546 } 547 548 @Override 549 public Reader openStream() { 550 return new CharSequenceReader(seq); 551 } 552 553 @Override 554 public String read() { 555 return seq.toString(); 556 } 557 558 @Override 559 public boolean isEmpty() { 560 return seq.length() == 0; 561 } 562 563 @Override 564 public long length() { 565 return seq.length(); 566 } 567 568 @Override 569 public Optional<Long> lengthIfKnown() { 570 return Optional.of((long) seq.length()); 571 } 572 573 /** 574 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 575 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 576 */ 577 private Iterator<String> linesIterator() { 578 return new AbstractIterator<String>() { 579 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 580 581 @Override 582 @CheckForNull 583 protected String computeNext() { 584 if (lines.hasNext()) { 585 String next = lines.next(); 586 // skip last line if it's empty 587 if (lines.hasNext() || !next.isEmpty()) { 588 return next; 589 } 590 } 591 return endOfData(); 592 } 593 }; 594 } 595 596 @Override 597 public Stream<String> lines() { 598 return Streams.stream(linesIterator()); 599 } 600 601 @Override 602 @CheckForNull 603 public String readFirstLine() { 604 Iterator<String> lines = linesIterator(); 605 return lines.hasNext() ? lines.next() : null; 606 } 607 608 @Override 609 public ImmutableList<String> readLines() { 610 return ImmutableList.copyOf(linesIterator()); 611 } 612 613 @Override 614 @ParametricNullness 615 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 616 Iterator<String> lines = linesIterator(); 617 while (lines.hasNext()) { 618 if (!processor.processLine(lines.next())) { 619 break; 620 } 621 } 622 return processor.getResult(); 623 } 624 625 @Override 626 public String toString() { 627 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 628 } 629 } 630 631 /** 632 * Subclass specialized for string instances. 633 * 634 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 635 * 636 * <ul> 637 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 638 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 639 * one with {@link CharSequence#charAt(int)}. 640 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 641 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 642 * can't change, and it is faster because many writers and appendables are optimized for 643 * appending string instances. 644 * </ul> 645 */ 646 private static class StringCharSource extends CharSequenceCharSource { 647 protected StringCharSource(String seq) { 648 super(seq); 649 } 650 651 @Override 652 public Reader openStream() { 653 return new StringReader((String) seq); 654 } 655 656 @Override 657 public long copyTo(Appendable appendable) throws IOException { 658 appendable.append(seq); 659 return seq.length(); 660 } 661 662 @Override 663 public long copyTo(CharSink sink) throws IOException { 664 checkNotNull(sink); 665 Closer closer = Closer.create(); 666 try { 667 Writer writer = closer.register(sink.openStream()); 668 writer.write((String) seq); 669 return seq.length(); 670 } catch (Throwable e) { 671 throw closer.rethrow(e); 672 } finally { 673 closer.close(); 674 } 675 } 676 } 677 678 private static final class EmptyCharSource extends StringCharSource { 679 680 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 681 682 private EmptyCharSource() { 683 super(""); 684 } 685 686 @Override 687 public String toString() { 688 return "CharSource.empty()"; 689 } 690 } 691 692 private static final class ConcatenatedCharSource extends CharSource { 693 694 private final Iterable<? extends CharSource> sources; 695 696 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 697 this.sources = checkNotNull(sources); 698 } 699 700 @Override 701 public Reader openStream() throws IOException { 702 return new MultiReader(sources.iterator()); 703 } 704 705 @Override 706 public boolean isEmpty() throws IOException { 707 for (CharSource source : sources) { 708 if (!source.isEmpty()) { 709 return false; 710 } 711 } 712 return true; 713 } 714 715 @Override 716 public Optional<Long> lengthIfKnown() { 717 long result = 0L; 718 for (CharSource source : sources) { 719 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 720 if (!lengthIfKnown.isPresent()) { 721 return Optional.absent(); 722 } 723 result += lengthIfKnown.get(); 724 } 725 return Optional.of(result); 726 } 727 728 @Override 729 public long length() throws IOException { 730 long result = 0L; 731 for (CharSource source : sources) { 732 result += source.length(); 733 } 734 return result; 735 } 736 737 @Override 738 public String toString() { 739 return "CharSource.concat(" + sources + ")"; 740 } 741 } 742}