001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.common.collect.Streams;
028import com.google.errorprone.annotations.CanIgnoreReturnValue;
029import com.google.errorprone.annotations.MustBeClosed;
030import java.io.BufferedReader;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.Reader;
034import java.io.StringReader;
035import java.io.UncheckedIOException;
036import java.io.Writer;
037import java.nio.charset.Charset;
038import java.util.Iterator;
039import java.util.List;
040import java.util.function.Consumer;
041import java.util.stream.Stream;
042import javax.annotation.CheckForNull;
043import org.checkerframework.checker.nullness.qual.Nullable;
044
045/**
046 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
047 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
048 * it is an immutable <i>supplier</i> of {@code Reader} instances.
049 *
050 * <p>{@code CharSource} provides two kinds of methods:
051 *
052 * <ul>
053 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
054 *       instance each time they are called. The caller is responsible for ensuring that the
055 *       returned reader is closed.
056 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
057 *       typically implemented by opening a reader using one of the methods in the first category,
058 *       doing something and finally closing the reader that was opened.
059 * </ul>
060 *
061 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
062 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
063 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
064 * be an empty line at the end if the contents are terminated with a line separator.
065 *
066 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
067 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
068 *
069 * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources
070 * that provide readers that are:
071 *
072 * <ul>
073 *   <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either
074 *       block indefinitely or fail if the source creates an infinite reader.
075 *   <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the
076 *       source as they are read from it. A source that provides such readers will not be reusable,
077 *       and operations that read from the stream (including {@link #length()}, in some
078 *       implementations) will prevent further operations from completing as expected.
079 * </ul>
080 *
081 * @since 14.0
082 * @author Colin Decker
083 */
084@GwtIncompatible
085@ElementTypesAreNonnullByDefault
086public abstract class CharSource {
087
088  /** Constructor for use by subclasses. */
089  protected CharSource() {}
090
091  /**
092   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
093   * as bytes using the given {@link Charset}.
094   *
095   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
096   * the default implementation of this method will ensure that the original {@code CharSource} is
097   * returned, rather than round-trip encoding. Subclasses that override this method should behave
098   * the same way.
099   *
100   * @since 20.0
101   */
102  @Beta
103  public ByteSource asByteSource(Charset charset) {
104    return new AsByteSource(charset);
105  }
106
107  /**
108   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
109   * reader each time it is called.
110   *
111   * <p>The caller is responsible for ensuring that the returned reader is closed.
112   *
113   * @throws IOException if an I/O error occurs while opening the reader
114   */
115  public abstract Reader openStream() throws IOException;
116
117  /**
118   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
119   * independent reader each time it is called.
120   *
121   * <p>The caller is responsible for ensuring that the returned reader is closed.
122   *
123   * @throws IOException if an I/O error occurs while of opening the reader
124   */
125  public BufferedReader openBufferedStream() throws IOException {
126    Reader reader = openStream();
127    return (reader instanceof BufferedReader)
128        ? (BufferedReader) reader
129        : new BufferedReader(reader);
130  }
131
132  /**
133   * Opens a new {@link Stream} for reading text one line at a time from this source. This method
134   * returns a new, independent stream each time it is called.
135   *
136   * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an
137   * I/O error occurs while the stream is reading from the source or when the stream is closed, an
138   * {@link UncheckedIOException} is thrown.
139   *
140   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
141   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
142   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
143   * it does.
144   *
145   * <p>The caller is responsible for ensuring that the returned stream is closed. For example:
146   *
147   * <pre>{@code
148   * try (Stream<String> lines = source.lines()) {
149   *   lines.map(...)
150   *      .filter(...)
151   *      .forEach(...);
152   * }
153   * }</pre>
154   *
155   * @throws IOException if an I/O error occurs while opening the stream
156   * @since 22.0
157   */
158  @Beta
159  @MustBeClosed
160  public Stream<String> lines() throws IOException {
161    BufferedReader reader = openBufferedStream();
162    return reader
163        .lines()
164        .onClose(
165            () -> {
166              try {
167                reader.close();
168              } catch (IOException e) {
169                throw new UncheckedIOException(e);
170              }
171            });
172  }
173
174  /**
175   * Returns the size of this source in chars, if the size can be easily determined without actually
176   * opening the data stream.
177   *
178   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
179   * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i>
180   * that this method will return a different number of chars than would be returned by reading all
181   * of the chars.
182   *
183   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
184   * return a different number of chars if the contents are changed.
185   *
186   * @since 19.0
187   */
188  @Beta
189  public Optional<Long> lengthIfKnown() {
190    return Optional.absent();
191  }
192
193  /**
194   * Returns the length of this source in chars, even if doing so requires opening and traversing an
195   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
196   *
197   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
198   * absent, it will fall back to a heavyweight operation that will open a stream, {@link
199   * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
200   * were skipped.
201   *
202   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
203   * implementation, it is <i>possible</i> that this method will return a different number of chars
204   * than would be returned by reading all of the chars.
205   *
206   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
207   * number of chars if the contents are changed.
208   *
209   * @throws IOException if an I/O error occurs while reading the length of this source
210   * @since 19.0
211   */
212  @Beta
213  public long length() throws IOException {
214    Optional<Long> lengthIfKnown = lengthIfKnown();
215    if (lengthIfKnown.isPresent()) {
216      return lengthIfKnown.get();
217    }
218
219    Closer closer = Closer.create();
220    try {
221      Reader reader = closer.register(openStream());
222      return countBySkipping(reader);
223    } catch (Throwable e) {
224      throw closer.rethrow(e);
225    } finally {
226      closer.close();
227    }
228  }
229
230  private long countBySkipping(Reader reader) throws IOException {
231    long count = 0;
232    long read;
233    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
234      count += read;
235    }
236    return count;
237  }
238
239  /**
240   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
241   * Does not close {@code appendable} if it is {@code Closeable}.
242   *
243   * @return the number of characters copied
244   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
245   *     appendable}
246   */
247  @CanIgnoreReturnValue
248  public long copyTo(Appendable appendable) throws IOException {
249    checkNotNull(appendable);
250
251    Closer closer = Closer.create();
252    try {
253      Reader reader = closer.register(openStream());
254      return CharStreams.copy(reader, appendable);
255    } catch (Throwable e) {
256      throw closer.rethrow(e);
257    } finally {
258      closer.close();
259    }
260  }
261
262  /**
263   * Copies the contents of this source to the given sink.
264   *
265   * @return the number of characters copied
266   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
267   *     sink}
268   */
269  @CanIgnoreReturnValue
270  public long copyTo(CharSink sink) throws IOException {
271    checkNotNull(sink);
272
273    Closer closer = Closer.create();
274    try {
275      Reader reader = closer.register(openStream());
276      Writer writer = closer.register(sink.openStream());
277      return CharStreams.copy(reader, writer);
278    } catch (Throwable e) {
279      throw closer.rethrow(e);
280    } finally {
281      closer.close();
282    }
283  }
284
285  /**
286   * Reads the contents of this source as a string.
287   *
288   * @throws IOException if an I/O error occurs while reading from this source
289   */
290  public String read() throws IOException {
291    Closer closer = Closer.create();
292    try {
293      Reader reader = closer.register(openStream());
294      return CharStreams.toString(reader);
295    } catch (Throwable e) {
296      throw closer.rethrow(e);
297    } finally {
298      closer.close();
299    }
300  }
301
302  /**
303   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
304   *
305   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
306   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
307   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
308   * it does.
309   *
310   * @throws IOException if an I/O error occurs while reading from this source
311   */
312  @CheckForNull
313  public String readFirstLine() throws IOException {
314    Closer closer = Closer.create();
315    try {
316      BufferedReader reader = closer.register(openBufferedStream());
317      return reader.readLine();
318    } catch (Throwable e) {
319      throw closer.rethrow(e);
320    } finally {
321      closer.close();
322    }
323  }
324
325  /**
326   * Reads all the lines of this source as a list of strings. The returned list will be empty if
327   * this source is empty.
328   *
329   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
330   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
331   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
332   * it does.
333   *
334   * @throws IOException if an I/O error occurs while reading from this source
335   */
336  public ImmutableList<String> readLines() throws IOException {
337    Closer closer = Closer.create();
338    try {
339      BufferedReader reader = closer.register(openBufferedStream());
340      List<String> result = Lists.newArrayList();
341      String line;
342      while ((line = reader.readLine()) != null) {
343        result.add(line);
344      }
345      return ImmutableList.copyOf(result);
346    } catch (Throwable e) {
347      throw closer.rethrow(e);
348    } finally {
349      closer.close();
350    }
351  }
352
353  /**
354   * Reads lines of text from this source, processing each line as it is read using the given {@link
355   * LineProcessor processor}. Stops when all lines have been processed or the processor returns
356   * {@code false} and returns the result produced by the processor.
357   *
358   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
359   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
360   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
361   * it does.
362   *
363   * @throws IOException if an I/O error occurs while reading from this source or if {@code
364   *     processor} throws an {@code IOException}
365   * @since 16.0
366   */
367  @Beta
368  @CanIgnoreReturnValue // some processors won't return a useful result
369  @ParametricNullness
370  public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
371    checkNotNull(processor);
372
373    Closer closer = Closer.create();
374    try {
375      Reader reader = closer.register(openStream());
376      return CharStreams.readLines(reader, processor);
377    } catch (Throwable e) {
378      throw closer.rethrow(e);
379    } finally {
380      closer.close();
381    }
382  }
383
384  /**
385   * Reads all lines of text from this source, running the given {@code action} for each line as it
386   * is read.
387   *
388   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
389   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
390   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
391   * it does.
392   *
393   * @throws IOException if an I/O error occurs while reading from this source or if {@code action}
394   *     throws an {@code UncheckedIOException}
395   * @since 22.0
396   */
397  @Beta
398  public void forEachLine(Consumer<? super String> action) throws IOException {
399    try (Stream<String> lines = lines()) {
400      // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure
401      lines.forEachOrdered(action);
402    } catch (UncheckedIOException e) {
403      throw e.getCause();
404    }
405  }
406
407  /**
408   * Returns whether the source has zero chars. The default implementation first checks {@link
409   * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
410   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
411   *
412   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
413   * chars are actually available for reading. This means that a source may return {@code true} from
414   * {@code isEmpty()} despite having readable content.
415   *
416   * @throws IOException if an I/O error occurs
417   * @since 15.0
418   */
419  public boolean isEmpty() throws IOException {
420    Optional<Long> lengthIfKnown = lengthIfKnown();
421    if (lengthIfKnown.isPresent()) {
422      return lengthIfKnown.get() == 0L;
423    }
424    Closer closer = Closer.create();
425    try {
426      Reader reader = closer.register(openStream());
427      return reader.read() == -1;
428    } catch (Throwable e) {
429      throw closer.rethrow(e);
430    } finally {
431      closer.close();
432    }
433  }
434
435  /**
436   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
437   * the source will contain the concatenated data from the streams of the underlying sources.
438   *
439   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
440   * close the open underlying stream.
441   *
442   * @param sources the sources to concatenate
443   * @return a {@code CharSource} containing the concatenated data
444   * @since 15.0
445   */
446  public static CharSource concat(Iterable<? extends CharSource> sources) {
447    return new ConcatenatedCharSource(sources);
448  }
449
450  /**
451   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
452   * the source will contain the concatenated data from the streams of the underlying sources.
453   *
454   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
455   * close the open underlying stream.
456   *
457   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
458   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
459   * eagerly fetches data for each source when iterated (rather than producing sources that only
460   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
461   * possible.
462   *
463   * @param sources the sources to concatenate
464   * @return a {@code CharSource} containing the concatenated data
465   * @throws NullPointerException if any of {@code sources} is {@code null}
466   * @since 15.0
467   */
468  public static CharSource concat(Iterator<? extends CharSource> sources) {
469    return concat(ImmutableList.copyOf(sources));
470  }
471
472  /**
473   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
474   * the source will contain the concatenated data from the streams of the underlying sources.
475   *
476   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
477   * close the open underlying stream.
478   *
479   * @param sources the sources to concatenate
480   * @return a {@code CharSource} containing the concatenated data
481   * @throws NullPointerException if any of {@code sources} is {@code null}
482   * @since 15.0
483   */
484  public static CharSource concat(CharSource... sources) {
485    return concat(ImmutableList.copyOf(sources));
486  }
487
488  /**
489   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
490   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
491   * the {@code charSequence} is mutated while it is being read, so don't do that.
492   *
493   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
494   */
495  public static CharSource wrap(CharSequence charSequence) {
496    return charSequence instanceof String
497        ? new StringCharSource((String) charSequence)
498        : new CharSequenceCharSource(charSequence);
499  }
500
501  /**
502   * Returns an immutable {@link CharSource} that contains no characters.
503   *
504   * @since 15.0
505   */
506  public static CharSource empty() {
507    return EmptyCharSource.INSTANCE;
508  }
509
510  /** A byte source that reads chars from this source and encodes them as bytes using a charset. */
511  private final class AsByteSource extends ByteSource {
512
513    final Charset charset;
514
515    AsByteSource(Charset charset) {
516      this.charset = checkNotNull(charset);
517    }
518
519    @Override
520    public CharSource asCharSource(Charset charset) {
521      if (charset.equals(this.charset)) {
522        return CharSource.this;
523      }
524      return super.asCharSource(charset);
525    }
526
527    @Override
528    public InputStream openStream() throws IOException {
529      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
530    }
531
532    @Override
533    public String toString() {
534      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
535    }
536  }
537
538  private static class CharSequenceCharSource extends CharSource {
539
540    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
541
542    protected final CharSequence seq;
543
544    protected CharSequenceCharSource(CharSequence seq) {
545      this.seq = checkNotNull(seq);
546    }
547
548    @Override
549    public Reader openStream() {
550      return new CharSequenceReader(seq);
551    }
552
553    @Override
554    public String read() {
555      return seq.toString();
556    }
557
558    @Override
559    public boolean isEmpty() {
560      return seq.length() == 0;
561    }
562
563    @Override
564    public long length() {
565      return seq.length();
566    }
567
568    @Override
569    public Optional<Long> lengthIfKnown() {
570      return Optional.of((long) seq.length());
571    }
572
573    /**
574     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
575     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
576     */
577    private Iterator<String> linesIterator() {
578      return new AbstractIterator<String>() {
579        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
580
581        @Override
582        @CheckForNull
583        protected String computeNext() {
584          if (lines.hasNext()) {
585            String next = lines.next();
586            // skip last line if it's empty
587            if (lines.hasNext() || !next.isEmpty()) {
588              return next;
589            }
590          }
591          return endOfData();
592        }
593      };
594    }
595
596    @Override
597    public Stream<String> lines() {
598      return Streams.stream(linesIterator());
599    }
600
601    @Override
602    @CheckForNull
603    public String readFirstLine() {
604      Iterator<String> lines = linesIterator();
605      return lines.hasNext() ? lines.next() : null;
606    }
607
608    @Override
609    public ImmutableList<String> readLines() {
610      return ImmutableList.copyOf(linesIterator());
611    }
612
613    @Override
614    @ParametricNullness
615    public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
616      Iterator<String> lines = linesIterator();
617      while (lines.hasNext()) {
618        if (!processor.processLine(lines.next())) {
619          break;
620        }
621      }
622      return processor.getResult();
623    }
624
625    @Override
626    public String toString() {
627      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
628    }
629  }
630
631  /**
632   * Subclass specialized for string instances.
633   *
634   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
635   *
636   * <ul>
637   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
638   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
639   *       one with {@link CharSequence#charAt(int)}.
640   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
641   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
642   *       can't change, and it is faster because many writers and appendables are optimized for
643   *       appending string instances.
644   * </ul>
645   */
646  private static class StringCharSource extends CharSequenceCharSource {
647    protected StringCharSource(String seq) {
648      super(seq);
649    }
650
651    @Override
652    public Reader openStream() {
653      return new StringReader((String) seq);
654    }
655
656    @Override
657    public long copyTo(Appendable appendable) throws IOException {
658      appendable.append(seq);
659      return seq.length();
660    }
661
662    @Override
663    public long copyTo(CharSink sink) throws IOException {
664      checkNotNull(sink);
665      Closer closer = Closer.create();
666      try {
667        Writer writer = closer.register(sink.openStream());
668        writer.write((String) seq);
669        return seq.length();
670      } catch (Throwable e) {
671        throw closer.rethrow(e);
672      } finally {
673        closer.close();
674      }
675    }
676  }
677
678  private static final class EmptyCharSource extends StringCharSource {
679
680    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
681
682    private EmptyCharSource() {
683      super("");
684    }
685
686    @Override
687    public String toString() {
688      return "CharSource.empty()";
689    }
690  }
691
692  private static final class ConcatenatedCharSource extends CharSource {
693
694    private final Iterable<? extends CharSource> sources;
695
696    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
697      this.sources = checkNotNull(sources);
698    }
699
700    @Override
701    public Reader openStream() throws IOException {
702      return new MultiReader(sources.iterator());
703    }
704
705    @Override
706    public boolean isEmpty() throws IOException {
707      for (CharSource source : sources) {
708        if (!source.isEmpty()) {
709          return false;
710        }
711      }
712      return true;
713    }
714
715    @Override
716    public Optional<Long> lengthIfKnown() {
717      long result = 0L;
718      for (CharSource source : sources) {
719        Optional<Long> lengthIfKnown = source.lengthIfKnown();
720        if (!lengthIfKnown.isPresent()) {
721          return Optional.absent();
722        }
723        result += lengthIfKnown.get();
724      }
725      return Optional.of(result);
726    }
727
728    @Override
729    public long length() throws IOException {
730      long result = 0L;
731      for (CharSource source : sources) {
732        result += source.length();
733      }
734      return result;
735    }
736
737    @Override
738    public String toString() {
739      return "CharSource.concat(" + sources + ")";
740    }
741  }
742}