001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.Ascii; 030import com.google.common.base.Objects; 031import com.google.errorprone.annotations.concurrent.LazyInit; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.OutputStream; 035import java.io.Reader; 036import java.io.Writer; 037import java.util.Arrays; 038import javax.annotation.CheckForNull; 039 040/** 041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 042 * strings. This class includes several constants for encoding schemes specified by <a 043 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 044 * 045 * <pre>{@code 046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII)) 047 * }</pre> 048 * 049 * <p>returns the string {@code "MZXW6==="}, and 050 * 051 * <pre>{@code 052 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 053 * }</pre> 054 * 055 * <p>...returns the ASCII bytes of the string {@code "foo"}. 056 * 057 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 058 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 059 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 060 * behavior: 061 * 062 * <pre>{@code 063 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 064 * }</pre> 065 * 066 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 067 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 068 * 069 * <pre>{@code 070 * // Do NOT do this 071 * BaseEncoding hex = BaseEncoding.base16(); 072 * hex.lowerCase(); // does nothing! 073 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 074 * }</pre> 075 * 076 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code 077 * x}, but the reverse does not necessarily hold. 078 * 079 * <table> 080 * <caption>Encodings</caption> 081 * <tr> 082 * <th>Encoding 083 * <th>Alphabet 084 * <th>{@code char:byte} ratio 085 * <th>Default padding 086 * <th>Comments 087 * <tr> 088 * <td>{@link #base16()} 089 * <td>0-9 A-F 090 * <td>2.00 091 * <td>N/A 092 * <td>Traditional hexadecimal. Defaults to upper case. 093 * <tr> 094 * <td>{@link #base32()} 095 * <td>A-Z 2-7 096 * <td>1.60 097 * <td>= 098 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 099 * <tr> 100 * <td>{@link #base32Hex()} 101 * <td>0-9 A-V 102 * <td>1.60 103 * <td>= 104 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 105 * <tr> 106 * <td>{@link #base64()} 107 * <td>A-Z a-z 0-9 + / 108 * <td>1.33 109 * <td>= 110 * <td> 111 * <tr> 112 * <td>{@link #base64Url()} 113 * <td>A-Z a-z 0-9 - _ 114 * <td>1.33 115 * <td>= 116 * <td>Safe to use as filenames, or to pass in URLs without escaping 117 * </table> 118 * 119 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 120 * 121 * @author Louis Wasserman 122 * @since 14.0 123 */ 124@GwtCompatible(emulated = true) 125@ElementTypesAreNonnullByDefault 126public abstract class BaseEncoding { 127 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 128 129 BaseEncoding() {} 130 131 /** 132 * Exception indicating invalid base-encoded input encountered while decoding. 133 * 134 * @author Louis Wasserman 135 * @since 15.0 136 */ 137 public static final class DecodingException extends IOException { 138 DecodingException(String message) { 139 super(message); 140 } 141 142 DecodingException(Throwable cause) { 143 super(cause); 144 } 145 } 146 147 /** Encodes the specified byte array, and returns the encoded {@code String}. */ 148 public String encode(byte[] bytes) { 149 return encode(bytes, 0, bytes.length); 150 } 151 152 /** 153 * Encodes the specified range of the specified byte array, and returns the encoded {@code 154 * String}. 155 */ 156 public final String encode(byte[] bytes, int off, int len) { 157 checkPositionIndexes(off, off + len, bytes.length); 158 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 159 try { 160 encodeTo(result, bytes, off, len); 161 } catch (IOException impossible) { 162 throw new AssertionError(impossible); 163 } 164 return result.toString(); 165 } 166 167 /** 168 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 169 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code 170 * Writer}. 171 */ 172 @GwtIncompatible // Writer,OutputStream 173 public abstract OutputStream encodingStream(Writer writer); 174 175 /** 176 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 177 */ 178 @GwtIncompatible // ByteSink,CharSink 179 public final ByteSink encodingSink(CharSink encodedSink) { 180 checkNotNull(encodedSink); 181 return new ByteSink() { 182 @Override 183 public OutputStream openStream() throws IOException { 184 return encodingStream(encodedSink.openStream()); 185 } 186 }; 187 } 188 189 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 190 191 private static byte[] extract(byte[] result, int length) { 192 if (length == result.length) { 193 return result; 194 } 195 byte[] trunc = new byte[length]; 196 System.arraycopy(result, 0, trunc, 0, length); 197 return trunc; 198 } 199 200 /** 201 * Determines whether the specified character sequence is a valid encoded string according to this 202 * encoding. 203 * 204 * @since 20.0 205 */ 206 public abstract boolean canDecode(CharSequence chars); 207 208 /** 209 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 210 * inverse operation to {@link #encode(byte[])}. 211 * 212 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 213 * encoding. 214 */ 215 public final byte[] decode(CharSequence chars) { 216 try { 217 return decodeChecked(chars); 218 } catch (DecodingException badInput) { 219 throw new IllegalArgumentException(badInput); 220 } 221 } 222 223 /** 224 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 225 * inverse operation to {@link #encode(byte[])}. 226 * 227 * @throws DecodingException if the input is not a valid encoded string according to this 228 * encoding. 229 */ 230 final byte[] decodeChecked(CharSequence chars) 231 throws DecodingException { 232 chars = trimTrailingPadding(chars); 233 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 234 int len = decodeTo(tmp, chars); 235 return extract(tmp, len); 236 } 237 238 /** 239 * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code 240 * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors. 241 */ 242 @GwtIncompatible // Reader,InputStream 243 public abstract InputStream decodingStream(Reader reader); 244 245 /** 246 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code 247 * CharSource}. 248 */ 249 @GwtIncompatible // ByteSource,CharSource 250 public final ByteSource decodingSource(CharSource encodedSource) { 251 checkNotNull(encodedSource); 252 return new ByteSource() { 253 @Override 254 public InputStream openStream() throws IOException { 255 return decodingStream(encodedSource.openStream()); 256 } 257 }; 258 } 259 260 // Implementations for encoding/decoding 261 262 abstract int maxEncodedSize(int bytes); 263 264 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 265 266 abstract int maxDecodedSize(int chars); 267 268 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 269 270 CharSequence trimTrailingPadding(CharSequence chars) { 271 return checkNotNull(chars); 272 } 273 274 // Modified encoding generators 275 276 /** 277 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 278 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 279 * section 3.2</a>, Padding of Encoded Data. 280 */ 281 public abstract BaseEncoding omitPadding(); 282 283 /** 284 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 285 * for padding. 286 * 287 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 288 * separator 289 */ 290 public abstract BaseEncoding withPadChar(char padChar); 291 292 /** 293 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 294 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 295 * are skipped over in decoding. 296 * 297 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 298 * string, or if {@code n <= 0} 299 * @throws UnsupportedOperationException if this encoding already uses a separator 300 */ 301 public abstract BaseEncoding withSeparator(String separator, int n); 302 303 /** 304 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 305 * uppercase letters. Padding and separator characters remain in their original case. 306 * 307 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 308 * lower-case characters 309 */ 310 public abstract BaseEncoding upperCase(); 311 312 /** 313 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 314 * lowercase letters. Padding and separator characters remain in their original case. 315 * 316 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 317 * lower-case characters 318 */ 319 public abstract BaseEncoding lowerCase(); 320 321 private static final BaseEncoding BASE64 = 322 new Base64Encoding( 323 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 324 325 /** 326 * The "base64" base encoding specified by <a 327 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 328 * (This is the same as the base 64 encoding from <a 329 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 330 * 331 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 332 * omitted} or {@linkplain #withPadChar(char) replaced}. 333 * 334 * <p>No line feeds are added by default, as per <a 335 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 336 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 337 */ 338 public static BaseEncoding base64() { 339 return BASE64; 340 } 341 342 private static final BaseEncoding BASE64_URL = 343 new Base64Encoding( 344 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 345 346 /** 347 * The "base64url" encoding specified by <a 348 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 349 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 350 * is the same as the base 64 encoding with URL and filename safe alphabet from <a 351 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 352 * 353 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 354 * omitted} or {@linkplain #withPadChar(char) replaced}. 355 * 356 * <p>No line feeds are added by default, as per <a 357 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 358 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 359 */ 360 public static BaseEncoding base64Url() { 361 return BASE64_URL; 362 } 363 364 private static final BaseEncoding BASE32 = 365 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 366 367 /** 368 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 369 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a 370 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 371 * 372 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 373 * omitted} or {@linkplain #withPadChar(char) replaced}. 374 * 375 * <p>No line feeds are added by default, as per <a 376 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 377 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 378 */ 379 public static BaseEncoding base32() { 380 return BASE32; 381 } 382 383 private static final BaseEncoding BASE32_HEX = 384 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 385 386 /** 387 * The "base32hex" encoding specified by <a 388 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 389 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 390 * 391 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 392 * omitted} or {@linkplain #withPadChar(char) replaced}. 393 * 394 * <p>No line feeds are added by default, as per <a 395 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 396 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 397 */ 398 public static BaseEncoding base32Hex() { 399 return BASE32_HEX; 400 } 401 402 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 403 404 /** 405 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 406 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a 407 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 408 * "hexadecimal" format. 409 * 410 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 411 * have no effect. 412 * 413 * <p>No line feeds are added by default, as per <a 414 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 415 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 416 */ 417 public static BaseEncoding base16() { 418 return BASE16; 419 } 420 421 private static final class Alphabet { 422 private final String name; 423 // this is meant to be immutable -- don't modify it! 424 private final char[] chars; 425 final int mask; 426 final int bitsPerChar; 427 final int charsPerChunk; 428 final int bytesPerChunk; 429 private final byte[] decodabet; 430 private final boolean[] validPadding; 431 432 Alphabet(String name, char[] chars) { 433 this.name = checkNotNull(name); 434 this.chars = checkNotNull(chars); 435 try { 436 this.bitsPerChar = log2(chars.length, UNNECESSARY); 437 } catch (ArithmeticException e) { 438 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 439 } 440 441 /* 442 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 443 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 444 */ 445 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 446 try { 447 this.charsPerChunk = 8 / gcd; 448 this.bytesPerChunk = bitsPerChar / gcd; 449 } catch (ArithmeticException e) { 450 throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e); 451 } 452 453 this.mask = chars.length - 1; 454 455 byte[] decodabet = new byte[Ascii.MAX + 1]; 456 Arrays.fill(decodabet, (byte) -1); 457 for (int i = 0; i < chars.length; i++) { 458 char c = chars[i]; 459 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 460 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 461 decodabet[c] = (byte) i; 462 } 463 this.decodabet = decodabet; 464 465 boolean[] validPadding = new boolean[charsPerChunk]; 466 for (int i = 0; i < bytesPerChunk; i++) { 467 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 468 } 469 this.validPadding = validPadding; 470 } 471 472 char encode(int bits) { 473 return chars[bits]; 474 } 475 476 boolean isValidPaddingStartPosition(int index) { 477 return validPadding[index % charsPerChunk]; 478 } 479 480 boolean canDecode(char ch) { 481 return ch <= Ascii.MAX && decodabet[ch] != -1; 482 } 483 484 int decode(char ch) throws DecodingException { 485 if (ch > Ascii.MAX) { 486 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 487 } 488 int result = decodabet[ch]; 489 if (result == -1) { 490 if (ch <= 0x20 || ch == Ascii.MAX) { 491 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 492 } else { 493 throw new DecodingException("Unrecognized character: " + ch); 494 } 495 } 496 return result; 497 } 498 499 private boolean hasLowerCase() { 500 for (char c : chars) { 501 if (Ascii.isLowerCase(c)) { 502 return true; 503 } 504 } 505 return false; 506 } 507 508 private boolean hasUpperCase() { 509 for (char c : chars) { 510 if (Ascii.isUpperCase(c)) { 511 return true; 512 } 513 } 514 return false; 515 } 516 517 Alphabet upperCase() { 518 if (!hasLowerCase()) { 519 return this; 520 } 521 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 522 char[] upperCased = new char[chars.length]; 523 for (int i = 0; i < chars.length; i++) { 524 upperCased[i] = Ascii.toUpperCase(chars[i]); 525 } 526 return new Alphabet(name + ".upperCase()", upperCased); 527 } 528 529 Alphabet lowerCase() { 530 if (!hasUpperCase()) { 531 return this; 532 } 533 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 534 char[] lowerCased = new char[chars.length]; 535 for (int i = 0; i < chars.length; i++) { 536 lowerCased[i] = Ascii.toLowerCase(chars[i]); 537 } 538 return new Alphabet(name + ".lowerCase()", lowerCased); 539 } 540 541 public boolean matches(char c) { 542 return c < decodabet.length && decodabet[c] != -1; 543 } 544 545 @Override 546 public String toString() { 547 return name; 548 } 549 550 @Override 551 public boolean equals(@CheckForNull Object other) { 552 if (other instanceof Alphabet) { 553 Alphabet that = (Alphabet) other; 554 return Arrays.equals(this.chars, that.chars); 555 } 556 return false; 557 } 558 559 @Override 560 public int hashCode() { 561 return Arrays.hashCode(chars); 562 } 563 } 564 565 static class StandardBaseEncoding extends BaseEncoding { 566 // TODO(lowasser): provide a useful toString 567 final Alphabet alphabet; 568 569 @CheckForNull final Character paddingChar; 570 571 StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 572 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 573 } 574 575 StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 576 this.alphabet = checkNotNull(alphabet); 577 checkArgument( 578 paddingChar == null || !alphabet.matches(paddingChar), 579 "Padding character %s was already in alphabet", 580 paddingChar); 581 this.paddingChar = paddingChar; 582 } 583 584 @Override 585 int maxEncodedSize(int bytes) { 586 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 587 } 588 589 @GwtIncompatible // Writer,OutputStream 590 @Override 591 public OutputStream encodingStream(Writer out) { 592 checkNotNull(out); 593 return new OutputStream() { 594 int bitBuffer = 0; 595 int bitBufferLength = 0; 596 int writtenChars = 0; 597 598 @Override 599 public void write(int b) throws IOException { 600 bitBuffer <<= 8; 601 bitBuffer |= b & 0xFF; 602 bitBufferLength += 8; 603 while (bitBufferLength >= alphabet.bitsPerChar) { 604 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 605 out.write(alphabet.encode(charIndex)); 606 writtenChars++; 607 bitBufferLength -= alphabet.bitsPerChar; 608 } 609 } 610 611 @Override 612 public void flush() throws IOException { 613 out.flush(); 614 } 615 616 @Override 617 public void close() throws IOException { 618 if (bitBufferLength > 0) { 619 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 620 out.write(alphabet.encode(charIndex)); 621 writtenChars++; 622 if (paddingChar != null) { 623 while (writtenChars % alphabet.charsPerChunk != 0) { 624 out.write(paddingChar.charValue()); 625 writtenChars++; 626 } 627 } 628 } 629 out.close(); 630 } 631 }; 632 } 633 634 @Override 635 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 636 checkNotNull(target); 637 checkPositionIndexes(off, off + len, bytes.length); 638 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 639 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 640 } 641 } 642 643 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 644 checkNotNull(target); 645 checkPositionIndexes(off, off + len, bytes.length); 646 checkArgument(len <= alphabet.bytesPerChunk); 647 long bitBuffer = 0; 648 for (int i = 0; i < len; ++i) { 649 bitBuffer |= bytes[off + i] & 0xFF; 650 bitBuffer <<= 8; // Add additional zero byte in the end. 651 } 652 // Position of first character is length of bitBuffer minus bitsPerChar. 653 int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 654 int bitsProcessed = 0; 655 while (bitsProcessed < len * 8) { 656 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 657 target.append(alphabet.encode(charIndex)); 658 bitsProcessed += alphabet.bitsPerChar; 659 } 660 if (paddingChar != null) { 661 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 662 target.append(paddingChar.charValue()); 663 bitsProcessed += alphabet.bitsPerChar; 664 } 665 } 666 } 667 668 @Override 669 int maxDecodedSize(int chars) { 670 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 671 } 672 673 @Override 674 CharSequence trimTrailingPadding(CharSequence chars) { 675 checkNotNull(chars); 676 if (paddingChar == null) { 677 return chars; 678 } 679 char padChar = paddingChar.charValue(); 680 int l; 681 for (l = chars.length() - 1; l >= 0; l--) { 682 if (chars.charAt(l) != padChar) { 683 break; 684 } 685 } 686 return chars.subSequence(0, l + 1); 687 } 688 689 @Override 690 public boolean canDecode(CharSequence chars) { 691 checkNotNull(chars); 692 chars = trimTrailingPadding(chars); 693 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 694 return false; 695 } 696 for (int i = 0; i < chars.length(); i++) { 697 if (!alphabet.canDecode(chars.charAt(i))) { 698 return false; 699 } 700 } 701 return true; 702 } 703 704 @Override 705 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 706 checkNotNull(target); 707 chars = trimTrailingPadding(chars); 708 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 709 throw new DecodingException("Invalid input length " + chars.length()); 710 } 711 int bytesWritten = 0; 712 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 713 long chunk = 0; 714 int charsProcessed = 0; 715 for (int i = 0; i < alphabet.charsPerChunk; i++) { 716 chunk <<= alphabet.bitsPerChar; 717 if (charIdx + i < chars.length()) { 718 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 719 } 720 } 721 int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 722 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 723 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 724 } 725 } 726 return bytesWritten; 727 } 728 729 @Override 730 @GwtIncompatible // Reader,InputStream 731 public InputStream decodingStream(Reader reader) { 732 checkNotNull(reader); 733 return new InputStream() { 734 int bitBuffer = 0; 735 int bitBufferLength = 0; 736 int readChars = 0; 737 boolean hitPadding = false; 738 739 @Override 740 public int read() throws IOException { 741 while (true) { 742 int readChar = reader.read(); 743 if (readChar == -1) { 744 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 745 throw new DecodingException("Invalid input length " + readChars); 746 } 747 return -1; 748 } 749 readChars++; 750 char ch = (char) readChar; 751 if (paddingChar != null && paddingChar.charValue() == ch) { 752 if (!hitPadding 753 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 754 throw new DecodingException("Padding cannot start at index " + readChars); 755 } 756 hitPadding = true; 757 } else if (hitPadding) { 758 throw new DecodingException( 759 "Expected padding character but found '" + ch + "' at index " + readChars); 760 } else { 761 bitBuffer <<= alphabet.bitsPerChar; 762 bitBuffer |= alphabet.decode(ch); 763 bitBufferLength += alphabet.bitsPerChar; 764 765 if (bitBufferLength >= 8) { 766 bitBufferLength -= 8; 767 return (bitBuffer >> bitBufferLength) & 0xFF; 768 } 769 } 770 } 771 } 772 773 @Override 774 public int read(byte[] buf, int off, int len) throws IOException { 775 // Overriding this to work around the fact that InputStream's default implementation of 776 // this method will silently swallow exceptions thrown by the single-byte read() method 777 // (other than on the first call to it), which in this case can cause invalid encoded 778 // strings to not throw an exception. 779 // See https://github.com/google/guava/issues/3542 780 checkPositionIndexes(off, off + len, buf.length); 781 782 int i = off; 783 for (; i < off + len; i++) { 784 int b = read(); 785 if (b == -1) { 786 int read = i - off; 787 return read == 0 ? -1 : read; 788 } 789 buf[i] = (byte) b; 790 } 791 return i - off; 792 } 793 794 @Override 795 public void close() throws IOException { 796 reader.close(); 797 } 798 }; 799 } 800 801 @Override 802 public BaseEncoding omitPadding() { 803 return (paddingChar == null) ? this : newInstance(alphabet, null); 804 } 805 806 @Override 807 public BaseEncoding withPadChar(char padChar) { 808 if (8 % alphabet.bitsPerChar == 0 809 || (paddingChar != null && paddingChar.charValue() == padChar)) { 810 return this; 811 } else { 812 return newInstance(alphabet, padChar); 813 } 814 } 815 816 @Override 817 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 818 for (int i = 0; i < separator.length(); i++) { 819 checkArgument( 820 !alphabet.matches(separator.charAt(i)), 821 "Separator (%s) cannot contain alphabet characters", 822 separator); 823 } 824 if (paddingChar != null) { 825 checkArgument( 826 separator.indexOf(paddingChar.charValue()) < 0, 827 "Separator (%s) cannot contain padding character", 828 separator); 829 } 830 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 831 } 832 833 @LazyInit @CheckForNull private transient BaseEncoding upperCase; 834 @LazyInit @CheckForNull private transient BaseEncoding lowerCase; 835 836 @Override 837 public BaseEncoding upperCase() { 838 BaseEncoding result = upperCase; 839 if (result == null) { 840 Alphabet upper = alphabet.upperCase(); 841 result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar); 842 } 843 return result; 844 } 845 846 @Override 847 public BaseEncoding lowerCase() { 848 BaseEncoding result = lowerCase; 849 if (result == null) { 850 Alphabet lower = alphabet.lowerCase(); 851 result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar); 852 } 853 return result; 854 } 855 856 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 857 return new StandardBaseEncoding(alphabet, paddingChar); 858 } 859 860 @Override 861 public String toString() { 862 StringBuilder builder = new StringBuilder("BaseEncoding."); 863 builder.append(alphabet.toString()); 864 if (8 % alphabet.bitsPerChar != 0) { 865 if (paddingChar == null) { 866 builder.append(".omitPadding()"); 867 } else { 868 builder.append(".withPadChar('").append(paddingChar).append("')"); 869 } 870 } 871 return builder.toString(); 872 } 873 874 @Override 875 public boolean equals(@CheckForNull Object other) { 876 if (other instanceof StandardBaseEncoding) { 877 StandardBaseEncoding that = (StandardBaseEncoding) other; 878 return this.alphabet.equals(that.alphabet) 879 && Objects.equal(this.paddingChar, that.paddingChar); 880 } 881 return false; 882 } 883 884 @Override 885 public int hashCode() { 886 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 887 } 888 } 889 890 static final class Base16Encoding extends StandardBaseEncoding { 891 final char[] encoding = new char[512]; 892 893 Base16Encoding(String name, String alphabetChars) { 894 this(new Alphabet(name, alphabetChars.toCharArray())); 895 } 896 897 private Base16Encoding(Alphabet alphabet) { 898 super(alphabet, null); 899 checkArgument(alphabet.chars.length == 16); 900 for (int i = 0; i < 256; ++i) { 901 encoding[i] = alphabet.encode(i >>> 4); 902 encoding[i | 0x100] = alphabet.encode(i & 0xF); 903 } 904 } 905 906 @Override 907 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 908 checkNotNull(target); 909 checkPositionIndexes(off, off + len, bytes.length); 910 for (int i = 0; i < len; ++i) { 911 int b = bytes[off + i] & 0xFF; 912 target.append(encoding[b]); 913 target.append(encoding[b | 0x100]); 914 } 915 } 916 917 @Override 918 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 919 checkNotNull(target); 920 if (chars.length() % 2 == 1) { 921 throw new DecodingException("Invalid input length " + chars.length()); 922 } 923 int bytesWritten = 0; 924 for (int i = 0; i < chars.length(); i += 2) { 925 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 926 target[bytesWritten++] = (byte) decoded; 927 } 928 return bytesWritten; 929 } 930 931 @Override 932 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 933 return new Base16Encoding(alphabet); 934 } 935 } 936 937 static final class Base64Encoding extends StandardBaseEncoding { 938 Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 939 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 940 } 941 942 private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 943 super(alphabet, paddingChar); 944 checkArgument(alphabet.chars.length == 64); 945 } 946 947 @Override 948 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 949 checkNotNull(target); 950 checkPositionIndexes(off, off + len, bytes.length); 951 int i = off; 952 for (int remaining = len; remaining >= 3; remaining -= 3) { 953 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 954 target.append(alphabet.encode(chunk >>> 18)); 955 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 956 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 957 target.append(alphabet.encode(chunk & 0x3F)); 958 } 959 if (i < off + len) { 960 encodeChunkTo(target, bytes, i, off + len - i); 961 } 962 } 963 964 @Override 965 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 966 checkNotNull(target); 967 chars = trimTrailingPadding(chars); 968 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 969 throw new DecodingException("Invalid input length " + chars.length()); 970 } 971 int bytesWritten = 0; 972 for (int i = 0; i < chars.length(); ) { 973 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 974 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 975 target[bytesWritten++] = (byte) (chunk >>> 16); 976 if (i < chars.length()) { 977 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 978 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 979 if (i < chars.length()) { 980 chunk |= alphabet.decode(chars.charAt(i++)); 981 target[bytesWritten++] = (byte) (chunk & 0xFF); 982 } 983 } 984 } 985 return bytesWritten; 986 } 987 988 @Override 989 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 990 return new Base64Encoding(alphabet, paddingChar); 991 } 992 } 993 994 @GwtIncompatible 995 static Reader ignoringReader(Reader delegate, String toIgnore) { 996 checkNotNull(delegate); 997 checkNotNull(toIgnore); 998 return new Reader() { 999 @Override 1000 public int read() throws IOException { 1001 int readChar; 1002 do { 1003 readChar = delegate.read(); 1004 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 1005 return readChar; 1006 } 1007 1008 @Override 1009 public int read(char[] cbuf, int off, int len) throws IOException { 1010 throw new UnsupportedOperationException(); 1011 } 1012 1013 @Override 1014 public void close() throws IOException { 1015 delegate.close(); 1016 } 1017 }; 1018 } 1019 1020 static Appendable separatingAppendable( 1021 Appendable delegate, String separator, int afterEveryChars) { 1022 checkNotNull(delegate); 1023 checkNotNull(separator); 1024 checkArgument(afterEveryChars > 0); 1025 return new Appendable() { 1026 int charsUntilSeparator = afterEveryChars; 1027 1028 @Override 1029 public Appendable append(char c) throws IOException { 1030 if (charsUntilSeparator == 0) { 1031 delegate.append(separator); 1032 charsUntilSeparator = afterEveryChars; 1033 } 1034 delegate.append(c); 1035 charsUntilSeparator--; 1036 return this; 1037 } 1038 1039 @Override 1040 public Appendable append(@CheckForNull CharSequence chars, int off, int len) { 1041 throw new UnsupportedOperationException(); 1042 } 1043 1044 @Override 1045 public Appendable append(@CheckForNull CharSequence chars) { 1046 throw new UnsupportedOperationException(); 1047 } 1048 }; 1049 } 1050 1051 @GwtIncompatible // Writer 1052 static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) { 1053 Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars); 1054 return new Writer() { 1055 @Override 1056 public void write(int c) throws IOException { 1057 separatingAppendable.append((char) c); 1058 } 1059 1060 @Override 1061 public void write(char[] chars, int off, int len) throws IOException { 1062 throw new UnsupportedOperationException(); 1063 } 1064 1065 @Override 1066 public void flush() throws IOException { 1067 delegate.flush(); 1068 } 1069 1070 @Override 1071 public void close() throws IOException { 1072 delegate.close(); 1073 } 1074 }; 1075 } 1076 1077 static final class SeparatedBaseEncoding extends BaseEncoding { 1078 private final BaseEncoding delegate; 1079 private final String separator; 1080 private final int afterEveryChars; 1081 1082 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1083 this.delegate = checkNotNull(delegate); 1084 this.separator = checkNotNull(separator); 1085 this.afterEveryChars = afterEveryChars; 1086 checkArgument( 1087 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1088 } 1089 1090 @Override 1091 CharSequence trimTrailingPadding(CharSequence chars) { 1092 return delegate.trimTrailingPadding(chars); 1093 } 1094 1095 @Override 1096 int maxEncodedSize(int bytes) { 1097 int unseparatedSize = delegate.maxEncodedSize(bytes); 1098 return unseparatedSize 1099 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1100 } 1101 1102 @GwtIncompatible // Writer,OutputStream 1103 @Override 1104 public OutputStream encodingStream(Writer output) { 1105 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1106 } 1107 1108 @Override 1109 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1110 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1111 } 1112 1113 @Override 1114 int maxDecodedSize(int chars) { 1115 return delegate.maxDecodedSize(chars); 1116 } 1117 1118 @Override 1119 public boolean canDecode(CharSequence chars) { 1120 StringBuilder builder = new StringBuilder(); 1121 for (int i = 0; i < chars.length(); i++) { 1122 char c = chars.charAt(i); 1123 if (separator.indexOf(c) < 0) { 1124 builder.append(c); 1125 } 1126 } 1127 return delegate.canDecode(builder); 1128 } 1129 1130 @Override 1131 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1132 StringBuilder stripped = new StringBuilder(chars.length()); 1133 for (int i = 0; i < chars.length(); i++) { 1134 char c = chars.charAt(i); 1135 if (separator.indexOf(c) < 0) { 1136 stripped.append(c); 1137 } 1138 } 1139 return delegate.decodeTo(target, stripped); 1140 } 1141 1142 @Override 1143 @GwtIncompatible // Reader,InputStream 1144 public InputStream decodingStream(Reader reader) { 1145 return delegate.decodingStream(ignoringReader(reader, separator)); 1146 } 1147 1148 @Override 1149 public BaseEncoding omitPadding() { 1150 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1151 } 1152 1153 @Override 1154 public BaseEncoding withPadChar(char padChar) { 1155 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1156 } 1157 1158 @Override 1159 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1160 throw new UnsupportedOperationException("Already have a separator"); 1161 } 1162 1163 @Override 1164 public BaseEncoding upperCase() { 1165 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1166 } 1167 1168 @Override 1169 public BaseEncoding lowerCase() { 1170 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1171 } 1172 1173 @Override 1174 public String toString() { 1175 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1176 } 1177 } 1178}