001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019 020import com.google.errorprone.annotations.Immutable; 021import java.security.Key; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collections; 025import java.util.Iterator; 026import java.util.List; 027import java.util.zip.Adler32; 028import java.util.zip.CRC32; 029import java.util.zip.Checksum; 030import javax.annotation.CheckForNull; 031import javax.crypto.spec.SecretKeySpec; 032 033/** 034 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 035 * utilities. 036 * 037 * <p>A comparison of the various hash functions can be found <a 038 * href="http://goo.gl/jS7HH">here</a>. 039 * 040 * @author Kevin Bourrillion 041 * @author Dimitris Andreou 042 * @author Kurt Alfred Kluever 043 * @since 11.0 044 */ 045@ElementTypesAreNonnullByDefault 046public final class Hashing { 047 /** 048 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 049 * the returned function implements is unspecified and subject to change without notice. 050 * 051 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 052 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 053 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 054 * non-cryptographic hash function that will never change behavior, we suggest {@link 055 * #murmur3_128}. 056 * 057 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 058 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 059 * 060 * @param minimumBits a positive integer (can be arbitrarily large) 061 * @return a hash function, described above, that produces hash codes of length {@code 062 * minimumBits} or greater 063 */ 064 public static HashFunction goodFastHash(int minimumBits) { 065 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 066 067 if (bits == 32) { 068 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 069 } 070 if (bits <= 128) { 071 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 072 } 073 074 // Otherwise, join together some 128-bit murmur3s 075 int hashFunctionsNeeded = (bits + 127) / 128; 076 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 077 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 078 int seed = GOOD_FAST_HASH_SEED; 079 for (int i = 1; i < hashFunctionsNeeded; i++) { 080 seed += 1500450271; // a prime; shouldn't matter 081 hashFunctions[i] = murmur3_128(seed); 082 } 083 return new ConcatenatedHashFunction(hashFunctions); 084 } 085 086 /** 087 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 088 * dependent on the hash codes they produce will fail sooner. 089 */ 090 @SuppressWarnings("GoodTime") // reading system time without TimeSource 091 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 092 093 /** 094 * Returns a hash function implementing the <a 095 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 096 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 097 * bug</b> as described in the deprecation text. 098 * 099 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 100 * have the bug. 101 * 102 * @deprecated This implementation produces incorrect hash values from the {@link 103 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 104 * #murmur3_32_fixed(int)} instead. 105 */ 106 @Deprecated 107 public static HashFunction murmur3_32(int seed) { 108 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 109 } 110 111 /** 112 * Returns a hash function implementing the <a 113 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 114 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 115 * bug</b> as described in the deprecation text. 116 * 117 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 118 * have the bug. 119 * 120 * @deprecated This implementation produces incorrect hash values from the {@link 121 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 122 * #murmur3_32_fixed()} instead. 123 */ 124 @Deprecated 125 public static HashFunction murmur3_32() { 126 return Murmur3_32HashFunction.MURMUR3_32; 127 } 128 129 /** 130 * Returns a hash function implementing the <a 131 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 132 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 133 * 134 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 135 * 136 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 137 * HashFunction} returned by the original {@code murmur3_32} method. 138 * 139 * @since 31.0 140 */ 141 public static HashFunction murmur3_32_fixed(int seed) { 142 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 143 } 144 145 /** 146 * Returns a hash function implementing the <a 147 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 148 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 149 * 150 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 151 * 152 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 153 * HashFunction} returned by the original {@code murmur3_32} method. 154 * 155 * @since 31.0 156 */ 157 public static HashFunction murmur3_32_fixed() { 158 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 159 } 160 161 /** 162 * Returns a hash function implementing the <a 163 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 164 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 165 * 166 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 167 */ 168 public static HashFunction murmur3_128(int seed) { 169 return new Murmur3_128HashFunction(seed); 170 } 171 172 /** 173 * Returns a hash function implementing the <a 174 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 175 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 176 * 177 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 178 */ 179 public static HashFunction murmur3_128() { 180 return Murmur3_128HashFunction.MURMUR3_128; 181 } 182 183 /** 184 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 185 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 186 * 187 * @since 15.0 188 */ 189 public static HashFunction sipHash24() { 190 return SipHashFunction.SIP_HASH_24; 191 } 192 193 /** 194 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 195 * SipHash-2-4 algorithm</a> using the given seed. 196 * 197 * @since 15.0 198 */ 199 public static HashFunction sipHash24(long k0, long k1) { 200 return new SipHashFunction(2, 4, k0, k1); 201 } 202 203 /** 204 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 205 * 206 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 207 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 208 * neither fast nor secure. As of January 2017, we suggest: 209 * <ul> 210 * <li>For security: 211 * {@link Hashing#sha256} or a higher-level API. 212 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 213 * </ul> 214 */ 215 @Deprecated 216 public static HashFunction md5() { 217 return Md5Holder.MD5; 218 } 219 220 private static class Md5Holder { 221 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 222 } 223 224 /** 225 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 226 * 227 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 228 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 229 * neither fast nor secure. As of January 2017, we suggest: 230 * <ul> 231 * <li>For security: 232 * {@link Hashing#sha256} or a higher-level API. 233 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 234 * </ul> 235 */ 236 @Deprecated 237 public static HashFunction sha1() { 238 return Sha1Holder.SHA_1; 239 } 240 241 private static class Sha1Holder { 242 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 243 } 244 245 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 246 public static HashFunction sha256() { 247 return Sha256Holder.SHA_256; 248 } 249 250 private static class Sha256Holder { 251 static final HashFunction SHA_256 = 252 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 253 } 254 255 /** 256 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 257 * 258 * @since 19.0 259 */ 260 public static HashFunction sha384() { 261 return Sha384Holder.SHA_384; 262 } 263 264 private static class Sha384Holder { 265 static final HashFunction SHA_384 = 266 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 267 } 268 269 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 270 public static HashFunction sha512() { 271 return Sha512Holder.SHA_512; 272 } 273 274 private static class Sha512Holder { 275 static final HashFunction SHA_512 = 276 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 277 } 278 279 /** 280 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 281 * MD5 (128 hash bits) hash function and the given secret key. 282 * 283 * @param key the secret key 284 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 285 * @since 20.0 286 */ 287 public static HashFunction hmacMd5(Key key) { 288 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 289 } 290 291 /** 292 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 293 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 294 * and the MD5 algorithm. 295 * 296 * @param key the key material of the secret key 297 * @since 20.0 298 */ 299 public static HashFunction hmacMd5(byte[] key) { 300 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 301 } 302 303 /** 304 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 305 * SHA-1 (160 hash bits) hash function and the given secret key. 306 * 307 * @param key the secret key 308 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 309 * @since 20.0 310 */ 311 public static HashFunction hmacSha1(Key key) { 312 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 313 } 314 315 /** 316 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 317 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 318 * array and the SHA-1 algorithm. 319 * 320 * @param key the key material of the secret key 321 * @since 20.0 322 */ 323 public static HashFunction hmacSha1(byte[] key) { 324 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 325 } 326 327 /** 328 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 329 * SHA-256 (256 hash bits) hash function and the given secret key. 330 * 331 * @param key the secret key 332 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 333 * @since 20.0 334 */ 335 public static HashFunction hmacSha256(Key key) { 336 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 337 } 338 339 /** 340 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 341 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 342 * array and the SHA-256 algorithm. 343 * 344 * @param key the key material of the secret key 345 * @since 20.0 346 */ 347 public static HashFunction hmacSha256(byte[] key) { 348 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 349 } 350 351 /** 352 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 353 * SHA-512 (512 hash bits) hash function and the given secret key. 354 * 355 * @param key the secret key 356 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 357 * @since 20.0 358 */ 359 public static HashFunction hmacSha512(Key key) { 360 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 361 } 362 363 /** 364 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 365 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 366 * array and the SHA-512 algorithm. 367 * 368 * @param key the key material of the secret key 369 * @since 20.0 370 */ 371 public static HashFunction hmacSha512(byte[] key) { 372 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 373 } 374 375 private static String hmacToString(String methodName, Key key) { 376 return String.format( 377 "Hashing.%s(Key[algorithm=%s, format=%s])", 378 methodName, key.getAlgorithm(), key.getFormat()); 379 } 380 381 /** 382 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 383 * by RFC 3720, Section 12.1. 384 * 385 * <p>This function is best understood as a <a 386 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 387 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 388 * 389 * @since 18.0 390 */ 391 public static HashFunction crc32c() { 392 return Crc32cHashFunction.CRC_32_C; 393 } 394 395 /** 396 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 397 * 398 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 399 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 400 * 401 * <p>This function is best understood as a <a 402 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 403 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 404 * 405 * @since 14.0 406 */ 407 public static HashFunction crc32() { 408 return ChecksumType.CRC_32.hashFunction; 409 } 410 411 /** 412 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 413 * 414 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 415 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 416 * 417 * <p>This function is best understood as a <a 418 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 419 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 420 * 421 * @since 14.0 422 */ 423 public static HashFunction adler32() { 424 return ChecksumType.ADLER_32.hashFunction; 425 } 426 427 @Immutable 428 enum ChecksumType implements ImmutableSupplier<Checksum> { 429 CRC_32("Hashing.crc32()") { 430 @Override 431 public Checksum get() { 432 return new CRC32(); 433 } 434 }, 435 ADLER_32("Hashing.adler32()") { 436 @Override 437 public Checksum get() { 438 return new Adler32(); 439 } 440 }; 441 442 public final HashFunction hashFunction; 443 444 ChecksumType(String toString) { 445 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 446 } 447 } 448 449 /** 450 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 451 * 452 * <p>This is designed for generating persistent fingerprints of strings. It isn't 453 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 454 * alternatives we've used in the past. 455 * 456 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 457 * means {@link HashCode#asLong} is guaranteed to return the same value that 458 * farmhash::Fingerprint64() would for the same input (when compared using {@link 459 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 460 * 461 * <p>This function is best understood as a <a 462 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 463 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 464 * 465 * @since 20.0 466 */ 467 public static HashFunction farmHashFingerprint64() { 468 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 469 } 470 471 /** 472 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 473 * 474 * <p>This is designed for generating persistent fingerprints of strings. It isn't 475 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 476 * generated using this are byte-wise identical to those created using the C++ version, but note 477 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 478 * Comparisons between the two should take this into account. 479 * 480 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 481 * longer strings. It could have been one or the other throughout. The main advantage of the 482 * combination is that CityHash has a bunch of special cases for short strings that don't need to 483 * be replicated here. The result will never be 0 or 1. 484 * 485 * <p>This function is best understood as a <a 486 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 487 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 488 * 489 * @since 31.1 490 */ 491 public static HashFunction fingerprint2011() { 492 return Fingerprint2011.FINGERPRINT_2011; 493 } 494 495 /** 496 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 497 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 498 * consistentHash(h, n)} equals: 499 * 500 * <ul> 501 * <li>{@code n - 1}, with approximate probability {@code 1/n} 502 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 503 * </ul> 504 * 505 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 506 * following conditions: 507 * 508 * <ul> 509 * <li>You want to assign the same fraction of inputs to each bucket. 510 * <li>When you reduce the number of buckets, you can accept that the most recently added 511 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 512 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 513 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 514 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 515 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 516 * no way for you to specify which of the three buckets is disappearing. Thus, if your 517 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 518 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 519 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 520 * </ul> 521 * 522 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 523 * consistent hashing</a> for more information. 524 */ 525 public static int consistentHash(HashCode hashCode, int buckets) { 526 return consistentHash(hashCode.padToLong(), buckets); 527 } 528 529 /** 530 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 531 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 532 * n)} equals: 533 * 534 * <ul> 535 * <li>{@code n - 1}, with approximate probability {@code 1/n} 536 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 537 * </ul> 538 * 539 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 540 * following conditions: 541 * 542 * <ul> 543 * <li>You want to assign the same fraction of inputs to each bucket. 544 * <li>When you reduce the number of buckets, you can accept that the most recently added 545 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 546 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 547 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 548 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 549 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 550 * no way for you to specify which of the three buckets is disappearing. Thus, if your 551 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 552 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 553 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 554 * </ul> 555 * 556 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 557 * consistent hashing</a> for more information. 558 */ 559 public static int consistentHash(long input, int buckets) { 560 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 561 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 562 int candidate = 0; 563 int next; 564 565 // Jump from bucket to bucket until we go out of range 566 while (true) { 567 next = (int) ((candidate + 1) / generator.nextDouble()); 568 if (next >= 0 && next < buckets) { 569 candidate = next; 570 } else { 571 return candidate; 572 } 573 } 574 } 575 576 /** 577 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 578 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 579 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 580 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 581 * 582 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 583 * have the same bit length 584 */ 585 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 586 Iterator<HashCode> iterator = hashCodes.iterator(); 587 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 588 int bits = iterator.next().bits(); 589 byte[] resultBytes = new byte[bits / 8]; 590 for (HashCode hashCode : hashCodes) { 591 byte[] nextBytes = hashCode.asBytes(); 592 checkArgument( 593 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 594 for (int i = 0; i < nextBytes.length; i++) { 595 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 596 } 597 } 598 return HashCode.fromBytesNoCopy(resultBytes); 599 } 600 601 /** 602 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 603 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 604 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 605 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 606 * 607 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 608 * have the same bit length 609 */ 610 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 611 Iterator<HashCode> iterator = hashCodes.iterator(); 612 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 613 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 614 for (HashCode hashCode : hashCodes) { 615 byte[] nextBytes = hashCode.asBytes(); 616 checkArgument( 617 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 618 for (int i = 0; i < nextBytes.length; i++) { 619 resultBytes[i] += nextBytes[i]; 620 } 621 } 622 return HashCode.fromBytesNoCopy(resultBytes); 623 } 624 625 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 626 static int checkPositiveAndMakeMultipleOf32(int bits) { 627 checkArgument(bits > 0, "Number of bits must be positive"); 628 return (bits + 31) & ~31; 629 } 630 631 /** 632 * Returns a hash function which computes its hash code by concatenating the hash codes of the 633 * underlying hash functions together. This can be useful if you need to generate hash codes of a 634 * specific length. 635 * 636 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 637 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 638 * 639 * @since 19.0 640 */ 641 public static HashFunction concatenating( 642 HashFunction first, HashFunction second, HashFunction... rest) { 643 // We can't use Lists.asList() here because there's no hash->collect dependency 644 List<HashFunction> list = new ArrayList<>(); 645 list.add(first); 646 list.add(second); 647 Collections.addAll(list, rest); 648 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 649 } 650 651 /** 652 * Returns a hash function which computes its hash code by concatenating the hash codes of the 653 * underlying hash functions together. This can be useful if you need to generate hash codes of a 654 * specific length. 655 * 656 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 657 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 658 * 659 * @since 19.0 660 */ 661 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 662 checkNotNull(hashFunctions); 663 // We can't use Iterables.toArray() here because there's no hash->collect dependency 664 List<HashFunction> list = new ArrayList<>(); 665 for (HashFunction hashFunction : hashFunctions) { 666 list.add(hashFunction); 667 } 668 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 669 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 670 } 671 672 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 673 674 private ConcatenatedHashFunction(HashFunction... functions) { 675 super(functions); 676 for (HashFunction function : functions) { 677 checkArgument( 678 function.bits() % 8 == 0, 679 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 680 function.bits(), 681 function); 682 } 683 } 684 685 @Override 686 HashCode makeHash(Hasher[] hashers) { 687 byte[] bytes = new byte[bits() / 8]; 688 int i = 0; 689 for (Hasher hasher : hashers) { 690 HashCode newHash = hasher.hash(); 691 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 692 } 693 return HashCode.fromBytesNoCopy(bytes); 694 } 695 696 @Override 697 public int bits() { 698 int bitSum = 0; 699 for (HashFunction function : functions) { 700 bitSum += function.bits(); 701 } 702 return bitSum; 703 } 704 705 @Override 706 public boolean equals(@CheckForNull Object object) { 707 if (object instanceof ConcatenatedHashFunction) { 708 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 709 return Arrays.equals(functions, other.functions); 710 } 711 return false; 712 } 713 714 @Override 715 public int hashCode() { 716 return Arrays.hashCode(functions); 717 } 718 } 719 720 /** 721 * Linear CongruentialGenerator to use for consistent hashing. See 722 * http://en.wikipedia.org/wiki/Linear_congruential_generator 723 */ 724 private static final class LinearCongruentialGenerator { 725 private long state; 726 727 public LinearCongruentialGenerator(long seed) { 728 this.state = seed; 729 } 730 731 public double nextDouble() { 732 state = 2862933555777941757L * state + 1; 733 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 734 } 735 } 736 737 private Hashing() {} 738}