/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* */ /* This file is part of the HiGHS linear optimization suite */ /* */ /* Written and engineered 2008-2022 at the University of Edinburgh */ /* */ /* Available as open-source under the MIT License */ /* */ /* Authors: Julian Hall, Ivet Galabova, Leona Gottwald and Michael */ /* Feldmeier */ /* */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifndef HIGHS_UTIL_HASH_H_ #define HIGHS_UTIL_HASH_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include "util/HighsInt.h" #ifdef HIGHS_HAVE_BITSCAN_REVERSE #include #pragma intrinsic(_BitScanReverse) #pragma intrinsic(_BitScanReverse64) #endif #if __GNUG__ && __GNUC__ < 5 #define IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T) #else #define IS_TRIVIALLY_COPYABLE(T) std::is_trivially_copyable::value #endif template struct HighsHashable : std::integral_constant { }; template struct HighsHashable> : public std::integral_constant::value && HighsHashable::value> {}; template struct HighsHashable> : public HighsHashable> { }; template struct HighsHashable> : public std::integral_constant< bool, HighsHashable::value && HighsHashable>::value> {}; struct HighsHashHelpers { using u8 = std::uint8_t; using i8 = std::int8_t; using u16 = std::uint16_t; using i16 = std::int16_t; using u32 = std::uint32_t; using i32 = std::int32_t; using u64 = std::uint64_t; using i64 = std::uint64_t; static constexpr u64 c[] = { u64{0xc8497d2a400d9551}, u64{0x80c8963be3e4c2f3}, u64{0x042d8680e260ae5b}, u64{0x8a183895eeac1536}, u64{0xa94e9c75f80ad6de}, u64{0x7e92251dec62835e}, u64{0x07294165cb671455}, u64{0x89b0f6212b0a4292}, u64{0x31900011b96bf554}, u64{0xa44540f8eee2094f}, u64{0xce7ffd372e4c64fc}, u64{0x51c9d471bfe6a10f}, u64{0x758c2a674483826f}, u64{0xf91a20abe63f8b02}, u64{0xc2a069024a1fcc6f}, u64{0xd5bb18b70c5dbd59}, u64{0xd510adac6d1ae289}, u64{0x571d069b23050a79}, u64{0x60873b8872933e06}, u64{0x780481cc19670350}, u64{0x7a48551760216885}, u64{0xb5d68b918231e6ca}, u64{0xa7e5571699aa5274}, u64{0x7b6d309b2cfdcf01}, u64{0x04e77c3d474daeff}, u64{0x4dbf099fd7247031}, u64{0x5d70dca901130beb}, u64{0x9f8b5f0df4182499}, u64{0x293a74c9686092da}, u64{0xd09bdab6840f52b3}, u64{0xc05d47f3ab302263}, u64{0x6b79e62b884b65d6}, u64{0xa581106fc980c34d}, u64{0xf081b7145ea2293e}, u64{0xfb27243dd7c3f5ad}, u64{0x5211bf8860ea667f}, u64{0x9455e65cb2385e7f}, u64{0x0dfaf6731b449b33}, u64{0x4ec98b3c6f5e68c7}, u64{0x007bfd4a42ae936b}, u64{0x65c93061f8674518}, u64{0x640816f17127c5d1}, u64{0x6dd4bab17b7c3a74}, u64{0x34d9268c256fa1ba}, u64{0x0b4d0c6b5b50d7f4}, u64{0x30aa965bc9fadaff}, u64{0xc0ac1d0c2771404d}, u64{0xc5e64509abb76ef2}, u64{0xd606b11990624a36}, u64{0x0d3f05d242ce2fb7}, u64{0x469a803cb276fe32}, u64{0xa4a44d177a3e23f4}, u64{0xb9d9a120dcc1ca03}, u64{0x2e15af8165234a2e}, u64{0x10609ba2720573d4}, u64{0xaa4191b60368d1d5}, u64{0x333dd2300bc57762}, u64{0xdf6ec48f79fb402f}, u64{0x5ed20fcef1b734fa}, u64{0x4c94924ec8be21ee}, u64{0x5abe6ad9d131e631}, u64{0xbe10136a522e602d}, u64{0x53671115c340e779}, u64{0x9f392fe43e2144da}}; /// mersenne prime 2^61 - 1 static constexpr u64 M61() { return u64{0x1fffffffffffffff}; }; #ifdef HIGHS_HAVE_BUILTIN_CLZ static int log2i(uint64_t n) { return 63 - __builtin_clzll(n); } static int log2i(unsigned int n) { return 31 - __builtin_clz(n); } #elif defined(HIGHS_HAVE_BITSCAN_REVERSE) static int log2i(uint64_t n) { unsigned long result; _BitScanReverse64(&result, n); return result; } static int log2i(unsigned int n) { unsigned long result; _BitScanReverse64(&result, (unsigned long)n); return result; } #else // integer log2 algorithm without floating point arithmetic. It uses an // unrolled loop and requires few instructions that can be well optimized. static int log2i(uint64_t n) { int x = 0; auto log2Iteration = [&](int p) { if (n >= uint64_t{1} << p) { x += p; n >>= p; } }; log2Iteration(32); log2Iteration(16); log2Iteration(8); log2Iteration(4); log2Iteration(2); log2Iteration(1); return x; } static int log2i(uint32_t n) { int x = 0; auto log2Iteration = [&](int p) { if (n >= 1u << p) { x += p; n >>= p; } }; log2Iteration(16); log2Iteration(8); log2Iteration(4); log2Iteration(2); log2Iteration(1); return x; } #endif /// compute a * b mod 2^61-1 static u64 multiply_modM61(u64 a, u64 b) { u64 ahi = a >> 32; u64 bhi = b >> 32; u64 alo = a & 0xffffffffu; u64 blo = b & 0xffffffffu; // compute the different order terms with adicities 2^64, 2^32, 2^0 u64 term_64 = ahi * bhi; u64 term_32 = ahi * blo + bhi * alo; u64 term_0 = alo * blo; // Partially reduce term_0 and term_32 modulo M61() individually to not deal // with a possible carry bit (thanks @https://github.com/WTFHCN for catching // the bug with this). We do not need to completely reduce by an additional // check for the range of the resulting term as this is done in the end in // any case and the reduced sizes do not cause troubles with the available // 64 bits. term_0 = (term_0 & M61()) + (term_0 >> 61); term_0 += ((term_32 >> 29) + (term_32 << 32)) & M61(); // The lower 61 bits of term_0 are now the lower 61 bits of the result that // we need. Now extract the upper 61 of the result so that we can compute // the result of the multiplication modulo M61() u64 ab61 = (term_64 << 3) | (term_0 >> 61); // finally take the result modulo M61 which is computed by exploiting // that M61 is a mersenne prime, particularly, if a * b = q * 2^61 + r // then a * b = (q + r) (mod 2^61 - 1) u64 result = (term_0 & M61()) + ab61; if (result >= M61()) result -= M61(); return result; } static u64 modexp_M61(u64 a, u64 e) { // the exponent need to be greater than zero assert(e > 0); u64 result = a; while (e != 1) { // square result = multiply_modM61(result, result); // multiply with a if exponent is odd if (e & 1) result = multiply_modM61(result, a); // shift to next bit e = e >> 1; } return result; } /// mersenne prime 2^31 - 1 static constexpr u64 M31() { return u32{0x7fffffff}; }; /// compute a * b mod 2^31-1 static u32 multiply_modM31(u32 a, u32 b) { u64 result = u64(a) * u64(b); result = (result >> 31) + (result & M31()); if (result >= M31()) result -= M31(); return result; } static u32 modexp_M31(u32 a, u64 e) { // the exponent need to be greater than zero assert(e > 0); u32 result = a; while (e != 1) { // square result = multiply_modM31(result, result); // multiply with a if exponent is odd if (e & 1) result = multiply_modM31(result, a); // shift to next bit e = e >> 1; } return result; } template static u64 pair_hash(u32 a, u32 b) { return (a + c[2 * k]) * (b + c[2 * k + 1]); } static void sparse_combine(u64& hash, HighsInt index, u64 value) { // we take each value of the sparse hash as coefficient for a polynomial // of the finite field modulo the mersenne prime 2^61-1 where the monomial // for a sparse entry has the degree of its index. We evaluate the // polynomial at a random constant. This allows to compute the hashes of // sparse vectors independently of each others nonzero contribution and // therefore allows to use the order of best access patterns for cache // performance. E.g. we can compute a strong hash value for parallel row and // column detection and only need to loop over the nonzeros once in // arbitrary order. This comes at the expense of more expensive hash // calculations as it would be more efficient to evaluate the polynomial // with horners scheme, but allows for parallelization and arbitrary order. // Since we have 64 random constants available, we slightly improve // the scheme by using a lower degree polynomial with 64 variables // which we evaluate at the random vector of 64. // make sure input value is never zero and at most 61bits are used value = ((value << 1) & M61()) | 1; // make sure that the constant has at most 61 bits, as otherwise the modulo // algorithm for multiplication mod M61 might not work properly due to // overflow u64 a = c[index & 63] & M61(); HighsInt degree = (index >> 6) + 1; hash += multiply_modM61(value, modexp_M61(a, degree)); hash = (hash >> 61) + (hash & M61()); if (hash >= M61()) hash -= M61(); assert(hash < M61()); } static void sparse_inverse_combine(u64& hash, HighsInt index, u64 value) { // same hash algorithm as sparse_combine(), but for updating a hash value to // the state before it was changed with a call to sparse_combine(). This is // easily possible as the hash value just uses finite field arithmetic. We // can simply add the additive inverse of the previous hash value. This is a // very useful routine for symmetry detection. During partition refinement // the hashes do not need to be recomputed but can be updated with this // procedure. // make sure input value is never zero and at most 61bits are used value = ((value << 1) & M61()) | 1; u64 a = c[index & 63] & M61(); HighsInt degree = (index >> 6) + 1; // add the additive inverse (M61() - hashvalue) instead of the hash value // itself hash += M61() - multiply_modM61(value, modexp_M61(a, degree)); hash = (hash >> 61) + (hash & M61()); if (hash >= M61()) hash -= M61(); assert(hash < M61()); } /// overload that is not taking a value and saves one multiplication call /// useful for sparse hashing of bit vectors static void sparse_combine(u64& hash, HighsInt index) { u64 a = c[index & 63] & M61(); HighsInt degree = (index >> 6) + 1; hash += modexp_M61(a, degree); hash = (hash >> 61) + (hash & M61()); if (hash >= M61()) hash -= M61(); assert(hash < M61()); } /// overload that is not taking a value and saves one multiplication call /// useful for sparse hashing of bit vectors static void sparse_inverse_combine(u64& hash, HighsInt index) { // same hash algorithm as sparse_combine(), but for updating a hash value to // the state before it was changed with a call to sparse_combine(). This is // easily possible as the hash value just uses finite field arithmetic. We // can simply add the additive inverse of the previous hash value. This is a // very useful routine for symmetry detection. During partition refinement // the hashes do not need to be recomputed but can be updated with this // procedure. u64 a = c[index & 63] & M61(); HighsInt degree = (index >> 6) + 1; // add the additive inverse (M61() - hashvalue) instead of the hash value // itself hash += M61() - modexp_M61(a, degree); hash = (hash >> 61) + (hash & M61()); if (hash >= M61()) hash -= M61(); assert(hash < M61()); } static void sparse_combine32(u32& hash, HighsInt index, u64 value) { // we take each value of the sparse hash as coefficient for a polynomial // of the finite field modulo the mersenne prime 2^61-1 where the monomial // for a sparse entry has the degree of its index. We evaluate the // polynomial at a random constant. This allows to compute the hashes of // sparse vectors independently of each others nonzero contribution and // therefore allows to use the order of best access patterns for cache // performance. E.g. we can compute a strong hash value for parallel row and // column detection and only need to loop over the nonzeros once in // arbitrary order. This comes at the expense of more expensive hash // calculations as it would be more efficient to evaluate the polynomial // with horners scheme, but allows for parallelization and arbitrary order. // Since we have 16 random constants available, we slightly improve // the scheme by using a lower degree polynomial with 16 variables // which we evaluate at the random vector of 16. // make sure input value is never zero and at most 31bits are used value = (pair_hash<0>(value, value >> 32) >> 33) | 1; // make sure that the constant has at most 31 bits, as otherwise the modulo // algorithm for multiplication mod M31 might not work properly due to // overflow u32 a = c[index & 63] & M31(); HighsInt degree = (index >> 6) + 1; hash += multiply_modM31(value, modexp_M31(a, degree)); hash = (hash >> 31) + (hash & M31()); if (hash >= M31()) hash -= M31(); assert(hash < M31()); } static void sparse_inverse_combine32(u32& hash, HighsInt index, u64 value) { // same hash algorithm as sparse_combine(), but for updating a hash value to // the state before it was changed with a call to sparse_combine(). This is // easily possible as the hash value just uses finite field arithmetic. We // can simply add the additive inverse of the previous hash value. This is a // very useful routine for symmetry detection. During partition refinement // the hashes do not need to be recomputed but can be updated with this // procedure. // make sure input value is never zero and at most 31bits are used value = (pair_hash<0>(value, value >> 32) >> 33) | 1; u32 a = c[index & 63] & M31(); HighsInt degree = (index >> 6) + 1; // add the additive inverse (M31() - hashvalue) instead of the hash value // itself hash += M31() - multiply_modM31(value, modexp_M31(a, degree)); hash = (hash >> 31) + (hash & M31()); if (hash >= M31()) hash -= M31(); assert(hash < M31()); } static constexpr u64 fibonacci_muliplier() { return u64{0x9e3779b97f4a7c15}; } template ::value, int>::type = 0> static u64 vector_hash(const T* vals, size_t numvals) { std::array pair{}; u64 hash = 0; HighsInt k = 0; const char* dataptr = (const char*)vals; const char* dataend = (const char*)(vals + numvals); while (dataptr != dataend) { using std::size_t; size_t numBytes = std::min(size_t(dataend - dataptr), size_t{256}); size_t numPairs = (numBytes + 7) / 8; size_t lastPairBytes = numBytes - (numPairs - 1) * 8; u64 chunkhash[] = {u64{0}, u64{0}}; #define HIGHS_VECHASH_CASE_N(N, B) \ std::memcpy(&pair[0], dataptr, B); \ chunkhash[N & 1] += pair_hash<32 - N>(pair[0], pair[1]); \ dataptr += B; switch (numPairs) { case 32: if (hash != 0) { // make sure hash is reduced mod M61() before multiplying with the // next random constant. For vectors at most 240 bytes we never // get here and only use the fast pair hashing scheme // for vectors with 240 bytes to 256 bytes we do have the one // additional check for hash != 0 above which will return false // and only for longer vectors we ever reduce modulo M61 if (hash >= M61()) hash -= M61(); hash = multiply_modM61(hash, c[(k++) & 63] & M61()); } HIGHS_VECHASH_CASE_N(32, 8) // fall through case 31: HIGHS_VECHASH_CASE_N(31, 8) // fall through case 30: HIGHS_VECHASH_CASE_N(30, 8) // fall through case 29: HIGHS_VECHASH_CASE_N(29, 8) // fall through case 28: HIGHS_VECHASH_CASE_N(28, 8) // fall through case 27: HIGHS_VECHASH_CASE_N(27, 8) // fall through case 26: HIGHS_VECHASH_CASE_N(26, 8) // fall through case 25: HIGHS_VECHASH_CASE_N(25, 8) // fall through case 24: HIGHS_VECHASH_CASE_N(24, 8) // fall through case 23: HIGHS_VECHASH_CASE_N(23, 8) // fall through case 22: HIGHS_VECHASH_CASE_N(22, 8) // fall through case 21: HIGHS_VECHASH_CASE_N(21, 8) // fall through case 20: HIGHS_VECHASH_CASE_N(20, 8) // fall through case 19: HIGHS_VECHASH_CASE_N(19, 8) // fall through case 18: HIGHS_VECHASH_CASE_N(18, 8) // fall through case 17: HIGHS_VECHASH_CASE_N(17, 8) // fall through case 16: HIGHS_VECHASH_CASE_N(16, 8) // fall through case 15: HIGHS_VECHASH_CASE_N(15, 8) // fall through case 14: HIGHS_VECHASH_CASE_N(14, 8) // fall through case 13: HIGHS_VECHASH_CASE_N(13, 8) // fall through case 12: HIGHS_VECHASH_CASE_N(12, 8) // fall through case 11: HIGHS_VECHASH_CASE_N(11, 8) // fall through case 10: HIGHS_VECHASH_CASE_N(10, 8) // fall through case 9: HIGHS_VECHASH_CASE_N(9, 8) // fall through case 8: HIGHS_VECHASH_CASE_N(8, 8) // fall through case 7: HIGHS_VECHASH_CASE_N(7, 8) // fall through case 6: HIGHS_VECHASH_CASE_N(6, 8) // fall through case 5: HIGHS_VECHASH_CASE_N(5, 8) // fall through case 4: HIGHS_VECHASH_CASE_N(4, 8) // fall through case 3: HIGHS_VECHASH_CASE_N(3, 8) // fall through case 2: HIGHS_VECHASH_CASE_N(2, 8) // fall through case 1: HIGHS_VECHASH_CASE_N(1, lastPairBytes) } hash += (chunkhash[0] >> 3) ^ (chunkhash[1] >> 32); } #undef HIGHS_VECHASH_CASE_N return hash * fibonacci_muliplier(); } template ::value && (sizeof(T) <= 8) && (sizeof(T) >= 1), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 4) bytes[0] = 0; if (sizeof(T) < 8) bytes[1] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return pair_hash<1>(bytes[0], bytes[1]) ^ pair_hash<0>(bytes[0], bytes[1]) >> 32; } template ::value && (sizeof(T) >= 9) && (sizeof(T) <= 16), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 12) bytes[2] = 0; if (sizeof(T) < 16) bytes[3] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return (pair_hash<0>(bytes[0], bytes[1]) ^ (pair_hash<1>(bytes[2], bytes[3]) >> 32)) * fibonacci_muliplier(); } template ::value && (sizeof(T) >= 17) && (sizeof(T) <= 24), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 20) bytes[4] = 0; if (sizeof(T) < 24) bytes[5] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return (pair_hash<0>(bytes[0], bytes[1]) ^ ((pair_hash<1>(bytes[2], bytes[3]) + pair_hash<2>(bytes[4], bytes[5])) >> 32)) * fibonacci_muliplier(); } template ::value && (sizeof(T) >= 25) && (sizeof(T) <= 32), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 28) bytes[6] = 0; if (sizeof(T) < 32) bytes[7] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return ((pair_hash<0>(bytes[0], bytes[1]) + pair_hash<1>(bytes[2], bytes[3])) ^ ((pair_hash<2>(bytes[4], bytes[5]) + pair_hash<3>(bytes[6], bytes[7])) >> 32)) * fibonacci_muliplier(); } template ::value && (sizeof(T) >= 33) && (sizeof(T) <= 40), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 36) bytes[8] = 0; if (sizeof(T) < 40) bytes[9] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return ((pair_hash<0>(bytes[0], bytes[1]) + pair_hash<1>(bytes[2], bytes[3])) ^ ((pair_hash<2>(bytes[4], bytes[5]) + pair_hash<3>(bytes[6], bytes[7]) + pair_hash<4>(bytes[8], bytes[9])) >> 32)) * fibonacci_muliplier(); } template ::value && (sizeof(T) >= 41) && (sizeof(T) <= 48), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 44) bytes[10] = 0; if (sizeof(T) < 48) bytes[11] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return ((pair_hash<0>(bytes[0], bytes[1]) + pair_hash<1>(bytes[2], bytes[3]) + pair_hash<2>(bytes[4], bytes[5])) ^ ((pair_hash<3>(bytes[6], bytes[7]) + pair_hash<4>(bytes[8], bytes[9]) + pair_hash<5>(bytes[10], bytes[11])) >> 32)) * fibonacci_muliplier(); } template ::value && (sizeof(T) >= 49) && (sizeof(T) <= 56), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 52) bytes[12] = 0; if (sizeof(T) < 56) bytes[13] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return ((pair_hash<0>(bytes[0], bytes[1]) + pair_hash<1>(bytes[2], bytes[3]) + pair_hash<2>(bytes[4], bytes[5])) ^ ((pair_hash<3>(bytes[6], bytes[7]) + pair_hash<4>(bytes[8], bytes[9]) + pair_hash<5>(bytes[10], bytes[11]) + pair_hash<6>(bytes[12], bytes[13])) >> 32)) * fibonacci_muliplier(); } template ::value && (sizeof(T) >= 57) && (sizeof(T) <= 64), int>::type = 0> static u64 hash(const T& val) { std::array bytes; if (sizeof(T) < 60) bytes[14] = 0; if (sizeof(T) < 64) bytes[15] = 0; std::memcpy(&bytes[0], &val, sizeof(T)); return ((pair_hash<0>(bytes[0], bytes[1]) + pair_hash<1>(bytes[2], bytes[3]) + pair_hash<2>(bytes[4], bytes[5]) + pair_hash<3>(bytes[6], bytes[7])) ^ ((pair_hash<4>(bytes[8], bytes[9]) + pair_hash<5>(bytes[10], bytes[11]) + pair_hash<6>(bytes[12], bytes[13]) + pair_hash<7>(bytes[14], bytes[15])) >> 32)) * fibonacci_muliplier(); } template ::value && (sizeof(T) > 64), int>::type = 0> static u64 hash(const T& val) { return vector_hash(&val, 1); } template ::value, int>::type = 0> static u64 hash(const std::vector& val) { return vector_hash(val.data(), val.size()); } template (0) == *reinterpret_cast(0)), bool>::value, int>::type = 0> static bool equal(const T& a, const T& b) { return a == b; } template ::value, int>::type = 0> static bool equal(const std::vector& a, const std::vector& b) { if (a.size() != b.size()) return false; return std::memcmp(a.data(), b.data(), sizeof(T) * a.size()) == 0; } static constexpr double golden_ratio_reciprocal() { return 0.61803398874989484; } static u32 double_hash_code(double val) { // we multiply by some irrational number, so that the buckets in which we // put the real numbers do not break on a power of two pattern. E.g. // consider the use case for detecting parallel rows when we have two // parallel rows scaled to have their largest coefficient 1.0 and another // coefficient which is 0.5 // +- epsilon. Clearly we want to detect those rows as parallel and give // them the same hash value for small enough epsilon. The exponent, // however will switch to -2 for the value just below 0.5 and the hashcodes // will differ. when multiplying with the reciprocal of the golden ratio the // exact 0.5 will yield 0.30901699437494742 and 0.5 - 1e-9 will yield // 0.3090169937569134 which has the same exponent and matches in the most // significant bits. Hence it yields the same hashcode. Obviously there will // now be different values which exhibit the same pattern as the 0.5 case, // but they do not have a small denominator like 1/2 in their rational // representation but are power of two multiples of the golden ratio and // therefore irrational, which we do not expect in non-artifical input data. int exponent; double hashbits = std::frexp(val * golden_ratio_reciprocal(), &exponent); // some extra casts to be more verbose about what is happening. // We want the exponent to use only 16bits so that the remaining 16 bits // are used for the most significant bits of the mantissa and the sign bit. // casting to unsigned 16bits first ensures that the value after the cast is // defined to be UINT16_MAX - |exponent| when the exponent is negative. // casting the exponent to a uint32_t directly would give wrong promotion // of negative exponents as UINT32_MAX - |exponent| and take up to many bits // or possibly loose information after the 16 bit shift. For the mantissa we // take the 15 most significant bits, even though we could squeeze out a few // more of the exponent. We don't need more bits as this would make the // buckets very small and might miss more values that are equal within // epsilon. Therefore the most significant 15 bits of the mantissa and the // sign is encoded in the 16 lower bits of the hashcode and the upper 16bits // encode the sign and value of the exponent. u32 hashvalue = (u32)(u16)(i16)exponent; hashvalue = (hashvalue << 16) | (u32)(u16)(i16)std::ldexp(hashbits, 15); return hashvalue; } }; struct HighsHasher { template size_t operator()(const T& x) const { return HighsHashHelpers::hash(x); } }; struct HighsVectorHasher { template size_t operator()(const std::vector& vec) const { return HighsHashHelpers::vector_hash(vec.data(), vec.size()); } }; struct HighsVectorEqual { template bool operator()(const std::vector& vec1, const std::vector& vec2) const { if (vec1.size() != vec2.size()) return false; return std::equal(vec1.begin(), vec1.end(), vec2.begin()); } }; template struct HighsHashTableEntry { private: K key_; V value_; public: template HighsHashTableEntry(K_&& k) : key_(k), value_() {} template HighsHashTableEntry(K_&& k, V_&& v) : key_(k), value_(v) {} const K& key() const { return key_; } const V& value() const { return value_; } V& value() { return value_; } }; template struct HighsHashTableEntry { private: T value_; public: template HighsHashTableEntry(Args&&... args) : value_(std::forward(args)...) {} const T& key() const { return value_; } const T& value() const { return value_; } }; template class HighsHashTable { struct OpNewDeleter { void operator()(void* ptr) { ::operator delete(ptr); } }; public: using u8 = std::uint8_t; using i8 = std::int8_t; using u16 = std::uint16_t; using i16 = std::int16_t; using u32 = std::uint32_t; using i32 = std::int32_t; using u64 = std::uint64_t; using i64 = std::int64_t; using Entry = HighsHashTableEntry; using KeyType = K; using ValueType = typename std::remove_reference(0x1)->value())>::type; std::unique_ptr entries; std::unique_ptr metadata; u64 tableSizeMask; u64 numHashShift; u64 numElements = 0; template class HashTableIterator { u8* pos; u8* end; Entry* entryEnd; public: using difference_type = std::ptrdiff_t; using value_type = IterType; using pointer = IterType*; using reference = IterType&; using iterator_category = std::forward_iterator_tag; HashTableIterator(u8* pos, u8* end, Entry* entryEnd) : pos(pos), end(end), entryEnd(entryEnd) {} HashTableIterator() = default; HashTableIterator operator++(int) { // postfix HashTableIterator oldpos = *this; for (++pos; pos != end; ++pos) if ((*pos) & 0x80u) break; return oldpos; } HashTableIterator& operator++() { // prefix for (++pos; pos != end; ++pos) if ((*pos) & 0x80u) break; return *this; } reference operator*() const { return *(entryEnd - (end - pos)); } pointer operator->() const { return (entryEnd - (end - pos)); } HashTableIterator operator+(difference_type v) const { for (difference_type k = 0; k != v; ++k) ++(*this); } bool operator==(const HashTableIterator& rhs) const { return pos == rhs.pos; } bool operator!=(const HashTableIterator& rhs) const { return pos != rhs.pos; } }; using const_iterator = HashTableIterator; using iterator = HashTableIterator; HighsHashTable() { makeEmptyTable(128); } HighsHashTable(u64 minCapacity) { u64 initCapacity = u64{1} << (u64)std::ceil( std::log2(std::max(128.0, 8 * minCapacity / 7.0))); makeEmptyTable(initCapacity); } iterator end() { u64 capacity = tableSizeMask + 1; return iterator{metadata.get() + capacity, metadata.get() + capacity, entries.get() + capacity}; }; const_iterator end() const { u64 capacity = tableSizeMask + 1; return const_iterator{metadata.get() + capacity, metadata.get() + capacity, entries.get() + capacity}; }; const_iterator begin() const { if (numElements == 0) return end(); u64 capacity = tableSizeMask + 1; const_iterator iter{metadata.get(), metadata.get() + capacity, entries.get() + capacity}; if (!occupied(metadata[0])) ++iter; return iter; }; iterator begin() { if (numElements == 0) return end(); u64 capacity = tableSizeMask + 1; iterator iter{metadata.get(), metadata.get() + capacity, entries.get() + capacity}; if (!occupied(metadata[0])) ++iter; return iter; }; private: u8 toMetadata(u64 hash) const { return (hash >> numHashShift) | 0x80u; } static constexpr u64 maxDistance() { return 127; } void makeEmptyTable(u64 capacity) { tableSizeMask = capacity - 1; numHashShift = 64 - HighsHashHelpers::log2i(capacity); assert(capacity == (u64{1} << (64 - numHashShift))); numElements = 0; metadata = decltype(metadata)(new u8[capacity]{}); entries = decltype(entries)((Entry*)::operator new(sizeof(Entry) * capacity)); } bool occupied(u8 meta) const { return meta & 0x80; } u64 distanceFromIdealSlot(u64 pos) const { // we store 7 bits of the hash in the metadata. Assuming a decent // hashfunction it is practically never happening that an item travels more // then 127 slots from its ideal position, therefore, we can compute the // distance from the ideal position just as it would normally be done // assuming there is at most one overflow. Consider using 3 bits which gives // values from 0 to 7. When an item is at a position with lower bits 7 and // is placed 3 positions after its ideal position, the lower bits of the // hash value will overflow and yield the value 2. With the assumption that // an item never cycles through one full cycle of the range 0 to 7, its // position would never be placed in a position with lower bits 7 other than // its ideal position. This allows us to compute the distance from its ideal // position by simply ignoring an overflow. In our case the correct answer // would be 3, but we get (2 - 7)=-5. This, however, is the correct result 3 // when promoting to an unsigned value and looking at the lower 3 bits. return ((pos - metadata[pos])) & 0x7f; } void growTable() { decltype(entries) oldEntries = std::move(entries); decltype(metadata) oldMetadata = std::move(metadata); u64 oldCapactiy = tableSizeMask + 1; makeEmptyTable(2 * oldCapactiy); for (u64 i = 0; i != oldCapactiy; ++i) if (occupied(oldMetadata[i])) insert(std::move(oldEntries.get()[i])); } void shrinkTable() { decltype(entries) oldEntries = std::move(entries); decltype(metadata) oldMetadata = std::move(metadata); u64 oldCapactiy = tableSizeMask + 1; makeEmptyTable(oldCapactiy / 2); for (u64 i = 0; i != oldCapactiy; ++i) if (occupied(oldMetadata[i])) insert(std::move(oldEntries.get()[i])); } bool findPosition(const KeyType& key, u8& meta, u64& startPos, u64& maxPos, u64& pos) const { u64 hash = HighsHashHelpers::hash(key); startPos = hash >> numHashShift; maxPos = (startPos + maxDistance()) & tableSizeMask; meta = toMetadata(hash); const Entry* entryArray = entries.get(); pos = startPos; do { if (!occupied(metadata[pos])) return false; if (metadata[pos] == meta && HighsHashHelpers::equal(key, entryArray[pos].key())) return true; u64 currentDistance = (pos - startPos) & tableSizeMask; if (currentDistance > distanceFromIdealSlot(pos)) return false; pos = (pos + 1) & tableSizeMask; } while (pos != maxPos); return false; } public: void clear() { if (numElements) { u64 capacity = tableSizeMask + 1; for (u64 i = 0; i < capacity; ++i) if (occupied(metadata[i])) entries.get()[i].~Entry(); makeEmptyTable(128); } } const ValueType* find(const KeyType& key) const { u64 pos, startPos, maxPos; u8 meta; if (findPosition(key, meta, startPos, maxPos, pos)) return &(entries.get()[pos].value()); return nullptr; } ValueType* find(const KeyType& key) { u64 pos, startPos, maxPos; u8 meta; if (findPosition(key, meta, startPos, maxPos, pos)) return &(entries.get()[pos].value()); return nullptr; } ValueType& operator[](const KeyType& key) { Entry* entryArray = entries.get(); u64 pos, startPos, maxPos; u8 meta; if (findPosition(key, meta, startPos, maxPos, pos)) return entryArray[pos].value(); if (numElements == ((tableSizeMask + 1) * 7) / 8 || pos == maxPos) { growTable(); return (*this)[key]; } using std::swap; ValueType& insertLocation = entryArray[pos].value(); Entry entry(key, ValueType()); ++numElements; do { if (!occupied(metadata[pos])) { metadata[pos] = meta; new (&entryArray[pos]) Entry{std::move(entry)}; return insertLocation; } u64 currentDistance = (pos - startPos) & tableSizeMask; u64 distanceOfCurrentOccupant = distanceFromIdealSlot(pos); if (currentDistance > distanceOfCurrentOccupant) { // steal the position swap(entry, entryArray[pos]); swap(meta, metadata[pos]); startPos = (pos - distanceOfCurrentOccupant) & tableSizeMask; maxPos = (startPos + maxDistance()) & tableSizeMask; } pos = (pos + 1) & tableSizeMask; } while (pos != maxPos); growTable(); insert(std::move(entry)); return (*this)[key]; } template bool insert(Args&&... args) { Entry entry(std::forward(args)...); u64 pos, startPos, maxPos; u8 meta; if (findPosition(entry.key(), meta, startPos, maxPos, pos)) return false; if (numElements == ((tableSizeMask + 1) * 7) / 8 || pos == maxPos) { growTable(); return insert(std::move(entry)); } using std::swap; Entry* entryArray = entries.get(); ++numElements; do { if (!occupied(metadata[pos])) { metadata[pos] = meta; new (&entryArray[pos]) Entry{std::move(entry)}; return true; } u64 currentDistance = (pos - startPos) & tableSizeMask; u64 distanceOfCurrentOccupant = distanceFromIdealSlot(pos); if (currentDistance > distanceOfCurrentOccupant) { // steal the position swap(entry, entryArray[pos]); swap(meta, metadata[pos]); startPos = (pos - distanceOfCurrentOccupant) & tableSizeMask; maxPos = (startPos + maxDistance()) & tableSizeMask; } pos = (pos + 1) & tableSizeMask; } while (pos != maxPos); growTable(); insert(std::move(entry)); return true; } bool erase(const KeyType& key) { u64 pos, startPos, maxPos; u8 meta; if (!findPosition(key, meta, startPos, maxPos, pos)) return false; // delete element at position pos Entry* entryArray = entries.get(); entryArray[pos].~Entry(); metadata[pos] = 0; // retain at least a quarter of slots occupied, otherwise shrink the table // if its not at its minimum size already --numElements; u64 capacity = tableSizeMask + 1; if (capacity != 128 && numElements < capacity / 4) { shrinkTable(); return true; } // shift elements after pos backwards while (true) { u64 shift = (pos + 1) & tableSizeMask; if (!occupied(metadata[shift])) return true; u64 dist = distanceFromIdealSlot(shift); if (dist == 0) return true; entryArray[pos] = std::move(entryArray[shift]); metadata[pos] = metadata[shift]; metadata[shift] = 0; pos = shift; } } i64 size() const { return numElements; } HighsHashTable(HighsHashTable&&) = default; HighsHashTable& operator=(HighsHashTable&&) = default; ~HighsHashTable() { if (metadata) { u64 capacity = tableSizeMask + 1; for (u64 i = 0; i < capacity; ++i) { if (occupied(metadata[i])) entries.get()[i].~Entry(); } } } }; #endif