Ninja
rapidhash.h
Go to the documentation of this file.
1/*
2 * rapidhash - Very fast, high quality, platform-independent hashing algorithm.
3 * Copyright (C) 2024 Nicolas De Carli
4 *
5 * Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net>
6 *
7 * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are
11 * met:
12 *
13 * * Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following disclaimer
17 * in the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * You can contact the author at:
33 * - rapidhash source repository: https://github.com/Nicoshev/rapidhash
34 */
35
36/*
37 * Includes.
38 */
39#include <stdint.h>
40#include <string.h>
41#if defined(_MSC_VER)
42 #include <intrin.h>
43 #if defined(_M_X64) && !defined(_M_ARM64EC)
44 #pragma intrinsic(_umul128)
45 #endif
46#endif
47
48/*
49 * C++ macros.
50 *
51 * RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)).
52 */
53#ifdef __cplusplus
54 #define RAPIDHASH_NOEXCEPT noexcept
55 #define RAPIDHASH_CONSTEXPR constexpr
56 #ifndef RAPIDHASH_INLINE
57 #define RAPIDHASH_INLINE inline
58 #endif
59#else
60 #define RAPIDHASH_NOEXCEPT
61 #define RAPIDHASH_CONSTEXPR static const
62 #ifndef RAPIDHASH_INLINE
63 #define RAPIDHASH_INLINE static inline
64 #endif
65#endif
66
67/*
68 * Protection macro, alters behaviour of rapid_mum multiplication function.
69 *
70 * RAPIDHASH_FAST: Normal behavior, max speed.
71 * RAPIDHASH_PROTECTED: Extra protection against entropy loss.
72 */
73#ifndef RAPIDHASH_PROTECTED
74 #define RAPIDHASH_FAST
75#elif defined(RAPIDHASH_FAST)
76 #error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously."
77#endif
78
79/*
80 * Unrolling macros, changes code definition for main hash function.
81 *
82 * RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes.
83 * RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes.
84 *
85 * Most modern CPUs should benefit from having RAPIDHASH_UNROLLED.
86 *
87 * These macros do not alter the output hash.
88 */
89#ifndef RAPIDHASH_COMPACT
90 #define RAPIDHASH_UNROLLED
91#elif defined(RAPIDHASH_UNROLLED)
92 #error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously."
93#endif
94
95/*
96 * Likely and unlikely macros.
97 */
98#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
99 #define _likely_(x) __builtin_expect(x,1)
100 #define _unlikely_(x) __builtin_expect(x,0)
101#else
102 #define _likely_(x) (x)
103 #define _unlikely_(x) (x)
104#endif
105
106/*
107 * Endianness macros.
108 */
109#ifndef RAPIDHASH_LITTLE_ENDIAN
110 #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
111 #define RAPIDHASH_LITTLE_ENDIAN
112 #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
113 #define RAPIDHASH_BIG_ENDIAN
114 #else
115 #warning "could not determine endianness! Falling back to little endian."
116 #define RAPIDHASH_LITTLE_ENDIAN
117 #endif
118#endif
119
120/*
121 * Default seed.
122 */
123#define RAPID_SEED (0xbdd89aa982704029ull)
124
125/*
126 * Default secret parameters.
127 */
128RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull};
129
130/*
131 * 64*64 -> 128bit multiply function.
132 *
133 * @param A Address of 64-bit number.
134 * @param B Address of 64-bit number.
135 *
136 * Calculates 128-bit C = *A * *B.
137 *
138 * When RAPIDHASH_FAST is defined:
139 * Overwrites A contents with C's low 64 bits.
140 * Overwrites B contents with C's high 64 bits.
141 *
142 * When RAPIDHASH_PROTECTED is defined:
143 * Xors and overwrites A contents with C's low 64 bits.
144 * Xors and overwrites B contents with C's high 64 bits.
145 */
147#if defined(__SIZEOF_INT128__)
148 __uint128_t r=*A; r*=*B;
149 #ifdef RAPIDHASH_PROTECTED
150 *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
151 #else
152 *A=(uint64_t)r; *B=(uint64_t)(r>>64);
153 #endif
154#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64))
155 #if defined(_M_X64)
156 #ifdef RAPIDHASH_PROTECTED
157 uint64_t a, b;
158 a=_umul128(*A,*B,&b);
159 *A^=a; *B^=b;
160 #else
161 *A=_umul128(*A,*B,B);
162 #endif
163 #else
164 #ifdef RAPIDHASH_PROTECTED
165 uint64_t a, b;
166 b = __umulh(*A, *B);
167 a = *A * *B;
168 *A^=a; *B^=b;
169 #else
170 uint64_t c = __umulh(*A, *B);
171 *A = *A * *B;
172 *B = c;
173 #endif
174 #endif
175#else
176 uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
177 uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
178 lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
179 #ifdef RAPIDHASH_PROTECTED
180 *A^=lo; *B^=hi;
181 #else
182 *A=lo; *B=hi;
183 #endif
184#endif
185}
186
187/*
188 * Multiply and xor mix function.
189 *
190 * @param A 64-bit number.
191 * @param B 64-bit number.
192 *
193 * Calculates 128-bit C = A * B.
194 * Returns 64-bit xor between high and low 64 bits of C.
195 */
197
198/*
199 * Read functions.
200 */
201#ifdef RAPIDHASH_LITTLE_ENDIAN
202RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;}
203RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;}
204#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
205RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);}
206RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);}
207#elif defined(_MSC_VER)
208RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);}
209RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);}
210#else
212 uint64_t v; memcpy(&v, p, 8);
213 return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
214}
216 uint32_t v; memcpy(&v, p, 4);
217 return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000));
218}
219#endif
220
221/*
222 * Reads and combines 3 bytes of input.
223 *
224 * @param p Buffer to read from.
225 * @param k Length of @p, in bytes.
226 *
227 * Always reads and combines 3 bytes from memory.
228 * Guarantees to read each buffer position at least once.
229 *
230 * Returns a 64-bit value containing all three bytes read.
231 */
232RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];}
233
234/*
235 * rapidhash main function.
236 *
237 * @param key Buffer to be hashed.
238 * @param len @key length, in bytes.
239 * @param seed 64-bit seed used to alter the hash result predictably.
240 * @param secret Triplet of 64-bit secrets used to alter hash result predictably.
241 *
242 * Returns a 64-bit hash.
243 */
244RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT {
245 const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b;
246 if(_likely_(len<=16)){
247 if(_likely_(len>=4)){
248 const uint8_t * plast = p + len - 4;
249 a = (rapid_read32(p) << 32) | rapid_read32(plast);
250 const uint64_t delta = ((len&24)>>(len>>3));
251 b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); }
252 else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;}
253 else a=b=0;
254 }
255 else{
256 size_t i=len;
257 if(_unlikely_(i>48)){
258 uint64_t see1=seed, see2=seed;
259#ifdef RAPIDHASH_UNROLLED
260 while(_likely_(i>=96)){
261 seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
262 see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
263 see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
264 seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed);
265 see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1);
266 see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2);
267 p+=96; i-=96;
268 }
269 if(_unlikely_(i>=48)){
270 seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
271 see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
272 see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
273 p+=48; i-=48;
274 }
275#else
276 do {
277 seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
278 see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
279 see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
280 p+=48; i-=48;
281 } while (_likely_(i>=48));
282#endif
283 seed^=see1^see2;
284 }
285 if(i>16){
286 seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]);
287 if(i>32)
288 seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed);
289 }
290 a=rapid_read64(p+i-16); b=rapid_read64(p+i-8);
291 }
292 a^=secret[1]; b^=seed; rapid_mum(&a,&b);
293 return rapid_mix(a^secret[0]^len,b^secret[1]);
294}
295
296/*
297 * rapidhash default seeded hash function.
298 *
299 * @param key Buffer to be hashed.
300 * @param len @key length, in bytes.
301 * @param seed 64-bit seed used to alter the hash result predictably.
302 *
303 * Calls rapidhash_internal using provided parameters and default secrets.
304 *
305 * Returns a 64-bit hash.
306 */
308 return rapidhash_internal(key, len, seed, rapid_secret);
309}
310
311/*
312 * rapidhash default hash function.
313 *
314 * @param key Buffer to be hashed.
315 * @param len @key length, in bytes.
316 *
317 * Calls rapidhash_withSeed using provided parameters and the default seed.
318 *
319 * Returns a 64-bit hash.
320 */
322 return rapidhash_withSeed(key, len, RAPID_SEED);
323}
#define _unlikely_(x)
Definition rapidhash.h:103
#define RAPIDHASH_CONSTEXPR
Definition rapidhash.h:61
RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:232
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:202
#define _likely_(x)
Definition rapidhash.h:102
RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t *secret) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:244
RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:196
#define RAPIDHASH_NOEXCEPT
Definition rapidhash.h:60
#define RAPIDHASH_INLINE
Definition rapidhash.h:63
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:203
RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:146
RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:321
#define RAPID_SEED
Definition rapidhash.h:123
RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3]
Definition rapidhash.h:128
RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT
Definition rapidhash.h:307
unsigned long long uint64_t
Definition win32port.h:29