Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_BitManipulation.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_BIT_MANIPULATION_HPP
18#define KOKKOS_BIT_MANIPULATION_HPP
19
20#include <Kokkos_Macros.hpp>
21#include <Kokkos_NumericTraits.hpp>
22#include <climits> // CHAR_BIT
23#include <cstring> //memcpy
24#include <type_traits>
25
26namespace Kokkos::Impl {
27
28template <class T>
29KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
30 if constexpr (sizeof(T) > 1) {
31 using U = std::make_unsigned_t<T>;
32
33 size_t shift = CHAR_BIT * (sizeof(T) - 1);
34
35 U lo_mask = static_cast<unsigned char>(~0);
36 U hi_mask = lo_mask << shift;
37
38 U val = x;
39
40 for (size_t i = 0; i < sizeof(T) / 2; ++i) {
41 U lo_val = val & lo_mask;
42 U hi_val = val & hi_mask;
43
44 val = (val & ~lo_mask) | (hi_val >> shift);
45 val = (val & ~hi_mask) | (lo_val << shift);
46
47 lo_mask <<= CHAR_BIT;
48 hi_mask >>= CHAR_BIT;
49
50 shift -= 2 * CHAR_BIT;
51 }
52 return val;
53 }
54 // sizeof(T) == 1
55 return x;
56}
57
58template <class T>
59KOKKOS_FUNCTION constexpr int countl_zero_fallback(T x) {
60 // From Hacker's Delight (2nd edition) section 5-3
61 unsigned int y = 0;
62 using ::Kokkos::Experimental::digits_v;
63 int n = digits_v<T>;
64 int c = digits_v<T> / 2;
65 do {
66 y = x >> c;
67 if (y != 0) {
68 n -= c;
69 x = y;
70 }
71 c >>= 1;
72 } while (c != 0);
73 return n - static_cast<int>(x);
74}
75
76template <class T>
77KOKKOS_FUNCTION constexpr int countr_zero_fallback(T x) {
78 using ::Kokkos::Experimental::digits_v;
79 return digits_v<T> - countl_zero_fallback(static_cast<T>(
80 static_cast<T>(~x) & static_cast<T>(x - 1)));
81}
82
83template <class T>
84KOKKOS_FUNCTION constexpr int popcount_fallback(T x) {
85 int c = 0;
86 for (; x != 0; x &= x - 1) {
87 ++c;
88 }
89 return c;
90}
91
92template <class T>
93inline constexpr bool is_standard_unsigned_integer_type_v =
94 std::is_same_v<T, unsigned char> || std::is_same_v<T, unsigned short> ||
95 std::is_same_v<T, unsigned int> || std::is_same_v<T, unsigned long> ||
96 std::is_same_v<T, unsigned long long>;
97
98} // namespace Kokkos::Impl
99
100namespace Kokkos {
101
102//<editor-fold desc="[bit.cast], bit_cast">
103// FIXME_SYCL intel/llvm has unqualified calls to bit_cast which are ambiguous
104// if we declare our own bit_cast function
105#ifdef KOKKOS_ENABLE_SYCL
106using sycl::detail::bit_cast;
107#else
108template <class To, class From>
109KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
110 std::is_trivially_copyable_v<To> &&
111 std::is_trivially_copyable_v<From>,
112 To>
113bit_cast(From const& from) noexcept {
114 To to;
115 memcpy(&to, &from, sizeof(To));
116 return to;
117}
118#endif
119//</editor-fold>
120
121//<editor-fold desc="[bit.byteswap], byteswap">
122template <class T>
123KOKKOS_FUNCTION constexpr std::enable_if_t<std::is_integral_v<T>, T> byteswap(
124 T value) noexcept {
125 return Impl::byteswap_fallback(value);
126}
127//</editor-fold>
128
129//<editor-fold desc="[bit.count], counting">
130template <class T>
131KOKKOS_FUNCTION constexpr std::enable_if_t<
132 Impl::is_standard_unsigned_integer_type_v<T>, int>
133countl_zero(T x) noexcept {
134 using ::Kokkos::Experimental::digits_v;
135 if (x == 0) return digits_v<T>;
136 // TODO use compiler intrinsics when available
137 return Impl::countl_zero_fallback(x);
138}
139
140template <class T>
141KOKKOS_FUNCTION constexpr std::enable_if_t<
142 Impl::is_standard_unsigned_integer_type_v<T>, int>
143countl_one(T x) noexcept {
144 using ::Kokkos::Experimental::digits_v;
145 using ::Kokkos::Experimental::finite_max_v;
146 if (x == finite_max_v<T>) return digits_v<T>;
147 return countl_zero(static_cast<T>(~x));
148}
149
150template <class T>
151KOKKOS_FUNCTION constexpr std::enable_if_t<
152 Impl::is_standard_unsigned_integer_type_v<T>, int>
153countr_zero(T x) noexcept {
154 using ::Kokkos::Experimental::digits_v;
155 if (x == 0) return digits_v<T>;
156 // TODO use compiler intrinsics when available
157 return Impl::countr_zero_fallback(x);
158}
159
160template <class T>
161KOKKOS_FUNCTION constexpr std::enable_if_t<
162 Impl::is_standard_unsigned_integer_type_v<T>, int>
163countr_one(T x) noexcept {
164 using ::Kokkos::Experimental::digits_v;
165 using ::Kokkos::Experimental::finite_max_v;
166 if (x == finite_max_v<T>) return digits_v<T>;
167 return countr_zero(static_cast<T>(~x));
168}
169
170template <class T>
171KOKKOS_FUNCTION constexpr std::enable_if_t<
172 Impl::is_standard_unsigned_integer_type_v<T>, int>
173popcount(T x) noexcept {
174 if (x == 0) return 0;
175 // TODO use compiler intrinsics when available
176 return Impl::popcount_fallback(x);
177}
178//</editor-fold>
179
180//<editor-fold desc="[bit.pow.two], integral powers of 2">
181template <class T>
182KOKKOS_FUNCTION constexpr std::enable_if_t<
183 Impl::is_standard_unsigned_integer_type_v<T>, bool>
184has_single_bit(T x) noexcept {
185 return x != 0 && (((x & (x - 1)) == 0));
186}
187
188template <class T>
189KOKKOS_FUNCTION constexpr std::enable_if_t<
190 Impl::is_standard_unsigned_integer_type_v<T>, T>
191bit_ceil(T x) noexcept {
192 if (x <= 1) return 1;
193 using ::Kokkos::Experimental::digits_v;
194 return T{1} << (digits_v<T> - countl_zero(static_cast<T>(x - 1)));
195}
196
197template <class T>
198KOKKOS_FUNCTION constexpr std::enable_if_t<
199 Impl::is_standard_unsigned_integer_type_v<T>, T>
200bit_floor(T x) noexcept {
201 if (x == 0) return 0;
202 using ::Kokkos::Experimental::digits_v;
203 return T{1} << (digits_v<T> - 1 - countl_zero(x));
204}
205
206template <class T>
207KOKKOS_FUNCTION constexpr std::enable_if_t<
208 Impl::is_standard_unsigned_integer_type_v<T>, T>
209bit_width(T x) noexcept {
210 if (x == 0) return 0;
211 using ::Kokkos::Experimental::digits_v;
212 return digits_v<T> - countl_zero(x);
213}
214//</editor-fold>
215
216//<editor-fold desc="[bit.rotate], rotating">
217template <class T>
218[[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
219 Impl::is_standard_unsigned_integer_type_v<T>, T>
220rotl(T x, int s) noexcept {
221 using Experimental::digits_v;
222 constexpr auto dig = digits_v<T>;
223 int const rem = s % dig;
224 if (rem == 0) return x;
225 if (rem > 0) return (x << rem) | (x >> ((dig - rem) % dig));
226 return (x >> -rem) | (x << ((dig + rem) % dig)); // rotr(x, -rem)
227}
228
229template <class T>
230[[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
231 Impl::is_standard_unsigned_integer_type_v<T>, T>
232rotr(T x, int s) noexcept {
233 using Experimental::digits_v;
234 constexpr auto dig = digits_v<T>;
235 int const rem = s % dig;
236 if (rem == 0) return x;
237 if (rem > 0) return (x >> rem) | (x << ((dig - rem) % dig));
238 return (x << -rem) | (x >> ((dig + rem) % dig)); // rotl(x, -rem)
239}
240//</editor-fold>
241
242} // namespace Kokkos
243
244namespace Kokkos::Impl {
245
246#if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \
247 defined(KOKKOS_COMPILER_GNU)
248#define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
249#endif
250
251template <class T>
252KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept {
253 return byteswap_fallback(x);
254}
255
256template <class T>
257KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept {
258#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
259 if constexpr (sizeof(T) == 1) {
260 return x;
261 } else if constexpr (sizeof(T) == 2) {
262 return __builtin_bswap16(x);
263 } else if constexpr (sizeof(T) == 4) {
264 return __builtin_bswap32(x);
265 } else if constexpr (sizeof(T) == 8) {
266 return __builtin_bswap64(x);
267 } else if constexpr (sizeof(T) == 16) {
268#if defined(__has_builtin)
269#if __has_builtin(__builtin_bswap128)
270 return __builtin_bswap128(x);
271#endif
272#endif
273 return (__builtin_bswap64(x >> 64) |
274 (static_cast<T>(__builtin_bswap64(x)) << 64));
275 }
276#endif
277
278 return byteswap_fallback(x);
279}
280
281template <class T>
282KOKKOS_IMPL_DEVICE_FUNCTION
283 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
284 countl_zero_builtin_device(T x) noexcept {
285#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
286 if constexpr (sizeof(T) == sizeof(long long int)) {
287 return __clzll(reinterpret_cast<long long int&>(x));
288 } else if constexpr (sizeof(T) == sizeof(int)) {
289 return __clz(reinterpret_cast<int&>(x));
290 } else {
291 using ::Kokkos::Experimental::digits_v;
292 constexpr int shift = digits_v<unsigned int> - digits_v<T>;
293 return __clz(x) - shift;
294 }
295#elif defined(KOKKOS_ENABLE_SYCL)
296 return sycl::clz(x);
297#else
298 return countl_zero_fallback(x);
299#endif
300}
301
302template <class T>
303KOKKOS_IMPL_HOST_FUNCTION
304 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
305 countl_zero_builtin_host(T x) noexcept {
306 using ::Kokkos::Experimental::digits_v;
307 if (x == 0) return digits_v<T>;
308#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
309 if constexpr (std::is_same_v<T, unsigned long long>) {
310 return __builtin_clzll(x);
311 } else if constexpr (std::is_same_v<T, unsigned long>) {
312 return __builtin_clzl(x);
313 } else if constexpr (std::is_same_v<T, unsigned int>) {
314 return __builtin_clz(x);
315 } else {
316 constexpr int shift = digits_v<unsigned int> - digits_v<T>;
317 return __builtin_clz(x) - shift;
318 }
319#else
320 return countl_zero_fallback(x);
321#endif
322}
323
324template <class T>
325KOKKOS_IMPL_DEVICE_FUNCTION
326 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
327 countr_zero_builtin_device(T x) noexcept {
328 using ::Kokkos::Experimental::digits_v;
329 if (x == 0) return digits_v<T>;
330#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
331 if constexpr (sizeof(T) == sizeof(long long int)) {
332 return __ffsll(reinterpret_cast<long long int&>(x)) - 1;
333 } else {
334 return __ffs(reinterpret_cast<int&>(x)) - 1;
335 }
336#elif defined(KOKKOS_ENABLE_SYCL)
337 return sycl::ctz(x);
338#else
339 return countr_zero_fallback(x);
340#endif
341}
342
343template <class T>
344KOKKOS_IMPL_HOST_FUNCTION
345 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
346 countr_zero_builtin_host(T x) noexcept {
347 using ::Kokkos::Experimental::digits_v;
348 if (x == 0) return digits_v<T>;
349#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
350 if constexpr (std::is_same_v<T, unsigned long long>) {
351 return __builtin_ctzll(x);
352 } else if constexpr (std::is_same_v<T, unsigned long>) {
353 return __builtin_ctzl(x);
354 } else {
355 return __builtin_ctz(x);
356 }
357#else
358 return countr_zero_fallback(x);
359#endif
360}
361
362template <class T>
363KOKKOS_IMPL_DEVICE_FUNCTION
364 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
365 popcount_builtin_device(T x) noexcept {
366#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
367 if constexpr (sizeof(T) == sizeof(long long int)) {
368 return __popcll(x);
369 } else {
370 return __popc(x);
371 }
372#elif defined(KOKKOS_ENABLE_SYCL)
373 return sycl::popcount(x);
374#else
375 return popcount_fallback(x);
376#endif
377}
378
379template <class T>
380KOKKOS_IMPL_HOST_FUNCTION
381 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
382 popcount_builtin_host(T x) noexcept {
383#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
384 if constexpr (std::is_same_v<T, unsigned long long>) {
385 return __builtin_popcountll(x);
386 } else if constexpr (std::is_same_v<T, unsigned long>) {
387 return __builtin_popcountl(x);
388 } else {
389 return __builtin_popcount(x);
390 }
391#else
392 return popcount_fallback(x);
393#endif
394}
395
396#undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
397
398} // namespace Kokkos::Impl
399
400namespace Kokkos::Experimental {
401
402template <class To, class From>
403KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
404 std::is_trivially_copyable_v<To> &&
405 std::is_trivially_copyable_v<From>,
406 To>
407bit_cast_builtin(From const& from) noexcept {
408 // qualify the call to avoid ADL
409 return Kokkos::bit_cast<To>(from); // no benefit to call the _builtin variant
410}
411
412template <class T>
413KOKKOS_FUNCTION std::enable_if_t<std::is_integral_v<T>, T> byteswap_builtin(
414 T x) noexcept {
415 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);))
416 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);))
417}
418
419template <class T>
420KOKKOS_FUNCTION std::enable_if_t<
421 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
422countl_zero_builtin(T x) noexcept {
423 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);))
424 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);))
425}
426
427template <class T>
428KOKKOS_FUNCTION std::enable_if_t<
429 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
430countl_one_builtin(T x) noexcept {
431 if (x == finite_max_v<T>) return digits_v<T>;
432 return countl_zero_builtin(static_cast<T>(~x));
433}
434
435template <class T>
436KOKKOS_FUNCTION std::enable_if_t<
437 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
438countr_zero_builtin(T x) noexcept {
439 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);))
440 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);))
441}
442
443template <class T>
444KOKKOS_FUNCTION std::enable_if_t<
445 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
446countr_one_builtin(T x) noexcept {
447 if (x == finite_max_v<T>) return digits_v<T>;
448 return countr_zero_builtin(static_cast<T>(~x));
449}
450
451template <class T>
452KOKKOS_FUNCTION std::enable_if_t<
453 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
454popcount_builtin(T x) noexcept {
455 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);))
456 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);))
457}
458
459template <class T>
460KOKKOS_FUNCTION std::enable_if_t<
461 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, bool>
462has_single_bit_builtin(T x) noexcept {
463 return has_single_bit(x); // no benefit to call the _builtin variant
464}
465
466template <class T>
467KOKKOS_FUNCTION
468 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
469 bit_ceil_builtin(T x) noexcept {
470 if (x <= 1) return 1;
471 return T{1} << (digits_v<T> - countl_zero_builtin(static_cast<T>(x - 1)));
472}
473
474template <class T>
475KOKKOS_FUNCTION
476 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
477 bit_floor_builtin(T x) noexcept {
478 if (x == 0) return 0;
479 return T{1} << (digits_v<T> - 1 - countl_zero_builtin(x));
480}
481
482template <class T>
483KOKKOS_FUNCTION
484 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
485 bit_width_builtin(T x) noexcept {
486 if (x == 0) return 0;
487 return digits_v<T> - countl_zero_builtin(x);
488}
489
490template <class T>
491[[nodiscard]] KOKKOS_FUNCTION
492 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
493 rotl_builtin(T x, int s) noexcept {
494 return rotl(x, s); // no benefit to call the _builtin variant
495}
496
497template <class T>
498[[nodiscard]] KOKKOS_FUNCTION
499 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
500 rotr_builtin(T x, int s) noexcept {
501 return rotr(x, s); // no benefit to call the _builtin variant
502}
503
504} // namespace Kokkos::Experimental
505
506#endif
ScopeGuard Some user scope issues have been identified with some Kokkos::finalize calls; ScopeGuard a...