 |
Vc
1.4.1
SIMD Vector Classes for C++
|
|
48 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
57 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
66 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
75 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
83 #define Vc_MSVC _MSC_FULL_VER
90 #ifdef __INTEL_COMPILER
91 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
92 #elif defined(__clang__) && defined(__apple_build_version__)
93 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
94 #elif defined(__clang__)
95 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
96 #elif defined(__GNUC__)
97 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
98 #elif defined(_MSC_VER)
99 #define Vc_MSVC _MSC_FULL_VER
101 #define Vc_UNSUPPORTED_COMPILER 1
104 #if defined Vc_GCC && Vc_GCC >= 0x60000
105 #define Vc_RESET_DIAGNOSTICS _Pragma("GCC diagnostic pop")
106 #pragma GCC diagnostic push
107 #pragma GCC diagnostic ignored "-Wignored-attributes"
109 #define Vc_RESET_DIAGNOSTICS
118 #pragma warning disable 2922
121 #if __cplusplus < 201103 && (!defined Vc_MSVC || _MSC_VER < 1900)
122 # error "Vc requires support for C++11."
123 #elif __cplusplus >= 201402L
125 # if __cplusplus > 201700L
130 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
135 # define Vc_HAVE_MAX_ALIGN_T 1
136 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
138 # define Vc_HAVE_STD_MAX_ALIGN_T 1
141 #if defined(Vc_GCC) || defined(Vc_CLANG) || defined Vc_APPLECLANG
142 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
146 # define Vc_CDECL __cdecl
147 # define Vc_VDECL __vectorcall
157 #define Scalar 0x00100000
158 #define SSE 0x00200000
159 #define SSE2 0x00300000
160 #define SSE3 0x00400000
161 #define SSSE3 0x00500000
162 #define SSE4_1 0x00600000
163 #define SSE4_2 0x00700000
164 #define AVX 0x00800000
165 #define AVX2 0x00900000
167 #define XOP 0x00000001
168 #define FMA4 0x00000002
169 #define F16C 0x00000004
170 #define POPCNT 0x00000008
171 #define SSE4a 0x00000010
172 #define FMA 0x00000020
173 #define BMI2 0x00000040
175 #define IMPL_MASK 0xFFF00000
176 #define EXT_MASK 0x000FFFFF
190 # elif defined(_M_AMD64)
201 #if defined Vc_ICC && !defined __POPCNT__
202 # if defined __SSE4_2__ || defined __SSE4A__
203 # define __POPCNT__ 1
208 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
213 # if defined(__AVX2__)
214 # define Vc_IMPL_AVX2 1
215 # define Vc_IMPL_AVX 1
216 # elif defined(__AVX__)
217 # define Vc_IMPL_AVX 1
219 # if defined(__SSE4_2__)
220 # define Vc_IMPL_SSE 1
221 # define Vc_IMPL_SSE4_2 1
223 # if defined(__SSE4_1__)
224 # define Vc_IMPL_SSE 1
225 # define Vc_IMPL_SSE4_1 1
227 # if defined(__SSE3__)
228 # define Vc_IMPL_SSE 1
229 # define Vc_IMPL_SSE3 1
231 # if defined(__SSSE3__)
232 # define Vc_IMPL_SSE 1
233 # define Vc_IMPL_SSSE3 1
235 # if defined(__SSE2__)
236 # define Vc_IMPL_SSE 1
237 # define Vc_IMPL_SSE2 1
240 # if defined(Vc_IMPL_SSE)
243 # define Vc_IMPL_Scalar 1
246 # if !defined(Vc_IMPL_Scalar)
248 # define Vc_IMPL_FMA4 1
251 # define Vc_IMPL_XOP 1
254 # define Vc_IMPL_F16C 1
257 # define Vc_IMPL_POPCNT 1
260 # define Vc_IMPL_SSE4a 1
263 # define Vc_IMPL_FMA 1
266 # define Vc_IMPL_BMI2 1
272 # if (Vc_IMPL & IMPL_MASK) == AVX2 // AVX2 supersedes SSE
273 # define Vc_IMPL_AVX2 1
274 # define Vc_IMPL_AVX 1
275 # elif (Vc_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
276 # define Vc_IMPL_AVX 1
277 # elif (Vc_IMPL & IMPL_MASK) == Scalar
278 # define Vc_IMPL_Scalar 1
279 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
280 # define Vc_IMPL_SSE4_2 1
281 # define Vc_IMPL_SSE4_1 1
282 # define Vc_IMPL_SSSE3 1
283 # define Vc_IMPL_SSE3 1
284 # define Vc_IMPL_SSE2 1
285 # define Vc_IMPL_SSE 1
286 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
287 # define Vc_IMPL_SSE4_1 1
288 # define Vc_IMPL_SSSE3 1
289 # define Vc_IMPL_SSE3 1
290 # define Vc_IMPL_SSE2 1
291 # define Vc_IMPL_SSE 1
292 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
293 # define Vc_IMPL_SSSE3 1
294 # define Vc_IMPL_SSE3 1
295 # define Vc_IMPL_SSE2 1
296 # define Vc_IMPL_SSE 1
297 # elif (Vc_IMPL & IMPL_MASK) == SSE3
298 # define Vc_IMPL_SSE3 1
299 # define Vc_IMPL_SSE2 1
300 # define Vc_IMPL_SSE 1
301 # elif (Vc_IMPL & IMPL_MASK) == SSE2
302 # define Vc_IMPL_SSE2 1
303 # define Vc_IMPL_SSE 1
304 # elif (Vc_IMPL & IMPL_MASK) == SSE
305 # define Vc_IMPL_SSE 1
306 # if defined(__SSE4_2__)
307 # define Vc_IMPL_SSE4_2 1
309 # if defined(__SSE4_1__)
310 # define Vc_IMPL_SSE4_1 1
312 # if defined(__SSE3__)
313 # define Vc_IMPL_SSE3 1
315 # if defined(__SSSE3__)
316 # define Vc_IMPL_SSSE3 1
318 # if defined(__SSE2__)
319 # define Vc_IMPL_SSE2 1
321 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
324 # define Vc_IMPL_SSE3 1
325 # define Vc_IMPL_SSE2 1
326 # define Vc_IMPL_SSE 1
329 # define Vc_IMPL_XOP 1
331 # if (Vc_IMPL & FMA4)
332 # define Vc_IMPL_FMA4 1
334 # if (Vc_IMPL & F16C)
335 # define Vc_IMPL_F16C 1
337 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
338 # define Vc_IMPL_POPCNT 1
340 # if (Vc_IMPL & SSE4a)
341 # define Vc_IMPL_SSE4a 1
344 # define Vc_IMPL_FMA 1
346 # if (Vc_IMPL & BMI2)
347 # define Vc_IMPL_BMI2 1
355 # define Vc_USE_VEX_CODING 1
360 # define Vc_IMPL_SSE4_2 1
361 # define Vc_IMPL_SSE4_1 1
362 # define Vc_IMPL_SSSE3 1
363 # define Vc_IMPL_SSE3 1
364 # define Vc_IMPL_SSE2 1
365 # define Vc_IMPL_SSE 1
368 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
369 # if defined(Vc_IMPL_AVX)
370 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
372 # if defined(Vc_IMPL_AVX2)
378 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX)
379 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
380 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
381 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
405 #if defined Vc_IMPL_AVX2
406 #define Vc_DEFAULT_IMPL_AVX2
407 #elif defined Vc_IMPL_AVX
408 #define Vc_DEFAULT_IMPL_AVX
409 #elif defined Vc_IMPL_SSE
410 #define Vc_DEFAULT_IMPL_SSE
411 #elif defined Vc_IMPL_Scalar
412 #define Vc_DEFAULT_IMPL_Scalar
414 #error "Preprocessor logic broken. Please report a bug."
419 namespace Vc_VERSIONED_NAMESPACE
422 typedef signed char int8_t;
423 typedef unsigned char uint8_t;
424 typedef signed short int16_t;
425 typedef unsigned short uint16_t;
426 typedef signed int int32_t;
427 typedef unsigned int uint32_t;
428 typedef signed long long int64_t;
429 typedef unsigned long long uint64_t;
486 ImplementationMask = 0xfff
519 ExtraInstructionsMask = 0xfffff000u
535 return static_cast<Implementation>(Features & ImplementationMask);
540 return static_cast<unsigned int>(impl) == current();
548 return static_cast<unsigned int>(low) <= current() &&
549 static_cast<unsigned int>(high) >= current();
554 static constexpr
bool runs_on(
unsigned int extraInstructions)
556 return (extraInstructions & Features & ExtraInstructionsMask) ==
557 (Features & ExtraInstructionsMask);
567 #ifdef Vc_IMPL_Scalar
569 #elif defined(Vc_IMPL_AVX2)
571 #elif defined(Vc_IMPL_AVX)
573 #elif defined(Vc_IMPL_SSE4_2)
575 #elif defined(Vc_IMPL_SSE4_1)
577 #elif defined(Vc_IMPL_SSSE3)
579 #elif defined(Vc_IMPL_SSE3)
581 #elif defined(Vc_IMPL_SSE2)
593 #ifdef Vc_IMPL_POPCNT
602 #ifdef Vc_USE_VEX_CODING
611 #endif // VC_GLOBAL_H_
@ FmaInstructions
Support for FMA instructions (3 operand variant)
@ AlignOnCacheline
Align on boundary of cache line sizes (e.g.
@ Bmi2Instructions
Support for BMI2 instructions.
@ Sse4aInstructions
Support for SSE4a instructions.
@ SSE42Impl
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
@ Float16cInstructions
Support for float16 conversions in hardware.
static constexpr Implementation current()
Returns the currently used Vc::Implementation.
@ PopcntInstructions
Support for the population count instruction.
@ SSSE3Impl
x86 SSE + SSE2 + SSE3 + SSSE3
@ SSE3Impl
x86 SSE + SSE2 + SSE3
@ AlignOnPage
Align on boundary of page sizes (e.g.
static constexpr bool is_between(Implementation low, Implementation high)
Returns whether the current Vc::Implementation implements at least low and at most high.
@ AlignOnVector
Align on boundary of vector sizes (e.g.
@ SSE41Impl
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
static constexpr bool is(Implementation impl)
Returns whether impl is the current Vc::Implementation.
@ XopInstructions
Support for XOP instructions.
@ Fma4Instructions
Support for FMA4 instructions.
@ VexInstructions
Support for ternary instruction coding (VEX)
@ ScalarImpl
uses only fundamental types
static constexpr bool runs_on(unsigned int extraInstructions)
Returns whether the current code would run on a CPU providing extraInstructions.