blitz
Version 1.0.2
Toggle main menu visibility
Loading...
Searching...
No Matches
tuning.h
Go to the documentation of this file.
1
// -*- C++ -*-
2
/***************************************************************************
3
* blitz/tuning.h Platform-specific code tuning
4
*
5
* $Id$
6
*
7
* Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org>
8
*
9
* This file is a part of Blitz.
10
*
11
* Blitz is free software: you can redistribute it and/or modify
12
* it under the terms of the GNU Lesser General Public License
13
* as published by the Free Software Foundation, either version 3
14
* of the License, or (at your option) any later version.
15
*
16
* Blitz is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU Lesser General Public License for more details.
20
*
21
* You should have received a copy of the GNU Lesser General Public
22
* License along with Blitz. If not, see <http://www.gnu.org/licenses/>.
23
*
24
* Suggestions: blitz-devel@lists.sourceforge.net
25
* Bugs: blitz-support@lists.sourceforge.net
26
*
27
* For more information, please see the Blitz++ Home Page:
28
* https://sourceforge.net/projects/blitz/
29
*
30
***************************************************************************/
31
32
#ifndef BZ_TUNING_H
33
#define BZ_TUNING_H
34
35
// These estimates should be conservative (i.e. underestimate the
36
// cache sizes). \todo these can be const ints instead of macros.
37
#define BZ_L1_CACHE_ESTIMATED_SIZE 32768
38
#define BZ_L2_CACHE_ESTIMATED_SIZE 6291456
39
// This will work for 32, 16 also
40
#define BZ_L1_CACHE_LINE_SIZE 64
41
#define BZ_CACHE_LINES_TO_ALIGN 16
42
43
#undef BZ_PARTIAL_LOOP_UNROLL
44
#define BZ_PASS_EXPR_BY_VALUE
45
#undef BZ_PTR_INC_FASTER_THAN_INDIRECTION
46
#define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR
47
#undef BZ_KCC_COPY_PROPAGATION_KLUDGE
48
#undef BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS
49
#undef BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE
50
#define BZ_INLINE_GROUP1
51
#define BZ_INLINE_GROUP2
52
#define BZ_COLLAPSE_LOOPS
53
#define BZ_USE_FAST_READ_ARRAY_EXPR
54
#define BZ_ARRAY_EXPR_USE_COMMON_STRIDE
55
#undef BZ_ARRAY_SPACE_FILLING_TRAVERSAL
56
#undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
57
#undef BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
58
#undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
59
#define BZ_ARRAY_2D_STENCIL_TILING
60
#define BZ_ARRAY_2D_STENCIL_TILE_SIZE 128
61
#undef BZ_INTERLACE_ARRAYS
62
#define BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY
63
#define BZ_FAST_COMPILE
64
#define BZ_TV_EVALUATE_UNROLL_LENGTH 0
65
#define BZ_MAX_BITS_FOR_BINARY_UNROLL 8
66
#define BZ_VECTORIZED_LOOP_WIDTH 32
67
68
69
#ifndef BZ_DISABLE_NEW_ET
70
#define BZ_NEW_EXPRESSION_TEMPLATES
71
#endif
72
73
#ifdef BZ_FAST_COMPILE
74
#define BZ_ETPARMS_CONSTREF
75
#define BZ_NO_INLINE_ET
76
#endif
77
78
// possibly overridden by specific compilers below
79
#define _bz_forceinline inline
80
#define _bz_inline_et inline
81
82
83
/*
84
* Platform-specific tuning
85
*/
86
87
#ifdef _CRAYT3E
88
// The backend compiler on the T3E does a better job of
89
// loop unrolling.
90
#undef BZ_PARTIAL_LOOP_UNROLL
91
#undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
92
#undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
93
#endif
94
95
#ifdef __INTEL_COMPILER
96
// icpc does not vectorize the unrolled loop so this is def. bad
97
#define BZ_TV_EVALUATE_UNROLL_LENGTH 0
98
99
// defines for inlining
100
#undef _bz_forceinline
101
#undef _bz_inline_et
102
#define _bz_forceinline __forceinline
103
#define _bz_inline_et __forceinline
104
105
#else
// need this since icpc also defines __GNUC__
106
#ifdef __GNUC__
107
// The egcs compiler does a good job of loop unrolling, if
108
// -funroll-loops is used.
109
#undef BZ_PARTIAL_LOOP_UNROLL
110
#undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
111
#undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
112
#endif
113
#endif
114
115
#ifdef BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE
116
#undef BZ_KCC_COPY_PROPAGATION_KLUDGE
117
#endif
118
119
#ifdef BZ_INLINE_GROUP1
120
#define _bz_inline1 inline
121
#else
122
#define _bz_inline1
123
#endif
124
125
#ifdef BZ_INLINE_GROUP2
126
#define _bz_inline2 inline
127
#else
128
#define _bz_inline2
129
#endif
130
131
// override definitions above
132
#ifdef BZ_NO_INLINE_ET
133
#undef _bz_inline_et
134
#define _bz_inline_et
135
#endif
136
137
#ifdef BZ_ETPARMS_CONSTREF
138
#define BZ_ETPARM(X) const X&
139
#else
140
#define BZ_ETPARM(X) X
141
#endif
142
143
#ifdef __DECCXX
144
// The DEC cxx compiler has problems with loop unrolling
145
// because of aliasing. Loop unrolling and anti-aliasing
146
// is done by Blitz++.
147
148
#define BZ_PARTIAL_LOOP_UNROLL
149
#define BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
150
#define BZ_ARRAY_STACK_TRAVERSAL_UNROLL
151
#endif
152
153
/*
154
* BZ_NO_PROPAGATE(X) prevents the compiler from performing
155
* copy propagation on a variable. This is used for loop
156
* unrolling to prevent KAI C++ from rearranging the
157
* ordering of memory accesses.
158
*/
159
160
#define BZ_NO_PROPAGATE(X) X
161
162
#ifdef __KCC
163
#ifdef BZ_USE_NO_PROPAGATE
164
extern
"C"
int
__kai_apply(
const
char
*, ...);
165
166
#undef BZ_NO_PROPAGATE(X)
167
#define BZ_NO_PROPAGATE(X) __kai_apply("(%a)",&X)
168
#endif
169
#endif
170
171
#endif
// BZ_TUNING_H
blitz
tuning.h
Generated by
1.17.0