Stokhos Package Browser (Single Doxygen Collection)
Version of the Day
Toggle main menu visibility
Loading...
Searching...
No Matches
src
kokkos
Cuda
Stokhos_Cuda_DeviceProp.hpp
Go to the documentation of this file.
1
// @HEADER
2
// ***********************************************************************
3
//
4
// Stokhos Package
5
// Copyright (2009) Sandia Corporation
6
//
7
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8
// license for use of this work by or on behalf of the U.S. Government.
9
//
10
// Redistribution and use in source and binary forms, with or without
11
// modification, are permitted provided that the following conditions are
12
// met:
13
//
14
// 1. Redistributions of source code must retain the above copyright
15
// notice, this list of conditions and the following disclaimer.
16
//
17
// 2. Redistributions in binary form must reproduce the above copyright
18
// notice, this list of conditions and the following disclaimer in the
19
// documentation and/or other materials provided with the distribution.
20
//
21
// 3. Neither the name of the Corporation nor the names of the
22
// contributors may be used to endorse or promote products derived from
23
// this software without specific prior written permission.
24
//
25
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
//
37
// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38
//
39
// ***********************************************************************
40
// @HEADER
41
42
#ifndef STOKHOS_CUDA_DEVICE_PROP_HPP
43
#define STOKHOS_CUDA_DEVICE_PROP_HPP
44
45
#include "Kokkos_Core.hpp"
46
47
#include "Teuchos_TestForException.hpp"
48
49
#include "cuda_runtime_api.h"
50
51
namespace
Stokhos
{
52
53
// Class encapsulating various device attributes
54
class
DeviceProp
{
55
public
:
56
57
typedef
Kokkos::Cuda::size_type
size_type
;
58
59
size_type
compute_capability_major
;
60
size_type
compute_capability_minor
;
61
62
size_type
shared_memory_capacity
;
63
size_type
shared_memory_granularity
;
64
size_type
max_shmem_per_block
;
65
size_type
max_threads_per_block
;
66
size_type
max_threads_per_sm
;
67
size_type
max_blocks_per_sm
;
68
size_type
max_warps_per_sm
;
69
size_type
warp_size
;
70
size_type
warp_granularity
;
71
size_type
max_regs_per_sm
;
72
size_type
max_regs_per_block
;
73
size_type
reg_bank_size
;
74
75
bool
has_shuffle
;
76
bool
has_ldg
;
77
78
DeviceProp
(
int
device_id = -1) :
79
compute_capability_major
(0),
80
compute_capability_minor
(0),
81
shared_memory_capacity
(0),
82
shared_memory_granularity
(0),
83
max_shmem_per_block
(0),
84
max_threads_per_block
(0),
85
max_threads_per_sm
(0),
86
max_blocks_per_sm
(0),
87
max_warps_per_sm
(0),
88
warp_size
(0),
89
warp_granularity
(0),
90
max_regs_per_sm
(0),
91
max_regs_per_block
(0),
92
reg_bank_size
(0),
93
has_shuffle
(false),
94
has_ldg
(false)
95
{
96
// If device_id is negative, use currently selected device
97
if
(device_id < 0)
98
cudaGetDevice(&device_id);
99
100
// Get compute capability
101
int
major, minor;
102
cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
103
device_id);
104
cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
105
device_id);
106
compute_capability_major
= major;
107
compute_capability_minor
= minor;
108
109
// Require compute capability >= 2
110
TEUCHOS_TEST_FOR_EXCEPTION(
111
compute_capability_major
< 2, std::logic_error,
112
"Cuda compute capability >= 2 is required!"
);
113
114
// These come from the CUDA occupancy calculator
115
if
(
compute_capability_major
== 7) {
116
if
(
compute_capability_minor
== 0) {
117
shared_memory_capacity
= 96 * 1024;
118
}
119
else
{
120
shared_memory_capacity
= 64 * 1024;
121
}
122
123
max_shmem_per_block
= 48 * 1024;
124
max_regs_per_block
= 64 * 1024;
125
max_regs_per_sm
= 64 * 1024;
126
shared_memory_granularity
= 256;
127
max_threads_per_block
= 1024;
128
129
if
(
compute_capability_minor
== 0) {
130
max_threads_per_sm
= 2048;
131
max_warps_per_sm
= 64;
132
max_blocks_per_sm
= 32;
133
}
134
else
{
135
max_threads_per_sm
= 1024;
136
max_warps_per_sm
= 32;
137
max_blocks_per_sm
= 16;
138
}
139
140
warp_size
= 32;
141
warp_granularity
= 4;
// ??
142
reg_bank_size
= 256;
143
has_shuffle
=
true
;
144
has_ldg
=
true
;
145
}
146
147
else
if
(
compute_capability_major
== 6) {
148
if
(
compute_capability_minor
== 1)
149
shared_memory_capacity
= 96 * 1024;
150
else
151
shared_memory_capacity
= 64 * 1024;
152
153
if
(
compute_capability_minor
== 0 ||
compute_capability_minor
== 1)
154
max_regs_per_block
= 64 * 1024;
155
else
156
max_regs_per_block
= 32 * 1024;
157
158
max_shmem_per_block
= 48 * 1024;
159
max_regs_per_sm
= 64 * 1024;
160
shared_memory_granularity
= 256;
161
max_threads_per_block
= 1024;
162
163
if
(
compute_capability_minor
== 2) {
164
max_threads_per_sm
= 4096;
165
max_warps_per_sm
= 128;
166
}
167
else
{
168
max_threads_per_sm
= 2048;
169
max_warps_per_sm
= 64;
170
}
171
max_blocks_per_sm
= 32;
172
173
warp_size
= 32;
174
if
(
compute_capability_minor
== 0)
175
warp_granularity
= 2;
176
else
177
warp_granularity
= 4;
178
reg_bank_size
= 256;
179
has_shuffle
=
true
;
180
has_ldg
=
true
;
181
}
182
183
else
if
(
compute_capability_major
== 3) {
184
if
(
compute_capability_minor
>= 7) {
185
shared_memory_capacity
= 112 * 1024;
186
max_shmem_per_block
= 48 * 1024;
187
max_regs_per_sm
= 128 * 1024;
188
max_regs_per_block
= 64 * 1024;
189
}
190
else
{
191
shared_memory_capacity
= 48 * 1024;
192
max_shmem_per_block
= 48 * 1024;
193
max_regs_per_sm
= 64 * 1024;
194
max_regs_per_block
= 64 * 1024;
195
}
196
shared_memory_granularity
= 256;
197
max_threads_per_block
= 1024;
198
max_threads_per_sm
= 2048;
199
max_blocks_per_sm
= 16;
200
max_warps_per_sm
= 64;
201
warp_size
= 32;
202
warp_granularity
= 4;
203
reg_bank_size
= 256;
204
has_shuffle
=
true
;
205
has_ldg
=
true
;
206
}
207
208
else
if
(
compute_capability_major
== 2) {
209
shared_memory_capacity
= 48 * 1024;
210
shared_memory_granularity
= 64;
211
max_shmem_per_block
= 48 * 1024;
212
max_threads_per_block
= 1024;
213
max_threads_per_sm
= 1536;
214
max_blocks_per_sm
= 8;
215
max_warps_per_sm
= 48;
216
warp_size
= 32;
217
warp_granularity
= 2;
218
max_regs_per_sm
= 32 * 1024;
219
max_regs_per_block
= 32 * 1024;
220
reg_bank_size
= 64;
221
has_shuffle
=
false
;
222
has_ldg
=
false
;
223
}
224
225
else
226
TEUCHOS_TEST_FOR_EXCEPTION(
227
true
, std::logic_error,
228
"DeviceProp not configured for compute capability "
<<
229
compute_capability_major
);
230
}
231
232
// Returns number of registers per thread used by the given kernel
233
template
<
typename
Kernel>
234
size_type
235
get_kernel_registers
(Kernel kernel) {
236
#ifdef __CUDACC__
237
typedef
void (*func_ptr_t)();
238
func_ptr_t func_ptr =
reinterpret_cast<
func_ptr_t
>
(kernel);
239
cudaFuncAttributes attrib;
240
cudaFuncGetAttributes(&attrib, func_ptr);
241
return
attrib.numRegs;
242
#else
243
return
0;
244
#endif
245
}
246
247
// Returns number of resident warps per sm for the given kernel
248
template
<
typename
Kernel>
249
size_type
250
get_resident_warps_per_sm
(Kernel kernel) {
251
const
size_type
regs_per_thread =
get_kernel_registers
(kernel);
252
const
size_type
regs_per_warp =
253
(
warp_size
*regs_per_thread +
reg_bank_size
-1) & ~(
reg_bank_size
-1);
254
const
size_type
warps_per_sm =
255
(
max_regs_per_sm
/regs_per_warp) & ~(
warp_granularity
-1);
256
return
warps_per_sm;
257
}
258
};
259
260
}
// namespace Stokhos
261
262
#endif
/* #ifndef STOKHOS_CUDA_DEVICE_PROP_HPP */
Stokhos::DeviceProp::max_warps_per_sm
size_type max_warps_per_sm
Definition
Stokhos_Cuda_DeviceProp.hpp:68
Stokhos::DeviceProp::has_ldg
bool has_ldg
Definition
Stokhos_Cuda_DeviceProp.hpp:76
Stokhos::DeviceProp::shared_memory_granularity
size_type shared_memory_granularity
Definition
Stokhos_Cuda_DeviceProp.hpp:63
Stokhos::DeviceProp::warp_granularity
size_type warp_granularity
Definition
Stokhos_Cuda_DeviceProp.hpp:70
Stokhos::DeviceProp::get_kernel_registers
size_type get_kernel_registers(Kernel kernel)
Definition
Stokhos_Cuda_DeviceProp.hpp:235
Stokhos::DeviceProp::shared_memory_capacity
size_type shared_memory_capacity
Definition
Stokhos_Cuda_DeviceProp.hpp:62
Stokhos::DeviceProp::max_regs_per_sm
size_type max_regs_per_sm
Definition
Stokhos_Cuda_DeviceProp.hpp:71
Stokhos::DeviceProp::DeviceProp
DeviceProp(int device_id=-1)
Definition
Stokhos_Cuda_DeviceProp.hpp:78
Stokhos::DeviceProp::reg_bank_size
size_type reg_bank_size
Definition
Stokhos_Cuda_DeviceProp.hpp:73
Stokhos::DeviceProp::get_resident_warps_per_sm
size_type get_resident_warps_per_sm(Kernel kernel)
Definition
Stokhos_Cuda_DeviceProp.hpp:250
Stokhos::DeviceProp::max_shmem_per_block
size_type max_shmem_per_block
Definition
Stokhos_Cuda_DeviceProp.hpp:64
Stokhos::DeviceProp::compute_capability_minor
size_type compute_capability_minor
Definition
Stokhos_Cuda_DeviceProp.hpp:60
Stokhos::DeviceProp::has_shuffle
bool has_shuffle
Definition
Stokhos_Cuda_DeviceProp.hpp:75
Stokhos::DeviceProp::max_blocks_per_sm
size_type max_blocks_per_sm
Definition
Stokhos_Cuda_DeviceProp.hpp:67
Stokhos::DeviceProp::max_threads_per_block
size_type max_threads_per_block
Definition
Stokhos_Cuda_DeviceProp.hpp:65
Stokhos::DeviceProp::max_regs_per_block
size_type max_regs_per_block
Definition
Stokhos_Cuda_DeviceProp.hpp:72
Stokhos::DeviceProp::max_threads_per_sm
size_type max_threads_per_sm
Definition
Stokhos_Cuda_DeviceProp.hpp:66
Stokhos::DeviceProp::warp_size
size_type warp_size
Definition
Stokhos_Cuda_DeviceProp.hpp:69
Stokhos::DeviceProp::size_type
Kokkos::Cuda::size_type size_type
Definition
Stokhos_Cuda_DeviceProp.hpp:57
Stokhos::DeviceProp::compute_capability_major
size_type compute_capability_major
Definition
Stokhos_Cuda_DeviceProp.hpp:59
Stokhos
Top-level namespace for Stokhos classes and functions.
Definition
Stokhos_AbstractPreconditionerFactory.hpp:48
Generated by
1.17.0