Stokhos Package Browser (Single Doxygen Collection)
Version of the Day
Toggle main menu visibility
Loading...
Searching...
No Matches
src
kokkos
Cuda
Stokhos_Cuda_SymmetricDiagonalSpec.hpp
Go to the documentation of this file.
1
// @HEADER
2
// ***********************************************************************
3
//
4
// Stokhos Package
5
// Copyright (2009) Sandia Corporation
6
//
7
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8
// license for use of this work by or on behalf of the U.S. Government.
9
//
10
// Redistribution and use in source and binary forms, with or without
11
// modification, are permitted provided that the following conditions are
12
// met:
13
//
14
// 1. Redistributions of source code must retain the above copyright
15
// notice, this list of conditions and the following disclaimer.
16
//
17
// 2. Redistributions in binary form must reproduce the above copyright
18
// notice, this list of conditions and the following disclaimer in the
19
// documentation and/or other materials provided with the distribution.
20
//
21
// 3. Neither the name of the Corporation nor the names of the
22
// contributors may be used to endorse or promote products derived from
23
// this software without specific prior written permission.
24
//
25
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
//
37
// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38
//
39
// ***********************************************************************
40
// @HEADER
41
42
#ifndef KOKKOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP
43
#define KOKKOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP
44
45
#include <stdexcept>
46
47
#include "Kokkos_Core.hpp"
48
49
#include "
Stokhos_Multiply.hpp
"
50
#include "
Stokhos_BlockCrsMatrix.hpp
"
51
#include "
Stokhos_SymmetricDiagonalSpec.hpp
"
52
53
//----------------------------------------------------------------------------
54
55
namespace
Stokhos
{
56
57
template
<>
58
class
BlockMultiply
<
SymmetricDiagonalSpec
<
Kokkos
::Cuda > >
59
{
60
public
:
61
typedef
Kokkos::Cuda
execution_space
;
62
typedef
execution_space::size_type
size_type
;
63
typedef
SymmetricDiagonalSpec< Kokkos::Cuda >
block_type
;
64
65
__host__
66
static
dim3
thread_block
(
const
block_type
& block )
67
{
68
const
int
d = block.
dimension
();
69
const
int
warp_size = Kokkos::Impl::CudaTraits::WarpSize;
70
const
int
y = ( Kokkos::Impl::cuda_internal_maximum_warp_count() * warp_size ) / d ;
71
72
if
( 0 == y ) {
73
throw
std::runtime_error( std::string(
"Stokhos::Multiply< SymmetricDiagonalSpec<Cuda> > ERROR: block too large"
) );
74
}
75
76
// dimension X #diagonals to concurrently process
77
return
dim3( d , std::min( y , ( 1 + d ) / 2 ) , 1 );
78
}
79
80
template
<
typename
VectorValue >
81
__host__
82
static
size_type
shmem_size
(
const
block_type
& block )
83
{
84
const
dim3 d =
thread_block
( block );
85
86
return
sizeof
(VectorValue) * d.x * d.y ;
87
}
88
89
__host__
90
static
size_type
matrix_size
(
const
block_type
& block )
91
{
return
block.
matrix_size
(); }
92
93
// Required: blockDim.x == block.dimension()
94
// Required: blockDim.y > 1
95
// Computing: Y[ threadIdx.x ]
96
template
<
typename
MatrixValue ,
typename
VectorValue >
97
__device__
98
static
VectorValue
apply
(
const
block_type
& block ,
99
const
MatrixValue *
const
a ,
100
const
VectorValue *
const
x )
101
{
102
const
int
dimension = block.
dimension
();
103
const
int
dim_half = ( dimension + 1 ) >> 1 ;
104
105
VectorValue *
const
shX = kokkos_impl_cuda_shared_memory<VectorValue>();
106
107
int
ia = -1 ;
108
int
ix = -1 ;
109
110
VectorValue y = 0 ;
111
112
if
( 0 == threadIdx.y ) {
113
// Load vector and multiply the main diagonal (first diagonal)
114
115
shX[ threadIdx.x ] = x[ threadIdx.x ];
// Load 'x'
116
117
y = shX[ threadIdx.x ] * a[ threadIdx.x ];
118
}
119
120
__syncthreads();
// Wait for load of 'x'
121
122
if
( 0 == threadIdx.y && ! ( dimension & 01 ) ) {
123
124
// If even number of diagonals then the last diagonal is half-length
125
// and only used once.
126
127
ix = threadIdx.x ;
128
ia = threadIdx.x + dim_half * dimension ;
129
130
if
( threadIdx.x < dim_half ) {
131
ix += dim_half ;
132
}
133
else
{
134
ix -= dim_half ;
135
ia -= dim_half ;
136
}
137
y += shX[ ix ] * a[ ia ];
138
}
139
140
//------------------------------------
141
142
const
int
A_stride = blockDim.y * dimension ;
143
144
int
d = 1 + threadIdx.y ;
145
146
const
MatrixValue * A = a + d * dimension ;
147
148
for
( ; d < dim_half ; d += blockDim.y , A += A_stride ) {
149
150
ix = threadIdx.x + d ;
if
( dimension <= ix ) ix -= dimension ;
151
ia = threadIdx.x - d ;
if
( ia < 0 ) ia += dimension ;
152
153
// A 'threadIdx.y' group accesses the matrix diagonal
154
// A[ 0 .. dimension - 1 ] twice in the following statement.
155
// Spatial and temporal locality are excellent for L1 cache reuse
156
// for the second access.
157
158
y += shX[ ix ] * A[ threadIdx.x ] +
159
shX[ ia ] * A[ ia ];
160
}
161
162
if
( 0 < threadIdx.y ) {
163
shX[ threadIdx.x + threadIdx.y * dimension ] = y ;
164
}
165
166
__syncthreads();
// Wait for load of contributions to 'y'
167
168
for
( ix = 1 ; ix < blockDim.y ; ++ix ) {
169
y += shX[ threadIdx.x + ix * dimension ];
170
}
171
172
return
y ;
173
}
174
};
175
176
}
/* namespace Stokhos */
177
178
//----------------------------------------------------------------------------
179
180
#endif
/* #ifndef STOKHOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP */
Stokhos_BlockCrsMatrix.hpp
Stokhos_Multiply.hpp
Stokhos_SymmetricDiagonalSpec.hpp
Stokhos::BlockMultiply< SymmetricDiagonalSpec< Kokkos::Cuda > >::size_type
execution_space::size_type size_type
Definition
Stokhos_Cuda_SymmetricDiagonalSpec.hpp:62
Stokhos::BlockMultiply< SymmetricDiagonalSpec< Kokkos::Cuda > >::shmem_size
static __host__ size_type shmem_size(const block_type &block)
Definition
Stokhos_Cuda_SymmetricDiagonalSpec.hpp:82
Stokhos::BlockMultiply< SymmetricDiagonalSpec< Kokkos::Cuda > >::thread_block
static __host__ dim3 thread_block(const block_type &block)
Definition
Stokhos_Cuda_SymmetricDiagonalSpec.hpp:66
Stokhos::BlockMultiply< SymmetricDiagonalSpec< Kokkos::Cuda > >::matrix_size
static __host__ size_type matrix_size(const block_type &block)
Definition
Stokhos_Cuda_SymmetricDiagonalSpec.hpp:90
Stokhos::BlockMultiply< SymmetricDiagonalSpec< Kokkos::Cuda > >::block_type
SymmetricDiagonalSpec< Kokkos::Cuda > block_type
Definition
Stokhos_Cuda_SymmetricDiagonalSpec.hpp:63
Stokhos::BlockMultiply< SymmetricDiagonalSpec< Kokkos::Cuda > >::execution_space
Kokkos::Cuda execution_space
Definition
Stokhos_Cuda_SymmetricDiagonalSpec.hpp:61
Stokhos::BlockMultiply< SymmetricDiagonalSpec< Kokkos::Cuda > >::apply
static __device__ VectorValue apply(const block_type &block, const MatrixValue *const a, const VectorValue *const x)
Definition
Stokhos_Cuda_SymmetricDiagonalSpec.hpp:98
Stokhos::BlockMultiply
Definition
Stokhos_Multiply.hpp:174
Stokhos::SymmetricDiagonalSpec
Symmetric diagonal storage for a dense matrix.
Definition
Stokhos_SymmetricDiagonalSpec.hpp:63
Stokhos::SymmetricDiagonalSpec::dimension
KOKKOS_INLINE_FUNCTION unsigned dimension() const
Dimension of vector block.
Definition
Stokhos_SymmetricDiagonalSpec.hpp:70
Stokhos::SymmetricDiagonalSpec::matrix_size
KOKKOS_INLINE_FUNCTION unsigned matrix_size() const
Storage size for block coefficients.
Definition
Stokhos_SymmetricDiagonalSpec.hpp:93
Kokkos
Definition
Stokhos_CrsMatrix.hpp:663
Stokhos
Top-level namespace for Stokhos classes and functions.
Definition
Stokhos_AbstractPreconditionerFactory.hpp:48
Generated by
1.17.0