Stokhos Package Browser (Single Doxygen Collection)
Version of the Day
Toggle main menu visibility
Loading...
Searching...
No Matches
src
cusp
detail
device
spmm
csr.h
Go to the documentation of this file.
1
/*
2
* Copyright 2008-2009 NVIDIA Corporation
3
*
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
7
*
8
* http://www.apache.org/licenses/LICENSE-2.0
9
*
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
15
*/
16
17
#include <cusp/array1d.h>
18
19
#include <cusp/detail/format_utils.h>
20
21
//#include <iostream>
22
//#include <cusp/print.h>
23
namespace
cusp
24
{
25
namespace
detail
26
{
27
namespace
device
28
{
29
30
//Row-wise AX where X is a 2d array with one thread per row (based on scalar model)
31
template
<
typename
IndexType,
32
typename
ValueType>
33
__global__
void
34
row_spmm_csr_scalar_kernel
(
const
IndexType Anum_rows,
35
const
IndexType xnum_row,
36
const
IndexType
xnum_cols
,
37
const
IndexType * Ar,
38
const
IndexType * Ac,
39
const
ValueType * Aval,
40
const
ValueType *
x
,
41
ValueType *
y
)
42
{
43
const
IndexType
thread_id
= blockDim.x * blockIdx.x + threadIdx.x;
44
const
IndexType grid_size = gridDim.x * blockDim.x;
45
46
47
for
(IndexType row =
thread_id
; row < Anum_rows; row += grid_size)
48
{
49
const
IndexType row_start = Ar[row];
50
const
IndexType row_end = Ar[row+1];
51
const
IndexType r = row_end - row_start;
52
53
for
(IndexType
j
= 0;
j
<
xnum_cols
;
j
++){
54
55
ValueType sum = 0.0;
56
for
(IndexType jj = row_start; jj < row_end; jj++)
57
sum += Aval[jj] *
x
[
j
+
xnum_cols
*Ac[jj]];
58
y
[
j
+
xnum_cols
*row]=sum;
59
}
60
61
}
62
}
63
64
65
66
//Col-wise with one thread per row
67
template
<
typename
IndexType,
68
typename
ValueType>
69
__global__
void
70
column_spmm_csr_scalar_kernel
(
const
IndexType Anum_rows,
71
const
IndexType
xnum_rows
,
72
const
IndexType
xnum_cols
,
73
const
IndexType * Ar,
74
const
IndexType * Ac,
75
const
ValueType * Aval,
76
const
ValueType *
x
,
77
ValueType *
y
)
78
{
79
const
IndexType
thread_id
= blockDim.x * blockIdx.x + threadIdx.x;
80
const
IndexType grid_size = gridDim.x * blockDim.x;
81
for
(IndexType row =
thread_id
; row < Anum_rows; row += grid_size){
82
const
IndexType row_start = Ar[row];
83
const
IndexType row_end = Ar[row+1];
84
85
for
(IndexType
j
= 0;
j
<
xnum_cols
;
j
++){
86
87
ValueType sum = 0;
88
for
(IndexType jj = row_start; jj < row_end; jj++)
89
sum += Aval[jj] *
x
[Ac[jj]+
xnum_rows
*
j
];
//x(Ac[jj], j)
90
y
[
j
*Anum_rows+row]=sum;
91
}
92
}
93
}
94
95
104
105
106
template
<
typename
Matrix1,
107
typename
Vector2,
108
typename
Vector3>
109
void
__spmm_csr_scalar
(
const
Matrix1& A,
110
const
Vector2&
x
,
111
Vector3&
y
, cusp::row_major)
112
{
113
CUSP_PROFILE_SCOPED();
114
typedef
typename
Vector3::index_type IndexType;
115
typedef
typename
Vector3::value_type ValueType;
116
typedef
typename
Vector3::memory_space MemorySpace;
117
const
size_t
BLOCK_SIZE = 256;
118
const
size_t
MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(
row_spmm_csr_scalar_kernel<IndexType, ValueType>
, BLOCK_SIZE, (
size_t
) 0);
119
const
size_t
NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
120
121
122
123
row_spmm_csr_scalar_kernel<IndexType,ValueType>
<<<NUM_BLOCKS, BLOCK_SIZE >>>
124
(A.num_rows,
x
.num_rows,
x
.num_cols,
125
thrust::raw_pointer_cast(&A.row_offsets[0]),
126
thrust::raw_pointer_cast(&A.column_indices[0]),
127
thrust::raw_pointer_cast(&A.values[0]),
128
thrust::raw_pointer_cast(&(
x
.values)[0]),
129
thrust::raw_pointer_cast(&(
y
.values)[0]));
130
131
}
132
133
template
<
typename
Matrix1,
134
typename
Vector2,
135
typename
Vector3>
136
void
__spmm_csr_scalar
(
const
Matrix1& A,
137
const
Vector2&
x
,
138
Vector3&
y
, cusp::column_major)
139
{
140
CUSP_PROFILE_SCOPED();
141
typedef
typename
Vector3::index_type IndexType;
142
typedef
typename
Vector3::value_type ValueType;
143
typedef
typename
Vector3::memory_space MemorySpace;
144
const
size_t
BLOCK_SIZE = 256;
145
const
size_t
MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(
column_spmm_csr_scalar_kernel<IndexType, ValueType>
, BLOCK_SIZE, (
size_t
) 0);
146
const
size_t
NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
147
column_spmm_csr_scalar_kernel<IndexType,ValueType>
<<<NUM_BLOCKS, BLOCK_SIZE>>>
148
(A.num_rows,
x
.num_rows,
x
.num_cols,
149
thrust::raw_pointer_cast(&A.row_offsets[0]),
150
thrust::raw_pointer_cast(&A.column_indices[0]),
151
thrust::raw_pointer_cast(&A.values[0]),
152
thrust::raw_pointer_cast(&(
x
.values)[0]),
153
thrust::raw_pointer_cast(&(
y
.values)[0]));
154
155
}
156
157
158
template
<
typename
Matrix1,
159
typename
Vector2,
160
typename
Vector3>
161
void
spmm_csr_scalar
(
const
Matrix1& A,
162
const
Vector2&
x
,
163
Vector3&
y
)
164
{
165
//Determine if row-wise or col-wise then call appropriate multiply
166
__spmm_csr_scalar
(A,
x
,
y
,
typename
Vector2::orientation());
167
}
168
}
// end namespace device
169
}
// end namespace detail
170
}
// end namespace cusp
171
j
j
Definition
Sacado_Fad_Exp_MP_Vector.hpp:527
cusp::detail::device
Definition
detail/device/MVmultiply.h:35
cusp::detail::device::x
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition
csr_vector.h:265
cusp::detail::device::spmm_csr_scalar
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
Definition
csr.h:161
cusp::detail::device::row_spmm_csr_scalar_kernel
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition
csr.h:34
cusp::detail::device::__spmm_csr_scalar
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)
Definition
csr.h:109
cusp::detail::device::y
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition
csr_vector.h:267
cusp::detail::device::xnum_rows
const IndexType xnum_rows
Definition
csr_vector.h:260
cusp::detail::device::xnum_cols
const IndexType const IndexType xnum_cols
Definition
csr_vector.h:261
cusp::detail::device::thread_id
const IndexType thread_id
Definition
csr_vector.h:273
cusp::detail::device::column_spmm_csr_scalar_kernel
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition
csr.h:70
cusp::detail
Definition
block_lu.h:28
cusp
Definition
block_monitor.h:38
Generated by
1.17.0