42#ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
43#define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
46#include "Sacado_ConfigDefs.h"
52#include "KokkosBlas.hpp"
55#include "Kokkos_Core.hpp"
57#include "Stokhos_config.h"
59#define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size)
62template <
class AViewType,
65 class IndexType =
typename AViewType::size_type>
82 KOKKOS_INLINE_FUNCTION
void
85 const IndexType m =
y_.extent(0);
86 const IndexType n =
x_.extent(0);
88 IndexType i_min =
m_c_ * i_tile;
89 bool last_tile = (i_min +
m_c_ >= m);
90 IndexType i_max = (last_tile) ? m : (i_min +
m_c_);
92#ifdef STOKHOS_HAVE_PRAGMA_UNROLL
96 for (IndexType i = i_min; i < i_max; ++i)
99 for (IndexType i = i_min; i < i_max; ++i)
102 for (IndexType
j = 0;
j < n; ++
j)
106 for (IndexType i = i_min; i < i_max; ++i)
107 y_(i) += alphab *
A_(i,
j);
113 typename AViewType::const_type
A_;
114 typename XViewType::const_type
x_;
121template <
class AViewType,
124 class IndexType =
typename AViewType::size_type>
143 KOKKOS_INLINE_FUNCTION
void
146 const IndexType m =
y_.extent(0);
147 const IndexType n =
x_.extent(0);
149 const int j = team.league_rank();
150 const IndexType j_min =
n_c_ *
j;
151 const IndexType nj = (j_min +
n_c_ > n) ? (n - j_min) :
n_c_;
152 const IndexType i_min =
j % m;
154 for (IndexType i = i_min; i < m; ++i)
157 Kokkos::parallel_reduce(
158 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
159 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
162 if (team.team_rank() == 0)
168 for (IndexType i = 0; i < i_min; ++i)
171 Kokkos::parallel_reduce(
172 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
173 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
176 if (team.team_rank() == 0)
186 typename AViewType::const_type
A_;
187 typename XViewType::const_type
x_;
197 typename VA::const_value_type &alpha,
200 typename VY::const_value_type &beta,
204 using IndexType =
typename VA::size_type;
205 using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
208 const size_t m = y.extent(0);
212 const size_t n_tiles_per_thread = ceil(((
double)m) / (N * m_c_star));
213 const size_t m_c = ceil(((
double)m) / (N * n_tiles_per_thread));
214 const size_t n_tiles = N * n_tiles_per_thread;
216 policy_type range(0, n_tiles);
219 functor_type functor(alpha, A, x, beta, y, m_c);
221 Kokkos::parallel_for(
"KokkosBlas::gemv[Update]", range, functor);
229 typename VA::const_value_type &alpha,
232 typename VY::const_value_type &beta,
236 using IndexType =
typename VA::size_type;
237 using team_policy_type = Kokkos::TeamPolicy<execution_space>;
240 const size_t m = y.extent(0);
241 const size_t n = x.extent(0);
243 const size_t team_size = STOKHOS_GEMV_TEAM_SIZE;
247 const size_t n_tiles_per_thread = ceil(((
double)n) / (N * m_c_star));
248 const size_t m_c = ceil(((
double)n) / (N * n_tiles_per_thread));
249 const size_t n_per_tile2 = m_c * team_size;
251 const size_t n_i2 = ceil(((
double)n) / n_per_tile2);
253 team_policy_type team(n_i2, team_size);
256 Kokkos::parallel_for(
257 m, KOKKOS_LAMBDA(
const int i) {
261 Kokkos::parallel_for(
262 m, KOKKOS_LAMBDA(
const int i) {
267 functor_type functor(alpha, A, x, y, n_per_tile2);
269 Kokkos::parallel_for(
"KokkosBlas::gemv[InnerProducts]", team, functor);
274template <
typename DA,
typename... PA,
275 typename DX,
typename... PX,
276 typename DY,
typename... PY>
277typename std::enable_if<Kokkos::is_view_mp_vector<Kokkos::View<DA, PA...>>::value &&
278 Kokkos::is_view_mp_vector<Kokkos::View<DX, PX...>>::value &&
279 Kokkos::is_view_mp_vector<Kokkos::View<DY, PY...>>::value>::type
281 typename Kokkos::View<DA, PA...>::const_value_type &alpha,
282 const Kokkos::View<DA, PA...> &A,
283 const Kokkos::View<DX, PX...> &x,
284 typename Kokkos::View<DY, PY...>::const_value_type &beta,
285 const Kokkos::View<DY, PY...> &y)
287 typedef typename Kokkos::View<DA, PA...>::value_type
Scalar;
288 typedef Kokkos::View<DA, PA...> VA;
289 typedef Kokkos::View<DX, PX...> VX;
290 typedef Kokkos::View<DY, PY...> VY;
292 static_assert(VA::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
293 static_assert(VX::rank == 1,
"GEMM: x must have rank 1 (be a vector).");
294 static_assert(VY::rank == 1,
"GEMM: y must have rank 1 (be a vector).");
296 if (trans[0] ==
'n' || trans[0] ==
'N')
void update_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
void inner_products_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
#define Sacado_MP_Vector_GEMV_Tile_Size(size)
Kokkos::DefaultHostExecutionSpace execution_space
std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< DA, PA... > >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DX, PX... > >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DY, PY... > >::value >::type gemv(const char trans[], typename Kokkos::View< DA, PA... >::const_value_type &alpha, const Kokkos::View< DA, PA... > &A, const Kokkos::View< DX, PX... > &x, typename Kokkos::View< DY, PY... >::const_value_type &beta, const Kokkos::View< DY, PY... > &y)
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
innerF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const YViewType &y, const IndexType n_c)
Kokkos::TeamPolicy< execution_space > policy_type
typename policy_type::member_type member_type
KOKKOS_INLINE_FUNCTION void operator()(const member_type &team) const
typename AViewType::non_const_value_type AlphaCoeffType
typename AViewType::execution_space execution_space
typename AViewType::non_const_value_type AlphaCoeffType
typename YViewType::non_const_value_type BetaCoeffType
KOKKOS_INLINE_FUNCTION void operator()(const IndexType &i_tile) const
updateF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const BetaCoeffType &beta, const YViewType &y, const IndexType m_c)