Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Teuchos_BLAS_MP_Vector.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Teuchos: Common Tools Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef _TEUCHOS_BLAS_MP_VECTOR_HPP_
43#define _TEUCHOS_BLAS_MP_VECTOR_HPP_
44
45#include "Teuchos_BLAS.hpp"
46#include "Sacado_MP_Vector.hpp"
47
48namespace Teuchos
49{
50
51namespace details
52{
53
54template <typename Storage>
55class GivensRotator<Sacado::MP::Vector<Storage>, false>
56{
57public:
60
61 void
63 ScalarType *db,
64 ScalarType *c,
65 ScalarType *s) const
66 {
67 typedef ScalarTraits<ScalarType> STS;
68
69 ScalarType r, roe, scale, z, da_scaled, db_scaled;
70 auto m_da = (STS::magnitude(*da) > STS::magnitude(*db));
71 mask_assign(m_da, roe) = {*da, *db};
72
73 scale = STS::magnitude(*da) + STS::magnitude(*db);
74
75 auto m_scale = scale != STS::zero();
76
77 da_scaled = *da;
78 db_scaled = *db;
79
80 *c = *da;
81 *s = *db;
82
83 ScalarType tmp = STS::one();
84 mask_assign(m_scale, tmp) /= scale;
85
86 mask_assign(m_scale, da_scaled) *= tmp;
87 mask_assign(m_scale, db_scaled) *= tmp;
88
89 r = scale * STS::squareroot(da_scaled * da_scaled + db_scaled * db_scaled);
90 auto m_roe = roe < 0;
91 mask_assign(m_roe, r) = -r;
92
93 tmp = STS::one();
94 mask_assign(m_scale, tmp) /= r;
95
96 mask_assign(m_scale, *c) *= tmp;
97 mask_assign(m_scale, *s) *= tmp;
98
99 mask_assign(!m_scale, *c) = STS::one();
100 mask_assign(!m_scale, *s) = STS::zero();
101
102 mask_assign(*c != STS::zero(), z) /= {STS::one(), *c, STS::zero()};
103 mask_assign(!m_scale, z) = STS::zero();
104 mask_assign(m_da, z) = *s;
105
106 *da = r;
107 *db = z;
108 }
109};
110} // namespace details
111} // namespace Teuchos
112
113//namespace Sacado {
114// namespace MP {
115namespace Teuchos
116{
118template <typename OrdinalType, typename Storage>
119class BLAS<OrdinalType, Sacado::MP::Vector<Storage>> : public Teuchos::DefaultBLASImpl<OrdinalType, Sacado::MP::Vector<Storage>>
120{
121
122 typedef typename Teuchos::ScalarTraits<Sacado::MP::Vector<Storage>>::magnitudeType MagnitudeType;
123 typedef typename Sacado::ValueType<Sacado::MP::Vector<Storage>>::type ValueType;
124 typedef typename Sacado::ScalarType<Sacado::MP::Vector<Storage>>::type scalar_type;
126 typedef Teuchos::DefaultBLASImpl<OrdinalType, Sacado::MP::Vector<Storage>> BLASType;
127
128public:
129 template <typename alpha_type, typename A_type>
130 void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo,
131 Teuchos::ETransp transa, Teuchos::EDiag diag,
132 const OrdinalType m, const OrdinalType n,
133 const alpha_type &alpha,
134 const A_type *A, const OrdinalType lda,
135 ScalarType *B, const OrdinalType ldb) const
136 {
137 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
138 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
139 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
140 A_type A_zero = ScalarTraits<A_type>::zero();
141 ScalarType B_zero = ScalarTraits<ScalarType>::zero();
142 alpha_type alpha_one = ScalarTraits<alpha_type>::one();
143 ScalarType B_one = ScalarTraits<ScalarType>::one();
144 ScalarType temp;
145 OrdinalType NRowA = m;
146 bool BadArgument = false;
147 bool noUnit = (EDiagChar[diag] == 'N');
148 bool noConj = (ETranspChar[transa] == 'T');
149
150 if (!(ESideChar[side] == 'L'))
151 {
152 NRowA = n;
153 }
154
155 // Quick return.
156 if (n == izero || m == izero)
157 {
158 return;
159 }
160 if (m < izero)
161 {
162 std::cout << "BLAS::TRSM Error: M == " << m << std::endl;
163 BadArgument = true;
164 }
165 if (n < izero)
166 {
167 std::cout << "BLAS::TRSM Error: N == " << n << std::endl;
168 BadArgument = true;
169 }
170 if (lda < NRowA)
171 {
172 std::cout << "BLAS::TRSM Error: LDA < " << NRowA << std::endl;
173 BadArgument = true;
174 }
175 if (ldb < m)
176 {
177 std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)" << std::endl;
178 BadArgument = true;
179 }
180
181 if (!BadArgument)
182 {
183 int i, j, k;
184 // Set the solution to the zero vector.
185 auto alpha_is_zero = (alpha == alpha_zero);
186 for (j = izero; j < n; j++)
187 {
188 for (i = izero; i < m; i++)
189 {
190 mask_assign(alpha_is_zero, B[j * ldb + i]) = B_zero;
191 }
192 }
193
194 auto alpha_is_not_one = (alpha != alpha_one);
195
196 { // Start the operations.
197 if (ESideChar[side] == 'L')
198 {
199 //
200 // Perform computations for OP(A)*X = alpha*B
201 //
202 if (ETranspChar[transa] == 'N')
203 {
204 //
205 // Compute B = alpha*inv( A )*B
206 //
207 if (EUploChar[uplo] == 'U')
208 {
209 // A is upper triangular.
210 for (j = izero; j < n; j++)
211 {
212 // Perform alpha*B if alpha is not 1.
213 for (i = izero; i < m; i++)
214 {
215 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
216 }
217
218 // Perform a backsolve for column j of B.
219 for (k = (m - ione); k > -ione; k--)
220 {
221 // If this entry is zero, we don't have to do anything.
222 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
223
224 if (noUnit)
225 {
226 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
227 }
228 for (i = izero; i < k; i++)
229 {
230 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
231 }
232 }
233 }
234 }
235 else
236 { // A is lower triangular.
237 for (j = izero; j < n; j++)
238 {
239 // Perform alpha*B if alpha is not 1.
240 for (i = izero; i < m; i++)
241 {
242 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
243 }
244 // Perform a forward solve for column j of B.
245 for (k = izero; k < m; k++)
246 {
247 // If this entry is zero, we don't have to do anything.
248 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
249 if (noUnit)
250 {
251 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
252 }
253 for (i = k + ione; i < m; i++)
254 {
255 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
256 }
257 }
258 }
259 } // end if (uplo == 'U')
260 } // if (transa =='N')
261 else
262 {
263 //
264 // Compute B = alpha*inv( A' )*B
265 // or B = alpha*inv( conj(A') )*B
266 //
267 if (EUploChar[uplo] == 'U')
268 {
269 // A is upper triangular.
270 for (j = izero; j < n; j++)
271 {
272 for (i = izero; i < m; i++)
273 {
274 temp = alpha * B[j * ldb + i];
275 if (noConj)
276 {
277 for (k = izero; k < i; k++)
278 {
279 temp -= A[i * lda + k] * B[j * ldb + k];
280 }
281 if (noUnit)
282 {
283 temp /= A[i * lda + i];
284 }
285 }
286 else
287 {
288 for (k = izero; k < i; k++)
289 {
290 temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[j * ldb + k];
291 }
292 if (noUnit)
293 {
294 temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
295 }
296 }
297 B[j * ldb + i] = temp;
298 }
299 }
300 }
301 else
302 { // A is lower triangular.
303 for (j = izero; j < n; j++)
304 {
305 for (i = (m - ione); i > -ione; i--)
306 {
307 temp = alpha * B[j * ldb + i];
308 if (noConj)
309 {
310 for (k = i + ione; k < m; k++)
311 {
312 temp -= A[i * lda + k] * B[j * ldb + k];
313 }
314 if (noUnit)
315 {
316 temp /= A[i * lda + i];
317 }
318 }
319 else
320 {
321 for (k = i + ione; k < m; k++)
322 {
323 temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[j * ldb + k];
324 }
325 if (noUnit)
326 {
327 temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
328 }
329 }
330 B[j * ldb + i] = temp;
331 }
332 }
333 }
334 }
335 } // if (side == 'L')
336 else
337 {
338 // side == 'R'
339 //
340 // Perform computations for X*OP(A) = alpha*B
341 //
342 if (ETranspChar[transa] == 'N')
343 {
344 //
345 // Compute B = alpha*B*inv( A )
346 //
347 if (EUploChar[uplo] == 'U')
348 {
349 // A is upper triangular.
350 // Perform a backsolve for column j of B.
351 for (j = izero; j < n; j++)
352 {
353 // Perform alpha*B if alpha is not 1.
354 for (i = izero; i < m; i++)
355 {
356 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
357 }
358 for (k = izero; k < j; k++)
359 {
360 // If this entry is zero, we don't have to do anything.
361 auto A_is_not_zero = (A[j * lda + k] != A_zero);
362 for (i = izero; i < m; i++)
363 {
364 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
365 }
366 }
367 if (noUnit)
368 {
369 temp = B_one / A[j * lda + j];
370 for (i = izero; i < m; i++)
371 {
372 B[j * ldb + i] *= temp;
373 }
374 }
375 }
376 }
377 else
378 { // A is lower triangular.
379 for (j = (n - ione); j > -ione; j--)
380 {
381 // Perform alpha*B if alpha is not 1.
382 for (i = izero; i < m; i++)
383 {
384 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
385 }
386
387 // Perform a forward solve for column j of B.
388 for (k = j + ione; k < n; k++)
389 {
390 // If this entry is zero, we don't have to do anything.
391 auto A_is_not_zero = (A[j * lda + k] != A_zero);
392 for (i = izero; i < m; i++)
393 {
394 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
395 }
396 }
397 if (noUnit)
398 {
399 temp = B_one / A[j * lda + j];
400 for (i = izero; i < m; i++)
401 {
402 B[j * ldb + i] *= temp;
403 }
404 }
405 }
406 } // end if (uplo == 'U')
407 } // if (transa =='N')
408 else
409 {
410 //
411 // Compute B = alpha*B*inv( A' )
412 // or B = alpha*B*inv( conj(A') )
413 //
414 if (EUploChar[uplo] == 'U')
415 {
416 // A is upper triangular.
417 for (k = (n - ione); k > -ione; k--)
418 {
419 if (noUnit)
420 {
421 if (noConj)
422 temp = B_one / A[k * lda + k];
423 else
424 temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
425 for (i = izero; i < m; i++)
426 {
427 B[k * ldb + i] *= temp;
428 }
429 }
430 for (j = izero; j < k; j++)
431 {
432 auto A_is_not_zero = (A[k * lda + j] != A_zero);
433 if (noConj)
434 mask_assign(A_is_not_zero, temp) = A[k * lda + j];
435 else
436 mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda + j]);
437 for (i = izero; i < m; i++)
438 {
439 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
440 }
441 }
442 for (i = izero; i < m; i++)
443 {
444 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
445 }
446 }
447 }
448 else
449 { // A is lower triangular.
450 for (k = izero; k < n; k++)
451 {
452 if (noUnit)
453 {
454 if (noConj)
455 temp = B_one / A[k * lda + k];
456 else
457 temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
458 for (i = izero; i < m; i++)
459 {
460 B[k * ldb + i] *= temp;
461 }
462 }
463 for (j = k + ione; j < n; j++)
464 {
465 auto A_is_not_zero = (A[k * lda + j] != A_zero);
466 if (noConj)
467 mask_assign(A_is_not_zero, temp) = A[k * lda + j];
468 else
469 mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda + j]);
470 for (i = izero; i < m; i++)
471 {
472 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
473 }
474 }
475 for (i = izero; i < m; i++)
476 {
477 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
478 }
479 }
480 }
481 }
482 }
483 }
484 }
485 }
486}; // class BLAS
487// }
488//}
489
490} // namespace Teuchos
491
492#endif // _TEUCHOS_BLAS__MP_VECTOR_HPP_
KOKKOS_INLINE_FUNCTION MaskedAssign< scalar > mask_assign(bool b, scalar *s)
Teuchos::DefaultBLASImpl< OrdinalType, Sacado::MP::Vector< Storage > > BLASType
Sacado::ScalarType< Sacado::MP::Vector< Storage > >::type scalar_type
Sacado::ValueType< Sacado::MP::Vector< Storage > >::type ValueType
Teuchos::ScalarTraits< Sacado::MP::Vector< Storage > >::magnitudeType MagnitudeType
void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
void ROTG(ScalarType *da, ScalarType *db, ScalarType *c, ScalarType *s) const