130 void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo,
131 Teuchos::ETransp transa, Teuchos::EDiag
diag,
132 const OrdinalType m,
const OrdinalType n,
133 const alpha_type &alpha,
134 const A_type *A,
const OrdinalType lda,
137 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
138 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
139 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
140 A_type A_zero = ScalarTraits<A_type>::zero();
141 ScalarType B_zero = ScalarTraits<ScalarType>::zero();
142 alpha_type alpha_one = ScalarTraits<alpha_type>::one();
143 ScalarType B_one = ScalarTraits<ScalarType>::one();
145 OrdinalType NRowA = m;
146 bool BadArgument =
false;
147 bool noUnit = (EDiagChar[
diag] ==
'N');
148 bool noConj = (ETranspChar[transa] ==
'T');
150 if (!(ESideChar[side] ==
'L'))
156 if (n == izero || m == izero)
162 std::cout <<
"BLAS::TRSM Error: M == " << m << std::endl;
167 std::cout <<
"BLAS::TRSM Error: N == " << n << std::endl;
172 std::cout <<
"BLAS::TRSM Error: LDA < " << NRowA << std::endl;
177 std::cout <<
"BLAS::TRSM Error: LDB < MAX(1,M)" << std::endl;
185 auto alpha_is_zero = (alpha == alpha_zero);
186 for (
j = izero;
j < n;
j++)
188 for (i = izero; i < m; i++)
194 auto alpha_is_not_one = (alpha != alpha_one);
197 if (ESideChar[side] ==
'L')
202 if (ETranspChar[transa] ==
'N')
207 if (EUploChar[uplo] ==
'U')
210 for (
j = izero;
j < n;
j++)
213 for (i = izero; i < m; i++)
219 for (k = (m - ione); k > -ione; k--)
222 auto B_is_not_zero = (B[
j * ldb + k] != B_zero);
226 mask_assign(B_is_not_zero, B[
j * ldb + k]) /= A[k * lda + k];
228 for (i = izero; i < k; i++)
230 mask_assign(B_is_not_zero, B[
j * ldb + i]) -= B[
j * ldb + k] * A[k * lda + i];
237 for (
j = izero;
j < n;
j++)
240 for (i = izero; i < m; i++)
245 for (k = izero; k < m; k++)
248 auto B_is_not_zero = (B[
j * ldb + k] != B_zero);
251 mask_assign(B_is_not_zero, B[
j * ldb + k]) /= A[k * lda + k];
253 for (i = k + ione; i < m; i++)
255 mask_assign(B_is_not_zero, B[
j * ldb + i]) -= B[
j * ldb + k] * A[k * lda + i];
267 if (EUploChar[uplo] ==
'U')
270 for (
j = izero;
j < n;
j++)
272 for (i = izero; i < m; i++)
274 temp = alpha * B[
j * ldb + i];
277 for (k = izero; k < i; k++)
279 temp -= A[i * lda + k] * B[
j * ldb + k];
283 temp /= A[i * lda + i];
288 for (k = izero; k < i; k++)
290 temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[
j * ldb + k];
294 temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
297 B[
j * ldb + i] = temp;
303 for (
j = izero;
j < n;
j++)
305 for (i = (m - ione); i > -ione; i--)
307 temp = alpha * B[
j * ldb + i];
310 for (k = i + ione; k < m; k++)
312 temp -= A[i * lda + k] * B[
j * ldb + k];
316 temp /= A[i * lda + i];
321 for (k = i + ione; k < m; k++)
323 temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[
j * ldb + k];
327 temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
330 B[
j * ldb + i] = temp;
342 if (ETranspChar[transa] ==
'N')
347 if (EUploChar[uplo] ==
'U')
351 for (
j = izero;
j < n;
j++)
354 for (i = izero; i < m; i++)
358 for (k = izero; k <
j; k++)
361 auto A_is_not_zero = (A[
j * lda + k] != A_zero);
362 for (i = izero; i < m; i++)
364 mask_assign(A_is_not_zero, B[
j * ldb + i]) -= A[
j * lda + k] * B[k * ldb + i];
369 temp = B_one / A[
j * lda +
j];
370 for (i = izero; i < m; i++)
372 B[
j * ldb + i] *= temp;
379 for (
j = (n - ione);
j > -ione;
j--)
382 for (i = izero; i < m; i++)
388 for (k =
j + ione; k < n; k++)
391 auto A_is_not_zero = (A[
j * lda + k] != A_zero);
392 for (i = izero; i < m; i++)
394 mask_assign(A_is_not_zero, B[
j * ldb + i]) -= A[
j * lda + k] * B[k * ldb + i];
399 temp = B_one / A[
j * lda +
j];
400 for (i = izero; i < m; i++)
402 B[
j * ldb + i] *= temp;
414 if (EUploChar[uplo] ==
'U')
417 for (k = (n - ione); k > -ione; k--)
422 temp = B_one / A[k * lda + k];
424 temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
425 for (i = izero; i < m; i++)
427 B[k * ldb + i] *= temp;
430 for (
j = izero;
j < k;
j++)
432 auto A_is_not_zero = (A[k * lda +
j] != A_zero);
436 mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda +
j]);
437 for (i = izero; i < m; i++)
439 mask_assign(A_is_not_zero, B[
j * ldb + i]) -= temp * B[k * ldb + i];
442 for (i = izero; i < m; i++)
444 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
450 for (k = izero; k < n; k++)
455 temp = B_one / A[k * lda + k];
457 temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
458 for (i = izero; i < m; i++)
460 B[k * ldb + i] *= temp;
463 for (
j = k + ione;
j < n;
j++)
465 auto A_is_not_zero = (A[k * lda +
j] != A_zero);
469 mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda +
j]);
470 for (i = izero; i < m; i++)
472 mask_assign(A_is_not_zero, B[
j * ldb + i]) -= temp * B[k * ldb + i];
475 for (i = izero; i < m; i++)
477 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;