Epetra Package Browser (Single Doxygen Collection) Development
Loading...
Searching...
No Matches
Epetra_JadMatrix.cpp
Go to the documentation of this file.
1
2//@HEADER
3// ************************************************************************
4//
5// Epetra: Linear Algebra Services Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41//@HEADER
42
43#include "Epetra_JadMatrix.h"
44#include "Epetra_Map.h"
45#include "Epetra_Import.h"
46#include "Epetra_Export.h"
47#include "Epetra_Vector.h"
48#include "Epetra_MultiVector.h"
49#include "Epetra_Comm.h"
50#include "Epetra_Util.h"
52#include "Epetra_RowMatrix.h"
53#include "Epetra_CrsMatrix.h"
54
55//==============================================================================
58 Values_(0),
59 Indices_(0),
60 IndexOffset_(0),
61 Profile_(0),
62 RowPerm_(0),
63 InvRowPerm_(0),
65{
66 SetMaps(Matrix.RowMatrixRowMap(), Matrix.RowMatrixColMap(), Matrix.OperatorDomainMap(), Matrix.OperatorRangeMap());
67 if (!Matrix.Filled()) throw Matrix.RowMatrixRowMap().ReportError("Input matrix must have called FillComplete()", -1);
68 Allocate(Matrix);
69 SetLabel("Epetra::JadMatrix");
70}
71
72//==============================================================================
74
75//==============================================================================
76int Epetra_JadMatrix::UpdateValues(const Epetra_RowMatrix & Matrix, bool CheckStructure) {
77
78 int NumEntries;
79 int * Indices = 0;
80 double * Values =0;
81
82 int ierr = 0;
83
84 try { // If matrix is an Epetra_CrsMatrix, we can get date much more cheaply
85
86 const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
87
88 for (int i1=0; i1<NumMyRows_; i1++) {
89
90 EPETRA_CHK_ERR(A.ExtractMyRowView(i1, NumEntries, Values, Indices)); // Get the current row based on the permutation
91 int i = InvRowPerm_[i1]; // Determine permuted row location
92 for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
93 if (CheckStructure)
94 for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[j]) ierr = - 1;
95 }
96 }
97 catch (...) { // Otherwise just live with RowMatrix interface
98
101 Indices = curIndices.Values();
102 Values = curValues.Values();
103 for (int i1=0; i1<NumMyRows_; i1++) {
104 EPETRA_CHK_ERR(Matrix.ExtractMyRowCopy(i1, NumJaggedDiagonals_, NumEntries, Values, Indices)); // Get current row based on the permutation
105 int i = InvRowPerm_[i1]; // Determine permuted row location
106 for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
107 if (CheckStructure)
108 for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[j]) ierr = - 1;
109 }
110 }
111
112 HaveNumericConstants_ = false;
113 EPETRA_CHK_ERR(ierr);
114 return(ierr);
115}
116
117//==============================================================================
119
120 // Allocate IndexOffset storage
121 int numMyRows = Matrix.NumMyRows();
122 int numMyNonzeros = Matrix.NumMyNonzeros();
123
125
126 // Next compute permutation of rows
127 RowPerm_.Resize(numMyRows);
128 InvRowPerm_.Resize(numMyRows);
129 Profile_.Resize(numMyRows);
130 for (int i=0; i<numMyRows; i++) {
131 int NumEntries;
132 Matrix.NumMyRowEntries(i, NumEntries);
133 Profile_[i] = NumEntries;
134 RowPerm_[i] = i;
135 }
136
137 Epetra_Util sorter;
138 int * RowPerm = RowPerm_.Values();
139 sorter.Sort(false, numMyRows, Profile_.Values(), 0, 0, 1, &RowPerm, 0, 0);
140 //cout << "Profile = " << Profile_ << std::endl;
141 //cout << "RowPerm = " << RowPerm_ << std::endl;
142 for (int i=0; i<numMyRows; i++) InvRowPerm_[RowPerm[i]] = i; // Compute inverse row permutation
143 //cout << "InvRowPerm = " << InvRowPerm_ << std::endl;
144
145 // Now build IndexOffsets: These contain the lengths of the jagged diagonals
146
147 for (int i=0; i<NumJaggedDiagonals_; i++) IndexOffset_[i] = 0;
148
149 int curOffset = numMyRows;
150 int * curIndex = IndexOffset_.Values(); // point to first index (will be incremented immediately below)
151 for (int i=1; i<NumJaggedDiagonals_+1; i++) {
152 curIndex++;
153 while (*curIndex==0) {
154 if (Profile_[curOffset-1]<i) curOffset--;
155 else *curIndex = *(curIndex-1) + curOffset; // Set the length of the current jagged diagonal (plus scan sum)
156 }
157 }
158
159 Values_.Resize(numMyNonzeros);
160 Indices_.Resize(numMyNonzeros);
161
162 int NumEntries;
163 int * Indices = 0;
164 double * Values =0;
165
166 try { // If matrix is an Epetra_CrsMatrix, we can get data much more cheaply
167
168 const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
169
170 for (int i1=0; i1<numMyRows; i1++) {
171
172 A.ExtractMyRowView(i1, NumEntries, Values, Indices); // Get the current row
173 int i = InvRowPerm_[i1]; // Determine permuted row location
174 //cout << "i1, i, NumEntries = " << i1 <<" "<< i <<" "<< NumEntries << std::endl;
175 for (int j=0; j< NumEntries; j++) {
176 Values_[IndexOffset_[j]+i] = Values[j];
177 Indices_[IndexOffset_[j]+i] = Indices[j];
178 }
179 }
180 }
181 catch (...) { // Otherwise just live with RowMatrix interface
182
185 Indices = curIndices.Values();
186 Values = curValues.Values();
187 for (int i1=0; i1<numMyRows; i1++) {
188 Matrix.ExtractMyRowCopy(i1, NumJaggedDiagonals_, NumEntries, Values, Indices); // Get current row based on the permutation
189 int i = InvRowPerm_[i1]; // Determine permuted row location
190 for (int j=0; j< NumEntries; j++) {
191 Values_[IndexOffset_[j]+i] = Values[j];
192 Indices_[IndexOffset_[j]+i] = Indices[j];
193 }
194 }
195 }
196}
197//=============================================================================
198int Epetra_JadMatrix::NumMyRowEntries(int MyRow, int & NumEntries) const {
199 int i = InvRowPerm_[MyRow]; // Determine permuted row location
200 NumEntries = Profile_[i]; // NNZ in current row
201 return(0);
202}
203//=============================================================================
204int Epetra_JadMatrix::ExtractMyRowCopy(int MyRow, int Length, int & NumEntries, double *Values, int * Indices) const {
205
206 if(MyRow < 0 || MyRow >= NumMyRows_)
207 EPETRA_CHK_ERR(-1); // Not in Row range
208
209 int i = InvRowPerm_[MyRow]; // Determine permuted row location
210 NumEntries = Profile_[i]; // NNZ in current row
211 if(NumEntries > Length)
212 EPETRA_CHK_ERR(-2); // Not enough space for copy. Needed size is passed back in NumEntries
213
214 for (int j=0; j< NumEntries; j++) Values[j] = Values_[IndexOffset_[j]+i];
215 for (int j=0; j< NumEntries; j++) Indices[j] = Indices_[IndexOffset_[j]+i];
216 return(0);
217}
218//=============================================================================
220 //
221 // This function forms the product Y = A * Y or Y = A' * X
222 //
223
224 int NumVectors = X.NumVectors();
225 if (NumVectors!=Y.NumVectors()) {
226 EPETRA_CHK_ERR(-1); // Need same number of vectors in each MV
227 }
228
229 double** Xp = (double**) X.Pointers();
230 double** Yp = (double**) Y.Pointers();
231 int LDX = X.ConstantStride() ? X.Stride() : 0;
232 int LDY = Y.ConstantStride() ? Y.Stride() : 0;
233 UpdateImportVector(NumVectors); // Make sure Import and Export Vectors are compatible
234 UpdateExportVector(NumVectors);
235
236 if (!TransA) {
237
238 // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
239 if (Importer()!=0) {
241 Xp = (double**)ImportVector_->Pointers();
242 LDX = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
243 }
244
245 // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
246 if (Exporter()!=0) {
247 Yp = (double**)ExportVector_->Pointers();
248 LDY = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
249 }
250
251 // Do actual computation
252 if (NumVectors==1)
253 GeneralMV(TransA, *Xp, *Yp);
254 else
255 GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
256 if (Exporter()!=0) {
257 Y.PutScalar(0.0); // Make sure target is zero
258 Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector
259 }
260 // Handle case of rangemap being a local replicated map
261 if (!OperatorRangeMap().DistributedGlobal() && Comm().NumProc()>1) EPETRA_CHK_ERR(Y.Reduce());
262 }
263 else { // Transpose operation
264
265
266 // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
267
268 if (Exporter()!=0) {
270 Xp = (double**)ExportVector_->Pointers();
271 LDX = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
272 }
273
274 // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
275 if (Importer()!=0) {
276 Yp = (double**)ImportVector_->Pointers();
277 LDY = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
278 }
279
280 // Do actual computation
281 if (NumVectors==1)
282 GeneralMV(TransA, *Xp, *Yp);
283 else
284 GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
285 if (Importer()!=0) {
286 Y.PutScalar(0.0); // Make sure target is zero
287 EPETRA_CHK_ERR(Y.Export(*ImportVector_, *Importer(), Add)); // Fill Y with Values from export vector
288 }
289 // Handle case of rangemap being a local replicated map
290 if (!OperatorDomainMap().DistributedGlobal() && Comm().NumProc()>1) EPETRA_CHK_ERR(Y.Reduce());
291 }
292
293 UpdateFlops(2*NumVectors*NumGlobalNonzeros64());
294 return(0);
295}
296//=======================================================================================================
297void Epetra_JadMatrix::GeneralMM(bool TransA, double ** X, int LDX, double ** Y, int LDY, int NumVectors) const {
298
299 if (LDX==0 || LDY==0 || NumVectors==1) {// Can't unroll RHS if X or Y not strided
300 for (int k=0; k<NumVectors; k++) GeneralMV(TransA, X[k], Y[k]);
301 }
302 else if (NumVectors==2) // Special 2 RHS case (does unrolling in both NumVectors and NumJaggedDiagonals)
303 GeneralMM2RHS(TransA, X[0], LDX, Y[0], LDY);
304 // Otherwise unroll RHS only
305 else
306 GeneralMM3RHS(TransA, X, LDX, Y, LDY, NumVectors);
307
308 return;
309}
310//=======================================================================================================
311void Epetra_JadMatrix::GeneralMM3RHS(bool TransA, double ** X, int ldx, double ** Y, int ldy, int NumVectors) const {
312
313#ifdef _CRAY
314#define Pragma(S) _Pragma(S)
315#else
316#define Pragma(S)
317#endif
318
319 // Routine for 3 or more RHS
320
321 const double * Values = Values_.Values();
322 const int * Indices = Indices_.Values();
323 const int * IndexOffset = IndexOffset_.Values();
324 const int * RowPerm = RowPerm_.Values();
325 for (int j=0; j<NumVectors; j++) {
326 double * y = Y[j];
327 if (!TransA)
328 for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
329 else
330 for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
331 }
332
333 int nv = NumVectors%5; if (nv==0) nv=5;
334 double * x = X[0];
335 double * y = Y[0];
336
337
338 for (int k=0; k<NumVectors; k+=5) {
339
340 for (int j=0; j<NumJaggedDiagonals_; j++) {
341 const int * curIndices = Indices+IndexOffset[j];
342 const double * curValues = Values+IndexOffset[j];
343 int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
344 switch (nv){
345 case 1:
346 {
347 if (!TransA) {
348Pragma("_CRI ivdep")
349 for (int i=0; i<jaggedDiagonalLength; i++) {
350 int ix = curIndices[i];
351 int iy = RowPerm[i];
352 double val = curValues[i];
353 y[iy] += val*x[ix];
354 }
355 }
356 else {
357Pragma("_CRI ivdep")
358 for (int i=0; i<jaggedDiagonalLength; i++) {
359 int iy = curIndices[i];
360 int ix = RowPerm[i];
361 double val = curValues[i];
362 y[iy] += val*x[ix];
363 }
364 }
365 break;
366 }
367 case 2:
368 {
369 if (!TransA) {
370Pragma("_CRI ivdep")
371 for (int i=0; i<jaggedDiagonalLength; i++) {
372 int ix = curIndices[i];
373 int iy = RowPerm[i];
374 double val = curValues[i];
375 y[iy] += val*x[ix];
376 iy+=ldy; ix+=ldx;
377 y[iy] += val*x[ix];
378 }
379 }
380 else {
381Pragma("_CRI ivdep")
382 for (int i=0; i<jaggedDiagonalLength; i++) {
383 int iy = curIndices[i];
384 int ix = RowPerm[i];
385 double val = curValues[i];
386 y[iy] += val*x[ix];
387 iy+=ldy; ix+=ldx;
388 y[iy] += val*x[ix];
389 }
390 }
391 break;
392 }
393 case 3:
394 {
395 if (!TransA) {
396Pragma("_CRI ivdep")
397 for (int i=0; i<jaggedDiagonalLength; i++) {
398 int ix = curIndices[i];
399 int iy = RowPerm[i];
400 double val = curValues[i];
401 y[iy] += val*x[ix];
402 iy+=ldy; ix+=ldx;
403 y[iy] += val*x[ix];
404 iy+=ldy; ix+=ldx;
405 y[iy] += val*x[ix];
406 }
407 }
408 else {
409Pragma("_CRI ivdep")
410 for (int i=0; i<jaggedDiagonalLength; i++) {
411 int iy = curIndices[i];
412 int ix = RowPerm[i];
413 double val = curValues[i];
414 y[iy] += val*x[ix];
415 iy+=ldy; ix+=ldx;
416 y[iy] += val*x[ix];
417 iy+=ldy; ix+=ldx;
418 y[iy] += val*x[ix];
419 }
420 }
421 break;
422 }
423 case 4:
424 {
425 if (!TransA) {
426Pragma("_CRI ivdep")
427 for (int i=0; i<jaggedDiagonalLength; i++) {
428 int ix = curIndices[i];
429 int iy = RowPerm[i];
430 double val = curValues[i];
431 y[iy] += val*x[ix];
432 iy+=ldy; ix+=ldx;
433 y[iy] += val*x[ix];
434 iy+=ldy; ix+=ldx;
435 y[iy] += val*x[ix];
436 iy+=ldy; ix+=ldx;
437 y[iy] += val*x[ix];
438 }
439 }
440 else {
441Pragma("_CRI ivdep")
442 for (int i=0; i<jaggedDiagonalLength; i++) {
443 int iy = curIndices[i];
444 int ix = RowPerm[i];
445 double val = curValues[i];
446 y[iy] += val*x[ix];
447 iy+=ldy; ix+=ldx;
448 y[iy] += val*x[ix];
449 iy+=ldy; ix+=ldx;
450 y[iy] += val*x[ix];
451 iy+=ldy; ix+=ldx;
452 y[iy] += val*x[ix];
453 }
454 }
455 break;
456 }
457 case 5:
458 {
459 if (!TransA) {
460Pragma("_CRI ivdep")
461 for (int i=0; i<jaggedDiagonalLength; i++) {
462 int ix = curIndices[i];
463 int iy = RowPerm[i];
464 double val = curValues[i];
465 y[iy] += val*x[ix];
466 iy+=ldy; ix+=ldx;
467 y[iy] += val*x[ix];
468 iy+=ldy; ix+=ldx;
469 y[iy] += val*x[ix];
470 iy+=ldy; ix+=ldx;
471 y[iy] += val*x[ix];
472 iy+=ldy; ix+=ldx;
473 y[iy] += val*x[ix];
474 }
475 }
476 else {
477Pragma("_CRI ivdep")
478 for (int i=0; i<jaggedDiagonalLength; i++) {
479 int iy = curIndices[i];
480 int ix = RowPerm[i];
481 double val = curValues[i];
482 y[iy] += val*x[ix];
483 iy+=ldy; ix+=ldx;
484 y[iy] += val*x[ix];
485 iy+=ldy; ix+=ldx;
486 y[iy] += val*x[ix];
487 iy+=ldy; ix+=ldx;
488 y[iy] += val*x[ix];
489 iy+=ldy; ix+=ldx;
490 y[iy] += val*x[ix];
491 }
492 }
493 break;
494 }
495 }
496 }
497 x += nv*ldx;
498 y += nv*ldy;
499 nv = 5; // After initial remainder, we will always do 5 RHS
500 }
501 return;
502}
503//=======================================================================================================
504void Epetra_JadMatrix::GeneralMM2RHS(bool TransA, double * x, int ldx, double * y, int ldy) const {
505
506 // special 2 rhs case
507
508 const double * Values = Values_.Values();
509 const int * Indices = Indices_.Values();
510 const int * IndexOffset = IndexOffset_.Values();
511 const int * RowPerm = RowPerm_.Values();
512 if (!TransA)
513 for (int i=0; i<NumMyRows_; i++) {
514 y[i] = 0.0;
515 y[i+ldy] = 0.0;
516 }
517 else
518 for (int i=0; i<NumMyCols_; i++) {
519 y[i] = 0.0;
520 y[i+ldy] = 0.0;
521 }
522
523 int j = 0;
524 while (j<NumJaggedDiagonals_) {
525 int j0 = j;
526 int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
527 j++;
528 // check if other diagonals have same length up to a max of 2
529 while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<2)) j++;
530
531 int numDiags = j-j0;
532 assert(numDiags<3 && numDiags>0);
533 assert(j<NumJaggedDiagonals_+1);
534
535 switch (numDiags){
536 case 1:
537 {
538 const int * curIndices = Indices+IndexOffset[j0];
539 const double * curValues = Values+IndexOffset[j0];
540 if (!TransA) {
541Pragma("_CRI ivdep")
542 for (int i=0; i<jaggedDiagonalLength; i++) {
543 int ix = curIndices[i];
544 int iy = RowPerm[i];
545 y[iy] += curValues[i]*x[ix];
546 iy+=ldy; ix+=ldx;
547 y[iy] += curValues[i]*x[ix];
548 }
549 }
550 else {
551Pragma("_CRI ivdep")
552 for (int i=0; i<jaggedDiagonalLength; i++){
553 int iy = curIndices[i];
554 int ix = RowPerm[i];
555 y[iy] += curValues[i]*x[ix];
556 iy+=ldy; ix+=ldx;
557 y[iy] += curValues[i]*x[ix];
558 }
559 }
560 break;
561 }
562 case 2:
563 {
564 const int * curIndices0 = Indices+IndexOffset[j0];
565 const double * curValues0 = Values+IndexOffset[j0++];
566 const int * curIndices1 = Indices+IndexOffset[j0];
567 const double * curValues1 = Values+IndexOffset[j0];
568 if (!TransA) {
569Pragma("_CRI ivdep")
570 for (int i=0; i<jaggedDiagonalLength; i++) {
571 int ix0 = curIndices0[i];
572 int ix1 = curIndices1[i];
573 int iy = RowPerm[i];
574 y[iy] +=
575 curValues0[i]*x[ix0] +
576 curValues1[i]*x[ix1];
577 iy+=ldy; ix0+=ldx; ix1+=ldx;
578 y[iy] +=
579 curValues0[i]*x[ix0] +
580 curValues1[i]*x[ix1];
581 }
582 }
583 else {
584Pragma("_CRI ivdep")
585 for (int i=0; i<jaggedDiagonalLength; i++) {
586 int iy0 = curIndices0[i];
587 int iy1 = curIndices1[i];
588 int ix = RowPerm[i];
589 double xval = x[ix];
590 y[iy0] += curValues0[i]*xval;
591 y[iy1] += curValues1[i]*xval;
592 ix+=ldx; iy0+=ldy; iy1+=ldy;
593 xval = x[ix];
594 y[iy0] += curValues0[i]*xval;
595 y[iy1] += curValues1[i]*xval;
596 }
597 }
598 }
599 break;
600 }
601 }
602 return;
603}
604//=======================================================================================================
605void Epetra_JadMatrix::GeneralMV(bool TransA, double * x, double * y) const {
606
607 const double * Values = Values_.Values();
608 const int * Indices = Indices_.Values();
609 const int * IndexOffset = IndexOffset_.Values();
610 const int * RowPerm = RowPerm_.Values();
611 if (!TransA)
612 for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
613 else
614 for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
615
616 int j = 0;
617 while (j<NumJaggedDiagonals_) {
618 int j0 = j;
619 int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
620 j++;
621 // check if other diagonals have same length up to a max of 5
622 while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<5)) j++;
623
624 int numDiags = j-j0;
625 assert(numDiags<6 && numDiags>0);
626 assert(j<NumJaggedDiagonals_+1);
627
628 switch (numDiags){
629 case 1:
630 {
631 const int * curIndices = Indices+IndexOffset[j0];
632 const double * curValues = Values+IndexOffset[j0];
633 if (!TransA) {
634Pragma("_CRI ivdep")
635 for (int i=0; i<jaggedDiagonalLength; i++)
636 y[RowPerm[i]] += curValues[i]*x[curIndices[i]];
637 }
638 else {
639Pragma("_CRI ivdep")
640 for (int i=0; i<jaggedDiagonalLength; i++)
641 y[curIndices[i]] += curValues[i]*x[RowPerm[i]];
642 }
643 break;
644 }
645 case 2:
646 {
647 const int * curIndices0 = Indices+IndexOffset[j0];
648 const double * curValues0 = Values+IndexOffset[j0++];
649 const int * curIndices1 = Indices+IndexOffset[j0];
650 const double * curValues1 = Values+IndexOffset[j0];
651 if (!TransA) {
652Pragma("_CRI ivdep")
653 for (int i=0; i<jaggedDiagonalLength; i++) {
654 y[RowPerm[i]] +=
655 curValues0[i]*x[curIndices0[i]] +
656 curValues1[i]*x[curIndices1[i]];
657 }
658 }
659 else {
660 //Pragma("_CRI ivdep") (Collisions possible)
661 for (int i=0; i<jaggedDiagonalLength; i++) {
662 double xval = x[RowPerm[i]];
663 y[curIndices0[i]] += curValues0[i]*xval;
664 y[curIndices1[i]] += curValues1[i]*xval;
665 }
666 }
667 }
668 break;
669 case 3:
670 {
671 const int * curIndices0 = Indices+IndexOffset[j0];
672 const double * curValues0 = Values+IndexOffset[j0++];
673 const int * curIndices1 = Indices+IndexOffset[j0];
674 const double * curValues1 = Values+IndexOffset[j0++];
675 const int * curIndices2 = Indices+IndexOffset[j0];
676 const double * curValues2 = Values+IndexOffset[j0];
677 if (!TransA) {
678Pragma("_CRI ivdep")
679 for (int i=0; i<jaggedDiagonalLength; i++) {
680 y[RowPerm[i]] +=
681 curValues0[i]*x[curIndices0[i]] +
682 curValues1[i]*x[curIndices1[i]] +
683 curValues2[i]*x[curIndices2[i]];
684 }
685 }
686 else {
687 //Pragma("_CRI ivdep") (Collisions possible)
688 for (int i=0; i<jaggedDiagonalLength; i++) {
689 double xval = x[RowPerm[i]];
690 y[curIndices0[i]] += curValues0[i]*xval;
691 y[curIndices1[i]] += curValues1[i]*xval;
692 y[curIndices2[i]] += curValues2[i]*xval;
693 }
694 }
695 }
696 break;
697 case 4:
698 {
699 const int * curIndices0 = Indices+IndexOffset[j0];
700 const double * curValues0 = Values+IndexOffset[j0++];
701 const int * curIndices1 = Indices+IndexOffset[j0];
702 const double * curValues1 = Values+IndexOffset[j0++];
703 const int * curIndices2 = Indices+IndexOffset[j0];
704 const double * curValues2 = Values+IndexOffset[j0++];
705 const int * curIndices3 = Indices+IndexOffset[j0];
706 const double * curValues3 = Values+IndexOffset[j0];
707 if (!TransA) {
708Pragma("_CRI ivdep")
709 for (int i=0; i<jaggedDiagonalLength; i++) {
710 y[RowPerm[i]] +=
711 curValues0[i]*x[curIndices0[i]] +
712 curValues1[i]*x[curIndices1[i]] +
713 curValues2[i]*x[curIndices2[i]] +
714 curValues3[i]*x[curIndices3[i]];
715 }
716 }
717 else {
718 //Pragma("_CRI ivdep") (Collisions possible)
719 for (int i=0; i<jaggedDiagonalLength; i++) {
720 double xval = x[RowPerm[i]];
721 y[curIndices0[i]] += curValues0[i]*xval;
722 y[curIndices1[i]] += curValues1[i]*xval;
723 y[curIndices2[i]] += curValues2[i]*xval;
724 y[curIndices3[i]] += curValues3[i]*xval;
725 }
726 }
727 }
728 break;
729 case 5:
730 {
731 const int * curIndices0 = Indices+IndexOffset[j0];
732 const double * curValues0 = Values+IndexOffset[j0++];
733 const int * curIndices1 = Indices+IndexOffset[j0];
734 const double * curValues1 = Values+IndexOffset[j0++];
735 const int * curIndices2 = Indices+IndexOffset[j0];
736 const double * curValues2 = Values+IndexOffset[j0++];
737 const int * curIndices3 = Indices+IndexOffset[j0];
738 const double * curValues3 = Values+IndexOffset[j0++];
739 const int * curIndices4 = Indices+IndexOffset[j0];
740 const double * curValues4 = Values+IndexOffset[j0];
741 if (!TransA) {
742Pragma("_CRI ivdep")
743 for (int i=0; i<jaggedDiagonalLength; i++) {
744 y[RowPerm[i]] +=
745 curValues0[i]*x[curIndices0[i]] +
746 curValues1[i]*x[curIndices1[i]] +
747 curValues2[i]*x[curIndices2[i]] +
748 curValues3[i]*x[curIndices3[i]] +
749 curValues4[i]*x[curIndices4[i]];
750 }
751 }
752 else {
753 // Pragma("_CRI ivdep") (Collisions possible)
754 for (int i=0; i<jaggedDiagonalLength; i++) {
755 double xval = x[RowPerm[i]];
756 y[curIndices0[i]] += curValues0[i]*xval;
757 y[curIndices1[i]] += curValues1[i]*xval;
758 y[curIndices2[i]] += curValues2[i]*xval;
759 y[curIndices3[i]] += curValues3[i]*xval;
760 y[curIndices4[i]] += curValues4[i]*xval;
761 }
762 }
763 }
764 break;
765 }
766 }
767 return;
768}
#define EPETRA_CHK_ERR(a)
#define Pragma(S)
void SetMaps(const Epetra_Map &RowMap, const Epetra_Map &ColMap)
Set maps (Version 1); call this function or the next, but not both.
virtual const Epetra_Map & OperatorDomainMap() const
Returns the Epetra_Map object associated with the domain of this operator.
Epetra_MultiVector * ImportVector_
virtual const Epetra_Export * Exporter() const
Returns the Epetra_Export object that contains the export operations for distributed operations,...
virtual const Epetra_Map & OperatorRangeMap() const
Returns the Epetra_Map object associated with the range of this operator (same as domain).
virtual int MaxNumEntries() const
Returns the maximum number of nonzero entries across all rows on this processor.
virtual long long NumGlobalNonzeros64() const
Epetra_BasicRowMatrix(const Epetra_Comm &Comm)
Epetra_BasicRowMatrix constructor.
virtual const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this matrix.
virtual const Epetra_Import * Importer() const
Returns the Epetra_Import object that contains the import operations for distributed operations,...
void UpdateExportVector(int NumVectors) const
void UpdateImportVector(int NumVectors) const
virtual const Epetra_Map & RowMatrixRowMap() const
Returns the Row Map object needed for implementing Epetra_RowMatrix.
Epetra_MultiVector * ExportVector_
void UpdateFlops(int Flops_in) const
Increment Flop count for this object.
Epetra_CrsMatrix: A class for constructing and using real-valued double-precision sparse compressed r...
int ExtractMyRowView(int MyRow, int &NumEntries, double *&Values, int *&Indices) const
Returns a view of the specified local row values via pointers to internal data.
int Export(const Epetra_SrcDistObject &A, const Epetra_Import &Importer, Epetra_CombineMode CombineMode, const Epetra_OffsetIndex *Indexor=0)
Exports an Epetra_DistObject using the Epetra_Import object.
Epetra_IntSerialDenseVector: A class for constructing and using dense vectors.
int * Values()
Returns pointer to the values in vector.
void GeneralMM(bool TransA, double **X, int LDX, double **Y, int LDY, int NumVectors) const
Epetra_IntSerialDenseVector IndexOffset_
int UpdateValues(const Epetra_RowMatrix &Matrix, bool CheckStructure=false)
Update values using a matrix with identical structure.
int NumMyRowEntries(int MyRow, int &NumEntries) const
Return the current number of values stored for the specified local row.
Epetra_IntSerialDenseVector InvRowPerm_
Epetra_IntSerialDenseVector Profile_
virtual ~Epetra_JadMatrix()
Epetra_JadMatrix Destructor.
int Multiply(bool TransA, const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Returns the result of a Epetra_JadMatrix multiplied by a Epetra_MultiVector X in Y.
void GeneralMM3RHS(bool TransA, double **X, int LDX, double **Y, int LDY, int NumVectors) const
Epetra_JadMatrix(const Epetra_RowMatrix &Matrix)
Epetra_JadMatrix constuctor.
int ExtractMyRowCopy(int MyRow, int Length, int &NumEntries, double *Values, int *Indices) const
Returns a copy of the specified local row in user-provided arrays.
Epetra_IntSerialDenseVector RowPerm_
void Allocate(const Epetra_RowMatrix &Matrix)
void GeneralMV(bool TransA, double *x, double *y) const
Epetra_IntSerialDenseVector Indices_
Epetra_SerialDenseVector Values_
void GeneralMM2RHS(bool TransA, double *x, int ldx, double *y, int ldy) const
Epetra_MultiVector: A class for constructing and using dense multi-vectors, vectors and matrices in p...
int NumVectors() const
Returns the number of vectors in the multi-vector.
bool ConstantStride() const
Returns true if this multi-vector has constant stride between vectors.
int Stride() const
Returns the stride between vectors in the multi-vector (only meaningful if ConstantStride() is true).
double ** Pointers() const
Get pointer to individual vector pointers.
int PutScalar(double ScalarConstant)
Initialize all values in a multi-vector with constant value.
virtual int ReportError(const std::string Message, int ErrorCode) const
Error reporting method.
virtual void SetLabel(const char *const Label)
Epetra_Object Label definition using char *.
virtual const Epetra_Map & OperatorDomainMap() const =0
Returns the Epetra_Map object associated with the domain of this operator.
virtual const Epetra_Map & OperatorRangeMap() const =0
Returns the Epetra_Map object associated with the range of this operator.
Epetra_RowMatrix: A pure virtual class for using real-valued double-precision row matrices.
virtual int NumMyRows() const =0
Returns the number of matrix rows owned by the calling processor.
virtual const Epetra_Map & RowMatrixColMap() const =0
Returns the Epetra_Map object associated with the columns of this matrix.
virtual const Epetra_Map & RowMatrixRowMap() const =0
Returns the Epetra_Map object associated with the rows of this matrix.
virtual int NumMyNonzeros() const =0
Returns the number of nonzero entries in the calling processor's portion of the matrix.
virtual int NumMyRowEntries(int MyRow, int &NumEntries) const =0
Returns the number of nonzero entries in MyRow.
virtual int ExtractMyRowCopy(int MyRow, int Length, int &NumEntries, double *Values, int *Indices) const =0
Returns a copy of the specified local row in user-provided arrays.
virtual bool Filled() const =0
If FillComplete() has been called, this query returns true, otherwise it returns false.
Epetra_SerialDenseVector: A class for constructing and using dense vectors.
double * Values() const
Returns pointer to the values in vector.
Epetra_Util: The Epetra Util Wrapper Class.
Definition Epetra_Util.h:79
static void EPETRA_LIB_DLL_EXPORT Sort(bool SortAscending, int NumKeys, T *Keys, int NumDoubleCompanions, double **DoubleCompanions, int NumIntCompanions, int **IntCompanions, int NumLongLongCompanions, long long **LongLongCompanions)
Epetra_Util Sort Routine (Shell sort)