Sacado Package Browser (Single Doxygen Collection)
Version of the Day
Toggle main menu visibility
Loading...
Searching...
No Matches
test
UnitTests
Fad_CommTests.hpp
Go to the documentation of this file.
1
// @HEADER
2
// ***********************************************************************
3
//
4
// Sacado Package
5
// Copyright (2006) Sandia Corporation
6
//
7
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8
// the U.S. Government retains certain rights in this software.
9
//
10
// This library is free software; you can redistribute it and/or modify
11
// it under the terms of the GNU Lesser General Public License as
12
// published by the Free Software Foundation; either version 2.1 of the
13
// License, or (at your option) any later version.
14
//
15
// This library is distributed in the hope that it will be useful, but
16
// WITHOUT ANY WARRANTY; without even the implied warranty of
17
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
// Lesser General Public License for more details.
19
//
20
// You should have received a copy of the GNU Lesser General Public
21
// License along with this library; if not, write to the Free Software
22
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23
// USA
24
// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25
// (etphipp@sandia.gov).
26
//
27
// ***********************************************************************
28
// @HEADER
29
#include "Teuchos_TestingHelpers.hpp"
30
#include "Teuchos_CommHelpers.hpp"
31
#include "Teuchos_DefaultComm.hpp"
32
#include "Teuchos_Array.hpp"
33
#include "Teuchos_Comm.hpp"
34
35
#include "
Sacado_mpl_apply.hpp
"
36
#include "
Sacado_Random.hpp
"
37
38
using
Teuchos::RCP;
39
using
Teuchos::rcp;
40
using
Teuchos::ValueTypeSerializer;
41
42
template
<
typename
ArrayType>
43
bool
checkFadArrays
(
const
ArrayType& x,
44
const
ArrayType& x2,
45
const
std::string& tag,
46
Teuchos::FancyOStream& out) {
47
typedef
typename
ArrayType::value_type
FadType
;
48
49
// Check sizes match
50
bool
success = (x.size() == x2.size());
51
out << tag <<
" Fad array size test"
;
52
if
(success)
53
out <<
" passed"
;
54
else
55
out <<
" failed"
;
56
out <<
": \n\tExpected: "
<< x.size() <<
", \n\tGot: "
<< x2.size()
57
<<
"."
<< std::endl;
58
59
// Check Fads match
60
const
int
sz = x.size();
61
for
(
int
i=0; i<sz; i++) {
62
bool
success2 =
Sacado::IsEqual<FadType>::eval
(x[i], x2[i]);
63
out << tag <<
" Fad array comparison test "
<< i;
64
if
(success2)
65
out <<
" passed"
;
66
else
67
out <<
" failed"
;
68
out <<
": \n\tExpected: "
<< x[i] <<
", \n\tGot: "
<< x2[i] <<
"."
69
<< std::endl;
70
success = success && success2;
71
}
72
73
return
success;
74
}
75
76
template
<
typename
Ordinal>
77
bool
checkResultOnAllProcs
(
78
const
Teuchos::Comm<Ordinal> &comm,
79
Teuchos::FancyOStream &out,
80
const
bool
result
81
)
82
{
83
out <<
"\nChecking that the above test passed in all processes ..."
;
84
int
thisResult = ( result ? 1 : 0 );
85
int
sumResult = -1;
86
Teuchos::reduceAll(comm,Teuchos::REDUCE_SUM,
Ordinal
(1),&thisResult,
87
&sumResult);
88
const
bool
passed = sumResult==Teuchos::size(comm);
89
if
(passed)
90
out <<
" passed\n"
;
91
else
92
out <<
" (sumResult="
<<sumResult<<
"!=numProcs="
<<Teuchos::size(comm)<<
") failed\n"
;
93
return
passed;
94
}
95
96
#define FAD_BASE_COMM_TESTS(FadType, FAD) \
97
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_Broadcast ) { \
98
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
99
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
100
\
101
int n = 7; \
102
int p = 5; \
103
ValueTypeSerializer<int,FadType> fts( \
104
rcp(new ValueTypeSerializer<int,double>), p); \
105
\
106
Teuchos::Array<FadType> x(n), x2(n), x3(n); \
107
for (int i=0; i<n; i++) { \
108
x[i] = FadType(p, rnd.number()); \
109
for (int j=0; j<p; j++) \
110
x[i].fastAccessDx(j) = rnd.number(); \
111
} \
112
for (int i=0; i<n; i++) { \
113
x2[i] = FadType(p, 0.0); \
114
} \
115
if (comm->getRank() == 0) { \
116
x2 = x; \
117
x3 = x; \
118
} \
119
\
120
Teuchos::broadcast(*comm, 0, n, &x2[0]); \
121
bool success1 = checkFadArrays( \
122
x, x2, std::string(#FAD)+" Broadcast", out); \
123
success1 = checkResultOnAllProcs(*comm, out, success1); \
124
\
125
Teuchos::broadcast(*comm, fts, 0, n, &x3[0]); \
126
bool success2 = checkFadArrays( \
127
x, x3, std::string(#FAD)+" Broadcast FTS", out); \
128
success2 = checkResultOnAllProcs(*comm, out, success2); \
129
\
130
success = success1 && success2; \
131
} \
132
\
133
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_GatherAll ) { \
134
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
135
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
136
\
137
int n = 7; \
138
int p = 5; \
139
int size = comm->getSize(); \
140
int rank = comm->getRank(); \
141
int N = n*size; \
142
ValueTypeSerializer<int,FadType> fts( \
143
rcp(new ValueTypeSerializer<int,double>), p); \
144
\
145
Teuchos::Array<FadType> x(n), x2(N), x3(N), x4(N); \
146
for (int i=0; i<n; i++) { \
147
x[i] = FadType(p, (rank+1)*(i+1)); \
148
for (int j=0; j<p; j++) \
149
x[i].fastAccessDx(j) = (rank+1)*(i+1)*(j+1); \
150
} \
151
for (int i=0; i<N; i++) { \
152
x2[i] = FadType(p, 0.0); \
153
} \
154
for (int j=0; j<size; j++) { \
155
for (int i=0; i<n; i++) { \
156
x3[n*j+i] = FadType(p, (j+1)*(i+1)); \
157
for (int k=0; k<p; k++) \
158
x3[n*j+i].fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
159
} \
160
} \
161
\
162
Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
163
bool success1 = checkFadArrays( \
164
x3, x2, std::string(#FAD)+" Gather All", out); \
165
success1 = checkResultOnAllProcs(*comm, out, success1); \
166
\
167
Teuchos::gatherAll(*comm, fts, n, &x[0], N, &x4[0]); \
168
bool success2 = checkFadArrays( \
169
x3, x4, std::string(#FAD)+" Gather All FTS", out); \
170
success2 = checkResultOnAllProcs(*comm, out, success2); \
171
\
172
success = success1 && success2; \
173
} \
174
\
175
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SumAll ) { \
176
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
177
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
178
\
179
int n = 7; \
180
int p = 5; \
181
int num_proc = comm->getSize(); \
182
ValueTypeSerializer<int,FadType> fts( \
183
rcp(new ValueTypeSerializer<int,double>), p); \
184
\
185
Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
186
for (int i=0; i<n; i++) { \
187
x[i] = FadType(p, 1.0*(i+1)); \
188
for (int j=0; j<p; j++) \
189
x[i].fastAccessDx(j) = 2.0*(i+1); \
190
} \
191
for (int i=0; i<n; i++) { \
192
sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
193
for (int j=0; j<p; j++) \
194
sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
195
} \
196
for (int i=0; i<n; i++) { \
197
sums2[i] = FadType(p, 0.0); \
198
} \
199
\
200
Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
201
bool success1 = checkFadArrays( \
202
sums, sums2, std::string(#FAD)+" Sum All", out); \
203
success1 = checkResultOnAllProcs(*comm, out, success1); \
204
\
205
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
206
bool success2 = checkFadArrays( \
207
sums, sums3, std::string(#FAD)+" Sum All FTS", out); \
208
success2 = checkResultOnAllProcs(*comm, out, success2); \
209
\
210
success = success1 && success2; \
211
} \
212
\
213
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MaxAll ) { \
214
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
215
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
216
\
217
int n = 7; \
218
int p = 5; \
219
int rank = comm->getRank(); \
220
int num_proc = comm->getSize(); \
221
ValueTypeSerializer<int,FadType> fts( \
222
rcp(new ValueTypeSerializer<int,double>), p); \
223
\
224
Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
225
for (int i=0; i<n; i++) { \
226
x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
227
for (int j=0; j<p; j++) \
228
x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
229
} \
230
for (int i=0; i<n; i++) { \
231
maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
232
for (int j=0; j<p; j++) \
233
maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
234
} \
235
for (int i=0; i<n; i++) { \
236
maxs2[i] = FadType(p, 0.0); \
237
} \
238
\
239
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
240
bool success1 = checkFadArrays( \
241
maxs, maxs2, std::string(#FAD)+" Max All", out); \
242
success1 = checkResultOnAllProcs(*comm, out, success1); \
243
\
244
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
245
bool success2 = checkFadArrays( \
246
maxs, maxs3, std::string(#FAD)+" Max All FTS", out); \
247
success2 = checkResultOnAllProcs(*comm, out, success2); \
248
\
249
success = success1 && success2; \
250
} \
251
\
252
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MinAll ) { \
253
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
254
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
255
\
256
int n = 7; \
257
int p = 5; \
258
int rank = comm->getRank(); \
259
ValueTypeSerializer<int,FadType> fts( \
260
rcp(new ValueTypeSerializer<int,double>), p); \
261
\
262
Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
263
for (int i=0; i<n; i++) { \
264
x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
265
for (int j=0; j<p; j++) \
266
x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
267
} \
268
for (int i=0; i<n; i++) { \
269
mins[i] = FadType(p, 1.0*(i+1)); \
270
for (int j=0; j<p; j++) \
271
mins[i].fastAccessDx(j) = 2.0*(i+1); \
272
} \
273
for (int i=0; i<n; i++) { \
274
mins2[i] = FadType(p, 0.0); \
275
} \
276
\
277
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
278
bool success1 = checkFadArrays( \
279
mins, mins2, std::string(#FAD)+" Min All", out); \
280
success1 = checkResultOnAllProcs(*comm, out, success1); \
281
\
282
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
283
bool success2 = checkFadArrays( \
284
mins, mins3, std::string(#FAD)+" Min All FTS", out); \
285
success2 = checkResultOnAllProcs(*comm, out, success2); \
286
\
287
success = success1 && success2; \
288
} \
289
\
290
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanSum ) { \
291
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
292
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
293
\
294
int n = 7; \
295
int p = 5; \
296
int rank = comm->getRank(); \
297
ValueTypeSerializer<int,FadType> fts( \
298
rcp(new ValueTypeSerializer<int,double>), p); \
299
\
300
Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
301
for (int i=0; i<n; i++) { \
302
x[i] = FadType(p, 1.0*(i+1)); \
303
for (int j=0; j<p; j++) \
304
x[i].fastAccessDx(j) = 2.0*(i+1); \
305
} \
306
for (int i=0; i<n; i++) { \
307
sums[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
308
for (int j=0; j<p; j++) \
309
sums[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
310
} \
311
for (int i=0; i<n; i++) { \
312
sums2[i] = FadType(p, 0.0); \
313
} \
314
\
315
Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
316
bool success1 = checkFadArrays( \
317
sums, sums2, std::string(#FAD)+" Scan Sum", out); \
318
success1 = checkResultOnAllProcs(*comm, out, success1); \
319
\
320
Teuchos::scan(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
321
bool success2 = checkFadArrays( \
322
sums, sums3, std::string(#FAD)+" Scan Sum FTS", out); \
323
success2 = checkResultOnAllProcs(*comm, out, success2); \
324
\
325
success = success1 && success2; \
326
} \
327
\
328
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMax ) { \
329
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
330
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
331
\
332
int n = 7; \
333
int p = 5; \
334
int rank = comm->getRank(); \
335
ValueTypeSerializer<int,FadType> fts( \
336
rcp(new ValueTypeSerializer<int,double>), p); \
337
\
338
Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
339
for (int i=0; i<n; i++) { \
340
x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
341
for (int j=0; j<p; j++) \
342
x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
343
} \
344
for (int i=0; i<n; i++) { \
345
maxs[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
346
for (int j=0; j<p; j++) \
347
maxs[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
348
} \
349
for (int i=0; i<n; i++) { \
350
maxs2[i] = FadType(p, 0.0); \
351
} \
352
\
353
Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
354
bool success1 = checkFadArrays( \
355
maxs, maxs2, std::string(#FAD)+" Scan Max", out); \
356
success1 = checkResultOnAllProcs(*comm, out, success1); \
357
\
358
Teuchos::scan(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
359
bool success2 = checkFadArrays( \
360
maxs, maxs3, std::string(#FAD)+" Scan Max FTS", out); \
361
success2 = checkResultOnAllProcs(*comm, out, success2); \
362
\
363
success = success1 && success2; \
364
} \
365
\
366
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMin ) { \
367
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
368
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
369
\
370
int n = 7; \
371
int p = 5; \
372
int rank = comm->getRank(); \
373
ValueTypeSerializer<int,FadType> fts( \
374
rcp(new ValueTypeSerializer<int,double>), p); \
375
\
376
Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
377
for (int i=0; i<n; i++) { \
378
x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
379
for (int j=0; j<p; j++) \
380
x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
381
} \
382
for (int i=0; i<n; i++) { \
383
mins[i] = FadType(p, 1.0*(i+1)); \
384
for (int j=0; j<p; j++) \
385
mins[i].fastAccessDx(j) = 2.0*(i+1); \
386
} \
387
for (int i=0; i<n; i++) { \
388
mins2[i] = FadType(p, 0.0); \
389
} \
390
\
391
Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
392
bool success1 = checkFadArrays( \
393
mins, mins2, std::string(#FAD)+" Scan Min", out); \
394
success1 = checkResultOnAllProcs(*comm, out, success1); \
395
\
396
Teuchos::scan(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
397
bool success2 = checkFadArrays( \
398
mins, mins3, std::string(#FAD)+" Scan Min FTS", out); \
399
success2 = checkResultOnAllProcs(*comm, out, success2); \
400
\
401
success = success1 && success2; \
402
} \
403
\
404
TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SendReceive ) { \
405
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
406
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
407
\
408
int num_proc = comm->getSize(); \
409
if (num_proc > 1) { \
410
int rank = comm->getRank(); \
411
int n = 7; \
412
int p = 5; \
413
ValueTypeSerializer<int,FadType> fts( \
414
rcp(new ValueTypeSerializer<int,double>), p); \
415
\
416
Teuchos::Array<FadType> x(n), x2(n), x3(n); \
417
for (int i=0; i<n; i++) { \
418
x[i] = FadType(p, 1.0*(i+1)); \
419
for (int j=0; j<p; j++) \
420
x[i].fastAccessDx(j) = 2.0*(i+1)*(j+1); \
421
} \
422
for (int i=0; i<n; i++) { \
423
x2[i] = FadType(p, 0.0); \
424
} \
425
if (rank != 1) { \
426
x2 = x; \
427
x3 = x; \
428
} \
429
\
430
if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
431
if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
432
bool success1 = checkFadArrays( \
433
x, x2, std::string(#FAD)+" Send/Receive", out); \
434
success1 = checkResultOnAllProcs(*comm, out, success1); \
435
\
436
if (rank == 0) Teuchos::send(*comm, fts, n, &x[0], 1); \
437
if (rank == 1) Teuchos::receive(*comm, fts, 0, n, &x3[0]); \
438
bool success2 = checkFadArrays( \
439
x, x3, std::string(#FAD)+" Send/Receive FTS", out); \
440
success2 = checkResultOnAllProcs(*comm, out, success2); \
441
\
442
success = success1 && success2; \
443
} \
444
else \
445
success = true; \
446
} \
447
\
448
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_Broadcast ) { \
449
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
450
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
451
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
452
\
453
int n = 7; \
454
int p1 = 5; \
455
int p2 = 5; \
456
RCP< ValueTypeSerializer<int,FadType> > fts = \
457
rcp(new ValueTypeSerializer<int,FadType>( \
458
rcp(new ValueTypeSerializer<int,double>), p1)); \
459
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
460
\
461
Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
462
for (int i=0; i<n; i++) { \
463
FadType f(p1, rnd.number()); \
464
for (int k=0; k<p1; k++) \
465
f.fastAccessDx(k) = rnd.number(); \
466
x[i] = FadFadType(p2, f); \
467
for (int j=0; j<p2; j++) { \
468
FadType g(p1, rnd.number()); \
469
for (int k=0; k<p1; k++) \
470
g.fastAccessDx(k) = rnd.number(); \
471
x[i].fastAccessDx(j) = g; \
472
} \
473
} \
474
for (int i=0; i<n; i++) { \
475
x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
476
for (int j=0; j<p2; j++) \
477
x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
478
} \
479
if (comm->getRank() == 0) { \
480
x2 = x; \
481
x3 = x; \
482
} \
483
\
484
Teuchos::broadcast(*comm, 0, n, &x2[0]); \
485
bool success1 = checkFadArrays( \
486
x, x2, std::string(#FAD)+"<"+#FAD+"> Broadcast", out); \
487
success1 = checkResultOnAllProcs(*comm, out, success1); \
488
\
489
Teuchos::broadcast(*comm, ffts, 0, n, &x3[0]); \
490
bool success2 = checkFadArrays( \
491
x, x3, std::string(#FAD)+"<"+#FAD+"> Broadcast FTS", out); \
492
success2 = checkResultOnAllProcs(*comm, out, success2); \
493
\
494
success = success1 && success2; \
495
} \
496
\
497
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_GatherAll ) { \
498
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
499
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
500
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
501
\
502
int n = 7; \
503
int p1 = 5; \
504
int p2 = 5; \
505
int size = comm->getSize(); \
506
int rank = comm->getRank(); \
507
int N = n*size; \
508
RCP< ValueTypeSerializer<int,FadType> > fts = \
509
rcp(new ValueTypeSerializer<int,FadType>( \
510
rcp(new ValueTypeSerializer<int,double>), p1)); \
511
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
512
\
513
Teuchos::Array<FadFadType> x(n), x2(N), x3(N), x4(N); \
514
for (int i=0; i<n; i++) { \
515
FadType f(p1, (rank+1)*(i+1)); \
516
for (int k=0; k<p1; k++) \
517
f.fastAccessDx(k) = (rank+1)*(i+1)*(k+1); \
518
x[i] = FadFadType(p2, f); \
519
for (int j=0; j<p2; j++) { \
520
x[i].fastAccessDx(j) = f; \
521
} \
522
} \
523
for (int i=0; i<N; i++) { \
524
x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
525
for (int j=0; j<p2; j++) \
526
x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
527
} \
528
for (int j=0; j<size; j++) { \
529
for (int i=0; i<n; i++) { \
530
FadType f(p1, (j+1)*(i+1)); \
531
for (int k=0; k<p1; k++) \
532
f.fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
533
x3[n*j+i] = FadFadType(p2, f); \
534
for (int k=0; k<p2; k++) \
535
x3[n*j+i].fastAccessDx(k) = f; \
536
} \
537
} \
538
\
539
Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
540
bool success1 = checkFadArrays( \
541
x3, x2, std::string(#FAD)+"<"+#FAD+"> Gather All", out); \
542
success1 = checkResultOnAllProcs(*comm, out, success1); \
543
\
544
Teuchos::gatherAll(*comm, ffts, n, &x[0], N, &x4[0]); \
545
bool success2 = checkFadArrays( \
546
x3, x4, std::string(#FAD)+"<"+#FAD+"> Gather All FTS", out); \
547
success2 = checkResultOnAllProcs(*comm, out, success2); \
548
\
549
success = success1 && success2; \
550
} \
551
\
552
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SumAll ) { \
553
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
554
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
555
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
556
\
557
int n = 7; \
558
int p1 = 5; \
559
int p2 = 5; \
560
int num_proc = comm->getSize(); \
561
RCP< ValueTypeSerializer<int,FadType> > fts = \
562
rcp(new ValueTypeSerializer<int,FadType>( \
563
rcp(new ValueTypeSerializer<int,double>), p1)); \
564
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
565
\
566
Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
567
for (int i=0; i<n; i++) { \
568
FadType f(p1, 1.0*(i+1)); \
569
for (int k=0; k<p1; k++) \
570
f.fastAccessDx(k) = 2.0*(i+1); \
571
x[i] = FadFadType(p2, f); \
572
for (int j=0; j<p2; j++) { \
573
x[i].fastAccessDx(j) = f; \
574
} \
575
} \
576
for (int i=0; i<n; i++) { \
577
FadType f(p1, 1.0*(i+1)*num_proc); \
578
for (int k=0; k<p1; k++) \
579
f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
580
sums[i] = FadFadType(p2, f); \
581
for (int j=0; j<p2; j++) \
582
sums[i].fastAccessDx(j) = f; \
583
} \
584
for (int i=0; i<n; i++) { \
585
sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
586
for (int j=0; j<p2; j++) \
587
sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
588
} \
589
\
590
Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
591
bool success1 = checkFadArrays( \
592
sums, sums2, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
593
success1 = checkResultOnAllProcs(*comm, out, success1); \
594
\
595
Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
596
bool success2 = checkFadArrays( \
597
sums, sums3, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
598
success2 = checkResultOnAllProcs(*comm, out, success2); \
599
\
600
success = success1 && success2; \
601
} \
602
\
603
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MaxAll ) { \
604
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
605
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
606
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
607
\
608
int n = 7; \
609
int p1 = 5; \
610
int p2 = 5; \
611
int rank = comm->getRank(); \
612
int num_proc = comm->getSize(); \
613
RCP< ValueTypeSerializer<int,FadType> > fts = \
614
rcp(new ValueTypeSerializer<int,FadType>( \
615
rcp(new ValueTypeSerializer<int,double>), p1)); \
616
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
617
\
618
Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
619
for (int i=0; i<n; i++) { \
620
FadType f(p1, 1.0*(i+1)*(rank+1)); \
621
for (int k=0; k<p1; k++) \
622
f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
623
x[i] = FadFadType(p2, f); \
624
for (int j=0; j<p2; j++) { \
625
x[i].fastAccessDx(j) = f; \
626
} \
627
} \
628
for (int i=0; i<n; i++) { \
629
FadType f(p1, 1.0*(i+1)*num_proc); \
630
for (int k=0; k<p1; k++) \
631
f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
632
maxs[i] = FadFadType(p2, f); \
633
for (int j=0; j<p2; j++) \
634
maxs[i].fastAccessDx(j) = f; \
635
} \
636
for (int i=0; i<n; i++) { \
637
maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
638
for (int j=0; j<p2; j++) \
639
maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
640
} \
641
\
642
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
643
bool success1 = checkFadArrays( \
644
maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Max All", out); \
645
success1 = checkResultOnAllProcs(*comm, out, success1); \
646
\
647
Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
648
bool success2 = checkFadArrays( \
649
maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Max All FTS", out); \
650
success2 = checkResultOnAllProcs(*comm, out, success2); \
651
\
652
success = success1 && success2; \
653
} \
654
\
655
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MinAll ) { \
656
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
657
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
658
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
659
\
660
int n = 7; \
661
int p1 = 5; \
662
int p2 = 5; \
663
int rank = comm->getRank(); \
664
RCP< ValueTypeSerializer<int,FadType> > fts = \
665
rcp(new ValueTypeSerializer<int,FadType>( \
666
rcp(new ValueTypeSerializer<int,double>), p1)); \
667
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
668
\
669
Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
670
for (int i=0; i<n; i++) { \
671
FadType f(p1, 1.0*(i+1)*(rank+1)); \
672
for (int k=0; k<p1; k++) \
673
f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
674
x[i] = FadFadType(p2, f); \
675
for (int j=0; j<p2; j++) { \
676
x[i].fastAccessDx(j) = f; \
677
} \
678
} \
679
for (int i=0; i<n; i++) { \
680
FadType f(p1, 1.0*(i+1)); \
681
for (int k=0; k<p1; k++) \
682
f.fastAccessDx(k) = 2.0*(i+1); \
683
mins[i] = FadFadType(p2, f); \
684
for (int j=0; j<p2; j++) \
685
mins[i].fastAccessDx(j) = f; \
686
} \
687
for (int i=0; i<n; i++) { \
688
mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
689
for (int j=0; j<p2; j++) \
690
mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
691
} \
692
\
693
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
694
bool success1 = checkFadArrays( \
695
mins, mins2, std::string(#FAD)+"<"+#FAD+"> Min All", out); \
696
success1 = checkResultOnAllProcs(*comm, out, success1); \
697
\
698
Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
699
bool success2 = checkFadArrays( \
700
mins, mins3, std::string(#FAD)+"<"+#FAD+"> Min All FTS", out); \
701
success2 = checkResultOnAllProcs(*comm, out, success2); \
702
\
703
success = success1 && success2; \
704
} \
705
\
706
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanSum ) { \
707
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
708
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
709
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
710
\
711
int n = 7; \
712
int p1 = 5; \
713
int p2 = 5; \
714
int rank = comm->getRank(); \
715
RCP< ValueTypeSerializer<int,FadType> > fts = \
716
rcp(new ValueTypeSerializer<int,FadType>( \
717
rcp(new ValueTypeSerializer<int,double>), p1)); \
718
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
719
\
720
Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
721
for (int i=0; i<n; i++) { \
722
FadType f(p1, 1.0*(i+1)); \
723
for (int k=0; k<p1; k++) \
724
f.fastAccessDx(k) = 2.0*(i+1); \
725
x[i] = FadFadType(p2, f); \
726
for (int j=0; j<p2; j++) { \
727
x[i].fastAccessDx(j) = f; \
728
} \
729
} \
730
for (int i=0; i<n; i++) { \
731
FadType f(p1, 1.0*(i+1)*(rank+1)); \
732
for (int k=0; k<p1; k++) \
733
f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
734
sums[i] = FadFadType(p2, f); \
735
for (int j=0; j<p2; j++) \
736
sums[i].fastAccessDx(j) = f; \
737
} \
738
for (int i=0; i<n; i++) { \
739
sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
740
for (int j=0; j<p2; j++) \
741
sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
742
} \
743
\
744
Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
745
bool success1 = checkFadArrays( \
746
sums, sums2, std::string(#FAD)+"<"+#FAD+"> Scan Sum", out); \
747
success1 = checkResultOnAllProcs(*comm, out, success1); \
748
\
749
Teuchos::scan(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
750
bool success2 = checkFadArrays( \
751
sums, sums3, std::string(#FAD)+"<"+#FAD+"> Scan Sum FTS", out); \
752
success2 = checkResultOnAllProcs(*comm, out, success2); \
753
\
754
success = success1 && success2; \
755
} \
756
\
757
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMax ) { \
758
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
759
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
760
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
761
\
762
int n = 7; \
763
int p1 = 5; \
764
int p2 = 5; \
765
int rank = comm->getRank(); \
766
RCP< ValueTypeSerializer<int,FadType> > fts = \
767
rcp(new ValueTypeSerializer<int,FadType>( \
768
rcp(new ValueTypeSerializer<int,double>), p1)); \
769
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
770
\
771
Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
772
for (int i=0; i<n; i++) { \
773
FadType f(p1, 1.0*(i+1)*(rank+1)); \
774
for (int k=0; k<p1; k++) \
775
f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
776
x[i] = FadFadType(p2, f); \
777
for (int j=0; j<p2; j++) { \
778
x[i].fastAccessDx(j) = f; \
779
} \
780
} \
781
for (int i=0; i<n; i++) { \
782
FadType f(p1, 1.0*(i+1)*(rank+1)); \
783
for (int k=0; k<p1; k++) \
784
f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
785
maxs[i] = FadFadType(p2, f); \
786
for (int j=0; j<p2; j++) \
787
maxs[i].fastAccessDx(j) = f; \
788
} \
789
for (int i=0; i<n; i++) { \
790
maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
791
for (int j=0; j<p2; j++) \
792
maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
793
} \
794
\
795
Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
796
bool success1 = checkFadArrays( \
797
maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Scan Max", out); \
798
success1 = checkResultOnAllProcs(*comm, out, success1); \
799
\
800
Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
801
bool success2 = checkFadArrays( \
802
maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Scan Max FTS", out); \
803
success2 = checkResultOnAllProcs(*comm, out, success2); \
804
\
805
success = success1 && success2; \
806
} \
807
\
808
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMin ) { \
809
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
810
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
811
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
812
\
813
int n = 7; \
814
int p1 = 5; \
815
int p2 = 5; \
816
int rank = comm->getRank(); \
817
RCP< ValueTypeSerializer<int,FadType> > fts = \
818
rcp(new ValueTypeSerializer<int,FadType>( \
819
rcp(new ValueTypeSerializer<int,double>), p1)); \
820
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
821
\
822
Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
823
for (int i=0; i<n; i++) { \
824
FadType f(p1, 1.0*(i+1)*(rank+1)); \
825
for (int k=0; k<p1; k++) \
826
f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
827
x[i] = FadFadType(p2, f); \
828
for (int j=0; j<p2; j++) { \
829
x[i].fastAccessDx(j) = f; \
830
} \
831
} \
832
for (int i=0; i<n; i++) { \
833
FadType f(p1, 1.0*(i+1)); \
834
for (int k=0; k<p1; k++) \
835
f.fastAccessDx(k) = 2.0*(i+1); \
836
mins[i] = FadFadType(p2, f); \
837
for (int j=0; j<p2; j++) \
838
mins[i].fastAccessDx(j) = f; \
839
} \
840
for (int i=0; i<n; i++) { \
841
mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
842
for (int j=0; j<p2; j++) \
843
mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
844
} \
845
\
846
Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
847
bool success1 = checkFadArrays( \
848
mins, mins2, std::string(#FAD)+"<"+#FAD+"> Scan Min", out); \
849
success1 = checkResultOnAllProcs(*comm, out, success1); \
850
\
851
Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
852
bool success2 = checkFadArrays( \
853
mins, mins3, std::string(#FAD)+"<"+#FAD+"> Scan Min FTS", out); \
854
success2 = checkResultOnAllProcs(*comm, out, success2); \
855
\
856
success = success1 && success2; \
857
} \
858
\
859
TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SendReceive ) { \
860
typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
861
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
862
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
863
\
864
int num_proc = comm->getSize(); \
865
if (num_proc > 1) { \
866
int rank = comm->getRank(); \
867
int n = 7; \
868
int p1 = 5; \
869
int p2 = 5; \
870
RCP< ValueTypeSerializer<int,FadType> > fts = \
871
rcp(new ValueTypeSerializer<int,FadType>( \
872
rcp(new ValueTypeSerializer<int,double>), p1)); \
873
ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
874
\
875
Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
876
for (int i=0; i<n; i++) { \
877
FadType f(p1, 1.0*(i+1)); \
878
for (int k=0; k<p1; k++) \
879
f.fastAccessDx(k) = 2.0*(i+1)*(k+1); \
880
x[i] = FadFadType(p2, f); \
881
for (int j=0; j<p2; j++) \
882
x[i].fastAccessDx(j) = f; \
883
} \
884
for (int i=0; i<n; i++) { \
885
x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
886
for (int j=0; j<p2; j++) \
887
x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
888
} \
889
if (rank != 1) { \
890
x2 = x; \
891
x3 = x; \
892
} \
893
\
894
if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
895
if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
896
bool success1 = checkFadArrays( \
897
x, x2, std::string(#FAD)+"<"+#FAD+"> Send/Receive", out); \
898
success1 = checkResultOnAllProcs(*comm, out, success1); \
899
\
900
if (rank == 0) Teuchos::send(*comm, ffts, n, &x[0], 1); \
901
if (rank == 1) Teuchos::receive(*comm, ffts, 0, n, &x3[0]); \
902
bool success2 = checkFadArrays( \
903
x, x3, std::string(#FAD)+"<"+#FAD+"> Send/Receive FTS", out); \
904
success2 = checkResultOnAllProcs(*comm, out, success2); \
905
\
906
success = success1 && success2; \
907
} \
908
else \
909
success = true; \
910
}
911
912
#if defined(HAVE_SACADO_KOKKOS) && defined(HAVE_SACADO_TEUCHOSKOKKOSCOMM)
913
914
#include "Kokkos_Core.hpp"
915
916
#define FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Device) \
917
TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_Broadcast ) { \
918
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
919
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
920
\
921
\
922
int n = 7; \
923
int p = 5; \
924
ValueTypeSerializer<int,FadType> fts( \
925
rcp(new ValueTypeSerializer<int,double>), p); \
926
\
927
typedef Kokkos::View<FadType*,Device> ViewType; \
928
typedef ViewType::HostMirror HostViewType; \
929
ViewType x("x",n,p+1), x2("x2",n,p+1), x3("x3",n,p+1); \
930
HostViewType h_x = Kokkos::create_mirror_view(x); \
931
HostViewType h_x2 = Kokkos::create_mirror_view(x2); \
932
HostViewType h_x3 = Kokkos::create_mirror_view(x3); \
933
for (int i=0; i<n; i++) { \
934
h_x[i] = FadType(p, rnd.number()); \
935
for (int j=0; j<p; j++) \
936
h_x[i].fastAccessDx(j) = rnd.number(); \
937
} \
938
for (int i=0; i<n; i++) { \
939
h_x2[i] = FadType(p, 0.0); \
940
} \
941
Kokkos::deep_copy(x, h_x); \
942
Kokkos::deep_copy(x2, h_x2); \
943
if (comm->getRank() == 0) { \
944
x2 = x; \
945
x3 = x; \
946
h_x2 = h_x; \
947
h_x3 = h_x; \
948
} \
949
\
950
/* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/
\
951
/* so only do the communication on the host. This probably makes */
\
952
/* the deep copy unnecessary. */
\
953
const bool accessible = \
954
Kokkos::Impl::MemorySpaceAccess< \
955
Kokkos::HostSpace, \
956
typename Device::memory_space >::accessible; \
957
if (accessible) { \
958
Teuchos::broadcast(*comm, 0, n, x2); \
959
Kokkos::deep_copy(h_x2, x2); \
960
} \
961
else \
962
Teuchos::broadcast(*comm, 0, n, h_x2); \
963
bool success1 = checkFadArrays( \
964
h_x, h_x2, std::string(#FAD)+" Broadcast", out); \
965
success1 = checkResultOnAllProcs(*comm, out, success1); \
966
\
967
if (accessible) { \
968
Teuchos::broadcast(*comm, fts, 0, n, x3); \
969
Kokkos::deep_copy(h_x3, x3); \
970
} \
971
else \
972
Teuchos::broadcast(*comm, fts, 0, n, h_x3); \
973
bool success2 = checkFadArrays( \
974
h_x, h_x3, std::string(#FAD)+" Broadcast FTS", out); \
975
success2 = checkResultOnAllProcs(*comm, out, success2); \
976
\
977
success = success1 && success2; \
978
} \
979
TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_SumAll ) { \
980
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
981
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
982
\
983
\
984
int n = 7; \
985
int p = 5; \
986
int num_proc = comm->getSize(); \
987
ValueTypeSerializer<int,FadType> fts( \
988
rcp(new ValueTypeSerializer<int,double>), p); \
989
\
990
typedef Kokkos::View<FadType*,Device> ViewType; \
991
typedef ViewType::HostMirror HostViewType; \
992
ViewType x("x",n,p+1), sums("sums",n,p+1), \
993
sums2("sums2",n,p+1), sums3("sums3",n,p+1); \
994
HostViewType h_x = Kokkos::create_mirror_view(x); \
995
HostViewType h_sums = Kokkos::create_mirror_view(sums); \
996
HostViewType h_sums2 = Kokkos::create_mirror_view(sums2); \
997
HostViewType h_sums3 = Kokkos::create_mirror_view(sums3); \
998
for (int i=0; i<n; i++) { \
999
h_x[i] = FadType(p, 1.0*(i+1)); \
1000
for (int j=0; j<p; j++) \
1001
h_x[i].fastAccessDx(j) = 2.0*(i+1); \
1002
} \
1003
for (int i=0; i<n; i++) { \
1004
h_sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
1005
for (int j=0; j<p; j++) \
1006
h_sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
1007
} \
1008
for (int i=0; i<n; i++) { \
1009
h_sums2[i] = FadType(p, 0.0); \
1010
} \
1011
Kokkos::deep_copy(x, h_x); \
1012
Kokkos::deep_copy(sums, h_sums); \
1013
Kokkos::deep_copy(sums2, h_sums2); \
1014
\
1015
/* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/
\
1016
/* so only do the communication on the host. This probably makes */
\
1017
/* the deep copy unnecessary. */
\
1018
const bool accessible = \
1019
Kokkos::Impl::MemorySpaceAccess< \
1020
Kokkos::HostSpace, \
1021
typename Device::memory_space >::accessible; \
1022
if (accessible) { \
1023
Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, x, sums2); \
1024
Kokkos::deep_copy(h_sums2, sums2); \
1025
} \
1026
else \
1027
Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, h_x, h_sums2); \
1028
bool success1 = checkFadArrays( \
1029
h_sums, h_sums2, std::string(#FAD)+" Sum All", out); \
1030
success1 = checkResultOnAllProcs(*comm, out, success1); \
1031
\
1032
if (accessible) { \
1033
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, x, sums3); \
1034
Kokkos::deep_copy(h_sums3, sums3); \
1035
} \
1036
else \
1037
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, h_x, h_sums3); \
1038
bool success2 = checkFadArrays( \
1039
h_sums, h_sums3, std::string(#FAD)+" Sum All FTS", out); \
1040
success2 = checkResultOnAllProcs(*comm, out, success2); \
1041
success = success1 && success2; \
1042
\
1043
} \
1044
TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MaxAll ) { \
1045
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1046
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1047
\
1048
\
1049
int n = 7; \
1050
int p = 5; \
1051
int rank = comm->getRank(); \
1052
int num_proc = comm->getSize(); \
1053
ValueTypeSerializer<int,FadType> fts( \
1054
rcp(new ValueTypeSerializer<int,double>), p); \
1055
\
1056
typedef Kokkos::View<FadType*,Device> ViewType; \
1057
typedef ViewType::HostMirror HostViewType; \
1058
ViewType x("x",n,p+1), maxs("maxs",n,p+1), \
1059
maxs2("maxs2",n,p+1), maxs3("maxs3",n,p+1); \
1060
HostViewType h_x = Kokkos::create_mirror_view(x); \
1061
HostViewType h_maxs = Kokkos::create_mirror_view(maxs); \
1062
HostViewType h_maxs2 = Kokkos::create_mirror_view(maxs2); \
1063
HostViewType h_maxs3 = Kokkos::create_mirror_view(maxs3); \
1064
for (int i=0; i<n; i++) { \
1065
h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1066
for (int j=0; j<p; j++) \
1067
h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1068
} \
1069
for (int i=0; i<n; i++) { \
1070
h_maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
1071
for (int j=0; j<p; j++) \
1072
h_maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
1073
} \
1074
for (int i=0; i<n; i++) { \
1075
h_maxs2[i] = FadType(p, 0.0); \
1076
} \
1077
Kokkos::deep_copy(x, h_x); \
1078
Kokkos::deep_copy(maxs, h_maxs); \
1079
Kokkos::deep_copy(maxs2, h_maxs2); \
1080
\
1081
/* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/
\
1082
/* so only do the communication on the host. This probably makes */
\
1083
/* the deep copy unnecessary. */
\
1084
const bool accessible = \
1085
Kokkos::Impl::MemorySpaceAccess< \
1086
Kokkos::HostSpace, \
1087
typename Device::memory_space >::accessible; \
1088
if (accessible) { \
1089
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, x, maxs2); \
1090
Kokkos::deep_copy(h_maxs2, maxs2); \
1091
} \
1092
else \
1093
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, h_x, h_maxs2); \
1094
bool success1 = checkFadArrays( \
1095
h_maxs, h_maxs2, std::string(#FAD)+" Max All", out); \
1096
success1 = checkResultOnAllProcs(*comm, out, success1); \
1097
\
1098
if (accessible) { \
1099
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, x, maxs3); \
1100
Kokkos::deep_copy(h_maxs3, maxs3); \
1101
} \
1102
else \
1103
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, h_x, h_maxs3); \
1104
bool success2 = checkFadArrays( \
1105
h_maxs, h_maxs3, std::string(#FAD)+" Max All FTS", out); \
1106
success2 = checkResultOnAllProcs(*comm, out, success2); \
1107
success = success1 && success2; \
1108
\
1109
} \
1110
TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MinAll ) { \
1111
Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1112
comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1113
\
1114
\
1115
int n = 7; \
1116
int p = 5; \
1117
int rank = comm->getRank(); \
1118
ValueTypeSerializer<int,FadType> fts( \
1119
rcp(new ValueTypeSerializer<int,double>), p); \
1120
\
1121
typedef Kokkos::View<FadType*,Device> ViewType; \
1122
typedef ViewType::HostMirror HostViewType; \
1123
ViewType x("x",n,p+1), mins("mins",n,p+1), \
1124
mins2("mins2",n,p+1), mins3("mins3",n,p+1); \
1125
HostViewType h_x = Kokkos::create_mirror_view(x); \
1126
HostViewType h_mins = Kokkos::create_mirror_view(mins); \
1127
HostViewType h_mins2 = Kokkos::create_mirror_view(mins2); \
1128
HostViewType h_mins3 = Kokkos::create_mirror_view(mins3); \
1129
for (int i=0; i<n; i++) { \
1130
h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1131
for (int j=0; j<p; j++) \
1132
h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1133
} \
1134
for (int i=0; i<n; i++) { \
1135
h_mins[i] = FadType(p, 1.0*(i+1)); \
1136
for (int j=0; j<p; j++) \
1137
h_mins[i].fastAccessDx(j) = 2.0*(i+1); \
1138
} \
1139
for (int i=0; i<n; i++) { \
1140
h_mins2[i] = FadType(p, 0.0); \
1141
} \
1142
Kokkos::deep_copy(x, h_x); \
1143
Kokkos::deep_copy(mins, h_mins); \
1144
Kokkos::deep_copy(mins2, h_mins2); \
1145
\
1146
/* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/
\
1147
/* so only do the communication on the host. This probably makes */
\
1148
/* the deep copy unnecessary. */
\
1149
const bool accessible = \
1150
Kokkos::Impl::MemorySpaceAccess< \
1151
Kokkos::HostSpace, \
1152
typename Device::memory_space >::accessible; \
1153
if (accessible) { \
1154
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, x, mins2); \
1155
Kokkos::deep_copy(h_mins2, mins2); \
1156
} \
1157
else \
1158
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, h_x, h_mins2); \
1159
bool success1 = checkFadArrays( \
1160
h_mins, h_mins2, std::string(#FAD)+" Min All", out); \
1161
success1 = checkResultOnAllProcs(*comm, out, success1); \
1162
\
1163
if (accessible) { \
1164
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, x, mins3); \
1165
Kokkos::deep_copy(h_mins3, mins3); \
1166
} \
1167
else \
1168
Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, h_x, h_mins3); \
1169
bool success2 = checkFadArrays( \
1170
h_mins, h_mins3, std::string(#FAD)+" Min All FTS", out); \
1171
success2 = checkResultOnAllProcs(*comm, out, success2); \
1172
success = success1 && success2; \
1173
\
1174
}
1175
1176
#ifdef KOKKOS_ENABLE_OPENMP
1177
#define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1178
using Kokkos::OpenMP; \
1179
FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, OpenMP)
1180
#else
1181
#define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD)
1182
#endif
1183
1184
#ifdef KOKKOS_ENABLE_THREADS
1185
#define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1186
using Kokkos::Threads; \
1187
FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Threads)
1188
#else
1189
#define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD)
1190
#endif
1191
1192
#ifdef KOKKOS_ENABLE_CUDA
1193
#define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1194
using Kokkos::Cuda; \
1195
FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Cuda)
1196
#else
1197
#define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD)
1198
#endif
1199
1200
#ifdef KOKKOS_ENABLE_HIP
1201
#define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD) \
1202
using Kokkos::Experimental::HIP; \
1203
FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, HIP)
1204
#else
1205
#define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD)
1206
#endif
1207
1208
#ifdef KOKKOS_ENABLE_SERIAL
1209
#define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD) \
1210
using Kokkos::Serial; \
1211
FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Serial)
1212
#else
1213
#define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1214
#endif
1215
1216
#define FAD_KOKKOS_COMM_TESTS(FadType, FAD) \
1217
FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1218
FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1219
FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1220
FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1221
1222
#else
1223
1224
#define FAD_KOKKOS_COMM_TESTS(FadType, FAD)
1225
1226
#endif
1227
1228
#define FAD_COMM_TESTS(FadType, FAD) \
1229
FAD_BASE_COMM_TESTS(FadType, FAD)
Ordinal
int Ordinal
Definition
CacheFad_CommTests.cpp:36
checkResultOnAllProcs
bool checkResultOnAllProcs(const Teuchos::Comm< Ordinal > &comm, Teuchos::FancyOStream &out, const bool result)
Definition
Fad_CommTests.hpp:77
checkFadArrays
bool checkFadArrays(const ArrayType &x, const ArrayType &x2, const std::string &tag, Teuchos::FancyOStream &out)
Definition
Fad_CommTests.hpp:43
Sacado_Random.hpp
Sacado_mpl_apply.hpp
FadType
Sacado::Fad::DFad< double > FadType
Definition
blas_example.cpp:49
Sacado::IsEqual::eval
static SACADO_INLINE_FUNCTION bool eval(const T &x, const T &y)
Definition
Sacado_Traits.hpp:417
Generated by
1.17.0