Tpetra parallel linear algebra
Version of the Day
Toggle main menu visibility
Loading...
Searching...
No Matches
core
src
Tpetra_Details_radixSort.hpp
1
/*
2
// @HEADER
3
// ***********************************************************************
4
//
5
// Tpetra: Templated Linear Algebra Services Package
6
// Copyright (2008) Sandia Corporation
7
//
8
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9
// the U.S. Government retains certain rights in this software.
10
//
11
// Redistribution and use in source and binary forms, with or without
12
// modification, are permitted provided that the following conditions are
13
// met:
14
//
15
// 1. Redistributions of source code must retain the above copyright
16
// notice, this list of conditions and the following disclaimer.
17
//
18
// 2. Redistributions in binary form must reproduce the above copyright
19
// notice, this list of conditions and the following disclaimer in the
20
// documentation and/or other materials provided with the distribution.
21
//
22
// 3. Neither the name of the Corporation nor the names of the
23
// contributors may be used to endorse or promote products derived from
24
// this software without specific prior written permission.
25
//
26
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
//
38
// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39
//
40
// ************************************************************************
41
// @HEADER
42
*/
43
44
#ifndef TPETRA_DETAILS_RADIXSORT_HPP
45
#define TPETRA_DETAILS_RADIXSORT_HPP
46
47
#include "TpetraCore_config.h"
48
#include "Kokkos_Macros.hpp"
49
50
namespace
Tpetra
51
{
52
namespace
Details
53
{
54
66
template
<
typename
KeyType,
typename
ValueType,
typename
IndexType>
67
KOKKOS_INLINE_FUNCTION
void
68
radixSortKeysAndValues
(KeyType* keys, KeyType* keysAux, ValueType* values, ValueType* valuesAux, IndexType n, IndexType upperBound)
69
{
70
if
(n <= 1)
71
return
;
72
KeyType mask = 0xF;
73
bool
inAux =
false
;
74
//maskPos counts the low bit index of mask (0, 4, 8, ...)
75
IndexType maskPos = 0;
76
//Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
77
IndexType sortBits = 0;
78
while
(upperBound)
79
{
80
upperBound >>= 1;
81
sortBits++;
82
}
83
for
(IndexType s = 0; s < (sortBits + 3) / 4; s++)
84
{
85
//Count the number of elements in each bucket
86
IndexType count[16] = {0};
87
IndexType offset[17] = {0};
88
if
(!inAux)
89
{
90
for
(IndexType i = 0; i < n; i++)
91
{
92
count[(keys[i] & mask) >> maskPos]++;
93
}
94
}
95
else
96
{
97
for
(IndexType i = 0; i < n; i++)
98
{
99
count[(keysAux[i] & mask) >> maskPos]++;
100
}
101
}
102
//get offset as the prefix sum for count
103
for
(IndexType i = 0; i < 16; i++)
104
{
105
offset[i + 1] = offset[i] + count[i];
106
}
107
//now for each element in [lo, hi), move it to its offset in the other buffer
108
//this branch should be ok because whichBuf is the same on all threads
109
if
(!inAux)
110
{
111
//copy from *Over to *Aux
112
for
(IndexType i = 0; i < n; i++)
113
{
114
IndexType bucket = (keys[i] & mask) >> maskPos;
115
keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
116
valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
117
count[bucket]--;
118
}
119
}
120
else
121
{
122
//copy from *Aux to *Over
123
for
(IndexType i = 0; i < n; i++)
124
{
125
IndexType bucket = (keysAux[i] & mask) >> maskPos;
126
keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
127
values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
128
count[bucket]--;
129
}
130
}
131
inAux = !inAux;
132
mask = mask << 4;
133
maskPos += 4;
134
}
135
if
(inAux)
136
{
137
//need to deep copy from aux arrays to main
138
for
(IndexType i = 0; i < n; i++)
139
{
140
keys[i] = keysAux[i];
141
values[i] = valuesAux[i];
142
}
143
}
144
}
145
146
}
//namespace Details
147
}
//namespace Tpetra
148
149
#endif
//include guard
150
Tpetra::Details
Nonmember function that computes a residual Computes R = B - A * X.
Definition
Tpetra_KokkosRefactor_Details_MultiVectorLocalDeepCopy.hpp:51
Tpetra::Details::radixSortKeysAndValues
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Definition
Tpetra_Details_radixSort.hpp:68
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Generated by
1.17.0