152 const PadCrsAction action,
153 const RowPtr& row_ptr_beg,
154 const RowPtr& row_ptr_end,
155 Indices& indices_wdv,
157 const Padding& padding,
162 using Kokkos::view_alloc;
163 using Kokkos::WithoutInitializing;
165 std::unique_ptr<std::string> prefix;
167 const size_t maxNumToPrint = verbose ?
170 std::ostringstream os;
171 os <<
"Proc " << my_rank <<
": Tpetra::...::pad_crs_arrays: ";
172 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
173 os <<
"Start" << endl;
174 std::cerr << os.str();
176 Kokkos::HostSpace hostSpace;
179 std::ostringstream os;
180 os << *prefix <<
"On input: ";
182 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
184 Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
189 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
191 Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
194 os <<
", indices.extent(0): " << indices_wdv.extent(0)
195 <<
", values.extent(0): " << values_wdv.extent(0)
199 std::cerr << os.str();
202 if (row_ptr_beg.size() == 0) {
204 std::ostringstream os;
205 os << *prefix <<
"Done; local matrix has no rows" << endl;
206 std::cerr << os.str();
211 const size_t lclNumRows(row_ptr_beg.size() - 1);
212 RowPtr newAllocPerRow =
213 make_uninitialized_view<RowPtr>(
"newAllocPerRow", lclNumRows,
214 verbose, prefix.get());
216 std::ostringstream os;
217 os << *prefix <<
"Fill newAllocPerRow & compute increase" << endl;
218 std::cerr << os.str();
224 auto row_ptr_end_h = create_mirror_view(
225 hostSpace, row_ptr_end, verbose, prefix.get());
227 Kokkos::deep_copy(exec_space_instance, row_ptr_end_h, row_ptr_end);
228 auto row_ptr_beg_h = create_mirror_view(
229 hostSpace, row_ptr_beg, verbose, prefix.get());
231 Kokkos::deep_copy(exec_space_instance, row_ptr_beg_h, row_ptr_beg);
239 exec_space_instance.fence();
241 auto newAllocPerRow_h = create_mirror_view(
242 hostSpace, newAllocPerRow, verbose, prefix.get());
243 using host_range_type = Kokkos::RangePolicy<
244 Kokkos::DefaultHostExecutionSpace,
size_t>;
245 Kokkos::parallel_reduce
246 (
"Tpetra::CrsGraph: Compute new allocation size per row",
247 host_range_type(0, lclNumRows),
248 [&] (
const size_t lclRowInd,
size_t& lclIncrease) {
249 const size_t start = row_ptr_beg_h[lclRowInd];
250 const size_t end = row_ptr_beg_h[lclRowInd+1];
251 TEUCHOS_ASSERT( end >= start );
252 const size_t oldAllocSize = end - start;
253 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start;
254 TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize );
261 auto result = padding.get_result(lclRowInd);
262 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
263 if (newNumEnt > oldAllocSize) {
264 lclIncrease += (newNumEnt - oldAllocSize);
265 newAllocPerRow_h[lclRowInd] = newNumEnt;
268 newAllocPerRow_h[lclRowInd] = oldAllocSize;
273 std::ostringstream os;
274 os << *prefix <<
"increase: " << increase <<
", ";
278 std::cerr << os.str();
285 Kokkos::deep_copy(
execution_space(), newAllocPerRow, newAllocPerRow_h);
288 using inds_value_type =
289 typename Indices::t_dev::non_const_value_type;
290 using vals_value_type =
typename Values::t_dev::non_const_value_type;
293 auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
294 const size_t newIndsSize = size_t(indices_old.size()) + increase;
295 auto indices_new = make_uninitialized_view<typename Indices::t_dev>(
296 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
299 typename Values::t_dev values_new;
300 auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
301 if (action == PadCrsAction::INDICES_AND_VALUES) {
302 const size_t newValsSize = newIndsSize;
305 values_new = make_initialized_view<typename Values::t_dev>(
306 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
310 std::ostringstream os;
311 os << *prefix <<
"Repack" << endl;
312 std::cerr << os.str();
315 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
316 Kokkos::parallel_scan(
317 "Tpetra::CrsGraph or CrsMatrix repack",
318 range_type(
size_t(0),
size_t(lclNumRows+1)),
319 KOKKOS_LAMBDA (
const size_t lclRow,
size_t& newRowBeg,
320 const bool finalPass)
325 const size_t row_beg = row_ptr_beg[lclRow];
326 const size_t row_end =
327 lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
328 const size_t numEnt = row_end - row_beg;
329 const size_t newRowAllocSize =
330 lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
332 if (lclRow < lclNumRows) {
333 const Kokkos::pair<size_t, size_t> oldRange(
334 row_beg, row_beg + numEnt);
335 const Kokkos::pair<size_t, size_t> newRange(
336 newRowBeg, newRowBeg + numEnt);
337 auto oldColInds = Kokkos::subview(indices_old, oldRange);
338 auto newColInds = Kokkos::subview(indices_new, newRange);
341 memcpy(newColInds.data(), oldColInds.data(),
342 numEnt *
sizeof(inds_value_type));
343 if (action == PadCrsAction::INDICES_AND_VALUES) {
345 Kokkos::subview(values_old, oldRange);
346 auto newVals = Kokkos::subview(values_new, newRange);
347 memcpy(newVals.data(), oldVals.data(),
348 numEnt *
sizeof(vals_value_type));
352 row_ptr_beg[lclRow] = newRowBeg;
353 if (lclRow < lclNumRows) {
354 row_ptr_end[lclRow] = newRowBeg + numEnt;
357 newRowBeg += newRowAllocSize;
362 std::ostringstream os;
366 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
368 Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
375 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
377 Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
382 std::cout << os.str();
385 indices_wdv = Indices(indices_new);
386 values_wdv = Values(values_new);
390 auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
391 auto values_h = values_wdv.getHostView(Access::ReadOnly);
392 std::ostringstream os;
403 std::ostringstream os;
404 os << *prefix <<
"Done" << endl;
405 std::cerr << os.str();
413 typename Pointers::value_type
const row,
414 Pointers
const& row_ptrs,
415 InOutIndices& cur_indices,
416 size_t& num_assigned,
417 InIndices
const& new_indices,
419 std::function<
void(
size_t const,
size_t const,
size_t const)> cb)
421 if (new_indices.size() == 0) {
425 if (cur_indices.size() == 0) {
427 return Teuchos::OrdinalTraits<size_t>::invalid();
430 using offset_type =
typename std::decay<
decltype (row_ptrs[0])>::type;
431 using ordinal_type =
typename std::decay<
decltype (cur_indices[0])>::type;
433 const offset_type start = row_ptrs[row];
434 offset_type end = start +
static_cast<offset_type
> (num_assigned);
435 const size_t num_avail = (row_ptrs[row + 1] < end) ?
size_t (0) :
436 row_ptrs[row + 1] - end;
437 const size_t num_new_indices =
static_cast<size_t> (new_indices.size ());
438 size_t num_inserted = 0;
440 size_t numIndicesLookup = num_assigned + num_new_indices;
443 const size_t useLookUpTableThreshold = 400;
445 if (numIndicesLookup <= useLookUpTableThreshold || num_new_indices == 1) {
448 for (
size_t k = 0; k < num_new_indices; ++k) {
449 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
450 offset_type row_offset = start;
451 for (; row_offset < end; ++row_offset) {
452 if (idx == cur_indices[row_offset]) {
457 if (row_offset == end) {
458 if (num_inserted >= num_avail) {
459 return Teuchos::OrdinalTraits<size_t>::invalid();
462 cur_indices[end++] = idx;
466 cb(k, start, row_offset - start);
472 std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
475 for (
size_t k = 0; k < num_assigned; k++) {
476 idxLookup[cur_indices[start+k]] = start+k;
480 for (
size_t k = 0; k < num_new_indices; k++) {
481 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
482 offset_type row_offset;
484 auto it = idxLookup.find(idx);
485 if (it == idxLookup.end()) {
486 if (num_inserted >= num_avail) {
487 return Teuchos::OrdinalTraits<size_t>::invalid();
491 cur_indices[end++] = idx;
492 idxLookup[idx] = row_offset;
497 row_offset = it->second;
500 cb(k, start, row_offset - start);
504 num_assigned += num_inserted;