40 #ifndef TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
41 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
54 #include "Kokkos_Core.hpp"
79 #ifndef DOXYGEN_SHOULD_SKIP_THIS
89 namespace UnpackAndCombineCrsGraphImpl {
100 template<
class Packet,
class GO,
class Device,
class BufferDevice>
102 unpackRow (
const Kokkos::View<GO*,Device,Kokkos::MemoryUnmanaged>& gids_out,
103 const Kokkos::View<int*,Device,Kokkos::MemoryUnmanaged>& pids_out,
104 const Kokkos::View<const Packet*,BufferDevice>& imports,
106 const size_t num_ent)
108 using size_type =
typename Kokkos::View<GO*,Device>::size_type;
116 for (size_type k=0; k<num_ent; k++)
117 gids_out(k) = imports(offset+k);
120 if (pids_out.size() > 0) {
121 for (size_type k=0; k<num_ent; k++) {
122 pids_out(k) =
static_cast<int>(imports(offset+num_ent+k));
139 template<
class LocalOrdinal,
146 using LO = LocalOrdinal;
147 using GO =
typename IndicesView::value_type;
148 using packet_type = Packet;
149 using row_ptrs_type = RowView;
150 using indices_type = IndicesView;
151 using buffer_device_type = BufferDevice;
153 using device_type =
typename IndicesView::device_type;
154 using execution_space =
typename device_type::execution_space;
156 using num_packets_per_lid_type = Kokkos::View<const size_t*, buffer_device_type>;
157 using offsets_type = Kokkos::View<const size_t*, device_type>;
158 using input_buffer_type = Kokkos::View<const packet_type*, buffer_device_type>;
159 using import_lids_type = Kokkos::View<const LO*, buffer_device_type>;
161 using gids_scratch_type = Kokkos::View<GO*, device_type>;
162 using pids_scratch_type = Kokkos::View<int*,device_type>;
164 row_ptrs_type row_ptrs_beg;
165 row_ptrs_type row_ptrs_end;
166 indices_type indices;
167 input_buffer_type imports;
168 num_packets_per_lid_type num_packets_per_lid;
169 import_lids_type import_lids;
170 offsets_type offsets;
173 Kokkos::Experimental::UniqueToken<execution_space,
174 Kokkos::Experimental::UniqueTokenScope::Global> tokens;
175 gids_scratch_type gids_scratch;
176 pids_scratch_type pids_scratch;
179 using value_type = Kokkos::pair<int, LO>;
182 const row_ptrs_type& row_ptrs_beg_in,
183 const row_ptrs_type& row_ptrs_end_in,
184 const indices_type& indices_in,
185 const input_buffer_type& imports_in,
186 const num_packets_per_lid_type& num_packets_per_lid_in,
187 const import_lids_type& import_lids_in,
188 const offsets_type& offsets_in,
189 const size_t max_num_ent_in,
190 const bool unpack_pids_in) :
191 row_ptrs_beg(row_ptrs_beg_in),
192 row_ptrs_end(row_ptrs_end_in),
195 num_packets_per_lid(num_packets_per_lid_in),
196 import_lids(import_lids_in),
198 max_num_ent(max_num_ent_in),
199 unpack_pids(unpack_pids_in),
200 tokens(execution_space()),
201 gids_scratch(
"gids_scratch", tokens.size() * max_num_ent),
202 pids_scratch(
"pids_scratch", tokens.size() * max_num_ent)
205 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const
207 using Tpetra::Details::OrdinalTraits;
208 dst = Kokkos::make_pair(0, OrdinalTraits<LO>::invalid());
211 KOKKOS_INLINE_FUNCTION
void
212 join(
volatile value_type& dst,
const volatile value_type& src)
const
218 using Tpetra::Details::OrdinalTraits;
219 if (src.second != OrdinalTraits<LO>::invalid()) {
224 if (dst.second == OrdinalTraits<LO>::invalid() ||
225 src.second < dst.second) {
231 KOKKOS_INLINE_FUNCTION
232 void operator()(
const LO i, value_type& dst)
const
235 using Kokkos::subview;
236 using Kokkos::MemoryUnmanaged;
237 using size_type =
typename execution_space::size_type;
238 using slice =
typename Kokkos::pair<size_type, size_type>;
240 using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
241 using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
243 const size_t num_packets_this_lid = num_packets_per_lid(i);
244 const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2
245 : num_packets_this_lid;
246 if (unpack_pids && num_packets_this_lid%2 != 0) {
249 dst = Kokkos::make_pair(1, i);
259 const size_t buf_size = imports.size();
260 const size_t offset = offsets(i);
262 if (offset > buf_size || offset + num_packets_this_lid > buf_size) {
263 dst = Kokkos::make_pair(2, i);
270 const size_type token = tokens.acquire();
271 const size_t a =
static_cast<size_t>(token) * max_num_ent;
272 const size_t b = a + num_ent;
273 gids_out_type gids_out = subview(gids_scratch, slice(a, b));
274 pids_out_type pids_out = subview(pids_scratch, slice(a, (unpack_pids ? b : a)));
276 const int err =
unpackRow (gids_out, pids_out, imports, offset, num_ent);
279 dst = Kokkos::make_pair(3, i);
280 tokens.release(token);
284 auto import_lid = import_lids(i);
285 for (
size_t k = 0; k < num_ent; ++k) {
286 indices(row_ptrs_end(import_lid)) = gids_out(k);
288 row_ptrs_end(import_lid) += 1;
291 tokens.release(token);
302 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node,
303 class RowView,
class IndicesView,
class BufferDevice>
306 (
const RowView& row_ptrs_beg,
307 const RowView& row_ptrs_end,
308 IndicesView& indices,
309 const Kokkos::View<
const GlobalOrdinal*, BufferDevice,
310 Kokkos::MemoryUnmanaged>& imports,
311 const Kokkos::View<
const size_t*, BufferDevice,
312 Kokkos::MemoryUnmanaged>& num_packets_per_lid,
313 const Kokkos::View<
const LocalOrdinal*, BufferDevice,
314 Kokkos::MemoryUnmanaged>& import_lids,
315 const typename CrsGraph<LocalOrdinal, GlobalOrdinal,
316 Node>::padding_type& padding,
317 const bool unpack_pids,
321 using LO = LocalOrdinal;
322 using GO = GlobalOrdinal;
323 using device_type =
typename Node::device_type;
324 using execution_space =
typename BufferDevice::execution_space;
326 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
327 using unpack_functor_type =
330 const char prefix[] =
331 "Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine: ";
333 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
334 if (num_import_lids == 0) {
340 padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding,
344 Kokkos::View<size_t*, device_type> offsets(
"offsets", num_import_lids+1);
351 Kokkos::parallel_reduce
353 range_policy (0, LO (num_packets_per_lid.size ())),
354 KOKKOS_LAMBDA (
const LO i,
size_t& running_max_num_ent) {
355 const size_t num_packets_this_lid = num_packets_per_lid(i);
356 const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2 :
357 num_packets_this_lid;
358 if (num_ent > running_max_num_ent) {
359 running_max_num_ent = num_ent;
361 }, Kokkos::Max<size_t> (max_num_ent));
364 unpack_functor_type f (row_ptrs_beg, row_ptrs_end, indices, imports,
365 num_packets_per_lid, import_lids, offsets,
366 max_num_ent, unpack_pids);
368 typename unpack_functor_type::value_type x;
369 Kokkos::parallel_reduce(range_policy(0,
static_cast<LO
>(num_import_lids)), f, x);
370 auto x_h = x.to_std_pair();
371 TEUCHOS_TEST_FOR_EXCEPTION(x_h.first != 0, std::runtime_error,
372 prefix <<
"UnpackAndCombineFunctor reported error code "
373 << x_h.first <<
" for the first bad row " << x_h.second);
376 template<
class Packet,
class LocalGraph,
class BufferDevice>
379 const LocalGraph& local_graph,
380 const Kokkos::View<
const typename LocalGraph::data_type*,
381 typename LocalGraph::device_type,
382 Kokkos::MemoryUnmanaged> permute_from_lids,
383 const Kokkos::View<const Packet*, BufferDevice>& ,
384 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
385 const size_t num_same_ids)
387 using Kokkos::parallel_reduce;
388 using local_graph_type = LocalGraph;
389 using LO =
typename local_graph_type::data_type;
390 using device_type =
typename local_graph_type::device_type;
391 using execution_space =
typename device_type::execution_space;
392 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
398 num_items =
static_cast<LO
>(num_same_ids);
402 range_policy(0, num_items),
403 KOKKOS_LAMBDA(
const LO lid,
size_t& update) {
404 update +=
static_cast<size_t>(local_graph.row_map[lid+1]
405 -local_graph.row_map[lid]);
411 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
415 range_policy(0, num_items),
416 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
417 const LO lid = permute_from_lids(i);
418 update +=
static_cast<size_t>(local_graph.row_map[lid+1]
419 - local_graph.row_map[lid]);
426 size_t tot_num_ent = 0;
427 parallel_reduce(
"SumReduce",
428 num_packets_per_lid.size(),
429 KOKKOS_LAMBDA(
const int& i,
size_t& lsum) {
430 lsum += num_packets_per_lid(i) / 2;
431 }, Kokkos::Sum<size_t>(tot_num_ent));
432 count += tot_num_ent;
439 template<
class Packet,
class LO,
class Device,
class BufferDevice>
442 const Kokkos::View<size_t*, Device>& tgt_rowptr,
443 const Kokkos::View<const LO*, BufferDevice>& import_lids,
444 const Kokkos::View<const Packet*, BufferDevice>& ,
445 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid)
447 using Kokkos::parallel_reduce;
448 using device_type = Device;
449 using execution_space =
typename device_type::execution_space;
450 using size_type =
typename Kokkos::View<size_t*,device_type>::size_type;
451 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
453 const size_type N = num_packets_per_lid.extent(0);
454 parallel_for(
"Setup row pointers for remotes",
456 KOKKOS_LAMBDA(
const size_t i){
457 using atomic_incr_type =
typename std::remove_reference<decltype(tgt_rowptr(0))>::type;
458 const size_t num_packets_this_lid = num_packets_per_lid(i);
459 const size_t num_ent = num_packets_this_lid / 2;
460 Kokkos::atomic_fetch_add(&tgt_rowptr(import_lids(i)), atomic_incr_type(num_ent));
465 template<
class Device>
467 makeCrsRowPtrFromLengths(
468 const Kokkos::View<size_t*,Device,Kokkos::MemoryUnmanaged>& tgt_rowptr,
469 const Kokkos::View<size_t*,Device>& new_start_row)
471 using Kokkos::parallel_scan;
472 using device_type = Device;
473 using execution_space =
typename device_type::execution_space;
474 using size_type =
typename Kokkos::View<size_t*,device_type>::size_type;
475 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
476 const size_type N = new_start_row.extent(0);
479 KOKKOS_LAMBDA(
const size_t& i,
size_t& update,
const bool&
final) {
480 auto cur_val = tgt_rowptr(i);
482 tgt_rowptr(i) = update;
483 new_start_row(i) = tgt_rowptr(i);
490 template<
class LocalGraph,
class LocalMap>
493 const Kokkos::View<
typename LocalMap::global_ordinal_type*,
494 typename LocalMap::device_type>& tgt_colind,
495 const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
496 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
497 const Kokkos::View<size_t*, typename LocalMap::device_type>& tgt_rowptr,
498 const Kokkos::View<const int*, typename LocalMap::device_type>& src_pids,
499 const LocalGraph& local_graph,
500 const LocalMap& local_col_map,
501 const size_t num_same_ids,
504 using Kokkos::parallel_for;
505 using device_type =
typename LocalMap::device_type;
506 using LO =
typename LocalMap::local_ordinal_type;
507 using execution_space =
typename device_type::execution_space;
508 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
511 range_policy(0, num_same_ids),
512 KOKKOS_LAMBDA(
const size_t i) {
513 using atomic_incr_type =
typename std::remove_reference<decltype(new_start_row(0))>::type;
515 const LO src_lid =
static_cast<LO
>(i);
516 size_t src_row = local_graph.row_map(src_lid);
518 const LO tgt_lid =
static_cast<LO
>(i);
519 const size_t tgt_row = tgt_rowptr(tgt_lid);
521 const size_t nsr = local_graph.row_map(src_lid+1)
522 - local_graph.row_map(src_lid);
523 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
525 for (
size_t j=local_graph.row_map(src_lid);
526 j<local_graph.row_map(src_lid+1); ++j) {
527 LO src_col = local_graph.entries(j);
528 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
529 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
535 template<
class LocalGraph,
class LocalMap,
class BufferDevice>
537 copyDataFromPermuteIDs(
538 const Kokkos::View<
typename LocalMap::global_ordinal_type*,
539 typename LocalMap::device_type>& tgt_colind,
540 const Kokkos::View<
int*,
541 typename LocalMap::device_type>& tgt_pids,
542 const Kokkos::View<
size_t*,
543 typename LocalMap::device_type>& new_start_row,
544 const Kokkos::View<
size_t*,
545 typename LocalMap::device_type>& tgt_rowptr,
546 const Kokkos::View<
const int*,
547 typename LocalMap::device_type>& src_pids,
548 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
549 BufferDevice, Kokkos::MemoryUnmanaged>& permute_to_lids,
550 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
551 BufferDevice, Kokkos::MemoryUnmanaged>& permute_from_lids,
552 const LocalGraph& local_graph,
553 const LocalMap& local_col_map,
556 using Kokkos::parallel_for;
557 using device_type =
typename LocalMap::device_type;
558 using LO =
typename LocalMap::local_ordinal_type;
559 using execution_space =
typename device_type::execution_space;
560 using size_type =
typename Kokkos::View<LO*,device_type>::size_type;
561 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
563 const size_type num_permute_to_lids = permute_to_lids.extent(0);
566 range_policy(0, num_permute_to_lids),
567 KOKKOS_LAMBDA(
const size_t i) {
568 using atomic_incr_type =
typename std::remove_reference<decltype(new_start_row(0))>::type;
570 const LO src_lid = permute_from_lids(i);
571 const size_t src_row = local_graph.row_map(src_lid);
573 const LO tgt_lid = permute_to_lids(i);
574 const size_t tgt_row = tgt_rowptr(tgt_lid);
576 size_t nsr = local_graph.row_map(src_lid+1)
577 - local_graph.row_map(src_lid);
578 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
580 for (
size_t j=local_graph.row_map(src_lid);
581 j<local_graph.row_map(src_lid+1); ++j) {
582 LO src_col = local_graph.entries(j);
583 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
584 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
590 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDevice>
592 unpackAndCombineIntoCrsArrays2(
593 const Kokkos::View<typename LocalMap::global_ordinal_type*, typename LocalMap::device_type>& tgt_colind,
594 const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
595 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
596 const Kokkos::View<const size_t*, typename LocalMap::device_type>& offsets,
598 const typename LocalMap::local_ordinal_type*,
600 Kokkos::MemoryUnmanaged>& import_lids,
601 const Kokkos::View<const Packet*, BufferDevice>& imports,
602 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
608 using Kokkos::subview;
609 using Kokkos::MemoryUnmanaged;
610 using Kokkos::parallel_reduce;
611 using Kokkos::atomic_fetch_add;
613 using device_type =
typename LocalMap::device_type;
614 using LO =
typename LocalMap::local_ordinal_type;
615 using GO =
typename LocalMap::global_ordinal_type;
616 using execution_space =
typename device_type::execution_space;
617 using size_type =
typename Kokkos::View<LO*, device_type>::size_type;
618 using slice =
typename Kokkos::pair<size_type, size_type>;
619 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
621 using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
622 using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
624 const size_type num_import_lids = import_lids.size();
625 const char prefix[] =
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays2: ";
629 parallel_reduce(
"Unpack and combine into CRS",
630 range_policy(0, num_import_lids),
631 KOKKOS_LAMBDA(
const size_t i,
int& err) {
632 using atomic_incr_type =
typename std::remove_reference< decltype( new_start_row(0) )>::type;
633 const size_t num_packets_this_lid = num_packets_per_lid(i);
634 const size_t num_ent = num_packets_this_lid / 2;
635 const size_t offset = offsets(i);
636 const LO lcl_row = import_lids(i);
637 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
638 const size_t end_row = start_row + num_ent;
640 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
641 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
643 err +=
unpackRow (gids_out, pids_out, imports, offset, num_ent);
646 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
647 const int pid = pids_out(j);
648 pids_out(j) = (pid != my_pid) ? pid : -1;
652 TEUCHOS_TEST_FOR_EXCEPTION(gbl_err_count != 0,
653 std::invalid_argument, prefix <<
654 "Attempting to unpack PIDs, but num_ent is not even; this should never "
655 "happen! Please report this bug to the Tpetra developers.");
660 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDevice>
663 const LocalGraph & local_graph,
664 const LocalMap & local_col_map,
665 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
667 Kokkos::MemoryUnmanaged>& import_lids,
668 const Kokkos::View<const Packet*, BufferDevice>& imports,
669 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
670 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
672 Kokkos::MemoryUnmanaged>& permute_to_lids,
673 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
675 Kokkos::MemoryUnmanaged>& permute_from_lids,
676 const Kokkos::View<
size_t*,
677 typename LocalMap::device_type,
678 Kokkos::MemoryUnmanaged>& tgt_rowptr,
679 const Kokkos::View<
typename LocalMap::global_ordinal_type*,
680 typename LocalMap::device_type,
681 Kokkos::MemoryUnmanaged>& tgt_colind,
682 const Kokkos::View<
const int*,
683 typename LocalMap::device_type,
684 Kokkos::MemoryUnmanaged>& src_pids,
685 const Kokkos::View<
int*,
686 typename LocalMap::device_type,
687 Kokkos::MemoryUnmanaged>& tgt_pids,
688 const size_t num_same_ids,
689 const size_t tgt_num_rows,
690 const size_t tgt_num_nonzeros,
691 const int my_tgt_pid)
694 using Kokkos::subview;
695 using Kokkos::parallel_for;
696 using Kokkos::MemoryUnmanaged;
697 using packet_type = Packet;
698 using local_map_type = LocalMap;
699 using local_graph_type = LocalGraph;
700 using buffer_device_type = BufferDevice;
701 using device_type =
typename LocalMap::device_type;
702 using LO =
typename LocalMap::local_ordinal_type;
703 using execution_space =
typename device_type::execution_space;
704 using size_type =
typename Kokkos::View<LO*, device_type>::size_type;
705 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
707 const char prefix[] =
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays: ";
709 const size_t N = tgt_num_rows;
710 const size_t mynnz = tgt_num_nonzeros;
714 const int my_pid = my_tgt_pid;
723 range_policy(0, N+1),
724 KOKKOS_LAMBDA(
const size_t i) {
731 range_policy(0, num_same_ids),
732 KOKKOS_LAMBDA(
const size_t i) {
733 const LO tgt_lid =
static_cast<LO
>(i);
734 const LO src_lid =
static_cast<LO
>(i);
735 tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
736 - local_graph.row_map(src_lid);
741 const size_type num_permute_to_lids = permute_to_lids.extent(0);
743 range_policy(0, num_permute_to_lids),
744 KOKKOS_LAMBDA(
const size_t i) {
745 const LO tgt_lid = permute_to_lids(i);
746 const LO src_lid = permute_from_lids(i);
747 tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
748 - local_graph.row_map(src_lid);
753 const size_type num_import_lids = import_lids.extent(0);
754 View<size_t*, device_type> offsets(
"offsets", num_import_lids+1);
757 #ifdef HAVE_TPETRA_DEBUG
759 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
760 const bool condition =
761 nth_offset_h !=
static_cast<size_t>(imports.extent(0));
762 TEUCHOS_TEST_FOR_EXCEPTION
763 (condition, std::logic_error, prefix
764 <<
"The final offset in bytes " << nth_offset_h
765 <<
" != imports.size() = " << imports.extent(0)
766 <<
". Please report this bug to the Tpetra developers.");
771 setupRowPointersForRemotes<packet_type,LO,device_type,buffer_device_type>(
772 tgt_rowptr, import_lids, imports, num_packets_per_lid);
776 View<size_t*, device_type> new_start_row(
"new_start_row", N+1);
779 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
781 auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N);
782 bool condition = nth_tgt_rowptr_h != mynnz;
783 TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument,
784 prefix <<
"CRS_rowptr[last] = " <<
785 nth_tgt_rowptr_h <<
"!= mynnz = " << mynnz <<
".");
789 copyDataFromSameIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
790 tgt_rowptr, src_pids, local_graph, local_col_map, num_same_ids, my_pid);
792 copyDataFromPermuteIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
793 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
794 local_graph, local_col_map, my_pid);
796 if (imports.extent(0) <= 0) {
800 unpackAndCombineIntoCrsArrays2<
801 packet_type,local_graph_type,local_map_type,buffer_device_type>(
802 tgt_colind, tgt_pids, new_start_row, offsets, import_lids, imports,
803 num_packets_per_lid, local_graph, local_col_map, my_pid);
859 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
863 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
865 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
870 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
871 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
873 using Kokkos::MemoryUnmanaged;
875 using device_type =
typename Node::device_type;
879 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
881 TEUCHOS_TEST_FOR_EXCEPTION
882 (permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
883 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size() <<
" != "
884 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
888 TEUCHOS_TEST_FOR_EXCEPTION
889 (! locallyIndexed, std::invalid_argument, prefix <<
"The input "
890 "CrsGraph 'sourceGraph' must be locally indexed.");
891 TEUCHOS_TEST_FOR_EXCEPTION
892 (importLIDs.size() != numPacketsPerLID.size(), std::invalid_argument,
893 prefix <<
"importLIDs.size() = " << importLIDs.size() <<
" != "
894 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
897 auto permute_from_lids_d =
899 permuteFromLIDs.getRawPtr(),
900 permuteFromLIDs.size(),
true,
901 "permute_from_lids");
905 imports.size(),
true,
907 auto num_packets_per_lid_d =
909 numPacketsPerLID.getRawPtr(),
910 numPacketsPerLID.size(),
true,
911 "num_packets_per_lid");
914 packet_type,local_graph_type,buffer_device_type>(
915 local_graph, permute_from_lids_d, imports_d, num_packets_per_lid_d, numSameIDs);
931 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
935 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
937 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
941 const size_t numSameIDs,
942 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
943 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
944 size_t TargetNumRows,
945 size_t TargetNumNonzeros,
946 const int MyTargetPID,
947 const Teuchos::ArrayView<size_t>& CRS_rowptr,
948 const Teuchos::ArrayView<GlobalOrdinal>& CRS_colind,
949 const Teuchos::ArrayView<const int>& SourcePids,
950 Teuchos::Array<int>& TargetPids)
954 using Teuchos::outArg;
955 using Teuchos::REDUCE_MAX;
956 using Teuchos::reduceAll;
957 using LO = LocalOrdinal;
958 using GO = GlobalOrdinal;
960 using packet_type =
typename crs_graph_type::packet_type;
961 using local_graph_type =
typename crs_graph_type::local_graph_type;
962 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
963 using device_type =
typename Node::device_type;
964 using size_type =
typename Teuchos::ArrayView<const LO>::size_type;
966 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
968 TEUCHOS_TEST_FOR_EXCEPTION(
969 TargetNumRows + 1 !=
static_cast<size_t>(CRS_rowptr.size()),
970 std::invalid_argument, prefix <<
"CRS_rowptr.size() = " <<
971 CRS_rowptr.size() <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
973 TEUCHOS_TEST_FOR_EXCEPTION(
974 permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
975 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size()
976 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
977 const size_type numImportLIDs = importLIDs.size();
979 TEUCHOS_TEST_FOR_EXCEPTION(
980 numImportLIDs != numPacketsPerLID.size(), std::invalid_argument,
981 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != "
982 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
985 if (
static_cast<size_t>(TargetPids.size()) != TargetNumNonzeros) {
986 TargetPids.resize(TargetNumNonzeros);
988 TargetPids.assign(TargetNumNonzeros, -1);
992 auto local_col_map = sourceGraph.
getColMap()->getLocalMap();
995 device_type outputDevice;
996 buffer_device_type bufferOutputDevice;
998 Kokkos::View<const LO*, buffer_device_type> import_lids_d =
1000 (bufferOutputDevice, importLIDs.getRawPtr(),
1001 importLIDs.size(),
true,
"import_lids");
1003 Kokkos::View<const packet_type*, buffer_device_type> imports_d =
1005 (bufferOutputDevice, imports.getRawPtr(),
1006 imports.size(),
true,
"imports");
1008 Kokkos::View<const size_t*, buffer_device_type> num_packets_per_lid_d =
1010 numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(),
1011 true,
"num_packets_per_lid");
1013 Kokkos::View<const LO*, buffer_device_type> permute_to_lids_d =
1015 permuteToLIDs.getRawPtr(), permuteToLIDs.size(),
1016 true,
"permute_to_lids");
1018 Kokkos::View<const LO*, buffer_device_type> permute_from_lids_d =
1020 permuteFromLIDs.getRawPtr(), permuteFromLIDs.size(),
1021 true,
"permute_from_lids");
1023 Kokkos::View<size_t*, device_type> crs_rowptr_d =
1025 CRS_rowptr.getRawPtr(), CRS_rowptr.size(),
1026 true,
"crs_rowptr");
1028 Kokkos::View<GO*, device_type> crs_colind_d =
1030 CRS_colind.getRawPtr(), CRS_colind.size(),
1031 true,
"crs_colidx");
1033 Kokkos::View<const int*, device_type> src_pids_d =
1035 SourcePids.getRawPtr(), SourcePids.size(),
1038 Kokkos::View<int*, device_type> tgt_pids_d =
1040 TargetPids.getRawPtr(), TargetPids.size(),
1043 using local_map_type = decltype(local_col_map);
1045 packet_type,local_graph_type,local_map_type,buffer_device_type>(
1046 local_graph, local_col_map, import_lids_d, imports_d, num_packets_per_lid_d,
1047 permute_to_lids_d, permute_from_lids_d, crs_rowptr_d, crs_colind_d, src_pids_d,
1048 tgt_pids_d, numSameIDs, TargetNumRows, TargetNumNonzeros, MyTargetPID);
1053 typename decltype(crs_rowptr_d)::HostMirror crs_rowptr_h(
1054 CRS_rowptr.getRawPtr(), CRS_rowptr.size());
1057 typename decltype(crs_colind_d)::HostMirror crs_colind_h(
1058 CRS_colind.getRawPtr(), CRS_colind.size());
1061 typename decltype(tgt_pids_d)::HostMirror tgt_pids_h(
1062 TargetPids.getRawPtr(), TargetPids.size());
1070 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_INSTANT( LO, GO, NT ) \
1072 Details::unpackAndCombineIntoCrsArrays<LO, GO, NT>( \
1073 const CrsGraph<LO, GO, NT> &, \
1074 const Teuchos::ArrayView<const LO>&, \
1075 const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type>&, \
1076 const Teuchos::ArrayView<const size_t>&, \
1079 const CombineMode, \
1081 const Teuchos::ArrayView<const LO>&, \
1082 const Teuchos::ArrayView<const LO>&, \
1086 const Teuchos::ArrayView<size_t>&, \
1087 const Teuchos::ArrayView<GO>&, \
1088 const Teuchos::ArrayView<const int>&, \
1089 Teuchos::Array<int>&); \
1091 Details::unpackAndCombineWithOwningPIDsCount<LO, GO, NT>( \
1092 const CrsGraph<LO, GO, NT> &, \
1093 const Teuchos::ArrayView<const LO> &, \
1094 const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type> &, \
1095 const Teuchos::ArrayView<const size_t>&, \
1100 const Teuchos::ArrayView<const LO>&, \
1101 const Teuchos::ArrayView<const LO>&);
Declaration of the Tpetra::CrsGraph class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Functions for manipulating CRS arrays.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
KOKKOS_FUNCTION int unpackRow(const Kokkos::View< GO *, Device, Kokkos::MemoryUnmanaged > &gids_out, const Kokkos::View< int *, Device, Kokkos::MemoryUnmanaged > &pids_out, const Kokkos::View< const Packet *, BufferDevice > &imports, const size_t offset, const size_t num_ent)
Unpack a single row of a CrsGraph.
void setupRowPointersForRemotes(const Kokkos::View< size_t *, Device > &tgt_rowptr, const Kokkos::View< const LO *, BufferDevice > &import_lids, const Kokkos::View< const Packet *, BufferDevice > &, const Kokkos::View< const size_t *, BufferDevice > &num_packets_per_lid)
Setup row pointers for remotes.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
local_graph_type getLocalGraph() const
Get the local graph.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type > local_graph_type
The type of the part of the sparse graph on each MPI process.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
Unpacks and combines a single row of the CrsGraph.
Sets up and executes a communication plan for a Tpetra DistObject.
Implementation details of Tpetra.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &sourceGraph, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t, Distributor &, CombineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &sourceGraph, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t, Distributor &, const CombineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GlobalOrdinal > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CombineMode
Rule for combining data in an Import or Export.