Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_unpackCrsGraphAndCombine_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
41 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
42 
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
51 #include "Tpetra_CrsGraph_decl.hpp"
54 #include "Kokkos_Core.hpp"
55 #include <memory>
56 #include <string>
57 
76 
77 namespace Tpetra {
78 
79 #ifndef DOXYGEN_SHOULD_SKIP_THIS
80 // Forward declaration of Distributor
81 class Distributor;
82 #endif // DOXYGEN_SHOULD_SKIP_THIS
83 
84 //
85 // Users must never rely on anything in the Details namespace.
86 //
87 namespace Details {
88 
89 namespace UnpackAndCombineCrsGraphImpl {
90 
100 template<class Packet, class GO, class Device, class BufferDevice>
101 KOKKOS_FUNCTION int
102 unpackRow (const Kokkos::View<GO*,Device,Kokkos::MemoryUnmanaged>& gids_out,
103  const Kokkos::View<int*,Device,Kokkos::MemoryUnmanaged>& pids_out,
104  const Kokkos::View<const Packet*,BufferDevice>& imports,
105  const size_t offset,
106  const size_t num_ent)
107 {
108  using size_type = typename Kokkos::View<GO*,Device>::size_type;
109 
110  if (num_ent == 0) {
111  // Empty rows always take zero bytes, to ensure sparsity.
112  return 0;
113  }
114 
115  // Unpack GIDs
116  for (size_type k=0; k<num_ent; k++)
117  gids_out(k) = imports(offset+k);
118 
119  // Unpack PIDs
120  if (pids_out.size() > 0) {
121  for (size_type k=0; k<num_ent; k++) {
122  pids_out(k) = static_cast<int>(imports(offset+num_ent+k));
123  }
124  }
125 
126  return 0;
127 }
128 
139 template<class LocalOrdinal,
140  class Packet,
141  class RowView,
142  class IndicesView,
143  class BufferDevice>
145 
146  using LO = LocalOrdinal;
147  using GO = typename IndicesView::value_type;
148  using packet_type = Packet;
149  using row_ptrs_type = RowView;
150  using indices_type = IndicesView;
151  using buffer_device_type = BufferDevice;
152 
153  using device_type = typename IndicesView::device_type;
154  using execution_space = typename device_type::execution_space;
155 
156  using num_packets_per_lid_type = Kokkos::View<const size_t*, buffer_device_type>;
157  using offsets_type = Kokkos::View<const size_t*, device_type>;
158  using input_buffer_type = Kokkos::View<const packet_type*, buffer_device_type>;
159  using import_lids_type = Kokkos::View<const LO*, buffer_device_type>;
160 
161  using gids_scratch_type = Kokkos::View<GO*, device_type>;
162  using pids_scratch_type = Kokkos::View<int*,device_type>;
163 
164  row_ptrs_type row_ptrs_beg;
165  row_ptrs_type row_ptrs_end;
166  indices_type indices;
167  input_buffer_type imports;
168  num_packets_per_lid_type num_packets_per_lid;
169  import_lids_type import_lids;
170  offsets_type offsets;
171  size_t max_num_ent;
172  bool unpack_pids;
173  Kokkos::Experimental::UniqueToken<execution_space,
174  Kokkos::Experimental::UniqueTokenScope::Global> tokens;
175  gids_scratch_type gids_scratch;
176  pids_scratch_type pids_scratch;
177 
178  public:
179  using value_type = Kokkos::pair<int, LO>;
180 
182  const row_ptrs_type& row_ptrs_beg_in,
183  const row_ptrs_type& row_ptrs_end_in,
184  const indices_type& indices_in,
185  const input_buffer_type& imports_in,
186  const num_packets_per_lid_type& num_packets_per_lid_in,
187  const import_lids_type& import_lids_in,
188  const offsets_type& offsets_in,
189  const size_t max_num_ent_in,
190  const bool unpack_pids_in) :
191  row_ptrs_beg(row_ptrs_beg_in),
192  row_ptrs_end(row_ptrs_end_in),
193  indices(indices_in),
194  imports(imports_in),
195  num_packets_per_lid(num_packets_per_lid_in),
196  import_lids(import_lids_in),
197  offsets(offsets_in),
198  max_num_ent(max_num_ent_in),
199  unpack_pids(unpack_pids_in),
200  tokens(execution_space()),
201  gids_scratch("gids_scratch", tokens.size() * max_num_ent),
202  pids_scratch("pids_scratch", tokens.size() * max_num_ent)
203  {}
204 
205  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const
206  {
207  using Tpetra::Details::OrdinalTraits;
208  dst = Kokkos::make_pair(0, OrdinalTraits<LO>::invalid());
209  }
210 
211  KOKKOS_INLINE_FUNCTION void
212  join(volatile value_type& dst, const volatile value_type& src) const
213  {
214  // `dst` should reflect the first (least) bad index and
215  // all other associated error codes and data. Thus, we need only
216  // check if the `src` object shows an error and if its associated
217  // bad index is less than `dst`'s bad index.
218  using Tpetra::Details::OrdinalTraits;
219  if (src.second != OrdinalTraits<LO>::invalid()) {
220  // An error in the src; check if
221  // 1. `dst` shows errors
222  // 2. If `dst` does show errors, if src's bad index is less than
223  // *this' bad index
224  if (dst.second == OrdinalTraits<LO>::invalid() ||
225  src.second < dst.second) {
226  dst = src;
227  }
228  }
229  }
230 
231  KOKKOS_INLINE_FUNCTION
232  void operator()(const LO i, value_type& dst) const
233  {
234  using Kokkos::View;
235  using Kokkos::subview;
236  using Kokkos::MemoryUnmanaged;
237  using size_type = typename execution_space::size_type;
238  using slice = typename Kokkos::pair<size_type, size_type>;
239 
240  using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
241  using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
242 
243  const size_t num_packets_this_lid = num_packets_per_lid(i);
244  const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2
245  : num_packets_this_lid;
246  if (unpack_pids && num_packets_this_lid%2 != 0) {
247  // Attempting to unpack PIDs, but num_packets_this_lid is not even; this
248  // should never
249  dst = Kokkos::make_pair(1, i);
250  return;
251  }
252 
253  // Only unpack data if there is a nonzero number to unpack
254  if (num_ent == 0) {
255  return;
256  }
257 
258  // there is actually something in the row
259  const size_t buf_size = imports.size();
260  const size_t offset = offsets(i);
261 
262  if (offset > buf_size || offset + num_packets_this_lid > buf_size) {
263  dst = Kokkos::make_pair(2, i); // out of bounds
264  return;
265  }
266 
267  // Get subviews in to the scratch arrays. The token returned from acquire
268  // is an integer in [0, tokens.size()). It is used to grab a unique (to
269  // this thread) subview of the scratch arrays.
270  const size_type token = tokens.acquire();
271  const size_t a = static_cast<size_t>(token) * max_num_ent;
272  const size_t b = a + num_ent;
273  gids_out_type gids_out = subview(gids_scratch, slice(a, b));
274  pids_out_type pids_out = subview(pids_scratch, slice(a, (unpack_pids ? b : a)));
275 
276  const int err = unpackRow (gids_out, pids_out, imports, offset, num_ent);
277 
278  if (err != 0) {
279  dst = Kokkos::make_pair(3, i);
280  tokens.release(token);
281  return;
282  }
283 
284  auto import_lid = import_lids(i);
285  for (size_t k = 0; k < num_ent; ++k) {
286  indices(row_ptrs_end(import_lid)) = gids_out(k);
287  // this is OK; don't need atomic, since LIDs to pack don't have repeats.
288  row_ptrs_end(import_lid) += 1;
289  }
290 
291  tokens.release(token);
292  }
293 
294 };
295 
302 template<class LocalOrdinal, class GlobalOrdinal, class Node,
303  class RowView, class IndicesView, class BufferDevice>
304 void
306 (const RowView& row_ptrs_beg,
307  const RowView& row_ptrs_end,
308  IndicesView& indices,
309  const Kokkos::View<const GlobalOrdinal*, BufferDevice,
310  Kokkos::MemoryUnmanaged>& imports,
311  const Kokkos::View<const size_t*, BufferDevice,
312  Kokkos::MemoryUnmanaged>& num_packets_per_lid,
313  const Kokkos::View<const LocalOrdinal*, BufferDevice,
314  Kokkos::MemoryUnmanaged>& import_lids,
315  const typename CrsGraph<LocalOrdinal, GlobalOrdinal,
316  Node>::padding_type& padding,
317  const bool unpack_pids,
318  const int myRank,
319  const bool verbose)
320 {
321  using LO = LocalOrdinal;
322  using GO = GlobalOrdinal;
323  using device_type = typename Node::device_type;
324  using execution_space = typename BufferDevice::execution_space;
325  using range_policy =
326  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
327  using unpack_functor_type =
329 
330  const char prefix[] =
331  "Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine: ";
332 
333  const size_t num_import_lids = static_cast<size_t>(import_lids.extent(0));
334  if (num_import_lids == 0) {
335  // Nothing to unpack
336  return;
337  }
338 
339  // Resize row pointers and indices to accommodate incoming data
340  padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding,
341  myRank, verbose);
342 
343  // Get the offsets
344  Kokkos::View<size_t*, device_type> offsets("offsets", num_import_lids+1);
345  computeOffsetsFromCounts(offsets, num_packets_per_lid);
346 
347  // Determine the maximum number of entries in any row in the graph. The
348  // maximum number of entries is needed to allocate unpack buffers on the
349  // device.
350  size_t max_num_ent;
351  Kokkos::parallel_reduce
352  ("MaxReduce",
353  range_policy (0, LO (num_packets_per_lid.size ())),
354  KOKKOS_LAMBDA (const LO i, size_t& running_max_num_ent) {
355  const size_t num_packets_this_lid = num_packets_per_lid(i);
356  const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2 :
357  num_packets_this_lid;
358  if (num_ent > running_max_num_ent) {
359  running_max_num_ent = num_ent;
360  }
361  }, Kokkos::Max<size_t> (max_num_ent));
362 
363  // Now do the actual unpack!
364  unpack_functor_type f (row_ptrs_beg, row_ptrs_end, indices, imports,
365  num_packets_per_lid, import_lids, offsets,
366  max_num_ent, unpack_pids);
367 
368  typename unpack_functor_type::value_type x;
369  Kokkos::parallel_reduce(range_policy(0, static_cast<LO>(num_import_lids)), f, x);
370  auto x_h = x.to_std_pair();
371  TEUCHOS_TEST_FOR_EXCEPTION(x_h.first != 0, std::runtime_error,
372  prefix << "UnpackAndCombineFunctor reported error code "
373  << x_h.first << " for the first bad row " << x_h.second);
374 }
375 
376 template<class Packet, class LocalGraph, class BufferDevice>
377 size_t
379  const LocalGraph& local_graph,
380  const Kokkos::View<const typename LocalGraph::data_type*,
381  typename LocalGraph::device_type,
382  Kokkos::MemoryUnmanaged> permute_from_lids,
383  const Kokkos::View<const Packet*, BufferDevice>& /* imports */,
384  const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
385  const size_t num_same_ids)
386 {
387  using Kokkos::parallel_reduce;
388  using local_graph_type = LocalGraph;
389  using LO = typename local_graph_type::data_type;
390  using device_type = typename local_graph_type::device_type;
391  using execution_space = typename device_type::execution_space;
392  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
393 
394  size_t count = 0;
395  LO num_items;
396 
397  // Number of graph entries to unpack (returned by this function).
398  num_items = static_cast<LO>(num_same_ids);
399  if (num_items) {
400  size_t kcnt = 0;
401  parallel_reduce(
402  range_policy(0, num_items),
403  KOKKOS_LAMBDA(const LO lid, size_t& update) {
404  update += static_cast<size_t>(local_graph.row_map[lid+1]
405  -local_graph.row_map[lid]);
406  }, kcnt);
407  count += kcnt;
408  }
409 
410  // Count entries copied directly from the source graph with permuting.
411  num_items = static_cast<LO>(permute_from_lids.extent(0));
412  if (num_items) {
413  size_t kcnt = 0;
414  parallel_reduce(
415  range_policy(0, num_items),
416  KOKKOS_LAMBDA(const LO i, size_t& update) {
417  const LO lid = permute_from_lids(i);
418  update += static_cast<size_t>(local_graph.row_map[lid+1]
419  - local_graph.row_map[lid]);
420  }, kcnt);
421  count += kcnt;
422  }
423 
424  {
425  // Count entries received from other MPI processes.
426  size_t tot_num_ent = 0;
427  parallel_reduce("SumReduce",
428  num_packets_per_lid.size(),
429  KOKKOS_LAMBDA(const int& i, size_t& lsum) {
430  lsum += num_packets_per_lid(i) / 2;
431  }, Kokkos::Sum<size_t>(tot_num_ent));
432  count += tot_num_ent;
433  }
434 
435  return count;
436 }
437 
439 template<class Packet, class LO, class Device, class BufferDevice>
440 void
442  const Kokkos::View<size_t*, Device>& tgt_rowptr,
443  const Kokkos::View<const LO*, BufferDevice>& import_lids,
444  const Kokkos::View<const Packet*, BufferDevice>& /* imports */,
445  const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid)
446 {
447  using Kokkos::parallel_reduce;
448  using device_type = Device;
449  using execution_space = typename device_type::execution_space;
450  using size_type = typename Kokkos::View<size_t*,device_type>::size_type;
451  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
452 
453  const size_type N = num_packets_per_lid.extent(0);
454  parallel_for("Setup row pointers for remotes",
455  range_policy(0, N),
456  KOKKOS_LAMBDA(const size_t i){
457  using atomic_incr_type = typename std::remove_reference<decltype(tgt_rowptr(0))>::type;
458  const size_t num_packets_this_lid = num_packets_per_lid(i);
459  const size_t num_ent = num_packets_this_lid / 2;
460  Kokkos::atomic_fetch_add(&tgt_rowptr(import_lids(i)), atomic_incr_type(num_ent));
461  });
462 }
463 
464 // Convert array of row lengths to a CRS pointer array
465 template<class Device>
466 void
467 makeCrsRowPtrFromLengths(
468  const Kokkos::View<size_t*,Device,Kokkos::MemoryUnmanaged>& tgt_rowptr,
469  const Kokkos::View<size_t*,Device>& new_start_row)
470 {
471  using Kokkos::parallel_scan;
472  using device_type = Device;
473  using execution_space = typename device_type::execution_space;
474  using size_type = typename Kokkos::View<size_t*,device_type>::size_type;
475  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
476  const size_type N = new_start_row.extent(0);
477  parallel_scan(
478  range_policy(0, N),
479  KOKKOS_LAMBDA(const size_t& i, size_t& update, const bool& final) {
480  auto cur_val = tgt_rowptr(i);
481  if (final) {
482  tgt_rowptr(i) = update;
483  new_start_row(i) = tgt_rowptr(i);
484  }
485  update += cur_val;
486  }
487  );
488 }
489 
490 template<class LocalGraph, class LocalMap>
491 void
492 copyDataFromSameIDs(
493  const Kokkos::View<typename LocalMap::global_ordinal_type*,
494  typename LocalMap::device_type>& tgt_colind,
495  const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
496  const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
497  const Kokkos::View<size_t*, typename LocalMap::device_type>& tgt_rowptr,
498  const Kokkos::View<const int*, typename LocalMap::device_type>& src_pids,
499  const LocalGraph& local_graph,
500  const LocalMap& local_col_map,
501  const size_t num_same_ids,
502  const int my_pid)
503 {
504  using Kokkos::parallel_for;
505  using device_type = typename LocalMap::device_type;
506  using LO = typename LocalMap::local_ordinal_type;
507  using execution_space = typename device_type::execution_space;
508  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
509 
510  parallel_for(
511  range_policy(0, num_same_ids),
512  KOKKOS_LAMBDA(const size_t i) {
513  using atomic_incr_type =typename std::remove_reference<decltype(new_start_row(0))>::type;
514 
515  const LO src_lid = static_cast<LO>(i);
516  size_t src_row = local_graph.row_map(src_lid);
517 
518  const LO tgt_lid = static_cast<LO>(i);
519  const size_t tgt_row = tgt_rowptr(tgt_lid);
520 
521  const size_t nsr = local_graph.row_map(src_lid+1)
522  - local_graph.row_map(src_lid);
523  Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
524 
525  for (size_t j=local_graph.row_map(src_lid);
526  j<local_graph.row_map(src_lid+1); ++j) {
527  LO src_col = local_graph.entries(j);
528  tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
529  tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
530  }
531  }
532  );
533 }
534 
535 template<class LocalGraph, class LocalMap, class BufferDevice>
536 void
537 copyDataFromPermuteIDs(
538  const Kokkos::View<typename LocalMap::global_ordinal_type*,
539  typename LocalMap::device_type>& tgt_colind,
540  const Kokkos::View<int*,
541  typename LocalMap::device_type>& tgt_pids,
542  const Kokkos::View<size_t*,
543  typename LocalMap::device_type>& new_start_row,
544  const Kokkos::View<size_t*,
545  typename LocalMap::device_type>& tgt_rowptr,
546  const Kokkos::View<const int*,
547  typename LocalMap::device_type>& src_pids,
548  const Kokkos::View<const typename LocalMap::local_ordinal_type*,
549  BufferDevice, Kokkos::MemoryUnmanaged>& permute_to_lids,
550  const Kokkos::View<const typename LocalMap::local_ordinal_type*,
551  BufferDevice, Kokkos::MemoryUnmanaged>& permute_from_lids,
552  const LocalGraph& local_graph,
553  const LocalMap& local_col_map,
554  const int my_pid)
555 {
556  using Kokkos::parallel_for;
557  using device_type = typename LocalMap::device_type;
558  using LO = typename LocalMap::local_ordinal_type;
559  using execution_space = typename device_type::execution_space;
560  using size_type = typename Kokkos::View<LO*,device_type>::size_type;
561  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
562 
563  const size_type num_permute_to_lids = permute_to_lids.extent(0);
564 
565  parallel_for(
566  range_policy(0, num_permute_to_lids),
567  KOKKOS_LAMBDA(const size_t i) {
568  using atomic_incr_type = typename std::remove_reference<decltype(new_start_row(0))>::type;
569 
570  const LO src_lid = permute_from_lids(i);
571  const size_t src_row = local_graph.row_map(src_lid);
572 
573  const LO tgt_lid = permute_to_lids(i);
574  const size_t tgt_row = tgt_rowptr(tgt_lid);
575 
576  size_t nsr = local_graph.row_map(src_lid+1)
577  - local_graph.row_map(src_lid);
578  Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
579 
580  for (size_t j=local_graph.row_map(src_lid);
581  j<local_graph.row_map(src_lid+1); ++j) {
582  LO src_col = local_graph.entries(j);
583  tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
584  tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
585  }
586  }
587  );
588 }
589 
590 template<class Packet, class LocalGraph, class LocalMap, class BufferDevice>
591 void
592 unpackAndCombineIntoCrsArrays2(
593  const Kokkos::View<typename LocalMap::global_ordinal_type*, typename LocalMap::device_type>& tgt_colind,
594  const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
595  const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
596  const Kokkos::View<const size_t*, typename LocalMap::device_type>& offsets,
597  const Kokkos::View<
598  const typename LocalMap::local_ordinal_type*,
599  BufferDevice,
600  Kokkos::MemoryUnmanaged>& import_lids,
601  const Kokkos::View<const Packet*, BufferDevice>& imports,
602  const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
603  const LocalGraph& /* local_graph */,
604  const LocalMap /*& local_col_map*/,
605  const int my_pid)
606 {
607  using Kokkos::View;
608  using Kokkos::subview;
609  using Kokkos::MemoryUnmanaged;
610  using Kokkos::parallel_reduce;
611  using Kokkos::atomic_fetch_add;
612 
613  using device_type = typename LocalMap::device_type;
614  using LO = typename LocalMap::local_ordinal_type;
615  using GO = typename LocalMap::global_ordinal_type;
616  using execution_space = typename device_type::execution_space;
617  using size_type = typename Kokkos::View<LO*, device_type>::size_type;
618  using slice = typename Kokkos::pair<size_type, size_type>;
619  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
620 
621  using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
622  using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
623 
624  const size_type num_import_lids = import_lids.size();
625  const char prefix[] = "UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays2: ";
626 
627  // RemoteIDs: Loop structure following UnpackAndCombine
628  int gbl_err_count;
629  parallel_reduce("Unpack and combine into CRS",
630  range_policy(0, num_import_lids),
631  KOKKOS_LAMBDA(const size_t i, int& err) {
632  using atomic_incr_type = typename std::remove_reference< decltype( new_start_row(0) )>::type;
633  const size_t num_packets_this_lid = num_packets_per_lid(i);
634  const size_t num_ent = num_packets_this_lid / 2;
635  const size_t offset = offsets(i);
636  const LO lcl_row = import_lids(i);
637  const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
638  const size_t end_row = start_row + num_ent;
639 
640  gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
641  pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
642 
643  err += unpackRow (gids_out, pids_out, imports, offset, num_ent);
644 
645  // Correct target PIDs.
646  for (size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
647  const int pid = pids_out(j);
648  pids_out(j) = (pid != my_pid) ? pid : -1;
649  }
650  }, gbl_err_count);
651 
652  TEUCHOS_TEST_FOR_EXCEPTION(gbl_err_count != 0,
653  std::invalid_argument, prefix <<
654  "Attempting to unpack PIDs, but num_ent is not even; this should never "
655  "happen! Please report this bug to the Tpetra developers.");
656 
657  return;
658 }
659 
660 template<class Packet, class LocalGraph, class LocalMap, class BufferDevice>
661 void
663  const LocalGraph & local_graph,
664  const LocalMap & local_col_map,
665  const Kokkos::View<const typename LocalMap::local_ordinal_type*,
666  BufferDevice,
667  Kokkos::MemoryUnmanaged>& import_lids,
668  const Kokkos::View<const Packet*, BufferDevice>& imports,
669  const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
670  const Kokkos::View<const typename LocalMap::local_ordinal_type*,
671  BufferDevice,
672  Kokkos::MemoryUnmanaged>& permute_to_lids,
673  const Kokkos::View<const typename LocalMap::local_ordinal_type*,
674  BufferDevice,
675  Kokkos::MemoryUnmanaged>& permute_from_lids,
676  const Kokkos::View<size_t*,
677  typename LocalMap::device_type,
678  Kokkos::MemoryUnmanaged>& tgt_rowptr,
679  const Kokkos::View<typename LocalMap::global_ordinal_type*,
680  typename LocalMap::device_type,
681  Kokkos::MemoryUnmanaged>& tgt_colind,
682  const Kokkos::View<const int*,
683  typename LocalMap::device_type,
684  Kokkos::MemoryUnmanaged>& src_pids,
685  const Kokkos::View<int*,
686  typename LocalMap::device_type,
687  Kokkos::MemoryUnmanaged>& tgt_pids,
688  const size_t num_same_ids,
689  const size_t tgt_num_rows,
690  const size_t tgt_num_nonzeros,
691  const int my_tgt_pid)
692 {
693  using Kokkos::View;
694  using Kokkos::subview;
695  using Kokkos::parallel_for;
696  using Kokkos::MemoryUnmanaged;
697  using packet_type = Packet;
698  using local_map_type = LocalMap;
699  using local_graph_type = LocalGraph;
700  using buffer_device_type = BufferDevice;
701  using device_type = typename LocalMap::device_type;
702  using LO = typename LocalMap::local_ordinal_type;
703  using execution_space = typename device_type::execution_space;
704  using size_type = typename Kokkos::View<LO*, device_type>::size_type;
705  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
706 
707  const char prefix[] = "UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays: ";
708 
709  const size_t N = tgt_num_rows;
710  const size_t mynnz = tgt_num_nonzeros;
711 
712  // In the case of reduced communicators, the sourceGraph won't have
713  // the right "my_pid", so thus we have to supply it.
714  const int my_pid = my_tgt_pid;
715 
716  // FIXME (mfh 24 Jun 2019)
717  //
718  // 1. Only zero the entries of tgt_rowptr that actually need it.
719  // 2. Consider merging these three kernels into one.
720 
721  // Zero the rowptr
722  parallel_for(
723  range_policy(0, N+1),
724  KOKKOS_LAMBDA(const size_t i) {
725  tgt_rowptr(i) = 0;
726  }
727  );
728 
729  // same IDs: Always first, always in the same place
730  parallel_for(
731  range_policy(0, num_same_ids),
732  KOKKOS_LAMBDA(const size_t i) {
733  const LO tgt_lid = static_cast<LO>(i);
734  const LO src_lid = static_cast<LO>(i);
735  tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
736  - local_graph.row_map(src_lid);
737  }
738  );
739 
740  // Permute IDs: Still local, but reordered
741  const size_type num_permute_to_lids = permute_to_lids.extent(0);
742  parallel_for(
743  range_policy(0, num_permute_to_lids),
744  KOKKOS_LAMBDA(const size_t i) {
745  const LO tgt_lid = permute_to_lids(i);
746  const LO src_lid = permute_from_lids(i);
747  tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
748  - local_graph.row_map(src_lid);
749  }
750  );
751 
752  // Get the offsets from the number of packets per LID
753  const size_type num_import_lids = import_lids.extent(0);
754  View<size_t*, device_type> offsets("offsets", num_import_lids+1);
755  computeOffsetsFromCounts(offsets, num_packets_per_lid);
756 
757 #ifdef HAVE_TPETRA_DEBUG
758  {
759  auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
760  const bool condition =
761  nth_offset_h != static_cast<size_t>(imports.extent(0));
762  TEUCHOS_TEST_FOR_EXCEPTION
763  (condition, std::logic_error, prefix
764  << "The final offset in bytes " << nth_offset_h
765  << " != imports.size() = " << imports.extent(0)
766  << ". Please report this bug to the Tpetra developers.");
767  }
768 #endif // HAVE_TPETRA_DEBUG
769 
770  // Setup row pointers for remotes
771  setupRowPointersForRemotes<packet_type,LO,device_type,buffer_device_type>(
772  tgt_rowptr, import_lids, imports, num_packets_per_lid);
773 
774  // If multiple processes contribute to the same row, we may need to
775  // update row offsets. This tracks that.
776  View<size_t*, device_type> new_start_row("new_start_row", N+1);
777 
778  // Turn row length into a real CRS row pointer
779  makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
780  {
781  auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N);
782  bool condition = nth_tgt_rowptr_h != mynnz;
783  TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument,
784  prefix << "CRS_rowptr[last] = " <<
785  nth_tgt_rowptr_h << "!= mynnz = " << mynnz << ".");
786  }
787 
788  // SameIDs: Copy the data over
789  copyDataFromSameIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
790  tgt_rowptr, src_pids, local_graph, local_col_map, num_same_ids, my_pid);
791 
792  copyDataFromPermuteIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
793  tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
794  local_graph, local_col_map, my_pid);
795 
796  if (imports.extent(0) <= 0) {
797  return;
798  }
799 
800  unpackAndCombineIntoCrsArrays2<
801  packet_type,local_graph_type,local_map_type,buffer_device_type>(
802  tgt_colind, tgt_pids, new_start_row, offsets, import_lids, imports,
803  num_packets_per_lid, local_graph, local_col_map, my_pid);
804 
805  return;
806 }
807 
808 } // namespace UnpackAndCombineCrsGraphImpl
809 
859 template<class LocalOrdinal, class GlobalOrdinal, class Node>
860 size_t
863  const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
864  const Teuchos::ArrayView<const typename CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::packet_type> &imports,
865  const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
866  size_t /* constantNumPackets */,
867  Distributor &/* distor */,
868  CombineMode /* combineMode */,
869  size_t numSameIDs,
870  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
871  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
872 {
873  using Kokkos::MemoryUnmanaged;
874  using Kokkos::View;
875  using device_type = typename Node::device_type;
876  using packet_type = typename CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::packet_type;
877  using local_graph_type = typename CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::local_graph_type;
878  using buffer_device_type = typename CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::buffer_device_type;
879  const char prefix[] = "unpackAndCombineWithOwningPIDsCount: ";
880 
881  TEUCHOS_TEST_FOR_EXCEPTION
882  (permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
883  prefix << "permuteToLIDs.size() = " << permuteToLIDs.size() << " != "
884  "permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
885  // FIXME (mfh 26 Jan 2015) If there are no entries on the calling
886  // process, then the graph is neither locally nor globally indexed.
887  const bool locallyIndexed = sourceGraph.isLocallyIndexed();
888  TEUCHOS_TEST_FOR_EXCEPTION
889  (! locallyIndexed, std::invalid_argument, prefix << "The input "
890  "CrsGraph 'sourceGraph' must be locally indexed.");
891  TEUCHOS_TEST_FOR_EXCEPTION
892  (importLIDs.size() != numPacketsPerLID.size(), std::invalid_argument,
893  prefix << "importLIDs.size() = " << importLIDs.size() << " != "
894  "numPacketsPerLID.size() = " << numPacketsPerLID.size() << ".");
895 
896  auto local_graph = sourceGraph.getLocalGraph();
897  auto permute_from_lids_d =
899  permuteFromLIDs.getRawPtr(),
900  permuteFromLIDs.size(), true,
901  "permute_from_lids");
902  auto imports_d =
903  create_mirror_view_from_raw_host_array(buffer_device_type(),
904  imports.getRawPtr(),
905  imports.size(), true,
906  "imports");
907  auto num_packets_per_lid_d =
908  create_mirror_view_from_raw_host_array(buffer_device_type(),
909  numPacketsPerLID.getRawPtr(),
910  numPacketsPerLID.size(), true,
911  "num_packets_per_lid");
912 
914  packet_type,local_graph_type,buffer_device_type>(
915  local_graph, permute_from_lids_d, imports_d, num_packets_per_lid_d, numSameIDs);
916 }
917 
931 template<class LocalOrdinal, class GlobalOrdinal, class Node>
932 void
935  const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
936  const Teuchos::ArrayView<const typename CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::packet_type>& imports,
937  const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
938  const size_t /* constantNumPackets */,
939  Distributor& /* distor */,
940  const CombineMode /* combineMode */,
941  const size_t numSameIDs,
942  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
943  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
944  size_t TargetNumRows,
945  size_t TargetNumNonzeros,
946  const int MyTargetPID,
947  const Teuchos::ArrayView<size_t>& CRS_rowptr,
948  const Teuchos::ArrayView<GlobalOrdinal>& CRS_colind,
949  const Teuchos::ArrayView<const int>& SourcePids,
950  Teuchos::Array<int>& TargetPids)
951 {
952  using Kokkos::View;
953  using Kokkos::deep_copy;
954  using Teuchos::outArg;
955  using Teuchos::REDUCE_MAX;
956  using Teuchos::reduceAll;
957  using LO = LocalOrdinal;
958  using GO = GlobalOrdinal;
959  using crs_graph_type = CrsGraph<LO, GO, Node>;
960  using packet_type = typename crs_graph_type::packet_type;
961  using local_graph_type = typename crs_graph_type::local_graph_type;
962  using buffer_device_type = typename crs_graph_type::buffer_device_type;
963  using device_type = typename Node::device_type;
964  using size_type = typename Teuchos::ArrayView<const LO>::size_type;
965 
966  const char prefix[] = "Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
967 
968  TEUCHOS_TEST_FOR_EXCEPTION(
969  TargetNumRows + 1 != static_cast<size_t>(CRS_rowptr.size()),
970  std::invalid_argument, prefix << "CRS_rowptr.size() = " <<
971  CRS_rowptr.size() << "!= TargetNumRows+1 = " << TargetNumRows+1 << ".");
972 
973  TEUCHOS_TEST_FOR_EXCEPTION(
974  permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
975  prefix << "permuteToLIDs.size() = " << permuteToLIDs.size()
976  << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
977  const size_type numImportLIDs = importLIDs.size();
978 
979  TEUCHOS_TEST_FOR_EXCEPTION(
980  numImportLIDs != numPacketsPerLID.size(), std::invalid_argument,
981  prefix << "importLIDs.size() = " << numImportLIDs << " != "
982  "numPacketsPerLID.size() = " << numPacketsPerLID.size() << ".");
983 
984  // Preseed TargetPids with -1 for local
985  if (static_cast<size_t>(TargetPids.size()) != TargetNumNonzeros) {
986  TargetPids.resize(TargetNumNonzeros);
987  }
988  TargetPids.assign(TargetNumNonzeros, -1);
989 
990  // Grab pointers for sourceGraph
991  auto local_graph = sourceGraph.getLocalGraph();
992  auto local_col_map = sourceGraph.getColMap()->getLocalMap();
993 
994  // Convert input arrays to Kokkos::View
995  device_type outputDevice;
996  buffer_device_type bufferOutputDevice;
997 
998  Kokkos::View<const LO*, buffer_device_type> import_lids_d =
1000  (bufferOutputDevice, importLIDs.getRawPtr(),
1001  importLIDs.size(), true, "import_lids");
1002 
1003  Kokkos::View<const packet_type*, buffer_device_type> imports_d =
1005  (bufferOutputDevice, imports.getRawPtr(),
1006  imports.size(), true, "imports");
1007 
1008  Kokkos::View<const size_t*, buffer_device_type> num_packets_per_lid_d =
1009  create_mirror_view_from_raw_host_array(bufferOutputDevice,
1010  numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(),
1011  true, "num_packets_per_lid");
1012 
1013  Kokkos::View<const LO*, buffer_device_type> permute_to_lids_d =
1014  create_mirror_view_from_raw_host_array(bufferOutputDevice,
1015  permuteToLIDs.getRawPtr(), permuteToLIDs.size(),
1016  true, "permute_to_lids");
1017 
1018  Kokkos::View<const LO*, buffer_device_type> permute_from_lids_d =
1019  create_mirror_view_from_raw_host_array(bufferOutputDevice,
1020  permuteFromLIDs.getRawPtr(), permuteFromLIDs.size(),
1021  true, "permute_from_lids");
1022 
1023  Kokkos::View<size_t*, device_type> crs_rowptr_d =
1025  CRS_rowptr.getRawPtr(), CRS_rowptr.size(),
1026  true, "crs_rowptr");
1027 
1028  Kokkos::View<GO*, device_type> crs_colind_d =
1030  CRS_colind.getRawPtr(), CRS_colind.size(),
1031  true, "crs_colidx");
1032 
1033  Kokkos::View<const int*, device_type> src_pids_d =
1035  SourcePids.getRawPtr(), SourcePids.size(),
1036  true, "src_pids");
1037 
1038  Kokkos::View<int*, device_type> tgt_pids_d =
1040  TargetPids.getRawPtr(), TargetPids.size(),
1041  true, "tgt_pids");
1042 
1043  using local_map_type = decltype(local_col_map);
1045  packet_type,local_graph_type,local_map_type,buffer_device_type>(
1046  local_graph, local_col_map, import_lids_d, imports_d, num_packets_per_lid_d,
1047  permute_to_lids_d, permute_from_lids_d, crs_rowptr_d, crs_colind_d, src_pids_d,
1048  tgt_pids_d, numSameIDs, TargetNumRows, TargetNumNonzeros, MyTargetPID);
1049 
1050  // FIXME (mfh 25 Jun 2019) HostMirror of CudaUVMSpace is CudaUVMSpace!!!
1051 
1052  // Copy outputs back to host
1053  typename decltype(crs_rowptr_d)::HostMirror crs_rowptr_h(
1054  CRS_rowptr.getRawPtr(), CRS_rowptr.size());
1055  deep_copy(crs_rowptr_h, crs_rowptr_d);
1056 
1057  typename decltype(crs_colind_d)::HostMirror crs_colind_h(
1058  CRS_colind.getRawPtr(), CRS_colind.size());
1059  deep_copy(crs_colind_h, crs_colind_d);
1060 
1061  typename decltype(tgt_pids_d)::HostMirror tgt_pids_h(
1062  TargetPids.getRawPtr(), TargetPids.size());
1063  deep_copy(tgt_pids_h, tgt_pids_d);
1064 
1065 }
1066 
1067 } // namespace Details
1068 } // namespace Tpetra
1069 
1070 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_INSTANT( LO, GO, NT ) \
1071  template void \
1072  Details::unpackAndCombineIntoCrsArrays<LO, GO, NT>( \
1073  const CrsGraph<LO, GO, NT> &, \
1074  const Teuchos::ArrayView<const LO>&, \
1075  const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type>&, \
1076  const Teuchos::ArrayView<const size_t>&, \
1077  const size_t, \
1078  Distributor&, \
1079  const CombineMode, \
1080  const size_t, \
1081  const Teuchos::ArrayView<const LO>&, \
1082  const Teuchos::ArrayView<const LO>&, \
1083  size_t, \
1084  size_t, \
1085  const int, \
1086  const Teuchos::ArrayView<size_t>&, \
1087  const Teuchos::ArrayView<GO>&, \
1088  const Teuchos::ArrayView<const int>&, \
1089  Teuchos::Array<int>&); \
1090  template size_t \
1091  Details::unpackAndCombineWithOwningPIDsCount<LO, GO, NT>( \
1092  const CrsGraph<LO, GO, NT> &, \
1093  const Teuchos::ArrayView<const LO> &, \
1094  const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type> &, \
1095  const Teuchos::ArrayView<const size_t>&, \
1096  size_t, \
1097  Distributor &, \
1098  CombineMode, \
1099  size_t, \
1100  const Teuchos::ArrayView<const LO>&, \
1101  const Teuchos::ArrayView<const LO>&);
1102 
1103 #endif // TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
Declaration of the Tpetra::CrsGraph class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Functions for manipulating CRS arrays.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
KOKKOS_FUNCTION int unpackRow(const Kokkos::View< GO *, Device, Kokkos::MemoryUnmanaged > &gids_out, const Kokkos::View< int *, Device, Kokkos::MemoryUnmanaged > &pids_out, const Kokkos::View< const Packet *, BufferDevice > &imports, const size_t offset, const size_t num_ent)
Unpack a single row of a CrsGraph.
void setupRowPointersForRemotes(const Kokkos::View< size_t *, Device > &tgt_rowptr, const Kokkos::View< const LO *, BufferDevice > &import_lids, const Kokkos::View< const Packet *, BufferDevice > &, const Kokkos::View< const size_t *, BufferDevice > &num_packets_per_lid)
Setup row pointers for remotes.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
local_graph_type getLocalGraph() const
Get the local graph.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type > local_graph_type
The type of the part of the sparse graph on each MPI process.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
Sets up and executes a communication plan for a Tpetra DistObject.
Implementation details of Tpetra.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &sourceGraph, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t, Distributor &, CombineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &sourceGraph, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t, Distributor &, const CombineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GlobalOrdinal > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CombineMode
Rule for combining data in an Import or Export.