Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_packCrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
42 
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
54 #include <memory>
55 #include <sstream>
56 #include <stdexcept>
57 #include <string>
58 
81 
82 namespace Tpetra {
83 
84 #ifndef DOXYGEN_SHOULD_SKIP_THIS
85 // Forward declaration of Distributor
86 class Distributor;
87 #endif // DOXYGEN_SHOULD_SKIP_THIS
88 
89 //
90 // Users must never rely on anything in the Details namespace.
91 //
92 namespace Details {
93 
94 namespace PackCrsMatrixImpl {
102 template<class OutputOffsetsViewType,
103  class CountsViewType,
104  class InputOffsetsViewType,
105  class InputLocalRowIndicesViewType,
106  class InputLocalRowPidsViewType,
107  const bool debug =
108 #ifdef HAVE_TPETRA_DEBUG
109  true
110 #else
111  false
112 #endif // HAVE_TPETRA_DEBUG
113  >
115 public:
116  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
117  typedef typename CountsViewType::non_const_value_type count_type;
118  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
119  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
120  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
121  // output Views drive where execution happens.
122  typedef typename OutputOffsetsViewType::device_type device_type;
123  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
124  typename device_type::execution_space>::value,
125  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
126  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
127  "OutputOffsetsViewType must be a Kokkos::View.");
128  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
129  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
130  static_assert (std::is_integral<output_offset_type>::value,
131  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
132  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
133  "CountsViewType must be a Kokkos::View.");
134  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
135  "CountsViewType must be a nonconst Kokkos::View.");
136  static_assert (std::is_integral<count_type>::value,
137  "The type of each entry of CountsViewType must be a built-in integer type.");
138  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
139  "InputOffsetsViewType must be a Kokkos::View.");
140  static_assert (std::is_integral<input_offset_type>::value,
141  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
142  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
143  "InputLocalRowIndicesViewType must be a Kokkos::View.");
144  static_assert (std::is_integral<local_row_index_type>::value,
145  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
146 
147  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
148  const CountsViewType& counts,
149  const InputOffsetsViewType& rowOffsets,
150  const InputLocalRowIndicesViewType& lclRowInds,
151  const InputLocalRowPidsViewType& lclRowPids,
152  const count_type sizeOfLclCount,
153  const count_type sizeOfGblColInd,
154  const count_type sizeOfPid,
155  const count_type sizeOfValue) :
156  outputOffsets_ (outputOffsets),
157  counts_ (counts),
158  rowOffsets_ (rowOffsets),
159  lclRowInds_ (lclRowInds),
160  lclRowPids_ (lclRowPids),
161  sizeOfLclCount_ (sizeOfLclCount),
162  sizeOfGblColInd_ (sizeOfGblColInd),
163  sizeOfPid_ (sizeOfPid),
164  sizeOfValue_ (sizeOfValue),
165  error_ ("error") // don't forget this, or you'll get segfaults!
166  {
167  if (debug) {
168  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
169 
170  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
171  std::ostringstream os;
172  os << "lclRowInds.extent(0) = " << numRowsToPack
173  << " != counts.extent(0) = " << counts_.extent (0)
174  << ".";
175  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
176  }
177  if (static_cast<size_t> (numRowsToPack + 1) !=
178  static_cast<size_t> (outputOffsets_.extent (0))) {
179  std::ostringstream os;
180  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
181  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
182  << ".";
183  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
184  }
185  }
186  }
187 
188  KOKKOS_INLINE_FUNCTION void
189  operator() (const local_row_index_type& curInd,
190  output_offset_type& update,
191  const bool final) const
192  {
193  if (debug) {
194  if (curInd < static_cast<local_row_index_type> (0)) {
195  error_ () = 1;
196  return;
197  }
198  }
199 
200  if (final) {
201  if (debug) {
202  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
203  error_ () = 2;
204  return;
205  }
206  }
207  outputOffsets_(curInd) = update;
208  }
209 
210  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
211  const auto lclRow = lclRowInds_(curInd);
212  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
213  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
214  error_ () = 3;
215  return;
216  }
217  // count_type could differ from the type of each row offset.
218  // For example, row offsets might each be 64 bits, but if their
219  // difference always fits in 32 bits, we may then safely use a
220  // 32-bit count_type.
221  const count_type count =
222  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
223 
224  // We pack first the number of entries in the row, then that
225  // many global column indices, then that many pids (if any),
226  // then that many values. However, if the number of entries in
227  // the row is zero, we pack nothing.
228  const count_type numBytes = (count == 0) ?
229  static_cast<count_type> (0) :
230  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
231  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
232  sizeOfValue_);
233 
234  if (final) {
235  counts_(curInd) = numBytes;
236  }
237  update += numBytes;
238  }
239  }
240 
241  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
242  // have join both with and without volatile! Otherwise intrawarp
243  // joins are really slow on GPUs.
244 
246  int getError () const {
247  auto error_h = Kokkos::create_mirror_view (error_);
248  Kokkos::deep_copy (error_h, error_);
249  return error_h ();
250  }
251 
252 private:
253  OutputOffsetsViewType outputOffsets_;
254  CountsViewType counts_;
255  typename InputOffsetsViewType::const_type rowOffsets_;
256  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
257  typename InputLocalRowPidsViewType::const_type lclRowPids_;
258  count_type sizeOfLclCount_;
259  count_type sizeOfGblColInd_;
260  count_type sizeOfPid_;
261  count_type sizeOfValue_;
262  Kokkos::View<int, device_type> error_;
263 };
264 
274 template<class OutputOffsetsViewType,
275  class CountsViewType,
276  class InputOffsetsViewType,
277  class InputLocalRowIndicesViewType,
278  class InputLocalRowPidsViewType>
279 typename CountsViewType::non_const_value_type
280 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
281  const CountsViewType& counts,
282  const InputOffsetsViewType& rowOffsets,
283  const InputLocalRowIndicesViewType& lclRowInds,
284  const InputLocalRowPidsViewType& lclRowPids,
285  const typename CountsViewType::non_const_value_type sizeOfLclCount,
286  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
287  const typename CountsViewType::non_const_value_type sizeOfPid,
288  const typename CountsViewType::non_const_value_type sizeOfValue)
289 {
290  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
291  CountsViewType, typename InputOffsetsViewType::const_type,
292  typename InputLocalRowIndicesViewType::const_type,
293  typename InputLocalRowPidsViewType::const_type> functor_type;
294  typedef typename CountsViewType::non_const_value_type count_type;
295  typedef typename OutputOffsetsViewType::size_type size_type;
296  typedef typename OutputOffsetsViewType::execution_space execution_space;
297  typedef typename functor_type::local_row_index_type LO;
298  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
299  const char prefix[] = "computeNumPacketsAndOffsets: ";
300 
301  count_type count = 0;
302  const count_type numRowsToPack = lclRowInds.extent (0);
303 
304  if (numRowsToPack == 0) {
305  return count;
306  }
307  else {
308  TEUCHOS_TEST_FOR_EXCEPTION
309  (rowOffsets.extent (0) <= static_cast<size_type> (1),
310  std::invalid_argument, prefix << "There is at least one row to pack, "
311  "but the matrix has no rows. lclRowInds.extent(0) = " <<
312  numRowsToPack << ", but rowOffsets.extent(0) = " <<
313  rowOffsets.extent (0) << " <= 1.");
314  TEUCHOS_TEST_FOR_EXCEPTION
315  (outputOffsets.extent (0) !=
316  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
317  prefix << "Output dimension does not match number of rows to pack. "
318  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
319  << " != lclRowInds.extent(0) + 1 = "
320  << static_cast<size_type> (numRowsToPack + 1) << ".");
321  TEUCHOS_TEST_FOR_EXCEPTION
322  (counts.extent (0) != numRowsToPack, std::invalid_argument,
323  prefix << "counts.extent(0) = " << counts.extent (0)
324  << " != numRowsToPack = " << numRowsToPack << ".");
325 
326  functor_type f (outputOffsets, counts, rowOffsets,
327  lclRowInds, lclRowPids, sizeOfLclCount,
328  sizeOfGblColInd, sizeOfPid, sizeOfValue);
329  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
330 
331  // At least in debug mode, this functor checks for errors.
332  const int errCode = f.getError ();
333  TEUCHOS_TEST_FOR_EXCEPTION
334  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
335  << errCode << " != 0.");
336 
337 #if 0
338  size_t total = 0;
339  for (LO k = 0; k < numRowsToPack; ++k) {
340  total += counts[k];
341  }
342  if (outputOffsets(numRowsToPack) != total) {
343  if (errStr.get () == NULL) {
344  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
345  }
346  std::ostringstream& os = *errStr;
347  os << prefix
348  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
349  << outputOffsets(numRowsToPack) << " != sum of counts = "
350  << total << "." << std::endl;
351  if (numRowsToPack != 0) {
352  // Only print the array if it's not too long.
353  if (numRowsToPack < static_cast<LO> (10)) {
354  os << "outputOffsets: [";
355  for (LO i = 0; i <= numRowsToPack; ++i) {
356  os << outputOffsets(i);
357  if (static_cast<LO> (i + 1) <= numRowsToPack) {
358  os << ",";
359  }
360  }
361  os << "]" << std::endl;
362  os << "counts: [";
363  for (LO i = 0; i < numRowsToPack; ++i) {
364  os << counts(i);
365  if (static_cast<LO> (i + 1) < numRowsToPack) {
366  os << ",";
367  }
368  }
369  os << "]" << std::endl;
370  }
371  else {
372  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
373  << outputOffsets(numRowsToPack-1) << "." << std::endl;
374  }
375  }
376  count = outputOffsets(numRowsToPack);
377  return {false, errStr};
378  }
379 #endif // HAVE_TPETRA_DEBUG
380 
381  // Get last entry of outputOffsets, which is the sum of the entries
382  // of counts. Don't assume UVM.
383  using Tpetra::Details::getEntryOnHost;
384  return static_cast<count_type> (getEntryOnHost (outputOffsets,
385  numRowsToPack));
386  }
387 }
388 
404 template<class ST, class ColumnMap, class BufferDeviceType>
405 KOKKOS_FUNCTION
406 Kokkos::pair<int, size_t>
407 packCrsMatrixRow (const ColumnMap& col_map,
408  const Kokkos::View<char*, BufferDeviceType>& exports,
410  const typename PackTraits<int>::input_array_type& pids_in,
411  const typename PackTraits<ST>::input_array_type& vals_in,
412  const size_t offset,
413  const size_t num_ent,
414  const size_t num_bytes_per_value,
415  const bool pack_pids)
416 {
417  using Kokkos::subview;
418  using LO = typename ColumnMap::local_ordinal_type;
419  using GO = typename ColumnMap::global_ordinal_type;
420  using return_type = Kokkos::pair<int, size_t>;
421 
422  if (num_ent == 0) {
423  // Empty rows always take zero bytes, to ensure sparsity.
424  return return_type (0, 0);
425  }
426 
427  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
428  const size_t num_ent_beg = offset;
429  const size_t num_ent_len = PackTraits<LO>::packValueCount (num_ent_LO);
430 
431  const size_t gids_beg = num_ent_beg + num_ent_len;
432  const size_t gids_len = num_ent * PackTraits<GO>::packValueCount (GO (0));
433 
434  const size_t pids_beg = gids_beg + gids_len;
435  const size_t pids_len = pack_pids ?
436  num_ent * PackTraits<int>::packValueCount (int (0)) :
437  static_cast<size_t> (0);
438 
439  const size_t vals_beg = gids_beg + gids_len + pids_len;
440  const size_t vals_len = num_ent * num_bytes_per_value;
441 
442  char* const num_ent_out = exports.data () + num_ent_beg;
443  char* const gids_out = exports.data () + gids_beg;
444  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
445  char* const vals_out = exports.data () + vals_beg;
446 
447  size_t num_bytes_out = 0;
448  int error_code = 0;
449  num_bytes_out += PackTraits<LO>::packValue (num_ent_out, num_ent_LO);
450 
451  {
452  // Copy column indices one at a time, so that we don't need
453  // temporary storage.
454  for (size_t k = 0; k < num_ent; ++k) {
455  const LO lid = lids_in[k];
456  const GO gid = col_map.getGlobalElement (lid);
457  num_bytes_out += PackTraits<GO>::packValue (gids_out, k, gid);
458  }
459  // Copy PIDs one at a time, so that we don't need temporary storage.
460  if (pack_pids) {
461  for (size_t k = 0; k < num_ent; ++k) {
462  const LO lid = lids_in[k];
463  const int pid = pids_in[lid];
464  num_bytes_out += PackTraits<int>::packValue (pids_out, k, pid);
465  }
466  }
467  const auto p =
468  PackTraits<ST>::packArray (vals_out, vals_in.data (), num_ent);
469  error_code += p.first;
470  num_bytes_out += p.second;
471  }
472 
473  if (error_code != 0) {
474  return return_type (10, num_bytes_out);
475  }
476 
477  const size_t expected_num_bytes =
478  num_ent_len + gids_len + pids_len + vals_len;
479  if (num_bytes_out != expected_num_bytes) {
480  return return_type (11, num_bytes_out);
481  }
482  return return_type (0, num_bytes_out);
483 }
484 
485 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
486 struct PackCrsMatrixFunctor {
487  typedef LocalMatrix local_matrix_type;
488  typedef LocalMap local_map_type;
489  typedef typename local_matrix_type::value_type ST;
490  typedef typename local_map_type::local_ordinal_type LO;
491  typedef typename local_map_type::global_ordinal_type GO;
492  typedef typename local_matrix_type::device_type DT;
493 
494  typedef Kokkos::View<const size_t*, BufferDeviceType>
495  num_packets_per_lid_view_type;
496  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
497  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
498  using export_lids_view_type = typename PackTraits<LO>::input_array_type;
499  using source_pids_view_type = typename PackTraits<int>::input_array_type;
500 
501  typedef typename num_packets_per_lid_view_type::non_const_value_type
502  count_type;
503  typedef typename offsets_view_type::non_const_value_type
504  offset_type;
505  typedef Kokkos::pair<int, LO> value_type;
506 
507  static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
508  "local_map_type::local_ordinal_type and "
509  "local_matrix_type::ordinal_type must be the same.");
510 
511  local_matrix_type local_matrix;
512  local_map_type local_col_map;
513  exports_view_type exports;
514  num_packets_per_lid_view_type num_packets_per_lid;
515  export_lids_view_type export_lids;
516  source_pids_view_type source_pids;
517  offsets_view_type offsets;
518  size_t num_bytes_per_value;
519  bool pack_pids;
520 
521  PackCrsMatrixFunctor (const local_matrix_type& local_matrix_in,
522  const local_map_type& local_col_map_in,
523  const exports_view_type& exports_in,
524  const num_packets_per_lid_view_type& num_packets_per_lid_in,
525  const export_lids_view_type& export_lids_in,
526  const source_pids_view_type& source_pids_in,
527  const offsets_view_type& offsets_in,
528  const size_t num_bytes_per_value_in,
529  const bool pack_pids_in) :
530  local_matrix (local_matrix_in),
531  local_col_map (local_col_map_in),
532  exports (exports_in),
533  num_packets_per_lid (num_packets_per_lid_in),
534  export_lids (export_lids_in),
535  source_pids (source_pids_in),
536  offsets (offsets_in),
537  num_bytes_per_value (num_bytes_per_value_in),
538  pack_pids (pack_pids_in)
539  {
540  const LO numRows = local_matrix_in.numRows ();
541  const LO rowMapDim =
542  static_cast<LO> (local_matrix.graph.row_map.extent (0));
543  TEUCHOS_TEST_FOR_EXCEPTION
544  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
545  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
546  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
547  }
548 
549  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
550  {
551  using ::Tpetra::Details::OrdinalTraits;
552  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
553  }
554 
555  KOKKOS_INLINE_FUNCTION void
556  join (volatile value_type& dst, const volatile value_type& src) const
557  {
558  // `dst` should reflect the first (least) bad index and all other
559  // associated error codes and data, so prefer keeping it.
560  if (src.first != 0 && dst.first == 0) {
561  dst = src;
562  }
563  }
564 
565  KOKKOS_INLINE_FUNCTION
566  void operator() (const LO i, value_type& dst) const
567  {
568  const size_t offset = offsets[i];
569  const LO export_lid = export_lids[i];
570  const size_t buf_size = exports.size();
571  const size_t num_bytes = num_packets_per_lid(i);
572  const size_t num_ent =
573  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
574  - local_matrix.graph.row_map[export_lid]);
575 
576  // Only pack this row's data if it has a nonzero number of
577  // entries. We can do this because receiving processes get the
578  // number of packets, and will know that zero packets means zero
579  // entries.
580  if (num_ent == 0) {
581  return;
582  }
583 
584  if (export_lid >= local_matrix.numRows ()) {
585  if (dst.first != 0) { // keep only the first error
586  dst = Kokkos::make_pair (1, i); // invalid row
587  }
588  return;
589  }
590  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
591  if (dst.first != 0) { // keep only the first error
592  dst = Kokkos::make_pair (2, i); // out of bounds
593  }
594  return;
595  }
596 
597  // We can now pack this row
598 
599  // Since the matrix is locally indexed on the calling process, we
600  // have to use its column Map (which it _must_ have in this case)
601  // to convert to global indices.
602  const auto row_beg = local_matrix.graph.row_map[export_lid];
603  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
604  auto vals_in = subview (local_matrix.values,
605  Kokkos::make_pair (row_beg, row_end));
606  auto lids_in = subview (local_matrix.graph.entries,
607  Kokkos::make_pair (row_beg, row_end));
608  typedef local_map_type LMT;
609  typedef BufferDeviceType BDT;
610  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
611  source_pids, vals_in, offset,
612  num_ent, num_bytes_per_value,
613  pack_pids);
614  int error_code_this_row = p.first;
615  size_t num_bytes_packed_this_row = p.second;
616  if (error_code_this_row != 0) {
617  if (dst.first != 0) { // keep only the first error
618  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
619  }
620  }
621  else if (num_bytes_packed_this_row != num_bytes) {
622  if (dst.first != 0) { // keep only the first error
623  dst = Kokkos::make_pair (3, i);
624  }
625  }
626  }
627 };
628 
636 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
637 void
638 do_pack (const LocalMatrix& local_matrix,
639  const LocalMap& local_map,
640  const Kokkos::View<char*, BufferDeviceType>& exports,
641  const typename PackTraits<size_t>::input_array_type& num_packets_per_lid,
643  const typename PackTraits<int>::input_array_type& source_pids,
644  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
645  const size_t num_bytes_per_value,
646  const bool pack_pids)
647 {
648  using LO = typename LocalMap::local_ordinal_type;
649  using DT = typename LocalMatrix::device_type;
650  using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
651  const char prefix[] = "Tpetra::Details::do_pack: ";
652 
653  if (export_lids.extent (0) != 0) {
654  TEUCHOS_TEST_FOR_EXCEPTION
655  (static_cast<size_t> (offsets.extent (0)) !=
656  static_cast<size_t> (export_lids.extent (0) + 1),
657  std::invalid_argument, prefix << "offsets.extent(0) = "
658  << offsets.extent (0) << " != export_lids.extent(0) (= "
659  << export_lids.extent (0) << ") + 1.");
660  TEUCHOS_TEST_FOR_EXCEPTION
661  (export_lids.extent (0) != num_packets_per_lid.extent (0),
662  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
663  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
664  << num_packets_per_lid.extent (0) << ".");
665  // If exports has nonzero length at this point, then the matrix
666  // has at least one entry to pack. Thus, if packing process
667  // ranks, we had better have at least one process rank to pack.
668  TEUCHOS_TEST_FOR_EXCEPTION
669  (pack_pids && exports.extent (0) != 0 &&
670  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
671  "pack_pids is true, and exports.extent(0) = " <<
672  exports.extent (0) << " != 0, meaning that we need to pack at "
673  "least one matrix entry, but source_pids.extent(0) = 0.");
674  }
675 
676  using pack_functor_type =
677  PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
678  pack_functor_type f (local_matrix, local_map, exports,
679  num_packets_per_lid, export_lids,
680  source_pids, offsets, num_bytes_per_value,
681  pack_pids);
682 
683  typename pack_functor_type::value_type result;
684  range_type range (0, num_packets_per_lid.extent (0));
685  Kokkos::parallel_reduce (range, f, result);
686 
687  if (result.first != 0) {
688  // We can't deep_copy from AnonymousSpace Views, so we can't print
689  // out any information from them in case of error.
690  TEUCHOS_TEST_FOR_EXCEPTION
691  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor "
692  "reported error code " << result.first << " for the first "
693  "bad row " << result.second << ".");
694  }
695 }
696 
726 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
727 void
729  Kokkos::DualView<char*, BufferDeviceType>& exports,
730  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
731  const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
732  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
733  size_t& constant_num_packets,
734  const bool pack_pids,
735  Distributor& /* dist */)
736 {
737  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix(
738  "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
739  "Import/Export"
740  );
741  using Kokkos::View;
742  typedef BufferDeviceType DT;
743  typedef typename DT::execution_space execution_space;
744  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
745  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
746  constexpr bool debug = false;
747 
748  auto local_matrix = sourceMatrix.getLocalMatrix ();
749  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
750 
751  // Setting this to zero tells the caller to expect a possibly
752  // different ("nonconstant") number of packets per local index
753  // (i.e., a possibly different number of entries per row).
754  constant_num_packets = 0;
755 
756  const size_t num_export_lids =
757  static_cast<size_t> (export_lids.extent (0));
758  TEUCHOS_TEST_FOR_EXCEPTION
759  (num_export_lids !=
760  static_cast<size_t> (num_packets_per_lid.extent (0)),
761  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
762  << num_export_lids << " != num_packets_per_lid.extent(0) = "
763  << num_packets_per_lid.extent (0) << ".");
764  if (num_export_lids != 0) {
765  TEUCHOS_TEST_FOR_EXCEPTION
766  (num_packets_per_lid.data () == NULL, std::invalid_argument,
767  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
768  "num_packets_per_lid.data() = "
769  << num_packets_per_lid.data () << " == NULL.");
770  }
771 
772  const size_t num_bytes_per_lid = PackTraits<LO>::packValueCount (LO (0));
773  const size_t num_bytes_per_gid = PackTraits<GO>::packValueCount (GO (0));
774  const size_t num_bytes_per_pid = PackTraits<int>::packValueCount (int (0));
775 
776  size_t num_bytes_per_value = 0;
778  // Assume ST is default constructible; packValueCount wants an instance.
779  num_bytes_per_value = PackTraits<ST>::packValueCount (ST ());
780  }
781  else {
782  // Since the packed data come from the source matrix, we can use
783  // the source matrix to get the number of bytes per Scalar value
784  // stored in the matrix. This assumes that all Scalar values in
785  // the source matrix require the same number of bytes. If the
786  // source matrix has no entries on the calling process, then we
787  // hope that some process does have some idea how big a Scalar
788  // value is. Of course, if no processes have any entries, then no
789  // values should be packed (though this does assume that in our
790  // packing scheme, rows with zero entries take zero bytes).
791  size_t num_bytes_per_value_l = 0;
792  if (local_matrix.values.extent(0) > 0) {
793  const ST& val = local_matrix.values(0);
794  num_bytes_per_value_l = PackTraits<ST>::packValueCount (val);
795  }
796  using Teuchos::reduceAll;
797  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
798  Teuchos::REDUCE_MAX,
799  num_bytes_per_value_l,
800  Teuchos::outArg (num_bytes_per_value));
801  }
802 
803  if (num_export_lids == 0) {
804  exports = exports_view_type ("exports", 0);
805  return;
806  }
807 
808  // Array of offsets into the pack buffer.
809  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
810 
811  // Compute number of packets per LID (row to send), as well as
812  // corresponding offsets (the prefix sum of the packet counts).
813  const size_t count =
814  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
815  local_matrix.graph.row_map, export_lids,
816  export_pids,
817  num_bytes_per_lid, num_bytes_per_gid,
818  num_bytes_per_pid, num_bytes_per_value);
819 
820  // Resize the output pack buffer if needed.
821  if (count > static_cast<size_t> (exports.extent (0))) {
822  exports = exports_view_type ("exports", count);
823  if (debug) {
824  std::ostringstream os;
825  os << "*** exports resized to " << count << std::endl;
826  std::cerr << os.str ();
827  }
828  }
829  if (debug) {
830  std::ostringstream os;
831  os << "*** count: " << count << ", exports.extent(0): "
832  << exports.extent (0) << std::endl;
833  std::cerr << os.str ();
834  }
835 
836  // If exports has nonzero length at this point, then the matrix has
837  // at least one entry to pack. Thus, if packing process ranks, we
838  // had better have at least one process rank to pack.
839  TEUCHOS_TEST_FOR_EXCEPTION
840  (pack_pids && exports.extent (0) != 0 &&
841  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
842  "pack_pids is true, and exports.extent(0) = " <<
843  exports.extent (0) << " != 0, meaning that we need to pack at least "
844  "one matrix entry, but export_pids.extent(0) = 0.");
845 
846  typedef typename std::decay<decltype (local_matrix)>::type
847  local_matrix_type;
848  typedef typename std::decay<decltype (local_col_map)>::type
849  local_map_type;
850 
851  exports.modify_device ();
852  auto exports_d = exports.view_device ();
853  do_pack<local_matrix_type, local_map_type, DT>
854  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
855  export_lids, export_pids, offsets, num_bytes_per_value,
856  pack_pids);
857  // If we got this far, we succeeded.
858 }
859 
860 } // namespace PackCrsMatrixImpl
861 
862 template<typename ST, typename LO, typename GO, typename NT>
863 void
865  Teuchos::Array<char>& exports,
866  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
867  const Teuchos::ArrayView<const LO>& exportLIDs,
868  size_t& constantNumPackets,
869  Distributor& distor)
870 {
871  using local_matrix_type = typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type;
872  using device_type = typename local_matrix_type::device_type;
873  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
874  using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
875  using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
876 
877  // Convert all Teuchos::Array to Kokkos::View
878 
879  // This is an output array, so we don't have to copy to device here.
880  // However, we'll have to remember to copy back to host when done.
881  Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
882  create_mirror_view_from_raw_host_array (buffer_device_type (),
883  numPacketsPerLID.getRawPtr (),
884  numPacketsPerLID.size (), false,
885  "num_packets_per_lid");
886  // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
887  // DualView through here, instead of recreating a device View from a
888  // host ArrayView that itself came from a DualView.
889  //
890  // This is an input array, so we have to copy to device here.
891  // However, we never need to copy it back to host.
892  Kokkos::View<const LO*, buffer_device_type> export_lids_d =
893  create_mirror_view_from_raw_host_array (buffer_device_type (),
894  exportLIDs.getRawPtr (),
895  exportLIDs.size (), true,
896  "export_lids");
897 
898  Kokkos::View<int*, device_type> export_pids_d; // output arg
899  Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
900  constexpr bool pack_pids = false;
901  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
902  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
903  export_pids_d, constantNumPackets, pack_pids, distor);
904 
905  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
906  // copy them back to host.
907  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
908  (numPacketsPerLID.getRawPtr (),
909  numPacketsPerLID.size ());
910  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
911 
912  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
913  // exports_dv above, then we have two host copies for exports_h.
914 
915  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
916  // to copy them back to host.
917  if (static_cast<size_t> (exports.size ()) !=
918  static_cast<size_t> (exports_dv.extent (0))) {
919  exports.resize (exports_dv.extent (0));
920  }
921  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
922  exports.size ());
923  Kokkos::deep_copy (exports_h, exports_dv.d_view);
924 }
925 
926 template<typename ST, typename LO, typename GO, typename NT>
927 void
929  const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
930  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
931  const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
932  const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
933  size_t& constantNumPackets,
934  Distributor& distor
935 )
936 {
937  using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
938  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
939 
940  // Create an empty array of PIDs, since the interface needs it.
941  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
942  constexpr bool pack_pids = false;
943 
944  // Write-only device access
945  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
946  numPacketsPerLID_nc.clear_sync_state ();
947  numPacketsPerLID_nc.modify_device ();
948  auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
949 
950  // Read-only device access
951  TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
952  auto exportLIDs_d = exportLIDs.view_device ();
953 
954  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix_new(
955  "Tpetra::Details::packCrsMatrixNew",
956  "Import/Export"
957  );
958  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
959  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
960  exportPIDs_d, constantNumPackets, pack_pids, distor);
961 }
962 
963 template<typename ST, typename LO, typename GO, typename NT>
964 void
966  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
967  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
968  const Teuchos::ArrayView<const LO>& exportLIDs,
969  const Teuchos::ArrayView<const int>& sourcePIDs,
970  size_t& constantNumPackets,
971  Distributor& distor)
972 {
973  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type local_matrix_type;
974  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
975  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
976  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
977 
978  typename local_matrix_type::device_type outputDevice;
979 
980  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
981  std::unique_ptr<std::string> prefix;
982  if (verbose) {
983  const int myRank = [&] () {
984  auto map = sourceMatrix.getMap ();
985  if (map.get () == nullptr) {
986  return -1;
987  }
988  auto comm = map->getComm ();
989  if (comm.get () == nullptr) {
990  return -2;
991  }
992  return comm->getRank ();
993  } ();
994  std::ostringstream os;
995  os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
996  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
997 
998  std::ostringstream os2;
999  os2 << *prefix << "start" << std::endl;
1000  std::cerr << os2.str ();
1001  }
1002 
1003  // Convert all Teuchos::Array to Kokkos::View
1004 
1005  // This is an output array, so we don't have to copy to device here.
1006  // However, we'll have to remember to copy back to host when done.
1007  auto num_packets_per_lid_d =
1008  create_mirror_view_from_raw_host_array (buffer_device_type (),
1009  numPacketsPerLID.getRawPtr (),
1010  numPacketsPerLID.size (), false,
1011  "num_packets_per_lid");
1012 
1013  // This is an input array, so we have to copy to device here.
1014  // However, we never need to copy it back to host.
1015  auto export_lids_d =
1016  create_mirror_view_from_raw_host_array (buffer_device_type (),
1017  exportLIDs.getRawPtr (),
1018  exportLIDs.size (), true,
1019  "export_lids");
1020  // This is an input array, so we have to copy to device here.
1021  // However, we never need to copy it back to host.
1022  auto export_pids_d =
1024  sourcePIDs.getRawPtr (),
1025  sourcePIDs.size (), true,
1026  "export_pids");
1027  constexpr bool pack_pids = true;
1028  try {
1029  PackCrsMatrixImpl::packCrsMatrix
1030  (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1031  export_pids_d, constantNumPackets, pack_pids, distor);
1032  }
1033  catch (std::exception& e) {
1034  if (verbose) {
1035  std::ostringstream os;
1036  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1037  << e.what () << std::endl;
1038  std::cerr << os.str ();
1039  }
1040  throw;
1041  }
1042  catch (...) {
1043  if (verbose) {
1044  std::ostringstream os;
1045  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1046  "not a subclass of std::exception" << std::endl;
1047  std::cerr << os.str ();
1048  }
1049  throw;
1050  }
1051 
1052  if (numPacketsPerLID.size () != 0) {
1053  try {
1054  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1055  // so we have to copy them back to host.
1056  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1057  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1058  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1059  }
1060  catch (std::exception& e) {
1061  if (verbose) {
1062  std::ostringstream os;
1063  os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1064  std::cerr << os.str ();
1065  }
1066  throw;
1067  }
1068  catch (...) {
1069  if (verbose) {
1070  std::ostringstream os;
1071  os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1072  "of std::exception" << std::endl;
1073  std::cerr << os.str ();
1074  }
1075  throw;
1076  }
1077  }
1078 
1079  if (verbose) {
1080  std::ostringstream os;
1081  os << *prefix << "done" << std::endl;
1082  std::cerr << os.str ();
1083  }
1084 }
1085 
1086 } // namespace Details
1087 } // namespace Tpetra
1088 
1089 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1090  template void \
1091  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1092  Teuchos::Array<char>&, \
1093  const Teuchos::ArrayView<size_t>&, \
1094  const Teuchos::ArrayView<const LO>&, \
1095  size_t&, \
1096  Distributor&); \
1097  template void \
1098  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1099  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1100  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1101  const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1102  size_t&, \
1103  Distributor&); \
1104  template void \
1105  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1106  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1107  const Teuchos::ArrayView<size_t>&, \
1108  const Teuchos::ArrayView<const LO>&, \
1109  const Teuchos::ArrayView<const int>&, \
1110  size_t&, \
1111  Distributor&);
1112 
1113 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_matrix_type getLocalMatrix() const
The local sparse matrix.
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
Compute the number of packets and offsets for the pack procedure.
Base class for distributed Tpetra objects that support data redistribution.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Sets up and executes a communication plan for a Tpetra DistObject.
Implementation details of Tpetra.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...