Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_packCrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
42 
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
54 #include <memory>
55 #include <sstream>
56 #include <stdexcept>
57 #include <string>
58 
81 
82 namespace Tpetra {
83 
84 //
85 // Users must never rely on anything in the Details namespace.
86 //
87 namespace Details {
88 
89 namespace PackCrsMatrixImpl {
97 template<class OutputOffsetsViewType,
98  class CountsViewType,
99  class InputOffsetsViewType,
100  class InputLocalRowIndicesViewType,
101  class InputLocalRowPidsViewType,
102  const bool debug =
103 #ifdef HAVE_TPETRA_DEBUG
104  true
105 #else
106  false
107 #endif // HAVE_TPETRA_DEBUG
108  >
110 public:
111  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112  typedef typename CountsViewType::non_const_value_type count_type;
113  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
116  // output Views drive where execution happens.
117  typedef typename OutputOffsetsViewType::device_type device_type;
118  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
119  typename device_type::execution_space>::value,
120  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
122  "OutputOffsetsViewType must be a Kokkos::View.");
123  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125  static_assert (std::is_integral<output_offset_type>::value,
126  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
128  "CountsViewType must be a Kokkos::View.");
129  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130  "CountsViewType must be a nonconst Kokkos::View.");
131  static_assert (std::is_integral<count_type>::value,
132  "The type of each entry of CountsViewType must be a built-in integer type.");
133  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
134  "InputOffsetsViewType must be a Kokkos::View.");
135  static_assert (std::is_integral<input_offset_type>::value,
136  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
138  "InputLocalRowIndicesViewType must be a Kokkos::View.");
139  static_assert (std::is_integral<local_row_index_type>::value,
140  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
141 
142  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
143  const CountsViewType& counts,
144  const InputOffsetsViewType& rowOffsets,
145  const InputLocalRowIndicesViewType& lclRowInds,
146  const InputLocalRowPidsViewType& lclRowPids,
147  const count_type sizeOfLclCount,
148  const count_type sizeOfGblColInd,
149  const count_type sizeOfPid,
150  const count_type sizeOfValue) :
151  outputOffsets_ (outputOffsets),
152  counts_ (counts),
153  rowOffsets_ (rowOffsets),
154  lclRowInds_ (lclRowInds),
155  lclRowPids_ (lclRowPids),
156  sizeOfLclCount_ (sizeOfLclCount),
157  sizeOfGblColInd_ (sizeOfGblColInd),
158  sizeOfPid_ (sizeOfPid),
159  sizeOfValue_ (sizeOfValue),
160  error_ ("error") // don't forget this, or you'll get segfaults!
161  {
162  if (debug) {
163  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
164 
165  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
166  std::ostringstream os;
167  os << "lclRowInds.extent(0) = " << numRowsToPack
168  << " != counts.extent(0) = " << counts_.extent (0)
169  << ".";
170  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
171  }
172  if (static_cast<size_t> (numRowsToPack + 1) !=
173  static_cast<size_t> (outputOffsets_.extent (0))) {
174  std::ostringstream os;
175  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
177  << ".";
178  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
179  }
180  }
181  }
182 
183  KOKKOS_INLINE_FUNCTION void
184  operator() (const local_row_index_type& curInd,
185  output_offset_type& update,
186  const bool final) const
187  {
188  if (debug) {
189  if (curInd < static_cast<local_row_index_type> (0)) {
190  error_ () = 1;
191  return;
192  }
193  }
194 
195  if (final) {
196  if (debug) {
197  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
198  error_ () = 2;
199  return;
200  }
201  }
202  outputOffsets_(curInd) = update;
203  }
204 
205  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
206  const auto lclRow = lclRowInds_(curInd);
207  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
208  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
209  error_ () = 3;
210  return;
211  }
212  // count_type could differ from the type of each row offset.
213  // For example, row offsets might each be 64 bits, but if their
214  // difference always fits in 32 bits, we may then safely use a
215  // 32-bit count_type.
216  const count_type count =
217  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
218 
219  // We pack first the number of entries in the row, then that
220  // many global column indices, then that many pids (if any),
221  // then that many values. However, if the number of entries in
222  // the row is zero, we pack nothing.
223  const count_type numBytes = (count == 0) ?
224  static_cast<count_type> (0) :
225  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
227  sizeOfValue_);
228 
229  if (final) {
230  counts_(curInd) = numBytes;
231  }
232  update += numBytes;
233  }
234  }
235 
236  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
237  // have join both with and without volatile! Otherwise intrawarp
238  // joins are really slow on GPUs.
239 
241  int getError () const {
242  auto error_h = Kokkos::create_mirror_view (error_);
243  Kokkos::deep_copy (error_h, error_);
244  return error_h ();
245  }
246 
247 private:
248  OutputOffsetsViewType outputOffsets_;
249  CountsViewType counts_;
250  typename InputOffsetsViewType::const_type rowOffsets_;
251  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
252  typename InputLocalRowPidsViewType::const_type lclRowPids_;
253  count_type sizeOfLclCount_;
254  count_type sizeOfGblColInd_;
255  count_type sizeOfPid_;
256  count_type sizeOfValue_;
257  Kokkos::View<int, device_type> error_;
258 };
259 
269 template<class OutputOffsetsViewType,
270  class CountsViewType,
271  class InputOffsetsViewType,
272  class InputLocalRowIndicesViewType,
273  class InputLocalRowPidsViewType>
274 typename CountsViewType::non_const_value_type
275 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
276  const CountsViewType& counts,
277  const InputOffsetsViewType& rowOffsets,
278  const InputLocalRowIndicesViewType& lclRowInds,
279  const InputLocalRowPidsViewType& lclRowPids,
280  const typename CountsViewType::non_const_value_type sizeOfLclCount,
281  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
282  const typename CountsViewType::non_const_value_type sizeOfPid,
283  const typename CountsViewType::non_const_value_type sizeOfValue)
284 {
285  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
286  CountsViewType, typename InputOffsetsViewType::const_type,
287  typename InputLocalRowIndicesViewType::const_type,
288  typename InputLocalRowPidsViewType::const_type> functor_type;
289  typedef typename CountsViewType::non_const_value_type count_type;
290  typedef typename OutputOffsetsViewType::size_type size_type;
291  typedef typename OutputOffsetsViewType::execution_space execution_space;
292  typedef typename functor_type::local_row_index_type LO;
293  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
294  const char prefix[] = "computeNumPacketsAndOffsets: ";
295 
296  count_type count = 0;
297  const count_type numRowsToPack = lclRowInds.extent (0);
298 
299  if (numRowsToPack == 0) {
300  return count;
301  }
302  else {
303  TEUCHOS_TEST_FOR_EXCEPTION
304  (rowOffsets.extent (0) <= static_cast<size_type> (1),
305  std::invalid_argument, prefix << "There is at least one row to pack, "
306  "but the matrix has no rows. lclRowInds.extent(0) = " <<
307  numRowsToPack << ", but rowOffsets.extent(0) = " <<
308  rowOffsets.extent (0) << " <= 1.");
309  TEUCHOS_TEST_FOR_EXCEPTION
310  (outputOffsets.extent (0) !=
311  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
312  prefix << "Output dimension does not match number of rows to pack. "
313  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
314  << " != lclRowInds.extent(0) + 1 = "
315  << static_cast<size_type> (numRowsToPack + 1) << ".");
316  TEUCHOS_TEST_FOR_EXCEPTION
317  (counts.extent (0) != numRowsToPack, std::invalid_argument,
318  prefix << "counts.extent(0) = " << counts.extent (0)
319  << " != numRowsToPack = " << numRowsToPack << ".");
320 
321  functor_type f (outputOffsets, counts, rowOffsets,
322  lclRowInds, lclRowPids, sizeOfLclCount,
323  sizeOfGblColInd, sizeOfPid, sizeOfValue);
324  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
325 
326  // At least in debug mode, this functor checks for errors.
327  const int errCode = f.getError ();
328  TEUCHOS_TEST_FOR_EXCEPTION
329  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
330  << errCode << " != 0.");
331 
332 #if 0
333  size_t total = 0;
334  for (LO k = 0; k < numRowsToPack; ++k) {
335  total += counts[k];
336  }
337  if (outputOffsets(numRowsToPack) != total) {
338  if (errStr.get () == NULL) {
339  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
340  }
341  std::ostringstream& os = *errStr;
342  os << prefix
343  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
344  << outputOffsets(numRowsToPack) << " != sum of counts = "
345  << total << "." << std::endl;
346  if (numRowsToPack != 0) {
347  // Only print the array if it's not too long.
348  if (numRowsToPack < static_cast<LO> (10)) {
349  os << "outputOffsets: [";
350  for (LO i = 0; i <= numRowsToPack; ++i) {
351  os << outputOffsets(i);
352  if (static_cast<LO> (i + 1) <= numRowsToPack) {
353  os << ",";
354  }
355  }
356  os << "]" << std::endl;
357  os << "counts: [";
358  for (LO i = 0; i < numRowsToPack; ++i) {
359  os << counts(i);
360  if (static_cast<LO> (i + 1) < numRowsToPack) {
361  os << ",";
362  }
363  }
364  os << "]" << std::endl;
365  }
366  else {
367  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
368  << outputOffsets(numRowsToPack-1) << "." << std::endl;
369  }
370  }
371  count = outputOffsets(numRowsToPack);
372  return {false, errStr};
373  }
374 #endif // HAVE_TPETRA_DEBUG
375 
376  // Get last entry of outputOffsets, which is the sum of the entries
377  // of counts. Don't assume UVM.
378  using Tpetra::Details::getEntryOnHost;
379  return static_cast<count_type> (getEntryOnHost (outputOffsets,
380  numRowsToPack));
381  }
382 }
383 
399 template<class ST, class ColumnMap, class BufferDeviceType>
400 KOKKOS_FUNCTION
401 Kokkos::pair<int, size_t>
402 packCrsMatrixRow (const ColumnMap& col_map,
403  const Kokkos::View<char*, BufferDeviceType>& exports,
405  const typename PackTraits<int>::input_array_type& pids_in,
406  const typename PackTraits<ST>::input_array_type& vals_in,
407  const size_t offset,
408  const size_t num_ent,
409  const size_t num_bytes_per_value,
410  const bool pack_pids)
411 {
412  using Kokkos::subview;
413  using LO = typename ColumnMap::local_ordinal_type;
414  using GO = typename ColumnMap::global_ordinal_type;
415  using return_type = Kokkos::pair<int, size_t>;
416 
417  if (num_ent == 0) {
418  // Empty rows always take zero bytes, to ensure sparsity.
419  return return_type (0, 0);
420  }
421 
422  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
423  const size_t num_ent_beg = offset;
424  const size_t num_ent_len = PackTraits<LO>::packValueCount (num_ent_LO);
425 
426  const size_t gids_beg = num_ent_beg + num_ent_len;
427  const size_t gids_len = num_ent * PackTraits<GO>::packValueCount (GO (0));
428 
429  const size_t pids_beg = gids_beg + gids_len;
430  const size_t pids_len = pack_pids ?
431  num_ent * PackTraits<int>::packValueCount (int (0)) :
432  static_cast<size_t> (0);
433 
434  const size_t vals_beg = gids_beg + gids_len + pids_len;
435  const size_t vals_len = num_ent * num_bytes_per_value;
436 
437  char* const num_ent_out = exports.data () + num_ent_beg;
438  char* const gids_out = exports.data () + gids_beg;
439  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
440  char* const vals_out = exports.data () + vals_beg;
441 
442  size_t num_bytes_out = 0;
443  int error_code = 0;
444  num_bytes_out += PackTraits<LO>::packValue (num_ent_out, num_ent_LO);
445 
446  {
447  // Copy column indices one at a time, so that we don't need
448  // temporary storage.
449  for (size_t k = 0; k < num_ent; ++k) {
450  const LO lid = lids_in[k];
451  const GO gid = col_map.getGlobalElement (lid);
452  num_bytes_out += PackTraits<GO>::packValue (gids_out, k, gid);
453  }
454  // Copy PIDs one at a time, so that we don't need temporary storage.
455  if (pack_pids) {
456  for (size_t k = 0; k < num_ent; ++k) {
457  const LO lid = lids_in[k];
458  const int pid = pids_in[lid];
459  num_bytes_out += PackTraits<int>::packValue (pids_out, k, pid);
460  }
461  }
462  const auto p =
463  PackTraits<ST>::packArray (vals_out, vals_in.data (), num_ent);
464  error_code += p.first;
465  num_bytes_out += p.second;
466  }
467 
468  if (error_code != 0) {
469  return return_type (10, num_bytes_out);
470  }
471 
472  const size_t expected_num_bytes =
473  num_ent_len + gids_len + pids_len + vals_len;
474  if (num_bytes_out != expected_num_bytes) {
475  return return_type (11, num_bytes_out);
476  }
477  return return_type (0, num_bytes_out);
478 }
479 
480 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
481 struct PackCrsMatrixFunctor {
482  typedef LocalMatrix local_matrix_device_type;
483  typedef LocalMap local_map_type;
484  typedef typename local_matrix_device_type::value_type ST;
485  typedef typename local_map_type::local_ordinal_type LO;
486  typedef typename local_map_type::global_ordinal_type GO;
487  typedef typename local_matrix_device_type::device_type DT;
488 
489  typedef Kokkos::View<const size_t*, BufferDeviceType>
490  num_packets_per_lid_view_type;
491  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
492  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
493  using export_lids_view_type = typename PackTraits<LO>::input_array_type;
494  using source_pids_view_type = typename PackTraits<int>::input_array_type;
495 
496  typedef typename num_packets_per_lid_view_type::non_const_value_type
497  count_type;
498  typedef typename offsets_view_type::non_const_value_type
499  offset_type;
500  typedef Kokkos::pair<int, LO> value_type;
501 
502  static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
503  "local_map_type::local_ordinal_type and "
504  "local_matrix_device_type::ordinal_type must be the same.");
505 
506  local_matrix_device_type local_matrix;
507  local_map_type local_col_map;
508  exports_view_type exports;
509  num_packets_per_lid_view_type num_packets_per_lid;
510  export_lids_view_type export_lids;
511  source_pids_view_type source_pids;
512  offsets_view_type offsets;
513  size_t num_bytes_per_value;
514  bool pack_pids;
515 
516  PackCrsMatrixFunctor (const local_matrix_device_type& local_matrix_in,
517  const local_map_type& local_col_map_in,
518  const exports_view_type& exports_in,
519  const num_packets_per_lid_view_type& num_packets_per_lid_in,
520  const export_lids_view_type& export_lids_in,
521  const source_pids_view_type& source_pids_in,
522  const offsets_view_type& offsets_in,
523  const size_t num_bytes_per_value_in,
524  const bool pack_pids_in) :
525  local_matrix (local_matrix_in),
526  local_col_map (local_col_map_in),
527  exports (exports_in),
528  num_packets_per_lid (num_packets_per_lid_in),
529  export_lids (export_lids_in),
530  source_pids (source_pids_in),
531  offsets (offsets_in),
532  num_bytes_per_value (num_bytes_per_value_in),
533  pack_pids (pack_pids_in)
534  {
535  const LO numRows = local_matrix_in.numRows ();
536  const LO rowMapDim =
537  static_cast<LO> (local_matrix.graph.row_map.extent (0));
538  TEUCHOS_TEST_FOR_EXCEPTION
539  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
540  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
541  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
542  }
543 
544  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
545  {
546  using ::Tpetra::Details::OrdinalTraits;
547  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
548  }
549 
550  KOKKOS_INLINE_FUNCTION void
551  join (volatile value_type& dst, const volatile value_type& src) const
552  {
553  // `dst` should reflect the first (least) bad index and all other
554  // associated error codes and data, so prefer keeping it.
555  if (src.first != 0 && dst.first == 0) {
556  dst = src;
557  }
558  }
559 
560  KOKKOS_INLINE_FUNCTION
561  void operator() (const LO i, value_type& dst) const
562  {
563  const size_t offset = offsets[i];
564  const LO export_lid = export_lids[i];
565  const size_t buf_size = exports.size();
566  const size_t num_bytes = num_packets_per_lid(i);
567  const size_t num_ent =
568  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
569  - local_matrix.graph.row_map[export_lid]);
570 
571  // Only pack this row's data if it has a nonzero number of
572  // entries. We can do this because receiving processes get the
573  // number of packets, and will know that zero packets means zero
574  // entries.
575  if (num_ent == 0) {
576  return;
577  }
578 
579  if (export_lid >= local_matrix.numRows ()) {
580  if (dst.first != 0) { // keep only the first error
581  dst = Kokkos::make_pair (1, i); // invalid row
582  }
583  return;
584  }
585  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
586  if (dst.first != 0) { // keep only the first error
587  dst = Kokkos::make_pair (2, i); // out of bounds
588  }
589  return;
590  }
591 
592  // We can now pack this row
593 
594  // Since the matrix is locally indexed on the calling process, we
595  // have to use its column Map (which it _must_ have in this case)
596  // to convert to global indices.
597  const auto row_beg = local_matrix.graph.row_map[export_lid];
598  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
599  auto vals_in = subview (local_matrix.values,
600  Kokkos::make_pair (row_beg, row_end));
601  auto lids_in = subview (local_matrix.graph.entries,
602  Kokkos::make_pair (row_beg, row_end));
603  typedef local_map_type LMT;
604  typedef BufferDeviceType BDT;
605  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
606  source_pids, vals_in, offset,
607  num_ent, num_bytes_per_value,
608  pack_pids);
609  int error_code_this_row = p.first;
610  size_t num_bytes_packed_this_row = p.second;
611  if (error_code_this_row != 0) {
612  if (dst.first != 0) { // keep only the first error
613  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
614  }
615  }
616  else if (num_bytes_packed_this_row != num_bytes) {
617  if (dst.first != 0) { // keep only the first error
618  dst = Kokkos::make_pair (3, i);
619  }
620  }
621  }
622 };
623 
631 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
632 void
633 do_pack (const LocalMatrix& local_matrix,
634  const LocalMap& local_map,
635  const Kokkos::View<char*, BufferDeviceType>& exports,
636  const typename PackTraits<size_t>::input_array_type& num_packets_per_lid,
638  const typename PackTraits<int>::input_array_type& source_pids,
639  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
640  const size_t num_bytes_per_value,
641  const bool pack_pids)
642 {
643  using LO = typename LocalMap::local_ordinal_type;
644  using DT = typename LocalMatrix::device_type;
645  using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
646  const char prefix[] = "Tpetra::Details::do_pack: ";
647 
648  if (export_lids.extent (0) != 0) {
649  TEUCHOS_TEST_FOR_EXCEPTION
650  (static_cast<size_t> (offsets.extent (0)) !=
651  static_cast<size_t> (export_lids.extent (0) + 1),
652  std::invalid_argument, prefix << "offsets.extent(0) = "
653  << offsets.extent (0) << " != export_lids.extent(0) (= "
654  << export_lids.extent (0) << ") + 1.");
655  TEUCHOS_TEST_FOR_EXCEPTION
656  (export_lids.extent (0) != num_packets_per_lid.extent (0),
657  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
658  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
659  << num_packets_per_lid.extent (0) << ".");
660  // If exports has nonzero length at this point, then the matrix
661  // has at least one entry to pack. Thus, if packing process
662  // ranks, we had better have at least one process rank to pack.
663  TEUCHOS_TEST_FOR_EXCEPTION
664  (pack_pids && exports.extent (0) != 0 &&
665  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
666  "pack_pids is true, and exports.extent(0) = " <<
667  exports.extent (0) << " != 0, meaning that we need to pack at "
668  "least one matrix entry, but source_pids.extent(0) = 0.");
669  }
670 
671  using pack_functor_type =
672  PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
673  pack_functor_type f (local_matrix, local_map, exports,
674  num_packets_per_lid, export_lids,
675  source_pids, offsets, num_bytes_per_value,
676  pack_pids);
677 
678  typename pack_functor_type::value_type result;
679  range_type range (0, num_packets_per_lid.extent (0));
680  Kokkos::parallel_reduce (range, f, result);
681 
682  if (result.first != 0) {
683  // We can't deep_copy from AnonymousSpace Views, so we can't print
684  // out any information from them in case of error.
685  TEUCHOS_TEST_FOR_EXCEPTION
686  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor "
687  "reported error code " << result.first << " for the first "
688  "bad row " << result.second << ".");
689  }
690 }
691 
721 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
722 void
724  Kokkos::DualView<char*, BufferDeviceType>& exports,
725  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
726  const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
727  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
728  size_t& constant_num_packets,
729  const bool pack_pids)
730 {
731  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix(
732  "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
733  "Import/Export"
734  );
735  using Kokkos::View;
736  typedef BufferDeviceType DT;
737  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
738  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
739  constexpr bool debug = false;
740 
741  auto local_matrix = sourceMatrix.getLocalMatrixDevice ();
742  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
743 
744  // Setting this to zero tells the caller to expect a possibly
745  // different ("nonconstant") number of packets per local index
746  // (i.e., a possibly different number of entries per row).
747  constant_num_packets = 0;
748 
749  const size_t num_export_lids =
750  static_cast<size_t> (export_lids.extent (0));
751  TEUCHOS_TEST_FOR_EXCEPTION
752  (num_export_lids !=
753  static_cast<size_t> (num_packets_per_lid.extent (0)),
754  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
755  << num_export_lids << " != num_packets_per_lid.extent(0) = "
756  << num_packets_per_lid.extent (0) << ".");
757  if (num_export_lids != 0) {
758  TEUCHOS_TEST_FOR_EXCEPTION
759  (num_packets_per_lid.data () == NULL, std::invalid_argument,
760  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
761  "num_packets_per_lid.data() = "
762  << num_packets_per_lid.data () << " == NULL.");
763  }
764 
765  const size_t num_bytes_per_lid = PackTraits<LO>::packValueCount (LO (0));
766  const size_t num_bytes_per_gid = PackTraits<GO>::packValueCount (GO (0));
767  const size_t num_bytes_per_pid = PackTraits<int>::packValueCount (int (0));
768 
769  size_t num_bytes_per_value = 0;
771  // Assume ST is default constructible; packValueCount wants an instance.
772  num_bytes_per_value = PackTraits<ST>::packValueCount (ST ());
773  }
774  else {
775  // Since the packed data come from the source matrix, we can use
776  // the source matrix to get the number of bytes per Scalar value
777  // stored in the matrix. This assumes that all Scalar values in
778  // the source matrix require the same number of bytes. If the
779  // source matrix has no entries on the calling process, then we
780  // hope that some process does have some idea how big a Scalar
781  // value is. Of course, if no processes have any entries, then no
782  // values should be packed (though this does assume that in our
783  // packing scheme, rows with zero entries take zero bytes).
784  size_t num_bytes_per_value_l = 0;
785  if (local_matrix.values.extent(0) > 0) {
786  const ST& val = local_matrix.values(0);
787  num_bytes_per_value_l = PackTraits<ST>::packValueCount (val);
788  }
789  using Teuchos::reduceAll;
790  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
791  Teuchos::REDUCE_MAX,
792  num_bytes_per_value_l,
793  Teuchos::outArg (num_bytes_per_value));
794  }
795 
796  if (num_export_lids == 0) {
797  exports = exports_view_type ("exports", 0);
798  return;
799  }
800 
801  // Array of offsets into the pack buffer.
802  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
803 
804  // Compute number of packets per LID (row to send), as well as
805  // corresponding offsets (the prefix sum of the packet counts).
806  const size_t count =
807  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
808  local_matrix.graph.row_map, export_lids,
809  export_pids,
810  num_bytes_per_lid, num_bytes_per_gid,
811  num_bytes_per_pid, num_bytes_per_value);
812 
813  // Resize the output pack buffer if needed.
814  if (count > static_cast<size_t> (exports.extent (0))) {
815  exports = exports_view_type ("exports", count);
816  if (debug) {
817  std::ostringstream os;
818  os << "*** exports resized to " << count << std::endl;
819  std::cerr << os.str ();
820  }
821  }
822  if (debug) {
823  std::ostringstream os;
824  os << "*** count: " << count << ", exports.extent(0): "
825  << exports.extent (0) << std::endl;
826  std::cerr << os.str ();
827  }
828 
829  // If exports has nonzero length at this point, then the matrix has
830  // at least one entry to pack. Thus, if packing process ranks, we
831  // had better have at least one process rank to pack.
832  TEUCHOS_TEST_FOR_EXCEPTION
833  (pack_pids && exports.extent (0) != 0 &&
834  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
835  "pack_pids is true, and exports.extent(0) = " <<
836  exports.extent (0) << " != 0, meaning that we need to pack at least "
837  "one matrix entry, but export_pids.extent(0) = 0.");
838 
839  typedef typename std::decay<decltype (local_matrix)>::type
840  local_matrix_device_type;
841  typedef typename std::decay<decltype (local_col_map)>::type
842  local_map_type;
843 
844  exports.modify_device ();
845  auto exports_d = exports.view_device ();
846  do_pack<local_matrix_device_type, local_map_type, DT>
847  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
848  export_lids, export_pids, offsets, num_bytes_per_value,
849  pack_pids);
850  // If we got this far, we succeeded.
851 }
852 
853 } // namespace PackCrsMatrixImpl
854 
855 template<typename ST, typename LO, typename GO, typename NT>
856 void
858  Teuchos::Array<char>& exports,
859  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
860  const Teuchos::ArrayView<const LO>& exportLIDs,
861  size_t& constantNumPackets)
862 {
863  using local_matrix_device_type = typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type;
864  using device_type = typename local_matrix_device_type::device_type;
865  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
866  using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
867  using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
868 
869  // Convert all Teuchos::Array to Kokkos::View
870 
871  // This is an output array, so we don't have to copy to device here.
872  // However, we'll have to remember to copy back to host when done.
873  Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
874  create_mirror_view_from_raw_host_array (buffer_device_type (),
875  numPacketsPerLID.getRawPtr (),
876  numPacketsPerLID.size (), false,
877  "num_packets_per_lid");
878  // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
879  // DualView through here, instead of recreating a device View from a
880  // host ArrayView that itself came from a DualView.
881  //
882  // This is an input array, so we have to copy to device here.
883  // However, we never need to copy it back to host.
884  Kokkos::View<const LO*, buffer_device_type> export_lids_d =
885  create_mirror_view_from_raw_host_array (buffer_device_type (),
886  exportLIDs.getRawPtr (),
887  exportLIDs.size (), true,
888  "export_lids");
889 
890  Kokkos::View<int*, device_type> export_pids_d; // output arg
891  Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
892  constexpr bool pack_pids = false;
893  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
894  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
895  export_pids_d, constantNumPackets, pack_pids);
896 
897  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
898  // copy them back to host.
899  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
900  (numPacketsPerLID.getRawPtr (),
901  numPacketsPerLID.size ());
902  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
903 
904  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
905  // exports_dv above, then we have two host copies for exports_h.
906 
907  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
908  // to copy them back to host.
909  if (static_cast<size_t> (exports.size ()) !=
910  static_cast<size_t> (exports_dv.extent (0))) {
911  exports.resize (exports_dv.extent (0));
912  }
913  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
914  exports.size ());
915  Kokkos::deep_copy (exports_h, exports_dv.d_view);
916 }
917 
918 template<typename ST, typename LO, typename GO, typename NT>
919 void
921  const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
922  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
923  const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
924  const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
925  size_t& constantNumPackets)
926 {
927  using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
928  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
929 
930  // Create an empty array of PIDs, since the interface needs it.
931  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
932  constexpr bool pack_pids = false;
933 
934  // Write-only device access
935  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
936  numPacketsPerLID_nc.clear_sync_state ();
937  numPacketsPerLID_nc.modify_device ();
938  auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
939 
940  // Read-only device access
941  TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
942  auto exportLIDs_d = exportLIDs.view_device ();
943 
944  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix_new(
945  "Tpetra::Details::packCrsMatrixNew",
946  "Import/Export"
947  );
948  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
949  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
950  exportPIDs_d, constantNumPackets, pack_pids);
951 }
952 
953 template<typename ST, typename LO, typename GO, typename NT>
954 void
956  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
957  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
958  const Teuchos::ArrayView<const LO>& exportLIDs,
959  const Teuchos::ArrayView<const int>& sourcePIDs,
960  size_t& constantNumPackets)
961 {
962  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type local_matrix_device_type;
963  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
964  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
965  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
966 
967  typename local_matrix_device_type::device_type outputDevice;
968 
969  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
970  std::unique_ptr<std::string> prefix;
971  if (verbose) {
972  const int myRank = [&] () {
973  auto map = sourceMatrix.getMap ();
974  if (map.get () == nullptr) {
975  return -1;
976  }
977  auto comm = map->getComm ();
978  if (comm.get () == nullptr) {
979  return -2;
980  }
981  return comm->getRank ();
982  } ();
983  std::ostringstream os;
984  os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
985  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
986 
987  std::ostringstream os2;
988  os2 << *prefix << "start" << std::endl;
989  std::cerr << os2.str ();
990  }
991 
992  // Convert all Teuchos::Array to Kokkos::View
993 
994  // This is an output array, so we don't have to copy to device here.
995  // However, we'll have to remember to copy back to host when done.
996  auto num_packets_per_lid_d =
997  create_mirror_view_from_raw_host_array (buffer_device_type (),
998  numPacketsPerLID.getRawPtr (),
999  numPacketsPerLID.size (), false,
1000  "num_packets_per_lid");
1001 
1002  // This is an input array, so we have to copy to device here.
1003  // However, we never need to copy it back to host.
1004  auto export_lids_d =
1005  create_mirror_view_from_raw_host_array (buffer_device_type (),
1006  exportLIDs.getRawPtr (),
1007  exportLIDs.size (), true,
1008  "export_lids");
1009  // This is an input array, so we have to copy to device here.
1010  // However, we never need to copy it back to host.
1011  auto export_pids_d =
1013  sourcePIDs.getRawPtr (),
1014  sourcePIDs.size (), true,
1015  "export_pids");
1016  constexpr bool pack_pids = true;
1017  try {
1018  PackCrsMatrixImpl::packCrsMatrix
1019  (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1020  export_pids_d, constantNumPackets, pack_pids);
1021  }
1022  catch (std::exception& e) {
1023  if (verbose) {
1024  std::ostringstream os;
1025  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1026  << e.what () << std::endl;
1027  std::cerr << os.str ();
1028  }
1029  throw;
1030  }
1031  catch (...) {
1032  if (verbose) {
1033  std::ostringstream os;
1034  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1035  "not a subclass of std::exception" << std::endl;
1036  std::cerr << os.str ();
1037  }
1038  throw;
1039  }
1040 
1041  if (numPacketsPerLID.size () != 0) {
1042  try {
1043  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1044  // so we have to copy them back to host.
1045  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1046  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1047  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1048  }
1049  catch (std::exception& e) {
1050  if (verbose) {
1051  std::ostringstream os;
1052  os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1053  std::cerr << os.str ();
1054  }
1055  throw;
1056  }
1057  catch (...) {
1058  if (verbose) {
1059  std::ostringstream os;
1060  os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1061  "of std::exception" << std::endl;
1062  std::cerr << os.str ();
1063  }
1064  throw;
1065  }
1066  }
1067 
1068  if (verbose) {
1069  std::ostringstream os;
1070  os << *prefix << "done" << std::endl;
1071  std::cerr << os.str ();
1072  }
1073 }
1074 
1075 } // namespace Details
1076 } // namespace Tpetra
1077 
1078 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1079  template void \
1080  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1081  Teuchos::Array<char>&, \
1082  const Teuchos::ArrayView<size_t>&, \
1083  const Teuchos::ArrayView<const LO>&, \
1084  size_t&); \
1085  template void \
1086  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1087  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1088  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1089  const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1090  size_t&); \
1091  template void \
1092  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1093  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1094  const Teuchos::ArrayView<size_t>&, \
1095  const Teuchos::ArrayView<const LO>&, \
1096  const Teuchos::ArrayView<const int>&, \
1097  size_t&);
1098 
1099 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
GlobalOrdinal global_ordinal_type
The type of global indices.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
"Local" part of Map suitable for Kokkos kernels.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Implementation details of Tpetra.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Compute the number of packets and offsets for the pack procedure.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
static bool verbose()
Whether Tpetra is in verbose mode.
typename Node::device_type device_type
The Kokkos device type.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Base class for distributed Tpetra objects that support data redistribution.
LocalOrdinal local_ordinal_type
The type of local indices.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.