Intrepid2
Intrepid2_Data.hpp
Go to the documentation of this file.
1 //
2 // Intrepid2_Data.hpp
3 // QuadraturePerformance
4 //
5 // Created by Roberts, Nathan V on 8/24/20.
6 //
7 
8 #ifndef Intrepid2_Data_h
9 #define Intrepid2_Data_h
10 
12 #include "Intrepid2_ScalarView.hpp"
13 #include "Intrepid2_Utils.hpp"
14 
20 namespace Intrepid2 {
32  {
37  };
38 
43  {
44  int logicalExtent;
45  DataVariationType variationType;
46  int dataExtent;
47  int variationModulus; // should be equal to dataExtent variationType other than MODULAR and CONSTANT
48  int blockPlusDiagonalLastNonDiagonal = -1; // only relevant for variationType == BLOCK_PLUS_DIAGONAL
49  };
50 
52  KOKKOS_INLINE_FUNCTION
54  {
55  const int myNominalExtent = myData.logicalExtent;
56 #ifdef HAVE_INTREPID2_DEBUG
57  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myNominalExtent != otherData.logicalExtent, std::invalid_argument, "both Data objects must match in their logical extent in the specified dimension");
58 #endif
59  const DataVariationType & myVariation = myData.variationType;
60  const DataVariationType & otherVariation = otherData.variationType;
61 
62  const int & myVariationModulus = myData.variationModulus;
63  const int & otherVariationModulus = otherData.variationModulus;
64 
65  int myDataExtent = myData.dataExtent;
66  int otherDataExtent = otherData.dataExtent;
67 
69  combinedDimensionInfo.logicalExtent = myNominalExtent;
70 
71  switch (myVariation)
72  {
73  case CONSTANT:
74  switch (otherVariation)
75  {
76  case CONSTANT:
77  case MODULAR:
78  case GENERAL:
80  combinedDimensionInfo = otherData;
81  }
82  break;
83  case MODULAR:
84  switch (otherVariation)
85  {
86  case CONSTANT:
87  combinedDimensionInfo = myData;
88  break;
89  case MODULAR:
90  if (myVariationModulus == otherVariationModulus)
91  {
92  // in this case, expect both to have the same data extent
93  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myDataExtent != otherDataExtent, std::logic_error, "Unexpected data extent/modulus combination");
94  combinedDimensionInfo.variationType = MODULAR;
95  combinedDimensionInfo.dataExtent = myDataExtent;
96  combinedDimensionInfo.variationModulus = myVariationModulus;
97  }
98  else
99  {
100  // both modular with two different moduli
101  // we could do something clever with e.g. least common multiples, but for now we just use GENERAL
102  // (this is not a use case we anticipate being a common one)
103  combinedDimensionInfo.variationType = GENERAL;
104  combinedDimensionInfo.dataExtent = myNominalExtent;
105  combinedDimensionInfo.variationModulus = myNominalExtent;
106  }
107  break;
108  case BLOCK_PLUS_DIAGONAL:
109  combinedDimensionInfo.variationType = GENERAL;
110  combinedDimensionInfo.dataExtent = myNominalExtent;
111  combinedDimensionInfo.variationModulus = myNominalExtent;
112  break;
113  case GENERAL:
114  // otherData is GENERAL: its info dominates
115  combinedDimensionInfo = otherData;
116  break;
117  }
118  break;
119  case BLOCK_PLUS_DIAGONAL:
120  switch (otherVariation)
121  {
122  case CONSTANT:
123  combinedDimensionInfo = myData;
124  break;
125  case MODULAR:
126  combinedDimensionInfo.variationType = GENERAL;
127  combinedDimensionInfo.dataExtent = myNominalExtent;
128  combinedDimensionInfo.variationModulus = myNominalExtent;
129  break;
130  case GENERAL:
131  // otherData is GENERAL: its info dominates
132  combinedDimensionInfo = otherData;
133  break;
134  case BLOCK_PLUS_DIAGONAL:
135  combinedDimensionInfo.variationType = GENERAL;
136  combinedDimensionInfo.dataExtent = max(myDataExtent,otherDataExtent);
137  combinedDimensionInfo.variationModulus = combinedDimensionInfo.dataExtent;
138  // for this case, we want to take the minimum of the two Data objects' blockPlusDiagonalLastNonDiagonal as the combined object's blockPlusDiagonalLastNonDiagonal
139  combinedDimensionInfo.blockPlusDiagonalLastNonDiagonal = min(myData.blockPlusDiagonalLastNonDiagonal, otherData.blockPlusDiagonalLastNonDiagonal);
140  }
141  break;
142  case GENERAL:
143  switch (otherVariation)
144  {
145  case CONSTANT:
146  case MODULAR:
147  case GENERAL:
148  case BLOCK_PLUS_DIAGONAL:
149  combinedDimensionInfo = myData;
150  }
151  }
152  return combinedDimensionInfo;
153  }
154 
163 template<class DataScalar,typename DeviceType>
164 class ZeroView {
165 public:
166  static ScalarView<DataScalar,DeviceType> zeroView()
167  {
168  static ScalarView<DataScalar,DeviceType> zeroView = ScalarView<DataScalar,DeviceType>("zero",1);
169  static bool havePushedFinalizeHook = false;
170  if (!havePushedFinalizeHook)
171  {
172  Kokkos::push_finalize_hook( [=] {
173  zeroView = ScalarView<DataScalar,DeviceType>();
174  });
175  havePushedFinalizeHook = true;
176  }
177  return zeroView;
178  }
179 };
180 
198  template<class DataScalar,typename DeviceType>
199  class Data {
200  public:
201  using value_type = DataScalar;
202  using execution_space = typename DeviceType::execution_space;
203  private:
204  ordinal_type dataRank_;
205  Kokkos::View<DataScalar*, DeviceType> data1_; // the rank 1 data that is explicitly stored
206  Kokkos::View<DataScalar**, DeviceType> data2_; // the rank 2 data that is explicitly stored
207  Kokkos::View<DataScalar***, DeviceType> data3_; // the rank 3 data that is explicitly stored
208  Kokkos::View<DataScalar****, DeviceType> data4_; // the rank 4 data that is explicitly stored
209  Kokkos::View<DataScalar*****, DeviceType> data5_; // the rank 5 data that is explicitly stored
210  Kokkos::View<DataScalar******, DeviceType> data6_; // the rank 6 data that is explicitly stored
211  Kokkos::View<DataScalar*******, DeviceType> data7_; // the rank 7 data that is explicitly stored
212  Kokkos::Array<int,7> extents_; // logical extents in each dimension
213  Kokkos::Array<DataVariationType,7> variationType_; // for each dimension, whether the data varies in that dimension
214  Kokkos::Array<int,7> variationModulus_; // for each dimension, a value by which indices should be modulused (only used when variationType_ is MODULAR)
215  int blockPlusDiagonalLastNonDiagonal_ = -1; // last row/column that is part of the non-diagonal part of the matrix indicated by BLOCK_PLUS_DIAGONAL (if any dimensions are thus marked)
216 
217  bool hasNontrivialModulusUNUSED_; // this is a little nutty, but having this UNUSED member variable improves performance, probably by shifting the alignment of underlyingMatchesLogical_. This is true with nvcc; it may also be true with Apple clang
218  bool underlyingMatchesLogical_; // if true, this Data object has the same rank and extent as the underlying view
219  Kokkos::Array<ordinal_type,7> activeDims_;
220  int numActiveDims_; // how many of the 7 entries are actually filled in
221 
222  ordinal_type rank_;
223 
224  using reference_type = typename ScalarView<DataScalar,DeviceType>::reference_type;
225  using const_reference_type = typename ScalarView<const DataScalar,DeviceType>::reference_type;
226  // we use reference_type as the return for operator() for performance reasons, especially significant when using Sacado types
227  using return_type = const_reference_type;
228 
229  ScalarView<DataScalar,DeviceType> zeroView_; // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
230 
232  KOKKOS_INLINE_FUNCTION
233  static int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
234  {
235  return (lastNondiagonal + 1) * (lastNondiagonal + 1);
236  }
237 
239  KOKKOS_INLINE_FUNCTION
240  static int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
241  {
242  return i * (lastNondiagonal + 1) + j;
243  }
244 
246  KOKKOS_INLINE_FUNCTION
247  static int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
248  {
249  return i - (lastNondiagonal + 1) + numNondiagonalEntries;
250  }
251 
253  KOKKOS_INLINE_FUNCTION
254  int getUnderlyingViewExtent(const int &dim) const
255  {
256  switch (dataRank_)
257  {
258  case 1: return data1_.extent_int(dim);
259  case 2: return data2_.extent_int(dim);
260  case 3: return data3_.extent_int(dim);
261  case 4: return data4_.extent_int(dim);
262  case 5: return data5_.extent_int(dim);
263  case 6: return data6_.extent_int(dim);
264  case 7: return data7_.extent_int(dim);
265  default: return -1;
266  }
267  }
268 
271  {
272  zeroView_ = ZeroView<DataScalar,DeviceType>::zeroView(); // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
273  // check that rank is compatible with the claimed extents:
274  for (int d=rank_; d<7; d++)
275  {
276  INTREPID2_TEST_FOR_EXCEPTION(extents_[d] > 1, std::invalid_argument, "Nominal extents may not be > 1 in dimensions beyond the rank of the container");
277  }
278 
279  numActiveDims_ = 0;
280  int blockPlusDiagonalCount = 0;
281  underlyingMatchesLogical_ = true;
282  for (ordinal_type i=0; i<7; i++)
283  {
284  if (variationType_[i] == GENERAL)
285  {
286  if (extents_[i] != 0)
287  {
288  variationModulus_[i] = extents_[i];
289  }
290  else
291  {
292  variationModulus_[i] = 1;
293  }
294  activeDims_[numActiveDims_] = i;
295  numActiveDims_++;
296  }
297  else if (variationType_[i] == MODULAR)
298  {
299  underlyingMatchesLogical_ = false;
300  if (extents_[i] != getUnderlyingViewExtent(numActiveDims_))
301  {
302  const int dataExtent = getUnderlyingViewExtent(numActiveDims_);
303  const int logicalExtent = extents_[i];
304  const int modulus = dataExtent;
305 
306  INTREPID2_TEST_FOR_EXCEPTION( dataExtent * (logicalExtent / dataExtent) != logicalExtent, std::invalid_argument, "data extent must evenly divide logical extent");
307 
308  variationModulus_[i] = modulus;
309  }
310  else
311  {
312  variationModulus_[i] = extents_[i];
313  }
314  activeDims_[numActiveDims_] = i;
315  numActiveDims_++;
316  }
317  else if (variationType_[i] == BLOCK_PLUS_DIAGONAL)
318  {
319  underlyingMatchesLogical_ = false;
320  blockPlusDiagonalCount++;
321  if (blockPlusDiagonalCount == 1) // first dimension thus marked --> active
322  {
323 
324 #ifdef HAVE_INTREPID2_DEBUG
325  const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
326  const int dataExtent = getUnderlyingViewExtent(numActiveDims_); // flat storage of all matrix entries
327  const int logicalExtent = extents_[i];
328  const int numDiagonalEntries = logicalExtent - (blockPlusDiagonalLastNonDiagonal_ + 1);
329  const int expectedDataExtent = numNondiagonalEntries + numDiagonalEntries;
330  INTREPID2_TEST_FOR_EXCEPTION(dataExtent != expectedDataExtent, std::invalid_argument, ("BLOCK_PLUS_DIAGONAL data extent of " + std::to_string(dataExtent) + " does not match expected based on blockPlusDiagonalLastNonDiagonal setting of " + std::to_string(blockPlusDiagonalLastNonDiagonal_)).c_str());
331 #endif
332 
333  activeDims_[numActiveDims_] = i;
334  numActiveDims_++;
335  }
336  variationModulus_[i] = getUnderlyingViewExtent(numActiveDims_);
337  INTREPID2_TEST_FOR_EXCEPTION(variationType_[i+1] != BLOCK_PLUS_DIAGONAL, std::invalid_argument, "BLOCK_PLUS_DIAGONAL ranks must be contiguous");
338  i++; // skip over the next BLOCK_PLUS_DIAGONAL
339  variationModulus_[i] = 1; // trivial modulus (should not ever be used)
340  INTREPID2_TEST_FOR_EXCEPTION(blockPlusDiagonalCount > 1, std::invalid_argument, "BLOCK_PLUS_DIAGONAL can only apply to two ranks");
341  }
342  else // CONSTANT
343  {
344  if (i < rank_)
345  {
346  underlyingMatchesLogical_ = false;
347  }
348  variationModulus_[i] = 1; // trivial modulus
349  }
350  }
351 
352  if (rank_ != dataRank_)
353  {
354  underlyingMatchesLogical_ = false;
355  }
356 
357  for (int d=numActiveDims_; d<7; d++)
358  {
359  // for *inactive* dims, the activeDims_ map just is the identity
360  // (this allows getEntry() to work even when the logical rank of the Data object is lower than that of the underlying View. This can happen for gradients in 1D.)
361  activeDims_[d] = d;
362  }
363  for (int d=0; d<7; d++)
364  {
365  INTREPID2_TEST_FOR_EXCEPTION(variationModulus_[d] == 0, std::logic_error, "variationModulus should not ever be 0");
366  }
367  }
368 
369  public:
371  template<bool passThroughBlockDiagonalArgs>
373  {
374  template<class ViewType, class ...IntArgs>
375  static KOKKOS_INLINE_FUNCTION reference_type get(const ViewType &view, const IntArgs&... intArgs)
376  {
377  return view.getWritableEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
378  }
379  };
380 
382  template<bool passThroughBlockDiagonalArgs>
384  {
385  template<class ViewType, class ...IntArgs>
386  static KOKKOS_INLINE_FUNCTION const_reference_type get(const ViewType &view, const IntArgs&... intArgs)
387  {
388  return view.getEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
389  }
390  };
391 
392  template<class BinaryOperator, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
393  class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB, bool includeInnerLoop=false>
395  {
396  private:
397  ThisUnderlyingViewType this_underlying_;
398  AUnderlyingViewType A_underlying_;
399  BUnderlyingViewType B_underlying_;
400  BinaryOperator binaryOperator_;
401  int innerLoopSize_;
402  public:
403  InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
404  BinaryOperator binaryOperator)
405  :
406  this_underlying_(this_underlying),
407  A_underlying_(A_underlying),
408  B_underlying_(B_underlying),
409  binaryOperator_(binaryOperator)
410  {
411  INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
412  }
413 
414  InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
415  BinaryOperator binaryOperator, int innerLoopSize)
416  :
417  this_underlying_(this_underlying),
418  A_underlying_(A_underlying),
419  B_underlying_(B_underlying),
420  binaryOperator_(binaryOperator),
421  innerLoopSize_(innerLoopSize)
422  {
423  INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
424  }
425 
426  template<class ...IntArgs, bool M=includeInnerLoop>
427  KOKKOS_INLINE_FUNCTION
428  enable_if_t<!M, void>
429  operator()(const IntArgs&... args) const
430  {
431  auto & result = ArgExtractorThis::get( this_underlying_, args... );
432  const auto & A_val = ArgExtractorA::get( A_underlying_, args... );
433  const auto & B_val = ArgExtractorB::get( B_underlying_, args... );
434 
435  result = binaryOperator_(A_val,B_val);
436  }
437 
438  template<class ...IntArgs, bool M=includeInnerLoop>
439  KOKKOS_INLINE_FUNCTION
440  enable_if_t<M, void>
441  operator()(const IntArgs&... args) const
442  {
443  using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
444  for (int_type iFinal=0; iFinal<static_cast<int_type>(innerLoopSize_); iFinal++)
445  {
446  auto & result = ArgExtractorThis::get( this_underlying_, args..., iFinal );
447  const auto & A_val = ArgExtractorA::get( A_underlying_, args..., iFinal );
448  const auto & B_val = ArgExtractorB::get( B_underlying_, args..., iFinal );
449 
450  result = binaryOperator_(A_val,B_val);
451  }
452  }
453  };
454 
456  template<class BinaryOperator, class PolicyType, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
457  class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB>
458  void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying,
459  AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying,
460  BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
461  {
462  using Functor = InPlaceCombinationFunctor<BinaryOperator, ThisUnderlyingViewType, AUnderlyingViewType, BUnderlyingViewType, ArgExtractorThis, ArgExtractorA, ArgExtractorB>;
463  Functor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
464  Kokkos::parallel_for("compute in-place", policy, functor);
465  }
466 
468  template<class BinaryOperator, int rank>
469  enable_if_t<rank != 7, void>
470  storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
471  {
472  auto policy = dataExtentRangePolicy<rank>();
473 
474  // shallow copy of this to avoid implicit references to this in calls to getWritableEntry() below
475  Data<DataScalar,DeviceType> thisData = *this;
476 
477  const bool A_1D = A.getUnderlyingViewRank() == 1;
478  const bool B_1D = B.getUnderlyingViewRank() == 1;
479  const bool this_1D = this->getUnderlyingViewRank() == 1;
480  const bool A_constant = A_1D && (A.getUnderlyingViewSize() == 1);
481  const bool B_constant = B_1D && (B.getUnderlyingViewSize() == 1);
482  const bool this_constant = this_1D && (this->getUnderlyingViewSize() == 1);
483  const bool A_full = A.underlyingMatchesLogical();
484  const bool B_full = B.underlyingMatchesLogical();
485  const bool this_full = this->underlyingMatchesLogical();
486 
488 
489  const FullArgExtractor<reference_type> fullArgs;
490  const FullArgExtractorData<true> fullArgsData; // true: pass through block diagonal args. This is due to the behavior of dataExtentRangePolicy() for block diagonal args.
491  const FullArgExtractorWritableData<true> fullArgsWritable; // true: pass through block diagonal args. This is due to the behavior of dataExtentRangePolicy() for block diagonal args.
492 
499 
500  // this lambda returns -1 if there is not a rank-1 underlying view whose data extent matches the logical extent in the corresponding dimension;
501  // otherwise, it returns the logical index of the corresponding dimension.
502  auto get1DArgIndex = [](const Data<DataScalar,DeviceType> &data) -> int
503  {
504  const auto & variationTypes = data.getVariationTypes();
505  for (int d=0; d<rank; d++)
506  {
507  if (variationTypes[d] == GENERAL)
508  {
509  return d;
510  }
511  }
512  return -1;
513  };
514  if (this_constant)
515  {
516  // then A, B are constant, too
517  auto thisAE = constArg;
518  auto AAE = constArg;
519  auto BAE = constArg;
520  auto & this_underlying = this->getUnderlyingView<1>();
521  auto & A_underlying = A.getUnderlyingView<1>();
522  auto & B_underlying = B.getUnderlyingView<1>();
523  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
524  }
525  else if (this_full && A_full && B_full)
526  {
527  auto thisAE = fullArgs;
528  auto AAE = fullArgs;
529  auto BAE = fullArgs;
530 
531  auto & this_underlying = this->getUnderlyingView<rank>();
532  auto & A_underlying = A.getUnderlyingView<rank>();
533  auto & B_underlying = B.getUnderlyingView<rank>();
534 
535  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
536  }
537  else if (A_constant)
538  {
539  auto AAE = constArg;
540  auto & A_underlying = A.getUnderlyingView<1>();
541  if (this_full)
542  {
543  auto thisAE = fullArgs;
544  auto & this_underlying = this->getUnderlyingView<rank>();
545 
546  if (B_full)
547  {
548  auto BAE = fullArgs;
549  auto & B_underlying = B.getUnderlyingView<rank>();
550  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
551  }
552  else // this_full, not B_full: B may have modular data, etc.
553  {
554  auto BAE = fullArgsData;
555  storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
556  }
557  }
558  else // this is not full
559  {
560  // below, we optimize for the case of 1D data in B, when A is constant. Still need to handle other cases…
561  if (B_1D && (get1DArgIndex(B) != -1) )
562  {
563  // since A is constant, that implies that this_1D is true, and has the same 1DArgIndex
564  const int argIndex = get1DArgIndex(B);
565  auto & B_underlying = B.getUnderlyingView<1>();
566  auto & this_underlying = this->getUnderlyingView<1>();
567  switch (argIndex)
568  {
569  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0); break;
570  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1); break;
571  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2); break;
572  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3); break;
573  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4); break;
574  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5); break;
575  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
576  }
577  }
578  else
579  {
580  // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
581  auto thisAE = fullArgsWritable;
582  auto BAE = fullArgsData;
583  storeInPlaceCombination(policy, thisData, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
584  }
585  }
586  }
587  else if (B_constant)
588  {
589  auto BAE = constArg;
590  auto & B_underlying = B.getUnderlyingView<1>();
591  if (this_full)
592  {
593  auto thisAE = fullArgs;
594  auto & this_underlying = this->getUnderlyingView<rank>();
595  if (A_full)
596  {
597  auto AAE = fullArgs;
598  auto & A_underlying = A.getUnderlyingView<rank>();
599 
600  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
601  }
602  else // this_full, not A_full: A may have modular data, etc.
603  {
604  // use A (the Data object). This could be further optimized by using A's underlying View and an appropriately-defined ArgExtractor.
605  auto AAE = fullArgsData;
606  storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
607  }
608  }
609  else // this is not full
610  {
611  // below, we optimize for the case of 1D data in A, when B is constant. Still need to handle other cases…
612  if (A_1D && (get1DArgIndex(A) != -1) )
613  {
614  // since B is constant, that implies that this_1D is true, and has the same 1DArgIndex as A
615  const int argIndex = get1DArgIndex(A);
616  auto & A_underlying = A.getUnderlyingView<1>();
617  auto & this_underlying = this->getUnderlyingView<1>();
618  switch (argIndex)
619  {
620  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE); break;
621  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE); break;
622  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE); break;
623  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE); break;
624  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE); break;
625  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE); break;
626  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
627  }
628  }
629  else
630  {
631  // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
632  auto thisAE = fullArgsWritable;
633  auto AAE = fullArgsData;
634  storeInPlaceCombination(policy, thisData, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
635  }
636  }
637  }
638  else // neither A nor B constant
639  {
640  if (this_1D && (get1DArgIndex(thisData) != -1))
641  {
642  // possible ways that "this" could have full-extent, 1D data
643  // 1. A constant, B 1D
644  // 2. A 1D, B constant
645  // 3. A 1D, B 1D
646  // The constant possibilities are already addressed above, leaving us with (3). Note that A and B don't have to be full-extent, however
647  const int argThis = get1DArgIndex(thisData);
648  const int argA = get1DArgIndex(A); // if not full-extent, will be -1
649  const int argB = get1DArgIndex(B); // ditto
650 
651  auto & A_underlying = A.getUnderlyingView<1>();
652  auto & B_underlying = B.getUnderlyingView<1>();
653  auto & this_underlying = this->getUnderlyingView<1>();
654  if ((argA != -1) && (argB != -1))
655  {
656 #ifdef INTREPID2_HAVE_DEBUG
657  INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error, "Unexpected 1D arg combination.");
658  INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error, "Unexpected 1D arg combination.");
659 #endif
660  switch (argThis)
661  {
662  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0); break;
663  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1); break;
664  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2); break;
665  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3); break;
666  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4); break;
667  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5); break;
668  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
669  }
670  }
671  else if (argA != -1)
672  {
673  // B is not full-extent in dimension argThis; use the Data object
674  switch (argThis)
675  {
676  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsData); break;
677  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsData); break;
678  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsData); break;
679  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsData); break;
680  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsData); break;
681  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsData); break;
682  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
683  }
684  }
685  else
686  {
687  // A is not full-extent in dimension argThis; use the Data object
688  switch (argThis)
689  {
690  case 0: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsData, arg0); break;
691  case 1: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsData, arg1); break;
692  case 2: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsData, arg2); break;
693  case 3: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsData, arg3); break;
694  case 4: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsData, arg4); break;
695  case 5: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsData, arg5); break;
696  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
697  }
698  }
699  }
700  else if (this_full)
701  {
702  // This case uses A,B Data objects; could be optimized by dividing into subcases and using underlying Views with appropriate ArgExtractors.
703  auto & this_underlying = this->getUnderlyingView<rank>();
704  auto thisAE = fullArgs;
705 
706  if (A_full)
707  {
708  auto & A_underlying = A.getUnderlyingView<rank>();
709  auto AAE = fullArgs;
710 
711  if (B_1D && (get1DArgIndex(B) != -1))
712  {
713  const int argIndex = get1DArgIndex(B);
714  auto & B_underlying = B.getUnderlyingView<1>();
715  switch (argIndex)
716  {
717  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0); break;
718  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1); break;
719  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2); break;
720  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3); break;
721  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4); break;
722  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5); break;
723  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
724  }
725  }
726  else
727  {
728  // A is full; B is not full, but not constant or full-extent 1D
729  // unoptimized in B access:
730  auto BAE = fullArgsData;
731  storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
732  }
733  }
734  else // A is not full
735  {
736  if (A_1D && (get1DArgIndex(A) != -1))
737  {
738  const int argIndex = get1DArgIndex(A);
739  auto & A_underlying = A.getUnderlyingView<1>();
740  if (B_full)
741  {
742  auto & B_underlying = B.getUnderlyingView<rank>();
743  auto BAE = fullArgs;
744  switch (argIndex)
745  {
746  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE); break;
747  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE); break;
748  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE); break;
749  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE); break;
750  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE); break;
751  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE); break;
752  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
753  }
754  }
755  else
756  {
757  auto BAE = fullArgsData;
758  switch (argIndex)
759  {
760  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE); break;
761  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE); break;
762  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE); break;
763  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE); break;
764  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE); break;
765  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE); break;
766  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
767  }
768  }
769  }
770  else // A not full, and not full-extent 1D
771  {
772  // unoptimized in A, B accesses.
773  auto AAE = fullArgsData;
774  auto BAE = fullArgsData;
775  storeInPlaceCombination(policy, this_underlying, A, B, binaryOperator, thisAE, AAE, BAE);
776  }
777  }
778  }
779  else
780  {
781  // completely un-optimized case: we use Data objects for this, A, B.
782  auto thisAE = fullArgsWritable;
783  auto AAE = fullArgsData;
784  auto BAE = fullArgsData;
785  storeInPlaceCombination(policy, thisData, A, B, binaryOperator, thisAE, AAE, BAE);
786  }
787  }
788  }
789 
791  template<class BinaryOperator, int rank>
792  enable_if_t<rank == 7, void>
793  storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
794  {
795  auto policy = dataExtentRangePolicy<rank>();
796 
797  using DataType = Data<DataScalar,DeviceType>;
798  using ThisAE = FullArgExtractorWritableData<true>;
801 
802  const ordinal_type dim6 = getDataExtent(6);
803  const bool includeInnerLoop = true;
804  using Functor = InPlaceCombinationFunctor<BinaryOperator, DataType, DataType, DataType, ThisAE, AAE, BAE, includeInnerLoop>;
805  Functor functor(*this, A, B, binaryOperator, dim6);
806  Kokkos::parallel_for("compute in-place", policy, functor);
807  }
808  public:
810  template<class UnaryOperator>
811  void applyOperator(UnaryOperator unaryOperator)
812  {
813  using ExecutionSpace = typename DeviceType::execution_space;
814 
815  switch (dataRank_)
816  {
817  case 1:
818  {
819  const int dataRank = 1;
820  auto view = getUnderlyingView<dataRank>();
821 
822  const int dataExtent = this->getDataExtent(0);
823  Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,dataExtent);
824  Kokkos::parallel_for("apply operator in-place", policy,
825  KOKKOS_LAMBDA (const int &i0) {
826  view(i0) = unaryOperator(view(i0));
827  });
828 
829  }
830  break;
831  case 2:
832  {
833  const int dataRank = 2;
834  auto policy = dataExtentRangePolicy<dataRank>();
835  auto view = getUnderlyingView<dataRank>();
836 
837  Kokkos::parallel_for("apply operator in-place", policy,
838  KOKKOS_LAMBDA (const int &i0, const int &i1) {
839  view(i0,i1) = unaryOperator(view(i0,i1));
840  });
841  }
842  break;
843  case 3:
844  {
845  const int dataRank = 3;
846  auto policy = dataExtentRangePolicy<dataRank>();
847  auto view = getUnderlyingView<dataRank>();
848 
849  Kokkos::parallel_for("apply operator in-place", policy,
850  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2) {
851  view(i0,i1,i2) = unaryOperator(view(i0,i1,i2));
852  });
853  }
854  break;
855  case 4:
856  {
857  const int dataRank = 4;
858  auto policy = dataExtentRangePolicy<dataRank>();
859  auto view = getUnderlyingView<dataRank>();
860 
861  Kokkos::parallel_for("apply operator in-place", policy,
862  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3) {
863  view(i0,i1,i2,i3) = unaryOperator(view(i0,i1,i2,i3));
864  });
865  }
866  break;
867  case 5:
868  {
869  const int dataRank = 5;
870  auto policy = dataExtentRangePolicy<dataRank>();
871  auto view = getUnderlyingView<dataRank>();
872 
873  Kokkos::parallel_for("apply operator in-place", policy,
874  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4) {
875  view(i0,i1,i2,i3,i4) = unaryOperator(view(i0,i1,i2,i3,i4));
876  });
877  }
878  break;
879  case 6:
880  {
881  const int dataRank = 6;
882  auto policy = dataExtentRangePolicy<dataRank>();
883  auto view = getUnderlyingView<dataRank>();
884 
885  Kokkos::parallel_for("apply operator in-place", policy,
886  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
887  view(i0,i1,i2,i3,i4,i5) = unaryOperator(view(i0,i1,i2,i3,i4,i5));
888  });
889  }
890  break;
891  case 7:
892  {
893  const int dataRank = 7;
894  auto policy6 = dataExtentRangePolicy<6>();
895  auto view = getUnderlyingView<dataRank>();
896 
897  const int dim_i6 = view.extent_int(6);
898 
899  Kokkos::parallel_for("apply operator in-place", policy6,
900  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
901  for (int i6=0; i6<dim_i6; i6++)
902  {
903  view(i0,i1,i2,i3,i4,i5,i6) = unaryOperator(view(i0,i1,i2,i3,i4,i5,i6));
904  }
905  });
906  }
907  break;
908  default:
909  INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unsupported data rank");
910  }
911  }
912 
914  template<class ...IntArgs>
915  KOKKOS_INLINE_FUNCTION
916  reference_type getWritableEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs... intArgs) const
917  {
918  #ifdef INTREPID2_HAVE_DEBUG
919  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(numArgs != rank_, std::invalid_argument, "getWritableEntry() should have the same number of arguments as the logical rank.");
920  #endif
921  constexpr int numArgs = sizeof...(intArgs);
922  if (underlyingMatchesLogical_)
923  {
924  // in this case, we require that numArgs == dataRank_
925  return getUnderlyingView<numArgs>()(intArgs...);
926  }
927 
928  // extract the type of the first argument; use that for the arrays below
929  using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
930 
931  const Kokkos::Array<int_type, numArgs+1> args {intArgs...,0}; // we pad with one extra entry (0) to avoid gcc compiler warnings about references beyond the bounds of the array (the [d+1]'s below)
932  Kokkos::Array<int_type, 7> refEntry;
933  for (int d=0; d<numArgs; d++)
934  {
935  switch (variationType_[d])
936  {
937  case CONSTANT: refEntry[d] = 0; break;
938  case GENERAL: refEntry[d] = args[d]; break;
939  case MODULAR: refEntry[d] = args[d] % variationModulus_[d]; break;
940  case BLOCK_PLUS_DIAGONAL:
941  {
942  if (passThroughBlockDiagonalArgs)
943  {
944  refEntry[d] = args[d];
945  refEntry[d+1] = args[d+1]; // this had better be == 0
946  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(args[d+1] != 0, std::invalid_argument, "getWritableEntry() called with passThroughBlockDiagonalArgs = true, but nonzero second matrix argument.");
947  }
948  else
949  {
950  const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
951 
952  const int_type &i = args[d];
953  if (d+1 >= numArgs)
954  {
955  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "BLOCK_PLUS_DIAGONAL must be present for two dimensions; here, encountered only one");
956  }
957  else
958  {
959  const int_type &j = args[d+1];
960 
961  if ((i > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)) || (j > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)))
962  {
963  if (i != j)
964  {
965  // off diagonal: zero
966  return zeroView_(0); // NOTE: this branches in an argument-dependent way; this is not great for CUDA performance. When using BLOCK_PLUS_DIAGONAL, should generally avoid calls to this getEntry() method. (Use methods that directly take advantage of the data packing instead.)
967  }
968  else
969  {
970  refEntry[d] = blockPlusDiagonalDiagonalEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i);
971  }
972  }
973  else
974  {
975  refEntry[d] = blockPlusDiagonalBlockEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i, j);
976  }
977 
978  // skip next d (this is required also to be BLOCK_PLUS_DIAGONAL, and we've consumed its arg as j above)
979  refEntry[d+1] = 0;
980  }
981  }
982  d++;
983  }
984  }
985  }
986  // refEntry should be zero-filled beyond numArgs, for cases when rank_ < dataRank_ (this only is allowed if the extra dimensions each has extent 1).
987  for (int d=numArgs; d<7; d++)
988  {
989  refEntry[d] = 0;
990  }
991 
992  if (dataRank_ == 1)
993  {
994  return data1_(refEntry[activeDims_[0]]);
995  }
996  else if (dataRank_ == 2)
997  {
998  return data2_(refEntry[activeDims_[0]],refEntry[activeDims_[1]]);
999  }
1000  else if (dataRank_ == 3)
1001  {
1002  return data3_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]]);
1003  }
1004  else if (dataRank_ == 4)
1005  {
1006  return data4_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]]);
1007  }
1008  else if (dataRank_ == 5)
1009  {
1010  return data5_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1011  refEntry[activeDims_[4]]);
1012  }
1013  else if (dataRank_ == 6)
1014  {
1015  return data6_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1016  refEntry[activeDims_[4]],refEntry[activeDims_[5]]);
1017  }
1018  else // dataRank_ == 7
1019  {
1020  return data7_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1021  refEntry[activeDims_[4]],refEntry[activeDims_[5]],refEntry[activeDims_[6]]);
1022  }
1023 
1024  }
1025 
1027  template<class ...IntArgs>
1028  KOKKOS_INLINE_FUNCTION
1029  reference_type getWritableEntry(const IntArgs... intArgs) const
1030  {
1031  return getWritableEntryWithPassThroughOption(false, intArgs...);
1032  }
1033  public:
1035  template<class ToContainer, class FromContainer>
1036  static void copyContainer(ToContainer to, FromContainer from)
1037  {
1038 // std::cout << "Entered copyContainer().\n";
1039  auto policy = Kokkos::MDRangePolicy<execution_space,Kokkos::Rank<6>>({0,0,0,0,0,0},{from.extent_int(0),from.extent_int(1),from.extent_int(2), from.extent_int(3), from.extent_int(4), from.extent_int(5)});
1040 
1041  Kokkos::parallel_for("copyContainer", policy,
1042  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
1043  for (int i6=0; i6<from.extent_int(6); i6++)
1044  {
1045  to.access(i0,i1,i2,i3,i4,i5,i6) = from.access(i0,i1,i2,i3,i4,i5,i6);
1046  }
1047  });
1048  }
1049 
1051  void allocateAndCopyFromDynRankView(ScalarView<DataScalar,DeviceType> data)
1052  {
1053 // std::cout << "Entered allocateAndCopyFromDynRankView().\n";
1054  switch (dataRank_)
1055  {
1056  case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", data.extent_int(0)); break;
1057  case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1)); break;
1058  case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2)); break;
1059  case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3)); break;
1060  case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4)); break;
1061  case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5)); break;
1062  case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5), data.extent_int(6)); break;
1063  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1064  }
1065 
1066  switch (dataRank_)
1067  {
1068  case 1: copyContainer(data1_,data); break;
1069  case 2: copyContainer(data2_,data); break;
1070  case 3: copyContainer(data3_,data); break;
1071  case 4: copyContainer(data4_,data); break;
1072  case 5: copyContainer(data5_,data); break;
1073  case 6: copyContainer(data6_,data); break;
1074  case 7: copyContainer(data7_,data); break;
1075  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1076  }
1077  }
1078 
1080  Data(std::vector<DimensionInfo> dimInfoVector)
1081  :
1082  // initialize member variables as if default constructor; if dimInfoVector is empty, we want default constructor behavior.
1083  dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(dimInfoVector.size())
1084  {
1085  // If dimInfoVector is empty, the member initialization above is correct; otherwise, we set as below.
1086  // Either way, once members are initialized, we must call setActiveDims().
1087  if (dimInfoVector.size() != 0)
1088  {
1089  std::vector<int> dataExtents;
1090 
1091  bool blockPlusDiagonalEncountered = false;
1092  for (int d=0; d<rank_; d++)
1093  {
1094  const DimensionInfo & dimInfo = dimInfoVector[d];
1095  extents_[d] = dimInfo.logicalExtent;
1096  variationType_[d] = dimInfo.variationType;
1097  const bool isBlockPlusDiagonal = (variationType_[d] == BLOCK_PLUS_DIAGONAL);
1098  const bool isSecondBlockPlusDiagonal = isBlockPlusDiagonal && blockPlusDiagonalEncountered;
1099  if (isBlockPlusDiagonal)
1100  {
1101  blockPlusDiagonalEncountered = true;
1102  blockPlusDiagonalLastNonDiagonal_ = dimInfo.blockPlusDiagonalLastNonDiagonal;
1103  }
1104  if ((variationType_[d] != CONSTANT) && (!isSecondBlockPlusDiagonal))
1105  {
1106  dataExtents.push_back(dimInfo.dataExtent);
1107  }
1108  }
1109  if (dataExtents.size() == 0)
1110  {
1111  // constant data
1112  dataExtents.push_back(1);
1113  }
1114  dataRank_ = dataExtents.size();
1115  switch (dataRank_)
1116  {
1117  case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", dataExtents[0]); break;
1118  case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1]); break;
1119  case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2]); break;
1120  case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3]); break;
1121  case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4]); break;
1122  case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5]); break;
1123  case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5], dataExtents[6]); break;
1124  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1125  }
1126  }
1127  setActiveDims();
1128  }
1129 
1131  Data(const ScalarView<DataScalar,DeviceType> &data, int rank, Kokkos::Array<int,7> extents, Kokkos::Array<DataVariationType,7> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1132  :
1133  dataRank_(data.rank()), extents_(extents), variationType_(variationType), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1134  {
1136  setActiveDims();
1137  }
1138 
1140  template<typename OtherDeviceType, class = typename std::enable_if< std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type,
1141  class = typename std::enable_if<!std::is_same<DeviceType,OtherDeviceType>::value>::type>
1142  Data(const Data<DataScalar,OtherDeviceType> &data)
1143  :
1144  dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1145  {
1146 // std::cout << "Entered copy-like Data constructor.\n";
1147  if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1148  {
1149  const auto view = data.getUnderlyingView();
1150  switch (dataRank_)
1151  {
1152  case 1: data1_ = data.getUnderlyingView1(); break;
1153  case 2: data2_ = data.getUnderlyingView2(); break;
1154  case 3: data3_ = data.getUnderlyingView3(); break;
1155  case 4: data4_ = data.getUnderlyingView4(); break;
1156  case 5: data5_ = data.getUnderlyingView5(); break;
1157  case 6: data6_ = data.getUnderlyingView6(); break;
1158  case 7: data7_ = data.getUnderlyingView7(); break;
1159  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1160  }
1161  }
1162  setActiveDims();
1163  }
1164 
1166  template<typename OtherDeviceType, class = typename std::enable_if<!std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type>
1168  :
1169  dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1170  {
1171 // std::cout << "Entered copy-like Data constructor.\n";
1172  if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1173  {
1174  const auto view = data.getUnderlyingView();
1175  switch (dataRank_)
1176  {
1177  case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", view.extent_int(0)); break;
1178  case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1)); break;
1179  case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1180  case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1181  case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1182  case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1183  case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1184  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1185  }
1186 
1187  // copy
1188  // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1189  // first, mirror and copy dataView; then copy to the appropriate data_ member
1190  using MemorySpace = typename DeviceType::memory_space;
1191  switch (dataRank_)
1192  {
1193  case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1194  case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1195  case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1196  case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1197  case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1198  case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1199  case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1200  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1201  }
1202  }
1203  setActiveDims();
1204  }
1205 
1207 // Data(const Data<DataScalar,ExecSpaceType> &data)
1208 // :
1209 // dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1210 // {
1211 // std::cout << "Entered Data copy constructor.\n";
1212 // if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1213 // {
1214 // const auto view = data.getUnderlyingView();
1215 // switch (dataRank_)
1216 // {
1217 // case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0)); break;
1218 // case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1)); break;
1219 // case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1220 // case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1221 // case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1222 // case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1223 // case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1224 // default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1225 // }
1226 //
1227 // // copy
1228 // // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1229 // // first, mirror and copy dataView; then copy to the appropriate data_ member
1230 // using MemorySpace = typename DeviceType::memory_space;
1231 // switch (dataRank_)
1232 // {
1233 // case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1234 // case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1235 // case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1236 // case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1237 // case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1238 // case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1239 // case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1240 // default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1241 // }
1242 // }
1243 //
1244 // setActiveDims();
1245 // }
1246 
1248  Data(ScalarView<DataScalar,DeviceType> data)
1249  :
1250  Data(data,
1251  data.rank(),
1252  Kokkos::Array<int,7> {data.extent_int(0),data.extent_int(1),data.extent_int(2),data.extent_int(3),data.extent_int(4),data.extent_int(5),data.extent_int(6)},
1253  Kokkos::Array<DataVariationType,7> {GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL}, -1)
1254  {}
1255 
1257  template<size_t rank, class ...DynRankViewProperties>
1258  Data(const Kokkos::DynRankView<DataScalar,DeviceType, DynRankViewProperties...> &data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1259  :
1260  dataRank_(data.rank()), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1261  {
1262 // std::cout << "Entered a DynRankView Data() constructor.\n";
1264  for (unsigned d=0; d<rank; d++)
1265  {
1266  extents_[d] = extents[d];
1267  variationType_[d] = variationType[d];
1268  }
1269  setActiveDims();
1270  }
1271 
1272  template<size_t rank, class ...ViewProperties>
1273  Data(Kokkos::View<DataScalar*,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1274  :
1275  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1276  {
1277  data1_ = data;
1278  for (unsigned d=0; d<rank; d++)
1279  {
1280  extents_[d] = extents[d];
1281  variationType_[d] = variationType[d];
1282  }
1283  setActiveDims();
1284  }
1285 
1286  template<size_t rank, class ...ViewProperties>
1287  Data(Kokkos::View<DataScalar**,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1288  :
1289  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1290  {
1291  data2_ = data;
1292  for (unsigned d=0; d<rank; d++)
1293  {
1294  extents_[d] = extents[d];
1295  variationType_[d] = variationType[d];
1296  }
1297  setActiveDims();
1298  }
1299 
1300  template<size_t rank, class ...ViewProperties>
1301  Data(Kokkos::View<DataScalar***,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1302  :
1303  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1304  {
1305  data3_ = data;
1306  for (unsigned d=0; d<rank; d++)
1307  {
1308  extents_[d] = extents[d];
1309  variationType_[d] = variationType[d];
1310  }
1311  setActiveDims();
1312  }
1313 
1314  template<size_t rank, class ...ViewProperties>
1315  Data(Kokkos::View<DataScalar****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1316  :
1317  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1318  {
1319  data4_ = data;
1320  for (unsigned d=0; d<rank; d++)
1321  {
1322  extents_[d] = extents[d];
1323  variationType_[d] = variationType[d];
1324  }
1325  setActiveDims();
1326  }
1327 
1328  template<size_t rank, class ...ViewProperties>
1329  Data(Kokkos::View<DataScalar*****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1330  :
1331  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1332  {
1333  data5_ = data;
1334  for (unsigned d=0; d<rank; d++)
1335  {
1336  extents_[d] = extents[d];
1337  variationType_[d] = variationType[d];
1338  }
1339  setActiveDims();
1340  }
1341 
1342  template<size_t rank, class ...ViewProperties>
1343  Data(Kokkos::View<DataScalar******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1344  :
1345  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1346  {
1347  data6_ = data;
1348  for (unsigned d=0; d<rank; d++)
1349  {
1350  extents_[d] = extents[d];
1351  variationType_[d] = variationType[d];
1352  }
1353  setActiveDims();
1354  }
1355 
1356  template<size_t rank, class ...ViewProperties>
1357  Data(Kokkos::View<DataScalar*******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1358  :
1359  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1360  {
1361  data7_ = data;
1362  for (unsigned d=0; d<rank; d++)
1363  {
1364  extents_[d] = extents[d];
1365  variationType_[d] = variationType[d];
1366  }
1367  setActiveDims();
1368  }
1369 
1371  template<size_t rank>
1372  Data(DataScalar constantValue, Kokkos::Array<int,rank> extents)
1373  :
1374  dataRank_(1), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(rank)
1375  {
1376  data1_ = Kokkos::View<DataScalar*,DeviceType>("Constant Data",1);
1377  Kokkos::deep_copy(data1_, constantValue);
1378  for (unsigned d=0; d<rank; d++)
1379  {
1380  extents_[d] = extents[d];
1381  }
1382  setActiveDims();
1383  }
1384 
1387  :
1388  dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(0)
1389  {
1390  setActiveDims();
1391  }
1392 
1394  KOKKOS_INLINE_FUNCTION
1396  {
1397  return blockPlusDiagonalLastNonDiagonal_;
1398  }
1399 
1401  KOKKOS_INLINE_FUNCTION
1402  Kokkos::Array<int,7> getExtents() const
1403  {
1404  return extents_;
1405  }
1406 
1408  KOKKOS_INLINE_FUNCTION
1409  DimensionInfo getDimensionInfo(const int &dim) const
1410  {
1411  DimensionInfo dimInfo;
1412 
1413  dimInfo.logicalExtent = extent_int(dim);
1414  dimInfo.variationType = variationType_[dim];
1415  dimInfo.dataExtent = getDataExtent(dim);
1416  dimInfo.variationModulus = variationModulus_[dim];
1417 
1418  if (dimInfo.variationType == BLOCK_PLUS_DIAGONAL)
1419  {
1420  dimInfo.blockPlusDiagonalLastNonDiagonal = blockPlusDiagonalLastNonDiagonal_;
1421  }
1422  return dimInfo;
1423  }
1424 
1426  KOKKOS_INLINE_FUNCTION
1427  DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
1428  {
1429  const DimensionInfo myDimInfo = getDimensionInfo(dim);
1430  const DimensionInfo otherDimInfo = otherData.getDimensionInfo(dim);
1431 
1432  return combinedDimensionInfo(myDimInfo, otherDimInfo);
1433  }
1434 
1436  template<int rank>
1437  KOKKOS_INLINE_FUNCTION
1438  enable_if_t<rank==1, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1440  {
1441  #ifdef HAVE_INTREPID2_DEBUG
1442  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1443  #endif
1444  return data1_;
1445  }
1446 
1448  template<int rank>
1449  KOKKOS_INLINE_FUNCTION
1450  enable_if_t<rank==2, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1452  {
1453  #ifdef HAVE_INTREPID2_DEBUG
1454  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1455  #endif
1456  return data2_;
1457  }
1458 
1460  template<int rank>
1461  KOKKOS_INLINE_FUNCTION
1462  enable_if_t<rank==3, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1464  {
1465  #ifdef HAVE_INTREPID2_DEBUG
1466  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1467  #endif
1468  return data3_;
1469  }
1470 
1472  template<int rank>
1473  KOKKOS_INLINE_FUNCTION
1474  enable_if_t<rank==4, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1476  {
1477  #ifdef HAVE_INTREPID2_DEBUG
1478  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1479  #endif
1480  return data4_;
1481  }
1482 
1484  template<int rank>
1485  KOKKOS_INLINE_FUNCTION
1486  enable_if_t<rank==5, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1488  {
1489  #ifdef HAVE_INTREPID2_DEBUG
1490  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1491  #endif
1492  return data5_;
1493  }
1494 
1496  template<int rank>
1497  KOKKOS_INLINE_FUNCTION
1498  enable_if_t<rank==6, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1500  {
1501  #ifdef HAVE_INTREPID2_DEBUG
1502  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1503  #endif
1504  return data6_;
1505  }
1506 
1508  template<int rank>
1509  KOKKOS_INLINE_FUNCTION
1510  enable_if_t<rank==7, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1512  {
1513  #ifdef HAVE_INTREPID2_DEBUG
1514  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1515  #endif
1516  return data7_;
1517  }
1518 
1520  KOKKOS_INLINE_FUNCTION
1521  const Kokkos::View<DataScalar*, DeviceType> & getUnderlyingView1() const
1522  {
1523  return getUnderlyingView<1>();
1524  }
1525 
1527  KOKKOS_INLINE_FUNCTION
1528  const Kokkos::View<DataScalar**, DeviceType> & getUnderlyingView2() const
1529  {
1530  return getUnderlyingView<2>();
1531  }
1532 
1534  KOKKOS_INLINE_FUNCTION
1535  const Kokkos::View<DataScalar***, DeviceType> & getUnderlyingView3() const
1536  {
1537  return getUnderlyingView<3>();
1538  }
1539 
1541  KOKKOS_INLINE_FUNCTION
1542  const Kokkos::View<DataScalar****, DeviceType> & getUnderlyingView4() const
1543  {
1544  return getUnderlyingView<4>();
1545  }
1546 
1548  KOKKOS_INLINE_FUNCTION
1549  const Kokkos::View<DataScalar*****, DeviceType> & getUnderlyingView5() const
1550  {
1551  return getUnderlyingView<5>();
1552  }
1553 
1555  KOKKOS_INLINE_FUNCTION
1556  const Kokkos::View<DataScalar******, DeviceType> & getUnderlyingView6() const
1557  {
1558  return getUnderlyingView<6>();
1559  }
1560 
1562  KOKKOS_INLINE_FUNCTION
1563  const Kokkos::View<DataScalar*******, DeviceType> & getUnderlyingView7() const
1564  {
1565  return getUnderlyingView<7>();
1566  }
1567 
1569  KOKKOS_INLINE_FUNCTION
1570  void setUnderlyingView1(Kokkos::View<DataScalar*, DeviceType> & view) const
1571  {
1572  data1_ = view;
1573  }
1574 
1576  KOKKOS_INLINE_FUNCTION
1577  void setUnderlyingView2(Kokkos::View<DataScalar**, DeviceType> & view) const
1578  {
1579  data2_ = view;
1580  }
1581 
1583  KOKKOS_INLINE_FUNCTION
1584  void setUnderlyingView3(Kokkos::View<DataScalar***, DeviceType> & view) const
1585  {
1586  data3_ = view;
1587  }
1588 
1590  KOKKOS_INLINE_FUNCTION
1591  void setUnderlyingView4(Kokkos::View<DataScalar****, DeviceType> & view) const
1592  {
1593  data4_ = view;
1594  }
1595 
1597  KOKKOS_INLINE_FUNCTION
1598  void setUnderlyingView5(Kokkos::View<DataScalar*****, DeviceType> & view) const
1599  {
1600  data5_ = view;
1601  }
1602 
1604  KOKKOS_INLINE_FUNCTION
1605  void setUnderlyingView6(Kokkos::View<DataScalar******, DeviceType> & view) const
1606  {
1607  data6_ = view;
1608  }
1609 
1611  KOKKOS_INLINE_FUNCTION
1612  void setUnderlyingView7(Kokkos::View<DataScalar*******, DeviceType> & view) const
1613  {
1614  data7_ = view;
1615  }
1616 
1618  ScalarView<DataScalar,DeviceType> getUnderlyingView() const
1619  {
1620  switch (dataRank_)
1621  {
1622  case 1: return data1_;
1623  case 2: return data2_;
1624  case 3: return data3_;
1625  case 4: return data4_;
1626  case 5: return data5_;
1627  case 6: return data6_;
1628  case 7: return data7_;
1629  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1630  }
1631  }
1632 
1634  KOKKOS_INLINE_FUNCTION
1635  ordinal_type getUnderlyingViewRank() const
1636  {
1637  return dataRank_;
1638  }
1639 
1641  KOKKOS_INLINE_FUNCTION
1642  ordinal_type getUnderlyingViewSize() const
1643  {
1644  ordinal_type size = 1;
1645  for (ordinal_type r=0; r<dataRank_; r++)
1646  {
1647  size *= getUnderlyingViewExtent(r);
1648  }
1649  return size;
1650  }
1651 
1653  ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying() const
1654  {
1655  switch (dataRank_)
1656  {
1657  case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", data1_.extent_int(0));
1658  case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", data2_.extent_int(0), data2_.extent_int(1));
1659  case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", data3_.extent_int(0), data3_.extent_int(1), data3_.extent_int(2));
1660  case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", data4_.extent_int(0), data4_.extent_int(1), data4_.extent_int(2), data4_.extent_int(3));
1661  case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", data5_.extent_int(0), data5_.extent_int(1), data5_.extent_int(2), data5_.extent_int(3), data5_.extent_int(4));
1662  case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", data6_.extent_int(0), data6_.extent_int(1), data6_.extent_int(2), data6_.extent_int(3), data6_.extent_int(4), data6_.extent_int(5));
1663  case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", data7_.extent_int(0), data7_.extent_int(1), data7_.extent_int(2), data7_.extent_int(3), data7_.extent_int(4), data7_.extent_int(5), data7_.extent_int(6));
1664  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1665  }
1666  }
1667 
1669  template<class ... DimArgs>
1670  ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
1671  {
1672  switch (dataRank_)
1673  {
1674  case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", dims...);
1675  case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", dims...);
1676  case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", dims...);
1677  case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", dims...);
1678  case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", dims...);
1679  case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", dims...);
1680  case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", dims...);
1681  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1682  }
1683  }
1684 
1686  void clear() const
1687  {
1688 #ifdef KOKKOS_COMPILER_INTEL
1689 // Workaround intel internal compiler errors
1690  DataScalar zero = DataScalar(0);
1691  switch (dataRank_)
1692  {
1693  case 1: {Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, data1_.extent_int(0)), KOKKOS_LAMBDA(int i) {data1_(i) = zero;}); break; }
1694  case 2: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<2>, execution_space>({0,0},{data2_.extent_int(0),data2_.extent_int(1)}), KOKKOS_LAMBDA(int i0, int i1) {data2_(i0, i1) = zero;}); break; }
1695  case 3: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>, execution_space>({0,0,0},{data3_.extent_int(0),data3_.extent_int(1),data3_.extent_int(2)}), KOKKOS_LAMBDA(int i0, int i1, int i2) {data3_(i0, i1, i2) = zero;}); break; }
1696  case 4: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<4>, execution_space>({0,0,0,0},{data4_.extent_int(0),data4_.extent_int(1),data4_.extent_int(2),data4_.extent_int(3)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {data4_(i0, i1, i2, i3) = zero;}); break; }
1697  case 5: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<5>, execution_space>({0,0,0,0,0},{data5_.extent_int(0),data5_.extent_int(1),data5_.extent_int(2),data5_.extent_int(3),data5_.extent_int(4)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {data5_(i0, i1, i2, i3, i4) = zero;}); break; }
1698  case 6: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data6_.extent_int(0),data6_.extent_int(1),data6_.extent_int(2),data6_.extent_int(3),data6_.extent_int(4),data6_.extent_int(5)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {data6_(i0, i1, i2, i3, i4, i5) = zero;}); break; }
1699  case 7: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data7_.extent_int(0),data7_.extent_int(1),data7_.extent_int(2),data7_.extent_int(3),data7_.extent_int(4),data7_.extent_int(5)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5 ) {for (int i6 = 0; i6 < data7_.extent_int(6); ++i6) data7_(i0, i1, i2, i3, i4, i5, i6) = zero;}); break; }
1700  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1701  }
1702 #else
1703  switch (dataRank_)
1704  {
1705  case 1: Kokkos::deep_copy(data1_, 0.0); break;
1706  case 2: Kokkos::deep_copy(data2_, 0.0); break;
1707  case 3: Kokkos::deep_copy(data3_, 0.0); break;
1708  case 4: Kokkos::deep_copy(data4_, 0.0); break;
1709  case 5: Kokkos::deep_copy(data5_, 0.0); break;
1710  case 6: Kokkos::deep_copy(data6_, 0.0); break;
1711  case 7: Kokkos::deep_copy(data7_, 0.0); break;
1712  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1713  }
1714 #endif
1715  }
1716 
1718  void copyDataFromDynRankViewMatchingUnderlying(const ScalarView<DataScalar,DeviceType> &dynRankView) const
1719  {
1720 // std::cout << "Entered copyDataFromDynRankViewMatchingUnderlying().\n";
1721  switch (dataRank_)
1722  {
1723  case 1: copyContainer(data1_,dynRankView); break;
1724  case 2: copyContainer(data2_,dynRankView); break;
1725  case 3: copyContainer(data3_,dynRankView); break;
1726  case 4: copyContainer(data4_,dynRankView); break;
1727  case 5: copyContainer(data5_,dynRankView); break;
1728  case 6: copyContainer(data6_,dynRankView); break;
1729  case 7: copyContainer(data7_,dynRankView); break;
1730  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1731  }
1732  }
1733 
1735  KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
1736  {
1737  for (int i=0; i<numActiveDims_; i++)
1738  {
1739  if (activeDims_[i] == d)
1740  {
1741  return getUnderlyingViewExtent(i);
1742  }
1743  else if (activeDims_[i] > d)
1744  {
1745  return 1; // data does not vary in the specified dimension
1746  }
1747  }
1748  return 1; // data does not vary in the specified dimension
1749  }
1750 
1762  KOKKOS_INLINE_FUNCTION
1763  int getVariationModulus(const int &d) const
1764  {
1765  return variationModulus_[d];
1766  }
1767 
1769  KOKKOS_INLINE_FUNCTION
1770  const Kokkos::Array<DataVariationType,7> & getVariationTypes() const
1771  {
1772  return variationType_;
1773  }
1774 
1776  template<class ...IntArgs>
1777  KOKKOS_INLINE_FUNCTION
1778  return_type getEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs&... intArgs) const
1779  {
1780  return getWritableEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
1781  }
1782 
1784  template<class ...IntArgs>
1785  KOKKOS_INLINE_FUNCTION
1786  return_type getEntry(const IntArgs&... intArgs) const
1787  {
1788  return getEntryWithPassThroughOption(false, intArgs...);
1789  }
1790 
1791  template <bool...> struct bool_pack;
1792 
1793  template <bool... v>
1794  using all_true = std::is_same<bool_pack<true, v...>, bool_pack<v..., true>>;
1795 
1796  template <class ...IntArgs>
1797  using valid_args = all_true<std::is_integral<IntArgs>{}...>;
1798 
1799  static_assert(valid_args<int,long,unsigned>::value, "valid args works");
1800 
1802  template <class ...IntArgs>
1803  KOKKOS_INLINE_FUNCTION
1804 #ifndef __INTEL_COMPILER
1805  // icc has a bug that prevents compilation with this enable_if_t
1806  // (possibly the same as https://community.intel.com/t5/Intel-C-Compiler/Intel-Compiler-bug-while-deducing-template-arguments-inside/m-p/1164358)
1807  // so with icc we'll just skip the argument type/count check
1808  enable_if_t<valid_args<IntArgs...>::value && (sizeof...(IntArgs) <= 7),return_type>
1809 #else
1810  return_type
1811 #endif
1812  operator()(const IntArgs&... intArgs) const {
1813  return getEntry(intArgs...);
1814  }
1815 
1817  KOKKOS_INLINE_FUNCTION
1818  int extent_int(const int& r) const
1819  {
1820  return extents_[r];
1821  }
1822 
1823  template <typename iType>
1824  KOKKOS_INLINE_FUNCTION constexpr
1825  typename std::enable_if<std::is_integral<iType>::value, size_t>::type
1826  extent(const iType& r) const {
1827  return extents_[r];
1828  }
1829 
1831  KOKKOS_INLINE_FUNCTION bool isDiagonal() const
1832  {
1833  if (blockPlusDiagonalLastNonDiagonal_ >= 1) return false;
1834  int numBlockPlusDiagonalTypes = 0;
1835  for (unsigned r = 0; r<variationType_.size(); r++)
1836  {
1837  const auto &entryType = variationType_[r];
1838  if (entryType == BLOCK_PLUS_DIAGONAL) numBlockPlusDiagonalTypes++;
1839  }
1840  // 2 BLOCK_PLUS_DIAGONAL entries, combined with blockPlusDiagonalLastNonDiagonal being -1 or 0 indicates diagonal
1841  if (numBlockPlusDiagonalTypes == 2) return true;
1842  else if (numBlockPlusDiagonalTypes == 0) return false; // no BLOCK_PLUS_DIAGONAL --> not a diagonal matrix
1843  else INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "Unexpected number of ranks marked as BLOCK_PLUS_DIAGONAL (should be 0 or 2)");
1844  return false; // statement should be unreachable; included because compilers don't necessarily recognize that fact...
1845  }
1846 
1851  {
1852  return Data<DataScalar,DeviceType>(value, this->getExtents());
1853  }
1854 
1861  {
1862  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.rank() != B.rank(), std::invalid_argument, "A and B must have the same logical shape");
1863  const int rank = A.rank();
1864  std::vector<DimensionInfo> dimInfo(rank);
1865  for (int d=0; d<rank; d++)
1866  {
1867  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.extent_int(d) != B.extent_int(d), std::invalid_argument, "A and B must have the same logical shape");
1868  dimInfo[d] = A.combinedDataDimensionInfo(B, d);
1869  }
1870  Data<DataScalar,DeviceType> result(dimInfo);
1871  return result;
1872  }
1873 
1880  static Data<DataScalar,DeviceType> allocateMatMatResult( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
1881  {
1882  // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
1883  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
1884 
1885  const int D1_DIM = A_MatData.rank() - 2;
1886  const int D2_DIM = A_MatData.rank() - 1;
1887 
1888  const int A_rows = A_MatData.extent_int(D1_DIM);
1889  const int A_cols = A_MatData.extent_int(D2_DIM);
1890  const int B_rows = B_MatData.extent_int(D1_DIM);
1891  const int B_cols = B_MatData.extent_int(D2_DIM);
1892 
1893  const int leftRows = transposeA ? A_cols : A_rows;
1894  const int leftCols = transposeA ? A_rows : A_cols;
1895  const int rightRows = transposeB ? B_cols : B_rows;
1896  const int rightCols = transposeB ? B_rows : B_cols;
1897 
1898  INTREPID2_TEST_FOR_EXCEPTION(leftCols != rightRows, std::invalid_argument, "incompatible matrix dimensions");
1899 
1900  Kokkos::Array<int,7> resultExtents; // logical extents
1901  Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
1902 
1903  resultExtents[D1_DIM] = leftRows;
1904  resultExtents[D2_DIM] = rightCols;
1905  int resultBlockPlusDiagonalLastNonDiagonal = -1;
1906  if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1907  {
1908  // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1909  resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1910  resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1911  resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1912  }
1913 
1914  const int resultRank = A_MatData.rank();
1915 
1916  auto A_VariationTypes = A_MatData.getVariationTypes();
1917  auto B_VariationTypes = B_MatData.getVariationTypes();
1918 
1919  Kokkos::Array<int,7> resultActiveDims;
1920  Kokkos::Array<int,7> resultDataDims;
1921  int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
1922  // the following loop is over the dimensions *prior* to matrix dimensions
1923  for (int i=0; i<resultRank-2; i++)
1924  {
1925  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.extent_int(i) != B_MatData.extent_int(i), std::invalid_argument, "A and B extents must match in each non-matrix dimension");
1926 
1927  resultExtents[i] = A_MatData.extent_int(i);
1928 
1929  const DataVariationType &A_VariationType = A_VariationTypes[i];
1930  const DataVariationType &B_VariationType = B_VariationTypes[i];
1931 
1932  // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
1933  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1934  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(B_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1935 
1936  int dataSize = 0;
1937  DataVariationType resultVariationType;
1938  if ((A_VariationType == GENERAL) || (B_VariationType == GENERAL))
1939  {
1940  resultVariationType = GENERAL;
1941  dataSize = resultExtents[i];
1942  }
1943  else if ((B_VariationType == CONSTANT) && (A_VariationType == CONSTANT))
1944  {
1945  resultVariationType = CONSTANT;
1946  dataSize = 1;
1947  }
1948  else if ((B_VariationType == MODULAR) && (A_VariationType == CONSTANT))
1949  {
1950  resultVariationType = MODULAR;
1951  dataSize = B_MatData.getVariationModulus(i);
1952  }
1953  else if ((B_VariationType == CONSTANT) && (A_VariationType == MODULAR))
1954  {
1955  resultVariationType = MODULAR;
1956  dataSize = A_MatData.getVariationModulus(i);
1957  }
1958  else
1959  {
1960  // both are modular. We allow this if they agree on the modulus
1961  auto A_Modulus = A_MatData.getVariationModulus(i);
1962  auto B_Modulus = B_MatData.getVariationModulus(i);
1963  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_Modulus != B_Modulus, std::invalid_argument, "If both matrices have variation type MODULAR, they must agree on the modulus");
1964  resultVariationType = MODULAR;
1965  dataSize = A_Modulus;
1966  }
1967  resultVariationTypes[i] = resultVariationType;
1968 
1969  if (resultVariationType != CONSTANT)
1970  {
1971  resultActiveDims[resultNumActiveDims] = i;
1972  resultDataDims[resultNumActiveDims] = dataSize;
1973  resultNumActiveDims++;
1974  }
1975  }
1976 
1977  // set things for final dimensions:
1978  resultExtents[D1_DIM] = leftRows;
1979  resultExtents[D2_DIM] = rightCols;
1980 
1981  if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1982  {
1983  // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1984  resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1985  resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1986  resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1987 
1988  resultActiveDims[resultNumActiveDims] = resultRank - 2;
1989 
1990  const int numDiagonalEntries = leftRows - (resultBlockPlusDiagonalLastNonDiagonal + 1);
1991  const int numNondiagonalEntries = (resultBlockPlusDiagonalLastNonDiagonal + 1) * (resultBlockPlusDiagonalLastNonDiagonal + 1);
1992 
1993  resultDataDims[resultNumActiveDims] = numDiagonalEntries + numNondiagonalEntries;
1994  resultNumActiveDims++;
1995  }
1996  else
1997  {
1998  // pretty much the only variation types that make sense for matrix dims are GENERAL and BLOCK_PLUS_DIAGONAL
1999  resultVariationTypes[D1_DIM] = GENERAL;
2000  resultVariationTypes[D2_DIM] = GENERAL;
2001 
2002  resultActiveDims[resultNumActiveDims] = resultRank - 2;
2003  resultActiveDims[resultNumActiveDims+1] = resultRank - 1;
2004 
2005  resultDataDims[resultNumActiveDims] = leftRows;
2006  resultDataDims[resultNumActiveDims+1] = rightCols;
2007  resultNumActiveDims += 2;
2008  }
2009 
2010  for (int i=resultRank; i<7; i++)
2011  {
2012  resultVariationTypes[i] = CONSTANT;
2013  resultExtents[i] = 1;
2014  }
2015 
2016  ScalarView<DataScalar,DeviceType> data;
2017  if (resultNumActiveDims == 1)
2018  {
2019  auto viewToMatch = A_MatData.getUnderlyingView1(); // new view will match this one in layout and fad dimension, if any
2020  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0]);
2021  }
2022  else if (resultNumActiveDims == 2)
2023  {
2024  auto viewToMatch = A_MatData.getUnderlyingView2(); // new view will match this one in layout and fad dimension, if any
2025  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1]);
2026  }
2027  else if (resultNumActiveDims == 3)
2028  {
2029  auto viewToMatch = A_MatData.getUnderlyingView3(); // new view will match this one in layout and fad dimension, if any
2030  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]);
2031  }
2032  else if (resultNumActiveDims == 4)
2033  {
2034  auto viewToMatch = A_MatData.getUnderlyingView4(); // new view will match this one in layout and fad dimension, if any
2035  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2036  resultDataDims[3]);
2037  }
2038  else if (resultNumActiveDims == 5)
2039  {
2040  auto viewToMatch = A_MatData.getUnderlyingView5(); // new view will match this one in layout and fad dimension, if any
2041  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2042  resultDataDims[3], resultDataDims[4]);
2043  }
2044  else if (resultNumActiveDims == 6)
2045  {
2046  auto viewToMatch = A_MatData.getUnderlyingView6(); // new view will match this one in layout and fad dimension, if any
2047  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2048  resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2049  }
2050  else // resultNumActiveDims == 7
2051  {
2052  auto viewToMatch = A_MatData.getUnderlyingView7(); // new view will match this one in layout and fad dimension, if any
2053  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
2054  resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2055  }
2056 
2057  return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes,resultBlockPlusDiagonalLastNonDiagonal);
2058  }
2059 
2063  {
2064  // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2065  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matData.rank() != vecData.rank() + 1, std::invalid_argument, "matData and vecData have incompatible ranks");
2066  const int vecDim = vecData.extent_int(vecData.rank() - 1);
2067  const int matRows = matData.extent_int(matData.rank() - 2);
2068  const int matCols = matData.extent_int(matData.rank() - 1);
2069 
2070  const int resultRank = vecData.rank();
2071 
2072  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matCols != vecDim, std::invalid_argument, "matData column count != vecData dimension");
2073 
2074  Kokkos::Array<int,7> resultExtents; // logical extents
2075  Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
2076  auto vecVariationTypes = vecData.getVariationTypes();
2077  auto matVariationTypes = matData.getVariationTypes();
2078 
2079  Kokkos::Array<int,7> resultActiveDims;
2080  Kokkos::Array<int,7> resultDataDims;
2081  int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
2082  // the following loop is over the dimensions *prior* to matrix/vector dimensions
2083  for (int i=0; i<resultRank-1; i++)
2084  {
2085  resultExtents[i] = vecData.extent_int(i);
2086  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecData.extent_int(i) != matData.extent_int(i), std::invalid_argument, "matData and vecData extents must match in each non-matrix/vector dimension");
2087 
2088  const DataVariationType &vecVariationType = vecVariationTypes[i];
2089  const DataVariationType &matVariationType = matVariationTypes[i];
2090 
2091  // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
2092  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2093  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2094 
2095  int dataSize = 0;
2096  DataVariationType resultVariationType;
2097  if ((vecVariationType == GENERAL) || (matVariationType == GENERAL))
2098  {
2099  resultVariationType = GENERAL;
2100  dataSize = resultExtents[i];
2101  }
2102  else if ((matVariationType == CONSTANT) && (vecVariationType == CONSTANT))
2103  {
2104  resultVariationType = CONSTANT;
2105  dataSize = 1;
2106  }
2107  else if ((matVariationType == MODULAR) && (vecVariationType == CONSTANT))
2108  {
2109  resultVariationType = MODULAR;
2110  dataSize = matData.getVariationModulus(i);
2111  }
2112  else if ((matVariationType == CONSTANT) && (vecVariationType == MODULAR))
2113  {
2114  resultVariationType = MODULAR;
2115  dataSize = matData.getVariationModulus(i);
2116  }
2117  else
2118  {
2119  // both are modular. We allow this if they agree on the modulus
2120  auto matModulus = matData.getVariationModulus(i);
2121  auto vecModulus = vecData.getVariationModulus(i);
2122  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matModulus != vecModulus, std::invalid_argument, "If matrix and vector both have variation type MODULAR, they must agree on the modulus");
2123  resultVariationType = MODULAR;
2124  dataSize = matModulus;
2125  }
2126  resultVariationTypes[i] = resultVariationType;
2127 
2128  if (resultVariationType != CONSTANT)
2129  {
2130  resultActiveDims[resultNumActiveDims] = i;
2131  resultDataDims[resultNumActiveDims] = dataSize;
2132  resultNumActiveDims++;
2133  }
2134  }
2135  // for the final dimension, the variation type is always GENERAL
2136  // (Some combinations, e.g. CONSTANT/CONSTANT *would* generate a CONSTANT result, but constant matrices don't make a lot of sense beyond 1x1 matrices…)
2137  resultVariationTypes[resultNumActiveDims] = GENERAL;
2138  resultActiveDims[resultNumActiveDims] = resultRank - 1;
2139  resultDataDims[resultNumActiveDims] = matRows;
2140  resultExtents[resultRank-1] = matRows;
2141  resultNumActiveDims++;
2142 
2143  for (int i=resultRank; i<7; i++)
2144  {
2145  resultVariationTypes[i] = CONSTANT;
2146  resultExtents[i] = 1;
2147  }
2148 
2149  ScalarView<DataScalar,DeviceType> data;
2150  if (resultNumActiveDims == 1)
2151  {
2152  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0]);
2153  }
2154  else if (resultNumActiveDims == 2)
2155  {
2156  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1]);
2157  }
2158  else if (resultNumActiveDims == 3)
2159  {
2160  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2]);
2161  }
2162  else if (resultNumActiveDims == 4)
2163  {
2164  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2165  resultDataDims[3]);
2166  }
2167  else if (resultNumActiveDims == 5)
2168  {
2169  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2170  resultDataDims[3], resultDataDims[4]);
2171  }
2172  else if (resultNumActiveDims == 6)
2173  {
2174  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2175  resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2176  }
2177  else // resultNumActiveDims == 7
2178  {
2179  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2180  resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2181  }
2182 
2183  return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes);
2184  }
2185 
2187  template<int rank>
2188  enable_if_t<(rank!=1) && (rank!=7), Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<rank>> >
2190  {
2191  using ExecutionSpace = typename DeviceType::execution_space;
2192  Kokkos::Array<int,rank> startingOrdinals;
2193  Kokkos::Array<int,rank> extents;
2194 
2195  for (int d=0; d<rank; d++)
2196  {
2197  startingOrdinals[d] = 0;
2198  extents[d] = getDataExtent(d);
2199  }
2200  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<rank>>(startingOrdinals,extents);
2201  return policy;
2202  }
2203 
2205  template<int rank>
2206  enable_if_t<rank==7, Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<6>> >
2208  {
2209  using ExecutionSpace = typename DeviceType::execution_space;
2210  Kokkos::Array<int,6> startingOrdinals;
2211  Kokkos::Array<int,6> extents;
2212 
2213  for (int d=0; d<6; d++)
2214  {
2215  startingOrdinals[d] = 0;
2216  extents[d] = getDataExtent(d);
2217  }
2218  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<6>>(startingOrdinals,extents);
2219  return policy;
2220  }
2221 
2222  template<int rank>
2223  inline
2224  enable_if_t<rank==1, Kokkos::RangePolicy<typename DeviceType::execution_space> >
2226  {
2227  using ExecutionSpace = typename DeviceType::execution_space;
2228  Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,getDataExtent(0));
2229  return policy;
2230  }
2231 
2233  Data shallowCopy(const int rank, const Kokkos::Array<int,7> &extents, const Kokkos::Array<DataVariationType,7> &variationTypes)
2234  {
2235  switch (dataRank_)
2236  {
2237  case 1: return Data(data1_, extents, variationTypes);
2238  case 2: return Data(data2_, extents, variationTypes);
2239  case 3: return Data(data3_, extents, variationTypes);
2240  case 4: return Data(data4_, extents, variationTypes);
2241  case 5: return Data(data5_, extents, variationTypes);
2242  case 6: return Data(data6_, extents, variationTypes);
2243  case 7: return Data(data7_, extents, variationTypes);
2244  default:
2245  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unhandled dataRank_");
2246  }
2247  }
2248 
2250  template<class BinaryOperator>
2251  void storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
2252  {
2253  using ExecutionSpace = typename DeviceType::execution_space;
2254 
2255 #ifdef INTREPID2_HAVE_DEBUG
2256  // check logical extents
2257  for (int d=0; d<rank_; d++)
2258  {
2259  INTREPID2_TEST_FOR_EXCEPTION(A.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2260  INTREPID2_TEST_FOR_EXCEPTION(B.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2261  }
2262  // TODO: add some checks that data extent of this suffices to accept combined A + B data.
2263 #endif
2264 
2265  const bool this_constant = (this->getUnderlyingViewRank() == 1) && (this->getUnderlyingViewSize() == 1);
2266 
2267  // we special-case for constant output here; since the constant case is essentially all overhead, we want to avoid as much of the overhead of storeInPlaceCombination() as possible…
2268  if (this_constant)
2269  {
2270  // constant data
2271  Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,1); // just 1 entry
2272  auto this_underlying = this->getUnderlyingView<1>();
2273  auto A_underlying = A.getUnderlyingView<1>();
2274  auto B_underlying = B.getUnderlyingView<1>();
2275  Kokkos::parallel_for("compute in-place", policy,
2276  KOKKOS_LAMBDA (const int &i0) {
2277  auto & result = this_underlying(0);
2278  const auto & A_val = A_underlying(0);
2279  const auto & B_val = B_underlying(0);
2280 
2281  result = binaryOperator(A_val,B_val);
2282  });
2283  }
2284  else
2285  {
2286  switch (rank_)
2287  {
2288  case 1: storeInPlaceCombination<BinaryOperator, 1>(A, B, binaryOperator); break;
2289  case 2: storeInPlaceCombination<BinaryOperator, 2>(A, B, binaryOperator); break;
2290  case 3: storeInPlaceCombination<BinaryOperator, 3>(A, B, binaryOperator); break;
2291  case 4: storeInPlaceCombination<BinaryOperator, 4>(A, B, binaryOperator); break;
2292  case 5: storeInPlaceCombination<BinaryOperator, 5>(A, B, binaryOperator); break;
2293  case 6: storeInPlaceCombination<BinaryOperator, 6>(A, B, binaryOperator); break;
2294  case 7: storeInPlaceCombination<BinaryOperator, 7>(A, B, binaryOperator); break;
2295  default:
2296  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "unhandled rank in switch");
2297  }
2298  }
2299  }
2300 
2303  {
2304  auto sum = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2305  {
2306  return a + b;
2307  };
2308  storeInPlaceCombination(A, B, sum);
2309  }
2310 
2313  {
2314  auto product = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2315  {
2316  return a * b;
2317  };
2318  storeInPlaceCombination(A, B, product);
2319  }
2320 
2323  {
2324  auto difference = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2325  {
2326  return a - b;
2327  };
2328  storeInPlaceCombination(A, B, difference);
2329  }
2330 
2333  {
2334  auto quotient = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2335  {
2336  return a / b;
2337  };
2338  storeInPlaceCombination(A, B, quotient);
2339  }
2340 
2343  {
2344  // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2345  // TODO: check for invalidly shaped containers.
2346 
2347  const int matRows = matData.extent_int(matData.rank() - 2);
2348  const int matCols = matData.extent_int(matData.rank() - 1);
2349 
2350  // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2351  Data<DataScalar,DeviceType> thisData = *this;
2352 
2353  using ExecutionSpace = typename DeviceType::execution_space;
2354  // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2355  if (rank_ == 3)
2356  {
2357  // typical case for e.g. gradient data: (C,P,D)
2358  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{getDataExtent(0),getDataExtent(1),matRows});
2359  Kokkos::parallel_for("compute mat-vec", policy,
2360  KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal, const int &i) {
2361  auto & val_i = thisData.getWritableEntry(cellOrdinal, pointOrdinal, i);
2362  val_i = 0;
2363  for (int j=0; j<matCols; j++)
2364  {
2365  val_i += matData(cellOrdinal,pointOrdinal,i,j) * vecData(cellOrdinal,pointOrdinal,j);
2366  }
2367  });
2368  }
2369  else if (rank_ == 2)
2370  {
2371  //
2372  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{getDataExtent(0),matRows});
2373  Kokkos::parallel_for("compute mat-vec", policy,
2374  KOKKOS_LAMBDA (const int &vectorOrdinal, const int &i) {
2375  auto & val_i = thisData.getWritableEntry(vectorOrdinal, i);
2376  val_i = 0;
2377  for (int j=0; j<matCols; j++)
2378  {
2379  val_i += matData(vectorOrdinal,i,j) * vecData(vectorOrdinal,j);
2380  }
2381  });
2382  }
2383  else if (rank_ == 1)
2384  {
2385  // single-vector case
2386  Kokkos::RangePolicy<ExecutionSpace> policy(0,matRows);
2387  Kokkos::parallel_for("compute mat-vec", policy,
2388  KOKKOS_LAMBDA (const int &i) {
2389  auto & val_i = thisData.getWritableEntry(i);
2390  val_i = 0;
2391  for (int j=0; j<matCols; j++)
2392  {
2393  val_i += matData(i,j) * vecData(j);
2394  }
2395  });
2396  }
2397  else
2398  {
2399  // TODO: handle other cases
2400  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2401  }
2402  }
2403 
2410  void storeMatMat( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
2411  {
2412  // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2413  // TODO: check for invalidly shaped containers.
2414 
2415  // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2416  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
2417 
2418  const int D1_DIM = A_MatData.rank() - 2;
2419  const int D2_DIM = A_MatData.rank() - 1;
2420 
2421  const int A_rows = A_MatData.extent_int(D1_DIM);
2422  const int A_cols = A_MatData.extent_int(D2_DIM);
2423  const int B_rows = B_MatData.extent_int(D1_DIM);
2424  const int B_cols = B_MatData.extent_int(D2_DIM);
2425 
2426  const int leftRows = transposeA ? A_cols : A_rows;
2427  const int leftCols = transposeA ? A_rows : A_cols;
2428  const int rightCols = transposeB ? B_rows : B_cols;
2429 
2430 #ifdef INTREPID2_HAVE_DEBUG
2431  const int rightRows = transposeB ? B_cols : B_rows;
2432  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(leftCols != rightRows, std::invalid_argument, "inner dimensions do not match");
2433 #endif
2434 
2435  // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2436  Data<DataScalar,DeviceType> thisData = *this;
2437 
2438  using ExecutionSpace = typename DeviceType::execution_space;
2439 
2440  const int diagonalStart = (variationType_[D1_DIM] == BLOCK_PLUS_DIAGONAL) ? blockPlusDiagonalLastNonDiagonal_ + 1 : leftRows;
2441  // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2442  if (rank_ == 3)
2443  {
2444  // (C,D,D), say
2445  auto policy = Kokkos::RangePolicy<ExecutionSpace>(0,getDataExtent(0));
2446  Kokkos::parallel_for("compute mat-mat", policy,
2447  KOKKOS_LAMBDA (const int &matrixOrdinal) {
2448  for (int i=0; i<diagonalStart; i++)
2449  {
2450  for (int j=0; j<rightCols; j++)
2451  {
2452  auto & val_ij = thisData.getWritableEntry(matrixOrdinal, i, j);
2453  val_ij = 0;
2454  for (int k=0; k<leftCols; k++)
2455  {
2456  const auto & left = transposeA ? A_MatData(matrixOrdinal,k,i) : A_MatData(matrixOrdinal,i,k);
2457  const auto & right = transposeB ? B_MatData(matrixOrdinal,j,k) : B_MatData(matrixOrdinal,k,j);
2458  val_ij += left * right;
2459  }
2460  }
2461  }
2462  for (int i=diagonalStart; i<leftRows; i++)
2463  {
2464  auto & val_ii = thisData.getWritableEntry(matrixOrdinal, i, i);
2465  const auto & left = A_MatData(matrixOrdinal,i,i);
2466  const auto & right = B_MatData(matrixOrdinal,i,i);
2467  val_ii = left * right;
2468  }
2469  });
2470  }
2471  else if (rank_ == 4)
2472  {
2473  // (C,P,D,D), perhaps
2474  auto policy = Kokkos::MDRangePolicy<ExecutionSpace, Kokkos::Rank<2> >({0,0},{getDataExtent(0),getDataExtent(1)});
2475  if (underlyingMatchesLogical_) // receiving data object is completely expanded
2476  {
2477  Kokkos::parallel_for("compute mat-mat", policy,
2478  KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2479  for (int i=0; i<leftCols; i++)
2480  {
2481  for (int j=0; j<rightCols; j++)
2482  {
2483  auto & val_ij = thisData.getUnderlyingView4()(cellOrdinal,pointOrdinal, i, j);
2484  val_ij = 0;
2485  for (int k=0; k<leftCols; k++)
2486  {
2487  const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2488  const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2489  val_ij += left * right;
2490  }
2491  }
2492  }
2493  });
2494  }
2495  else
2496  {
2497  Kokkos::parallel_for("compute mat-mat", policy,
2498  KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2499  for (int i=0; i<diagonalStart; i++)
2500  {
2501  for (int j=0; j<rightCols; j++)
2502  {
2503  auto & val_ij = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, j);
2504  val_ij = 0;
2505  for (int k=0; k<leftCols; k++)
2506  {
2507  const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2508  const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2509  val_ij += left * right;
2510  }
2511  }
2512  }
2513  for (int i=diagonalStart; i<leftRows; i++)
2514  {
2515  auto & val_ii = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, i);
2516  const auto & left = A_MatData(cellOrdinal,pointOrdinal,i,i);
2517  const auto & right = B_MatData(cellOrdinal,pointOrdinal,i,i);
2518  val_ii = left * right;
2519  }
2520  });
2521  }
2522  }
2523  else
2524  {
2525  // TODO: handle other cases
2526  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2527  }
2528  }
2529 
2531  KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
2532  {
2533  return extents_[0] > 0;
2534  }
2535 
2537  KOKKOS_INLINE_FUNCTION
2538  unsigned rank() const
2539  {
2540  return rank_;
2541  }
2542 
2549  void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
2550  {
2551  INTREPID2_TEST_FOR_EXCEPTION(variationType_[d] == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "setExtent is not supported for BLOCK_PLUS_DIAGONAL dimensions");
2552 
2553  if (variationType_[d] == MODULAR)
2554  {
2555  bool dividesEvenly = ((newExtent / variationModulus_[d]) * variationModulus_[d] == newExtent);
2556  INTREPID2_TEST_FOR_EXCEPTION(!dividesEvenly, std::invalid_argument, "when setExtent is called on dimenisions with MODULAR variation, the modulus must divide the new extent evenly");
2557  }
2558 
2559  if ((newExtent != extents_[d]) && (variationType_[d] == GENERAL))
2560  {
2561  // then we need to resize; let's determine the full set of new extents
2562  std::vector<ordinal_type> newExtents(dataRank_,-1);
2563  for (int r=0; r<dataRank_; r++)
2564  {
2565  if (activeDims_[r] == d)
2566  {
2567  // this is the changed dimension
2568  newExtents[r] = newExtent;
2569  }
2570  else
2571  {
2572  // unchanged; copy from existing
2573  newExtents[r] = getUnderlyingViewExtent(r);
2574  }
2575  }
2576 
2577  switch (dataRank_)
2578  {
2579  case 1: Kokkos::resize(data1_,newExtents[0]);
2580  break;
2581  case 2: Kokkos::resize(data2_,newExtents[0],newExtents[1]);
2582  break;
2583  case 3: Kokkos::resize(data3_,newExtents[0],newExtents[1],newExtents[2]);
2584  break;
2585  case 4: Kokkos::resize(data4_,newExtents[0],newExtents[1],newExtents[2],newExtents[3]);
2586  break;
2587  case 5: Kokkos::resize(data5_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4]);
2588  break;
2589  case 6: Kokkos::resize(data6_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5]);
2590  break;
2591  case 7: Kokkos::resize(data7_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5],newExtents[6]);
2592  break;
2593  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::logic_error, "Unexpected dataRank_ value");
2594  }
2595  }
2596 
2597  extents_[d] = newExtent;
2598  }
2599 
2601  KOKKOS_INLINE_FUNCTION
2603  {
2604  return underlyingMatchesLogical_;
2605  }
2606  };
2607 }
2608 
2609 #endif /* Intrepid2_Data_h */
enable_if_t< rank==7, Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< 6 > > > dataExtentRangePolicy()
returns an MDRangePolicy over the first six underlying data extents (but with the logical shape)...
KOKKOS_INLINE_FUNCTION void setUnderlyingView4(Kokkos::View< DataScalar ****, DeviceType > &view) const
sets the View that stores the unique data. For rank-4 underlying containers.
Header file with various static argument-extractor classes. These are useful for writing efficient...
arbitrary variation
KOKKOS_INLINE_FUNCTION bool isDiagonal() const
returns true for containers that have two dimensions marked as BLOCK_PLUS_DIAGONAL for which the non-...
void applyOperator(UnaryOperator unaryOperator)
applies the specified unary operator to each entry
KOKKOS_INLINE_FUNCTION DimensionInfo getDimensionInfo(const int &dim) const
Returns an object fully specifying the indicated dimension. This is used in determining appropriate s...
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
Returns flattened index of the specified (i,i) matrix entry, assuming that i > lastNondiagonal. Only applicable for BLOCK_PLUS_DIAGONAL DataVariationType.
ScalarView< DataScalar, DeviceType > getUnderlyingView() const
Returns a DynRankView constructed atop the same underlying data as the fixed-rank Kokkos::View used i...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==4, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 4...
KOKKOS_INLINE_FUNCTION void setUnderlyingView6(Kokkos::View< DataScalar ******, DeviceType > &view) const
sets the View that stores the unique data. For rank-6 underlying containers.
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
//! Returns flattened index of the specified (i,j) matrix entry, assuming that i,j ≤ lastNondiagonal...
static Data< DataScalar, DeviceType > allocateInPlaceCombinationResult(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
KOKKOS_INLINE_FUNCTION reference_type getWritableEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs... intArgs) const
Returns an l-value reference to the specified logical entry in the underlying view. Note that for variation types other than GENERAL, multiple valid argument sets will refer to the same memory location. Intended for Intrepid2 developers and expert users only. If passThroughBlockDiagonalArgs is TRUE, the corresponding arguments are interpreted as entries in the 1D packed matrix rather than as logical 2D matrix row and column.
Argument extractor class which passes all arguments to the provided container.
KOKKOS_INLINE_FUNCTION Kokkos::Array< int, 7 > getExtents() const
Returns an array containing the logical extents in each dimension.
static void copyContainer(ToContainer to, FromContainer from)
Generic data copying method to allow construction of Data object from DynRankViews for which deep_cop...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==3, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 3...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ******, DeviceType > & getUnderlyingView6() const
returns the View that stores the unique data. For rank-6 underlying containers.
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
KOKKOS_INLINE_FUNCTION void setUnderlyingView7(Kokkos::View< DataScalar *******, DeviceType > &view) const
sets the View that stores the unique data. For rank-7 underlying containers.
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
enable_if_t< rank !=7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank < 7.
KOKKOS_INLINE_FUNCTION void setUnderlyingView2(Kokkos::View< DataScalar **, DeviceType > &view) const
sets the View that stores the unique data. For rank-2 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView3(Kokkos::View< DataScalar ***, DeviceType > &view) const
sets the View that stores the unique data. For rank-3 underlying containers.
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
Returns (DataVariationType, data extent) in the specified dimension for a Data container that combine...
void storeMatVec(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
Places the result of a matrix-vector multiply corresponding to the two provided containers into this ...
Argument extractor class which ignores the input arguments in favor of passing a single 0 argument to...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==7, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 7...
KOKKOS_INLINE_FUNCTION const int & blockPlusDiagonalLastNonDiagonal() const
For a Data object containing data with variation type BLOCK_PLUS_DIAGONAL, returns the row and column...
void clear() const
Copies 0.0 to the underlying View.
KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
returns true for containers that have data; false for those that don&#39;t (namely, those that have been ...
one of two dimensions in a matrix; bottom-right part of matrix is diagonal
KOKKOS_INLINE_FUNCTION enable_if_t< rank==5, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 5...
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
varies according to modulus of the index
void storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
Places the result of an in-place combination (e.g., entrywise sum) into this data container...
Header function for Intrepid2::Util class and other utility functions.
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *, DeviceType > & getUnderlyingView1() const
returns the View that stores the unique data. For rank-1 underlying containers.
enable_if_t< rank==7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank of 7. (Not optimized; expec...
Data(std::vector< DimensionInfo > dimInfoVector)
Constructor in terms of DimensionInfo for each logical dimension; does not require a View to be speci...
Struct expressing all variation information about a Data object in a single dimension, including its logical extent and storage extent.
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
void storeInPlaceDifference(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) difference, A .- B, into this container.
KOKKOS_INLINE_FUNCTION enable_if_t< valid_args< IntArgs... >::value &&(sizeof...(IntArgs)<=7), return_type > operator()(const IntArgs &... intArgs) const
Returns a value corresponding to the specified logical data location.
Data(DataScalar constantValue, Kokkos::Array< int, rank > extents)
constructor for everywhere-constant data
KOKKOS_INLINE_FUNCTION enable_if_t< rank==2, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 2...
enable_if_t<(rank!=1) &&(rank!=7), Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< rank > > > dataExtentRangePolicy()
returns an MDRangePolicy over the underlying data extents (but with the logical shape).
Data< DataScalar, DeviceType > allocateConstantData(const DataScalar &value)
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
void copyDataFromDynRankViewMatchingUnderlying(const ScalarView< DataScalar, DeviceType > &dynRankView) const
Copies from the provided DynRankView into the underlying Kokkos::View container storing the unique da...
For use with Data object into which a value will be stored. We use passThroughBlockDiagonalArgs = tru...
KOKKOS_INLINE_FUNCTION return_type getEntry(const IntArgs &... intArgs) const
Returns a (read-only) value corresponding to the specified logical data location. ...
static Data< DataScalar, DeviceType > allocateMatVecResult(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
void storeInPlaceQuotient(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) quotient, A ./ B, into this container.
void storeInPlaceSum(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) sum, A .+ B, into this container.
For use with Data object into which a value will be stored. We use passThroughBlockDiagonalArgs = tru...
Data(const ScalarView< DataScalar, DeviceType > &data, int rank, Kokkos::Array< int, 7 > extents, Kokkos::Array< DataVariationType, 7 > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
DynRankView constructor. Will copy to a View of appropriate rank.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ****, DeviceType > & getUnderlyingView4() const
returns the View that stores the unique data. For rank-4 underlying containers.
void storeMatMat(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
void allocateAndCopyFromDynRankView(ScalarView< DataScalar, DeviceType > data)
allocate an underlying View that matches the provided DynRankView in dimensions, and copy...
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDimensionInfo(const DimensionInfo &myData, const DimensionInfo &otherData)
Returns DimensionInfo for a Data container that combines (through multiplication, say...
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
Returns the number of non-diagonal entries based on the last non-diagonal. Only applicable for BLOCK_...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==6, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 6...
void setActiveDims()
class initialization method. Called by constructors.
Kokkos::DynRankView< typename ViewType::value_type, typename DeduceLayout< ViewType >::result_layout, typename ViewType::device_type > getMatchingViewWithLabel(const ViewType &view, const std::string &label, DimArgs... dims)
Creates and returns a view that matches the provided view in Kokkos Layout.
Data()
default constructor (empty data)
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying() const
Returns a DynRankView that matches the underlying Kokkos::View object in value_type, layout, and dimension.
void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
sets the logical extent in the specified dimension. If needed, the underlying data container is resiz...
A singleton class for a DynRankView containing exactly one zero entry. (Technically, the entry is DataScalar(), the default value for the scalar type.) This allows View-wrapping classes to return a reference to zero, even when that zero is not explicitly stored in the wrapped views.
void storeInPlaceProduct(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) product, A .* B, into this container.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *******, DeviceType > & getUnderlyingView7() const
returns the View that stores the unique data. For rank-7 underlying containers.
KOKKOS_INLINE_FUNCTION reference_type getWritableEntry(const IntArgs... intArgs) const
Returns an l-value reference to the specified logical entry in the underlying view. Note that for variation types other than GENERAL, multiple valid argument sets will refer to the same memory location. Intended for Intrepid2 developers and expert users only. If passThroughBlockDiagonalArgs is TRUE, the corresponding arguments are interpreted as entries in the 1D packed matrix rather than as logical 2D matrix row and column.
KOKKOS_INLINE_FUNCTION void setUnderlyingView1(Kokkos::View< DataScalar *, DeviceType > &view) const
sets the View that stores the unique data. For rank-1 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView5(Kokkos::View< DataScalar *****, DeviceType > &view) const
sets the View that stores the unique data. For rank-5 underlying containers.
void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying, AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying, BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
storeInPlaceCombination implementation for rank < 7, with compile-time underlying views and argument ...
Data shallowCopy(const int rank, const Kokkos::Array< int, 7 > &extents, const Kokkos::Array< DataVariationType, 7 > &variationTypes)
Creates a new Data object with the same underlying view, but with the specified logical rank...
KOKKOS_INLINE_FUNCTION int getVariationModulus(const int &d) const
Variation modulus accessor.
Data(ScalarView< DataScalar, DeviceType > data)
copy constructor modeled after the copy-like constructor above. Not as efficient as the implicit copy...
Data(const Kokkos::DynRankView< DataScalar, DeviceType, DynRankViewProperties... > &data, Kokkos::Array< int, rank > extents, Kokkos::Array< DataVariationType, rank > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
Constructor that accepts a DynRankView as an argument. The data belonging to the DynRankView will be ...
KOKKOS_INLINE_FUNCTION return_type getEntryWithPassThroughOption(const bool &passThroughBlockDiagonalArgs, const IntArgs &... intArgs) const
Returns a (read-only) value corresponding to the specified logical data location. If passThroughBlock...
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar **, DeviceType > & getUnderlyingView2() const
returns the View that stores the unique data. For rank-2 underlying containers.
Argument extractor class which passes a single argument, indicated by the template parameter whichArg...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *****, DeviceType > & getUnderlyingView5() const
returns the View that stores the unique data. For rank-5 underlying containers.
DataVariationType
Enumeration to indicate how data varies in a particular dimension of an Intrepid2::Data object...
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
Returns a DynRankView that matches the underlying Kokkos::View object value_type and layout...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ***, DeviceType > & getUnderlyingView3() const
returns the View that stores the unique data. For rank-3 underlying containers.
static Data< DataScalar, DeviceType > allocateMatMatResult(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
Data(const Data< DataScalar, OtherDeviceType > &data)
copy-like constructor for differing execution spaces. This does a deep_copy of the underlying view...
KOKKOS_INLINE_FUNCTION int getUnderlyingViewExtent(const int &dim) const
Returns the extent of the underlying view in the specified dimension.
KOKKOS_INLINE_FUNCTION const Kokkos::Array< DataVariationType, 7 > & getVariationTypes() const
Returns an array with the variation types in each logical dimension.