49 #ifndef __INTREPID2_INTEGRATIONTOOLS_DEF_HPP__ 50 #define __INTREPID2_INTEGRATIONTOOLS_DEF_HPP__ 55 #include "Teuchos_TimeMonitor.hpp" 64 template<
class Scalar,
class DeviceType,
int integralViewRank>
67 using ExecutionSpace =
typename DeviceType::execution_space;
68 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
69 using TeamMember =
typename TeamPolicy::member_type;
71 using IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType>;
72 IntegralViewType integralView_;
79 int leftComponentSpan_;
80 int rightComponentSpan_;
81 int numTensorComponents_;
82 int leftFieldOrdinalOffset_;
83 int rightFieldOrdinalOffset_;
84 bool forceNonSpecialized_;
86 size_t fad_size_output_ = 0;
88 Kokkos::Array<int, 7> offsetsForComponentOrdinal_;
92 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldBounds_;
93 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldBounds_;
94 Kokkos::Array<int,Parameters::MaxTensorComponents> pointBounds_;
96 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldRelativeEnumerationSpans_;
97 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldRelativeEnumerationSpans_;
110 int leftFieldOrdinalOffset,
111 int rightFieldOrdinalOffset,
112 bool forceNonSpecialized)
114 integralView_(integralData.template getUnderlyingView<integralViewRank>()),
115 leftComponent_(leftComponent),
116 composedTransform_(composedTransform),
117 rightComponent_(rightComponent),
118 cellMeasures_(cellMeasures),
121 leftComponentSpan_(leftComponent.
extent_int(2)),
122 rightComponentSpan_(rightComponent.
extent_int(2)),
124 leftFieldOrdinalOffset_(leftFieldOrdinalOffset),
125 rightFieldOrdinalOffset_(rightFieldOrdinalOffset),
126 forceNonSpecialized_(forceNonSpecialized)
128 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponents_ != rightComponent_.
numTensorComponents(), std::invalid_argument,
"Left and right components must have matching number of tensorial components");
131 const int FIELD_DIM = 0;
132 const int POINT_DIM = 1;
136 for (
int r=0; r<numTensorComponents_; r++)
139 maxFieldsLeft_ = std::max(maxFieldsLeft_, leftFieldBounds_[r]);
141 maxFieldsRight_ = std::max(maxFieldsRight_, rightFieldBounds_[r]);
143 maxPointCount_ = std::max(maxPointCount_, pointBounds_[r]);
147 int leftRelativeEnumerationSpan = 1;
148 int rightRelativeEnumerationSpan = 1;
149 for (
int startingComponent=numTensorComponents_-2; startingComponent>=0; startingComponent--)
151 leftRelativeEnumerationSpan *= leftFieldBounds_[startingComponent];
152 rightRelativeEnumerationSpan *= rightFieldBounds_[startingComponent];
153 leftFieldRelativeEnumerationSpans_ [startingComponent] = leftRelativeEnumerationSpan;
154 rightFieldRelativeEnumerationSpans_[startingComponent] = rightRelativeEnumerationSpan;
160 const bool allocateFadStorage = !std::is_pod<Scalar>::value;
161 if (allocateFadStorage)
166 const int R = numTensorComponents_ - 1;
169 int allocationSoFar = 0;
170 offsetsForComponentOrdinal_[R] = allocationSoFar;
173 for (
int r=R-1; r>0; r--)
178 num_ij *= leftFields * rightFields;
180 offsetsForComponentOrdinal_[r] = allocationSoFar;
181 allocationSoFar += num_ij;
183 offsetsForComponentOrdinal_[0] = allocationSoFar;
186 template<
size_t maxComponents,
size_t numComponents = maxComponents>
187 KOKKOS_INLINE_FUNCTION
188 int incrementArgument( Kokkos::Array<int,maxComponents> &arguments,
189 const Kokkos::Array<int,maxComponents> &bounds)
const 191 if (numComponents == 0)
197 int r =
static_cast<int>(numComponents - 1);
198 while (arguments[r] + 1 >= bounds[r])
204 if (r >= 0) ++arguments[r];
210 KOKKOS_INLINE_FUNCTION
212 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
213 const int &numComponents)
const 215 if (numComponents == 0)
return -1;
216 int r =
static_cast<int>(numComponents - 1);
217 while (arguments[r] + 1 >= bounds[r])
223 if (r >= 0) ++arguments[r];
227 template<
size_t maxComponents,
size_t numComponents = maxComponents>
228 KOKKOS_INLINE_FUNCTION
229 int nextIncrementResult(
const Kokkos::Array<int,maxComponents> &arguments,
230 const Kokkos::Array<int,maxComponents> &bounds)
const 232 if (numComponents == 0)
238 int r =
static_cast<int>(numComponents - 1);
239 while (arguments[r] + 1 >= bounds[r])
249 KOKKOS_INLINE_FUNCTION
251 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
252 const int &numComponents)
const 254 if (numComponents == 0)
return -1;
255 int r = numComponents - 1;
256 while (arguments[r] + 1 >= bounds[r])
264 template<
size_t maxComponents,
size_t numComponents = maxComponents>
265 KOKKOS_INLINE_FUNCTION
266 int relativeEnumerationIndex(
const Kokkos::Array<int,maxComponents> &arguments,
267 const Kokkos::Array<int,maxComponents> &bounds,
268 const int startIndex)
const 271 if (numComponents == 0)
277 int enumerationIndex = 0;
278 for (
size_t r=numComponents-1; r>
static_cast<size_t>(startIndex); r--)
280 enumerationIndex += arguments[r];
281 enumerationIndex *= bounds[r-1];
283 enumerationIndex += arguments[startIndex];
284 return enumerationIndex;
298 KOKKOS_INLINE_FUNCTION
301 constexpr
int numTensorComponents = 3;
303 Kokkos::Array<int,numTensorComponents> pointBounds;
304 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
305 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
306 for (
unsigned r=0; r<numTensorComponents; r++)
308 pointBounds[r] = pointBounds_[r];
309 leftFieldBounds[r] = leftFieldBounds_[r];
310 rightFieldBounds[r] = rightFieldBounds_[r];
313 const int cellDataOrdinal = teamMember.league_rank();
314 const int numThreads = teamMember.team_size();
315 const int scratchViewSize = offsetsForComponentOrdinal_[0];
317 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> scratchView;
318 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
319 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_x, leftFields_y, leftFields_z, rightFields_x, rightFields_y, rightFields_z;
320 if (fad_size_output_ > 0) {
321 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads, fad_size_output_);
322 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1), fad_size_output_);
323 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[0], pointBounds[0], fad_size_output_);
324 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[0], pointBounds[0], fad_size_output_);
325 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[1], pointBounds[1], fad_size_output_);
326 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[1], pointBounds[1], fad_size_output_);
327 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[2], pointBounds[2], fad_size_output_);
328 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[2], pointBounds[2], fad_size_output_);
331 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads);
332 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1));
333 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[0], pointBounds[0]);
334 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[0], pointBounds[0]);
335 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[1], pointBounds[1]);
336 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[1], pointBounds[1]);
337 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[2], pointBounds[2]);
338 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[2], pointBounds[2]);
344 constexpr
int R = numTensorComponents - 1;
346 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
348 const int a = a_offset_ + a_component;
349 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
351 const int b = b_offset_ + b_component;
356 const int numLeftFieldsFinal = leftFinalComponent.
extent_int(0);
357 const int numRightFieldsFinal = rightFinalComponent.
extent_int(0);
366 const int maxFields = (maxFieldsLeft_ > maxFieldsRight_) ? maxFieldsLeft_ : maxFieldsRight_;
367 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,maxFields * maxPointCount_), [&] (
const int& fieldOrdinalPointOrdinal) {
368 const int fieldOrdinal = fieldOrdinalPointOrdinal % maxFields;
369 const int pointOrdinal = fieldOrdinalPointOrdinal / maxFields;
370 if ((fieldOrdinal < leftTensorComponent_x.
extent_int(0)) && (pointOrdinal < leftTensorComponent_x.
extent_int(1)))
372 const int leftRank = leftTensorComponent_x.
rank();
373 leftFields_x(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_x(fieldOrdinal,pointOrdinal) : leftTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
375 if ((fieldOrdinal < leftTensorComponent_y.
extent_int(0)) && (pointOrdinal < leftTensorComponent_y.
extent_int(1)))
377 const int leftRank = leftTensorComponent_y.
rank();
378 leftFields_y(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_y(fieldOrdinal,pointOrdinal) : leftTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
380 if ((fieldOrdinal < leftTensorComponent_z.
extent_int(0)) && (pointOrdinal < leftTensorComponent_z.
extent_int(1)))
382 const int leftRank = leftTensorComponent_z.
rank();
383 leftFields_z(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_z(fieldOrdinal,pointOrdinal) : leftTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
385 if ((fieldOrdinal < rightTensorComponent_x.
extent_int(0)) && (pointOrdinal < rightTensorComponent_x.
extent_int(1)))
387 const int rightRank = rightTensorComponent_x.
rank();
388 rightFields_x(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_x(fieldOrdinal,pointOrdinal) : rightTensorComponent_x(fieldOrdinal,pointOrdinal,b_component);
390 if ((fieldOrdinal < rightTensorComponent_y.
extent_int(0)) && (pointOrdinal < rightTensorComponent_y.
extent_int(1)))
392 const int rightRank = rightTensorComponent_y.
rank();
393 rightFields_y(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_y(fieldOrdinal,pointOrdinal) : rightTensorComponent_y(fieldOrdinal,pointOrdinal,b_component);
395 if ((fieldOrdinal < rightTensorComponent_z.
extent_int(0)) && (pointOrdinal < rightTensorComponent_z.
extent_int(1)))
397 const int rightRank = rightTensorComponent_z.
rank();
398 rightFields_z(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_z(fieldOrdinal,pointOrdinal) : rightTensorComponent_z(fieldOrdinal,pointOrdinal,b_component);
405 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransformView.extent_int(1)), [&] (
const int& pointOrdinal) {
406 pointWeights(pointOrdinal) = composedTransformView(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
411 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.
extent_int(1)), [&] (
const int& pointOrdinal) {
412 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
419 teamMember.team_barrier();
422 const int scratchOffsetForThread = teamMember.team_rank() * scratchViewSize;
423 for (
int i=scratchOffsetForThread; i<scratchOffsetForThread+scratchViewSize; i++)
425 scratchView(i) = 0.0;
429 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numLeftFieldsFinal * numRightFieldsFinal), [&] (
const int& leftRightFieldEnumeration) {
430 const int iz = leftRightFieldEnumeration % numLeftFieldsFinal;
431 const int jz = leftRightFieldEnumeration / numLeftFieldsFinal;
435 Kokkos::Array<int,numTensorComponents> leftFieldArguments;
436 Kokkos::Array<int,numTensorComponents> rightFieldArguments;
437 rightFieldArguments[R] = jz;
438 leftFieldArguments[R] = iz;
440 Kokkos::Array<int,numTensorComponents> pointArguments;
441 for (
int i=0; i<numTensorComponents; i++)
443 pointArguments[i] = 0;
446 for (
int lx=0; lx<pointBounds[0]; lx++)
448 pointArguments[0] = lx;
452 const int Gy_start_index = scratchOffsetForThread + offsetsForComponentOrdinal_[1];
453 const int Gy_end_index = scratchOffsetForThread + offsetsForComponentOrdinal_[0];
455 for (
int Gy_index=Gy_start_index; Gy_index < Gy_end_index; Gy_index++)
457 scratchView(Gy_index) = 0;
460 for (
int ly=0; ly<pointBounds[1]; ly++)
462 pointArguments[1] = ly;
464 Scalar * Gz = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[R]);
467 for (
int lz=0; lz < pointBounds[2]; lz++)
469 const Scalar & leftValue = leftFields_z(iz,lz);
470 const Scalar & rightValue = rightFields_z(jz,lz);
472 pointArguments[2] = lz;
473 int pointEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(pointArguments, pointBounds, 0);
475 *Gz += leftValue * pointWeights(pointEnumerationIndex) * rightValue;
480 for (
int iy=0; iy<leftFieldBounds_[1]; iy++)
482 leftFieldArguments[1] = iy;
483 const int leftEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, 1);
485 const Scalar & leftValue = leftFields_y(iy,ly);
487 for (
int jy=0; jy<rightFieldBounds_[1]; jy++)
489 rightFieldArguments[1] = jy;
491 const int rightEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, 1);
492 const Scalar & rightValue = rightFields_y(jy,ly);
494 const int & rightEnumerationSpan_y = rightFieldRelativeEnumerationSpans_[1];
495 const int Gy_index = leftEnumerationIndex_y * rightEnumerationSpan_y + rightEnumerationIndex_y;
497 const int Gz_index = 0;
498 const Scalar & Gz = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[2] + Gz_index);
500 Scalar & Gy = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[1] + Gy_index);
502 Gy += leftValue * Gz * rightValue;
507 for (
int ix=0; ix<leftFieldBounds_[0]; ix++)
509 leftFieldArguments[0] = ix;
510 const Scalar & leftValue = leftFields_x(ix,lx);
512 for (
int iy=0; iy<leftFieldBounds_[1]; iy++)
514 leftFieldArguments[1] = iy;
516 const int leftEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, 1);
518 for (
int jx=0; jx<rightFieldBounds_[0]; jx++)
520 rightFieldArguments[0] = jx;
521 const Scalar & rightValue = rightFields_x(jx,lx);
523 for (
int jy=0; jy<rightFieldBounds_[1]; jy++)
525 rightFieldArguments[1] = jy;
526 const int rightEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, 1);
528 const int rightEnumerationSpan_y = rightFieldRelativeEnumerationSpans_[1];
530 const int Gy_index = leftEnumerationIndex_y * rightEnumerationSpan_y + rightEnumerationIndex_y;
531 Scalar & Gy = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[1] + Gy_index);
534 int leftEnumerationIndex = relativeEnumerationIndex<numTensorComponents>( leftFieldArguments, leftFieldBounds, 0);
535 int rightEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(rightFieldArguments, rightFieldBounds, 0);
536 const int leftFieldIndex = leftEnumerationIndex + leftFieldOrdinalOffset_;
537 const int rightFieldIndex = rightEnumerationIndex + rightFieldOrdinalOffset_;
539 if (integralViewRank == 3)
561 integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex) += leftValue * Gy * rightValue;
566 integralView_.access(leftFieldIndex,rightFieldIndex,0) += leftValue * Gy * rightValue;
580 template<
size_t numTensorComponents>
581 KOKKOS_INLINE_FUNCTION
582 void run(
const TeamMember & teamMember )
const 584 Kokkos::Array<int,numTensorComponents> pointBounds;
585 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
586 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
587 for (
unsigned r=0; r<numTensorComponents; r++)
589 pointBounds[r] = pointBounds_[r];
590 leftFieldBounds[r] = leftFieldBounds_[r];
591 rightFieldBounds[r] = rightFieldBounds_[r];
594 const int cellDataOrdinal = teamMember.league_rank();
595 const int numThreads = teamMember.team_size();
596 const int scratchViewSize = offsetsForComponentOrdinal_[0];
597 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> scratchView;
598 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
599 Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged> leftFields, rightFields;
600 if (fad_size_output_ > 0) {
601 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads, fad_size_output_);
602 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1), fad_size_output_);
603 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_, fad_size_output_);
604 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_, fad_size_output_);
607 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize*numThreads);
608 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1));
609 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_);
610 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_);
616 constexpr
int R = numTensorComponents - 1;
618 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
620 const int a = a_offset_ + a_component;
621 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
623 const int b = b_offset_ + b_component;
625 const Data<Scalar,DeviceType> & leftFinalComponent = leftComponent_.
getTensorComponent(R);
626 const Data<Scalar,DeviceType> & rightFinalComponent = rightComponent_.
getTensorComponent(R);
628 const int numLeftFieldsFinal = leftFinalComponent.
extent_int(0);
629 const int numRightFieldsFinal = rightFinalComponent.extent_int(0);
631 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numTensorComponents), [&] (
const int& r) {
632 const Data<Scalar,DeviceType> & leftTensorComponent = leftComponent_.
getTensorComponent(r);
633 const Data<Scalar,DeviceType> & rightTensorComponent = rightComponent_.
getTensorComponent(r);
634 const int leftFieldCount = leftTensorComponent.
extent_int(0);
635 const int pointCount = leftTensorComponent.extent_int(1);
636 const int leftRank = leftTensorComponent.rank();
637 const int rightFieldCount = rightTensorComponent.extent_int(0);
638 const int rightRank = rightTensorComponent.rank();
639 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsLeft_; fieldOrdinal++)
642 const int fieldAddress = (fieldOrdinal < leftFieldCount) ? fieldOrdinal : leftFieldCount - 1;
643 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
645 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
646 leftFields(r,fieldAddress,pointAddress) = (leftRank == 2) ? leftTensorComponent(fieldAddress,pointAddress) : leftTensorComponent(fieldAddress,pointAddress,a_component);
649 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsRight_; fieldOrdinal++)
652 const int fieldAddress = (fieldOrdinal < rightFieldCount) ? fieldOrdinal : rightFieldCount - 1;
653 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
655 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
656 rightFields(r,fieldAddress,pointAddress) = (rightRank == 2) ? rightTensorComponent(fieldAddress,pointAddress) : rightTensorComponent(fieldAddress,pointAddress,b_component);
661 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.
extent_int(1)), [&] (
const int& pointOrdinal) {
662 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
667 teamMember.team_barrier();
669 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numLeftFieldsFinal * numRightFieldsFinal), [&] (
const int& leftRightFieldEnumeration) {
670 const int scratchOffsetForThread = teamMember.team_rank() * scratchViewSize;
671 const int i_R = leftRightFieldEnumeration % numLeftFieldsFinal;
672 const int j_R = leftRightFieldEnumeration / numLeftFieldsFinal;
676 Kokkos::Array<int,numTensorComponents> leftFieldArguments;
677 Kokkos::Array<int,numTensorComponents> rightFieldArguments;
678 rightFieldArguments[R] = j_R;
679 leftFieldArguments[R] = i_R;
682 for (
int i=scratchOffsetForThread; i<scratchOffsetForThread+scratchViewSize; i++)
684 scratchView(i) = 0.0;
686 Kokkos::Array<int,numTensorComponents> pointArguments;
687 for (
unsigned i=0; i<numTensorComponents; i++)
689 pointArguments[i] = 0;
696 const int pointBounds_R = pointBounds[R];
697 int & pointArgument_R = pointArguments[R];
698 for (pointArgument_R=0; pointArgument_R < pointBounds_R; pointArgument_R++)
703 G_R = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[R]);
707 const int leftFieldIndex = i_R + leftFieldOrdinalOffset_;
708 const int rightFieldIndex = j_R + rightFieldOrdinalOffset_;
710 if (integralViewRank == 3)
713 G_R = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
718 G_R = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
722 const int & pointIndex_R = pointArguments[R];
724 const Scalar & leftValue = leftFields(R,i_R,pointIndex_R);
725 const Scalar & rightValue = rightFields(R,j_R,pointIndex_R);
727 int pointEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(pointArguments, pointBounds, 0);
729 *G_R += leftValue * pointWeights(pointEnumerationIndex) * rightValue;
734 const int r_next = nextIncrementResult<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
735 const int r_min = (r_next >= 0) ? r_next : 0;
737 for (
int s=R-1; s>=r_min; s--)
739 const int & pointIndex_s = pointArguments[s];
742 for (
int s1=s; s1<R; s1++)
744 leftFieldArguments[s1] = 0;
748 const int & i_s = leftFieldArguments[s];
749 const int & j_s = rightFieldArguments[s];
752 const int & rightEnumerationSpan_s = rightFieldRelativeEnumerationSpans_[s];
753 const int & rightEnumerationSpan_sp = rightFieldRelativeEnumerationSpans_[s+1];
757 const int leftEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s);
760 const int leftEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s+1);
762 const Scalar & leftValue = leftFields(s,i_s,pointIndex_s);
764 for (
int s1=s; s1<R; s1++)
766 rightFieldArguments[s1] = 0;
771 const int rightEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s);
774 const int rightEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s+1);
776 const Scalar & rightValue = rightFields(s,j_s,pointIndex_s);
778 const int G_s_index = leftEnumerationIndex_s * rightEnumerationSpan_s + rightEnumerationIndex_s;
784 const int G_sp_index = leftEnumerationIndex_sp * rightEnumerationSpan_sp + rightEnumerationIndex_sp;
786 const Scalar & G_sp = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s+1] + G_sp_index);
791 G_s = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s] + G_s_index);
796 int leftEnumerationIndex = relativeEnumerationIndex<numTensorComponents>( leftFieldArguments, leftFieldBounds, 0);
797 int rightEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(rightFieldArguments, rightFieldBounds, 0);
798 const int leftFieldIndex = leftEnumerationIndex + leftFieldOrdinalOffset_;
799 const int rightFieldIndex = rightEnumerationIndex + rightFieldOrdinalOffset_;
801 if (integralViewRank == 3)
804 G_s = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
809 G_s = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
813 *G_s += leftValue * G_sp * rightValue;
818 sRight = incrementArgument<numTensorComponents,R>(rightFieldArguments, rightFieldBounds);
822 sLeft = incrementArgument<numTensorComponents,R>(leftFieldArguments, leftFieldBounds);
829 const int endIndex = scratchOffsetForThread + offsetsForComponentOrdinal_[r_min];
830 for (
int i=scratchOffsetForThread; i<endIndex; i++)
832 scratchView(i) = 0.0;
839 r = incrementArgument<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
847 KOKKOS_INLINE_FUNCTION
848 void operator()(
const TeamMember & teamMember )
const 850 switch (numTensorComponents_)
852 case 1: run<1>(teamMember);
break;
853 case 2: run<2>(teamMember);
break;
855 if (forceNonSpecialized_)
860 case 4: run<4>(teamMember);
break;
861 case 5: run<5>(teamMember);
break;
862 case 6: run<6>(teamMember);
break;
863 case 7: run<7>(teamMember);
break;
864 #ifdef INTREPID2_HAVE_DEBUG 877 const int R = numTensorComponents_ - 1;
883 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
885 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
890 const int numLeftFieldsFinal = leftFinalComponent.
extent_int(0);
891 const int numRightFieldsFinal = rightFinalComponent.
extent_int(0);
893 flopCount += flopsPerPoint_ab * cellMeasures_.
extent_int(1);
895 int flopsPer_i_R_j_R = 0;
899 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldArguments;
900 leftFieldArguments[R] = 0;
902 Kokkos::Array<int,Parameters::MaxTensorComponents> pointArguments;
903 for (
int i=0; i<numTensorComponents_; i++)
905 pointArguments[i] = 0;
910 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldArguments;
911 rightFieldArguments[R] = 0;
917 for (pointArguments[R]=0; pointArguments[R] < pointBounds_[R]; pointArguments[R]++)
919 flopsPer_i_R_j_R += 4;
927 const int r_next = nextIncrementResult(pointArguments, pointBounds_, numTensorComponents_);
928 const int r_min = (r_next >= 0) ? r_next : 0;
930 for (
int s=R-1; s>=r_min; s--)
933 int numLeftIterations = leftFieldRelativeEnumerationSpans_[s];
934 int numRightIterations = rightFieldRelativeEnumerationSpans_[s];
936 flopsPer_i_R_j_R += numLeftIterations * numRightIterations * 3;
940 r = incrementArgument(pointArguments, pointBounds_, numTensorComponents_);
943 flopCount += flopsPer_i_R_j_R * numLeftFieldsFinal * numRightFieldsFinal;
951 int teamSize(
const int &maxTeamSizeFromKokkos)
const 953 const int R = numTensorComponents_ - 1;
954 const int threadParallelismExpressed = leftFieldBounds_[R] * rightFieldBounds_[R];
955 return std::min(maxTeamSizeFromKokkos, threadParallelismExpressed);
962 size_t shmem_size = 0;
964 if (fad_size_output_ > 0)
966 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(offsetsForComponentOrdinal_[0] * team_size, fad_size_output_);
967 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.
extent_int(1), fad_size_output_);
968 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_, fad_size_output_);
969 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_, fad_size_output_);
973 shmem_size += Kokkos::View<Scalar*, DeviceType>::shmem_size(offsetsForComponentOrdinal_[0] * team_size);
974 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.
extent_int(1));
975 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_);
976 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_);
992 template<
class Scalar,
class DeviceType,
int integralViewRank>
995 using ExecutionSpace =
typename DeviceType::execution_space;
996 using TeamPolicy = Kokkos::TeamPolicy<DeviceType>;
997 using TeamMember =
typename TeamPolicy::member_type;
999 using IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType>;
1000 IntegralViewType integralView_;
1007 int leftComponentSpan_;
1008 int rightComponentSpan_;
1009 int numTensorComponents_;
1010 int leftFieldOrdinalOffset_;
1011 int rightFieldOrdinalOffset_;
1013 size_t fad_size_output_ = 0;
1017 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldBounds_;
1018 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldBounds_;
1019 Kokkos::Array<int,Parameters::MaxTensorComponents> pointBounds_;
1022 int maxFieldsRight_;
1032 int leftFieldOrdinalOffset,
1033 int rightFieldOrdinalOffset)
1035 integralView_(integralData.template getUnderlyingView<integralViewRank>()),
1036 leftComponent_(leftComponent),
1037 composedTransform_(composedTransform),
1038 rightComponent_(rightComponent),
1039 cellMeasures_(cellMeasures),
1040 a_offset_(a_offset),
1041 b_offset_(b_offset),
1042 leftComponentSpan_(leftComponent.
extent_int(2)),
1043 rightComponentSpan_(rightComponent.
extent_int(2)),
1045 leftFieldOrdinalOffset_(leftFieldOrdinalOffset),
1046 rightFieldOrdinalOffset_(rightFieldOrdinalOffset)
1048 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponents_ != rightComponent_.
numTensorComponents(), std::invalid_argument,
"Left and right components must have matching number of tensorial components");
1050 const int FIELD_DIM = 0;
1051 const int POINT_DIM = 1;
1053 maxFieldsRight_ = 0;
1055 for (
int r=0; r<numTensorComponents_; r++)
1058 maxFieldsLeft_ = std::max(maxFieldsLeft_, leftFieldBounds_[r]);
1060 maxFieldsRight_ = std::max(maxFieldsRight_, rightFieldBounds_[r]);
1062 maxPointCount_ = std::max(maxPointCount_, pointBounds_[r]);
1068 const bool allocateFadStorage = !std::is_pod<Scalar>::value;
1069 if (allocateFadStorage)
1075 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1076 KOKKOS_INLINE_FUNCTION
1077 int incrementArgument( Kokkos::Array<int,maxComponents> &arguments,
1078 const Kokkos::Array<int,maxComponents> &bounds)
const 1080 if (numComponents == 0)
return -1;
1081 int r =
static_cast<int>(numComponents - 1);
1082 while (arguments[r] + 1 >= bounds[r])
1088 if (r >= 0) ++arguments[r];
1093 KOKKOS_INLINE_FUNCTION
1095 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
1096 const int &numComponents)
const 1098 if (numComponents == 0)
return -1;
1099 int r =
static_cast<int>(numComponents - 1);
1100 while (arguments[r] + 1 >= bounds[r])
1106 if (r >= 0) ++arguments[r];
1110 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1111 KOKKOS_INLINE_FUNCTION
1112 int nextIncrementResult(
const Kokkos::Array<int,maxComponents> &arguments,
1113 const Kokkos::Array<int,maxComponents> &bounds)
const 1115 if (numComponents == 0)
return -1;
1116 int r =
static_cast<int>(numComponents - 1);
1117 while (arguments[r] + 1 >= bounds[r])
1126 KOKKOS_INLINE_FUNCTION
1128 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
1129 const int &numComponents)
const 1131 if (numComponents == 0)
return -1;
1132 int r = numComponents - 1;
1133 while (arguments[r] + 1 >= bounds[r])
1141 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1142 KOKKOS_INLINE_FUNCTION
1143 int relativeEnumerationIndex(
const Kokkos::Array<int,maxComponents> &arguments,
1144 const Kokkos::Array<int,maxComponents> &bounds,
1145 const int startIndex)
const 1148 if (numComponents == 0)
1152 int enumerationIndex = 0;
1153 for (
size_t r=numComponents-1; r>
static_cast<size_t>(startIndex); r--)
1155 enumerationIndex += arguments[r];
1156 enumerationIndex *= bounds[r-1];
1158 enumerationIndex += arguments[startIndex];
1159 return enumerationIndex;
1163 KOKKOS_INLINE_FUNCTION
1164 enable_if_t<rank==3 && rank==integralViewRank, Scalar &>
1165 integralViewEntry(
const IntegralViewType& integralView,
const int &cellDataOrdinal,
const int &i,
const int &j)
const 1167 return integralView(cellDataOrdinal,i,j);
1171 KOKKOS_INLINE_FUNCTION
1172 enable_if_t<rank==2 && rank==integralViewRank, Scalar &>
1173 integralViewEntry(
const IntegralViewType& integralView,
const int &cellDataOrdinal,
const int &i,
const int &j)
const 1175 return integralView(i,j);
1179 KOKKOS_INLINE_FUNCTION
1182 constexpr
int numTensorComponents = 3;
1184 const int pointBounds_x = pointBounds_[0];
1185 const int pointBounds_y = pointBounds_[1];
1186 const int pointBounds_z = pointBounds_[2];
1187 const int pointsInNonzeroComponentDimensions = pointBounds_y * pointBounds_z;
1189 const int leftFieldBounds_x = leftFieldBounds_[0];
1190 const int rightFieldBounds_x = rightFieldBounds_[0];
1191 const int leftFieldBounds_y = leftFieldBounds_[1];
1192 const int rightFieldBounds_y = rightFieldBounds_[1];
1193 const int leftFieldBounds_z = leftFieldBounds_[2];
1194 const int rightFieldBounds_z = rightFieldBounds_[2];
1196 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
1197 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
1198 for (
unsigned r=0; r<numTensorComponents; r++)
1200 leftFieldBounds[r] = leftFieldBounds_[r];
1201 rightFieldBounds[r] = rightFieldBounds_[r];
1204 const auto integralView = integralView_;
1205 const auto leftFieldOrdinalOffset = leftFieldOrdinalOffset_;
1206 const auto rightFieldOrdinalOffset = rightFieldOrdinalOffset_;
1208 const int cellDataOrdinal = teamMember.league_rank();
1209 const int threadNumber = teamMember.team_rank();
1211 const int numThreads = teamMember.team_size();
1212 const int GyEntryCount = pointBounds_z;
1213 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GxIntegrals;
1214 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GyIntegrals;
1215 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GzIntegral;
1216 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
1218 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_x, rightFields_x;
1219 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_y, rightFields_y;
1220 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_z, rightFields_z;
1221 if (fad_size_output_ > 0) {
1222 GxIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), pointsInNonzeroComponentDimensions, fad_size_output_);
1223 GyIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), GyEntryCount * numThreads, fad_size_output_);
1224 GzIntegral = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numThreads, fad_size_output_);
1225 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1), fad_size_output_);
1227 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x, fad_size_output_);
1228 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_x, pointBounds_x, fad_size_output_);
1229 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_y, pointBounds_y, fad_size_output_);
1230 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_y, pointBounds_y, fad_size_output_);
1231 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_z, pointBounds_z, fad_size_output_);
1232 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_z, pointBounds_z, fad_size_output_);
1235 GxIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), pointsInNonzeroComponentDimensions);
1236 GyIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), GyEntryCount * numThreads);
1237 GzIntegral = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numThreads);
1238 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1));
1240 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x);
1241 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_x, pointBounds_x);
1242 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_y, pointBounds_y);
1243 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_y, pointBounds_y);
1244 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_z, pointBounds_z);
1245 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_z, pointBounds_z);
1254 teamMember.team_barrier();
1256 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
1258 const int a = a_offset_ + a_component;
1259 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
1261 const int b = b_offset_ + b_component;
1270 const int maxFields = (maxFieldsLeft_ > maxFieldsRight_) ? maxFieldsLeft_ : maxFieldsRight_;
1271 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,maxFields), [&] (
const int& fieldOrdinal) {
1272 if (fieldOrdinal < leftTensorComponent_x.
extent_int(0))
1274 const int pointCount = leftTensorComponent_x.
extent_int(1);
1275 const int leftRank = leftTensorComponent_x.
rank();
1276 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1278 leftFields_x(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_x(fieldOrdinal,pointOrdinal) : leftTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
1281 if (fieldOrdinal < leftTensorComponent_y.
extent_int(0))
1283 const int pointCount = leftTensorComponent_y.
extent_int(1);
1284 const int leftRank = leftTensorComponent_y.
rank();
1285 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1287 leftFields_y(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_y(fieldOrdinal,pointOrdinal) : leftTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
1290 if (fieldOrdinal < leftTensorComponent_z.
extent_int(0))
1292 const int pointCount = leftTensorComponent_z.
extent_int(1);
1293 const int leftRank = leftTensorComponent_z.
rank();
1294 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1296 leftFields_z(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_z(fieldOrdinal,pointOrdinal) : leftTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
1299 if (fieldOrdinal < rightTensorComponent_x.
extent_int(0))
1301 const int pointCount = rightTensorComponent_x.
extent_int(1);
1302 const int rightRank = rightTensorComponent_x.
rank();
1303 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1305 rightFields_x(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_x(fieldOrdinal,pointOrdinal) : rightTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
1308 if (fieldOrdinal < rightTensorComponent_y.
extent_int(0))
1310 const int pointCount = rightTensorComponent_y.
extent_int(1);
1311 const int rightRank = rightTensorComponent_y.
rank();
1312 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1314 rightFields_y(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_y(fieldOrdinal,pointOrdinal) : rightTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
1317 if (fieldOrdinal < rightTensorComponent_z.
extent_int(0))
1319 const int pointCount = rightTensorComponent_z.
extent_int(1);
1320 const int rightRank = rightTensorComponent_z.
rank();
1321 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1323 rightFields_z(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_z(fieldOrdinal,pointOrdinal) : rightTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
1328 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.
extent_int(1)), [&] (
const int& pointOrdinal) {
1329 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
1333 teamMember.team_barrier();
1335 for (
int i0=0; i0<leftFieldBounds_x; i0++)
1337 for (
int j0=0; j0<rightFieldBounds_x; j0++)
1339 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,pointsInNonzeroComponentDimensions), [&] (
const int& pointEnumerationIndexLaterDimensions) {
1341 const int tensorPointEnumerationOffset = pointBounds_x * pointEnumerationIndexLaterDimensions;
1343 Scalar & Gx = GxIntegrals(pointEnumerationIndexLaterDimensions);
1346 if (fad_size_output_ == 0)
1349 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, pointBounds_x), [&] (
const int &x_pointOrdinal, Scalar &integralThusFar)
1351 integralThusFar += leftFields_x(i0,x_pointOrdinal) * rightFields_x(j0,x_pointOrdinal) * pointWeights(tensorPointEnumerationOffset + x_pointOrdinal);
1356 for (
int x_pointOrdinal=0; x_pointOrdinal<pointBounds_x; x_pointOrdinal++)
1358 Gx += leftFields_x(i0,x_pointOrdinal) * rightFields_x(j0,x_pointOrdinal) * pointWeights(tensorPointEnumerationOffset + x_pointOrdinal);
1364 teamMember.team_barrier();
1366 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,leftFieldBounds_y * rightFieldBounds_y), [&] (
const int& i1j1) {
1367 const int i1 = i1j1 % leftFieldBounds_y;
1368 const int j1 = i1j1 / leftFieldBounds_y;
1370 int Gy_index = GyEntryCount * threadNumber;
1372 int pointEnumerationIndex = 0;
1373 for (
int lz=0; lz<pointBounds_z; lz++)
1375 Scalar & Gy = GyIntegrals(Gy_index);
1378 for (
int ly=0; ly<pointBounds_y; ly++)
1380 const Scalar & leftValue = leftFields_y(i1,ly);
1381 const Scalar & rightValue = rightFields_y(j1,ly);
1383 Gy += leftValue * rightValue * GxIntegrals(pointEnumerationIndex);
1385 pointEnumerationIndex++;
1390 Scalar & Gz = GzIntegral(threadNumber);
1391 for (
int i2=0; i2<leftFieldBounds_z; i2++)
1393 for (
int j2=0; j2<rightFieldBounds_z; j2++)
1397 int Gy_index = GyEntryCount * threadNumber;
1399 for (
int lz=0; lz<pointBounds_z; lz++)
1401 const Scalar & leftValue = leftFields_z(i2,lz);
1402 const Scalar & rightValue = rightFields_z(j2,lz);
1404 Gz += leftValue * rightValue * GyIntegrals(Gy_index);
1409 const int i = leftFieldOrdinalOffset + i0 + (i1 + i2 * leftFieldBounds_y) * leftFieldBounds_x;
1410 const int j = rightFieldOrdinalOffset + j0 + (j1 + j2 * rightFieldBounds_y) * rightFieldBounds_x;
1415 integralViewEntry<integralViewRank>(integralView, cellDataOrdinal, i, j) += Gz;
1420 teamMember.team_barrier();
1427 template<
size_t numTensorComponents>
1428 KOKKOS_INLINE_FUNCTION
1429 void run(
const TeamMember & teamMember )
const 1567 KOKKOS_INLINE_FUNCTION
1568 void operator()(
const TeamMember & teamMember )
const 1570 switch (numTensorComponents_)
1572 case 1: run<1>(teamMember);
break;
1573 case 2: run<2>(teamMember);
break;
1575 case 4: run<4>(teamMember);
break;
1576 case 5: run<5>(teamMember);
break;
1577 case 6: run<6>(teamMember);
break;
1578 case 7: run<7>(teamMember);
break;
1579 #ifdef INTREPID2_HAVE_DEBUG 1592 constexpr
int numTensorComponents = 3;
1593 Kokkos::Array<int,numTensorComponents> pointBounds;
1594 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
1595 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
1597 int pointsInNonzeroComponentDimensions = 1;
1598 for (
unsigned r=0; r<numTensorComponents; r++)
1600 pointBounds[r] = pointBounds_[r];
1601 if (r > 0) pointsInNonzeroComponentDimensions *= pointBounds[r];
1602 leftFieldBounds[r] = leftFieldBounds_[r];
1603 rightFieldBounds[r] = rightFieldBounds_[r];
1606 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
1608 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
1615 for (
int i0=0; i0<leftFieldBounds[0]; i0++)
1617 for (
int j0=0; j0<rightFieldBounds[0]; j0++)
1619 flopCount += pointsInNonzeroComponentDimensions * pointBounds[0] * 3;
1659 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * pointBounds[1] * pointBounds[2] * 3;
1660 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * leftFieldBounds[2] * rightFieldBounds[2] * pointBounds[2] * 3;
1661 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * leftFieldBounds[2] * rightFieldBounds[2] * 1;
1737 const int R = numTensorComponents_ - 1;
1738 const int threadParallelismExpressed = leftFieldBounds_[R] * rightFieldBounds_[R];
1739 return std::min(maxTeamSizeFromKokkos, threadParallelismExpressed);
1746 size_t shmem_size = 0;
1748 const int GyEntryCount = pointBounds_[2];
1750 int pointsInNonzeroComponentDimensions = 1;
1751 for (
int d=1; d<numTensorComponents_; d++)
1753 pointsInNonzeroComponentDimensions *= pointBounds_[d];
1756 if (fad_size_output_ > 0)
1758 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(pointsInNonzeroComponentDimensions, fad_size_output_);
1759 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(GyEntryCount * numThreads, fad_size_output_);
1760 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( 1 * numThreads, fad_size_output_);
1761 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size (composedTransform_.
extent_int(1), fad_size_output_);
1763 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[0], pointBounds_[0], fad_size_output_);
1764 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[0], pointBounds_[0], fad_size_output_);
1765 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[1], pointBounds_[1], fad_size_output_);
1766 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[1], pointBounds_[1], fad_size_output_);
1767 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[2], pointBounds_[2], fad_size_output_);
1768 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[2], pointBounds_[2], fad_size_output_);
1772 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(pointsInNonzeroComponentDimensions);
1773 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(GyEntryCount * numThreads);
1774 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( 1 * numThreads);
1775 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size (composedTransform_.
extent_int(1));
1777 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[0], pointBounds_[0]);
1778 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[0], pointBounds_[0]);
1779 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[1], pointBounds_[1]);
1780 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[1], pointBounds_[1]);
1781 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[2], pointBounds_[2]);
1782 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[2], pointBounds_[2]);
1790 template<
typename DeviceType>
1791 template<
class Scalar>
1800 const bool leftHasOrdinalFilter = vectorDataLeft.basisValues().ordinalFilter().extent_int(0) > 0;
1801 const bool rightHasOrdinalFilter = vectorDataRight.basisValues().ordinalFilter().extent_int(0) > 0;
1802 TEUCHOS_TEST_FOR_EXCEPTION(leftHasOrdinalFilter || rightHasOrdinalFilter, std::invalid_argument,
"Ordinal filters for BasisValues are not yet supported by IntegrationTools");
1808 const int CELL_DIM = 0;
1810 const auto leftTransform = vectorDataLeft.
transform();
1812 DimensionInfo combinedCellDimInfo = cellMeasureData.getDimensionInfo(CELL_DIM);
1817 int cellDataExtent = combinedCellDimInfo.dataExtent;
1819 const int numCells = vectorDataLeft.
numCells();
1820 const int numFieldsLeft = vectorDataLeft.
numFields();
1821 const int numFieldsRight = vectorDataRight.
numFields();
1823 Kokkos::Array<int,3> extents {numCells, numFieldsLeft, numFieldsRight};
1824 Kokkos::Array<DataVariationType,3> variationTypes {cellVariationType,
GENERAL,
GENERAL};
1828 Kokkos::View<Scalar***,DeviceType> data(
"Intrepid2 integral data",cellDataExtent,numFieldsLeft,numFieldsRight);
1833 Kokkos::View<Scalar**,DeviceType> data(
"Intrepid2 integral data",numFieldsLeft,numFieldsRight);
1841 template<
typename DeviceType>
1842 template<
class Scalar>
1846 double* approximateFlops)
1848 using ExecutionSpace =
typename DeviceType::execution_space;
1852 const bool leftHasOrdinalFilter = basisValuesLeft.basisValues().ordinalFilter().extent_int(0) > 0;
1853 const bool rightHasOrdinalFilter = basisValuesRight.basisValues().ordinalFilter().extent_int(0) > 0;
1854 TEUCHOS_TEST_FOR_EXCEPTION(leftHasOrdinalFilter || rightHasOrdinalFilter, std::invalid_argument,
"Ordinal filters for BasisValues are not yet supported by IntegrationTools");
1856 if (approximateFlops != NULL)
1858 *approximateFlops = 0;
1870 const int numFieldsLeft = basisValuesLeft.
numFields();
1871 const int numFieldsRight = basisValuesRight.
numFields();
1872 const int spaceDim = basisValuesLeft.
spaceDim();
1874 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.
spaceDim() != basisValuesRight.
spaceDim(), std::invalid_argument,
"vectorDataLeft and vectorDataRight must agree on the space dimension");
1876 const int leftFamilyCount = basisValuesLeft.basisValues().
numFamilies();
1877 const int rightFamilyCount = basisValuesRight.basisValues().
numFamilies();
1881 int numTensorComponentsLeft = -1;
1882 const bool isVectorValued = basisValuesLeft.
vectorData().isValid();
1885 const bool rightIsVectorValued = basisValuesRight.
vectorData().isValid();
1886 INTREPID2_TEST_FOR_EXCEPTION(!rightIsVectorValued, std::invalid_argument,
"left and right must either both be vector-valued, or both scalar-valued");
1887 const auto &refVectorLeft = basisValuesLeft.
vectorData();
1888 int numFamiliesLeft = refVectorLeft.numFamilies();
1889 int numVectorComponentsLeft = refVectorLeft.numComponents();
1890 Kokkos::Array<int,7> maxFieldsForComponentLeft {0,0,0,0,0,0,0};
1891 Kokkos::Array<int,7> maxFieldsForComponentRight {0,0,0,0,0,0,0};
1892 for (
int familyOrdinal=0; familyOrdinal<numFamiliesLeft; familyOrdinal++)
1894 for (
int vectorComponent=0; vectorComponent<numVectorComponentsLeft; vectorComponent++)
1899 if (numTensorComponentsLeft == -1)
1903 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != tensorData.
numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataLeft must have the same number of tensor components as every other");
1904 for (
int r=0; r<numTensorComponentsLeft; r++)
1911 int numTensorComponentsRight = -1;
1912 const auto &refVectorRight = basisValuesRight.
vectorData();
1913 int numFamiliesRight = refVectorRight.numFamilies();
1914 int numVectorComponentsRight = refVectorRight.numComponents();
1915 for (
int familyOrdinal=0; familyOrdinal<numFamiliesRight; familyOrdinal++)
1917 for (
int vectorComponent=0; vectorComponent<numVectorComponentsRight; vectorComponent++)
1919 const auto &tensorData = refVectorRight.getComponent(familyOrdinal,vectorComponent);
1920 if (tensorData.numTensorComponents() > 0)
1922 if (numTensorComponentsRight == -1)
1924 numTensorComponentsRight = tensorData.numTensorComponents();
1926 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsRight != tensorData.numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataRight must have the same number of tensor components as every other");
1927 for (
int r=0; r<numTensorComponentsRight; r++)
1929 maxFieldsForComponentRight[r] = std::max(tensorData.getTensorComponent(r).extent_int(0), maxFieldsForComponentRight[r]);
1934 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != numVectorComponentsRight, std::invalid_argument,
"Left and right vector entries must have the same number of tensorial components");
1939 for (
int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++)
1941 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().
tensorData(familyOrdinal).
numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"All families must match in the number of tensor components");
1945 for (
int familyOrdinal=0; familyOrdinal< rightFamilyCount; familyOrdinal++)
1947 INTREPID2_TEST_FOR_EXCEPTION(basisValuesRight.basisValues().
tensorData(familyOrdinal).
numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"Right families must match left in the number of tensor components");
1952 if ((numPointTensorComponents == numTensorComponentsLeft) && basisValuesLeft.
axisAligned() && basisValuesRight.
axisAligned())
1961 Kokkos::Array<int,Parameters::MaxTensorComponents> pointDimensions;
1962 for (
int r=0; r<numPointTensorComponents; r++)
1969 Kokkos::View<Scalar**, DeviceType> integralView2;
1970 Kokkos::View<Scalar***, DeviceType> integralView3;
1971 if (integralViewRank == 3)
1979 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
1984 const int leftVectorComponentCount = isVectorValued ? basisValuesLeft.
vectorData().numComponents() : 1;
1985 for (
int leftVectorComponentOrdinal = 0; leftVectorComponentOrdinal < leftVectorComponentCount; leftVectorComponentOrdinal++)
1988 : basisValuesLeft.basisValues().
tensorData(leftFamilyOrdinal);
1994 const int leftDimSpan = leftComponent.
extent_int(2);
1996 const int leftComponentFieldCount = leftComponent.
extent_int(0);
1998 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2001 int rightFieldOffset = basisValuesRight.
vectorData().familyFieldOrdinalOffset(rightFamilyOrdinal);
2003 const int rightVectorComponentCount = isVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2004 for (
int rightVectorComponentOrdinal = 0; rightVectorComponentOrdinal < rightVectorComponentCount; rightVectorComponentOrdinal++)
2007 : basisValuesRight.basisValues().
tensorData(rightFamilyOrdinal);
2008 if (!rightComponent.
isValid())
2013 const int rightDimSpan = rightComponent.
extent_int(2);
2015 const int rightComponentFieldCount = rightComponent.
extent_int(0);
2018 if ((a_offset + leftDimSpan <= b_offset) || (b_offset + rightDimSpan <= a_offset))
2020 b_offset += rightDimSpan;
2026 INTREPID2_TEST_FOR_EXCEPTION(( a_offset != b_offset), std::logic_error,
"left and right dimension offsets should match.");
2027 INTREPID2_TEST_FOR_EXCEPTION(( leftDimSpan != rightDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2029 const int d_start = a_offset;
2030 const int d_end = d_start + leftDimSpan;
2033 ComponentIntegralsArray componentIntegrals;
2034 for (
int r=0; r<numPointTensorComponents; r++)
2051 const int numPoints = pointDimensions[r];
2058 const int leftTensorComponentRank = leftTensorComponent.
rank();
2059 const int leftTensorComponentDimSpan = leftTensorComponent.
extent_int(2);
2060 const int leftTensorComponentFields = leftTensorComponent.
extent_int(0);
2061 const int rightTensorComponentRank = rightTensorComponent.
rank();
2062 const int rightTensorComponentDimSpan = rightTensorComponent.
extent_int(2);
2063 const int rightTensorComponentFields = rightTensorComponent.
extent_int(0);
2065 INTREPID2_TEST_FOR_EXCEPTION(( leftTensorComponentDimSpan != rightTensorComponentDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2067 for (
int d=d_start; d<d_end; d++)
2069 const bool allocateFadStorage = !std::is_pod<Scalar>::value;
2070 if (allocateFadStorage)
2073 componentIntegrals[r][d] = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields, fad_size_output);
2077 componentIntegrals[r][d] = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields);
2080 auto componentIntegralView = componentIntegrals[r][d];
2082 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{leftTensorComponentFields,rightTensorComponentFields});
2084 for (
int a=0; a<leftTensorComponentDimSpan; a++)
2086 Kokkos::parallel_for(
"compute componentIntegrals", policy,
2087 KOKKOS_LAMBDA (
const int &i,
const int &j) {
2088 Scalar refSpaceIntegral = 0.0;
2089 for (
int ptOrdinal=0; ptOrdinal<numPoints; ptOrdinal++)
2091 const Scalar & leftValue = ( leftTensorComponentRank == 2) ? leftTensorComponent(i,ptOrdinal) : leftTensorComponent(i,ptOrdinal,a);
2092 const Scalar & rightValue = (rightTensorComponentRank == 2) ? rightTensorComponent(j,ptOrdinal) : rightTensorComponent(j,ptOrdinal,a);
2093 refSpaceIntegral += leftValue * rightValue * quadratureWeights(ptOrdinal);
2095 componentIntegralView(i,j) = refSpaceIntegral;
2099 if (approximateFlops != NULL)
2101 *approximateFlops += leftTensorComponentFields*rightTensorComponentFields*numPoints*(3);
2106 ExecutionSpace().fence();
2108 Kokkos::Array<int,3> upperBounds {cellDataExtent,leftComponentFieldCount,rightComponentFieldCount};
2109 Kokkos::Array<int,3> lowerBounds {0,0,0};
2110 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>(lowerBounds, upperBounds);
2112 Kokkos::parallel_for(
"compute field integrals", policy,
2113 KOKKOS_LAMBDA (
const int &cellDataOrdinal,
const int &leftFieldOrdinal,
const int &rightFieldOrdinal) {
2114 const Scalar & cellMeasureWeight = cellMeasures.
getTensorComponent(0)(cellDataOrdinal);
2122 Scalar integralSum = 0.0;
2123 for (
int d=d_start; d<d_end; d++)
2125 const Scalar & transformLeft_d = basisValuesLeft.
transformWeight(cellDataOrdinal,0,d,d);
2126 const Scalar & transformRight_d = basisValuesRight.
transformWeight(cellDataOrdinal,0,d,d);
2128 const Scalar & leftRightTransform_d = transformLeft_d * transformRight_d;
2131 Scalar integral_d = 1.0;
2133 for (
int r=0; r<numPointTensorComponents; r++)
2135 integral_d *= componentIntegrals[r][d](leftTensorIterator.
argument(r),rightTensorIterator.
argument(r));
2138 integralSum += leftRightTransform_d * integral_d;
2141 const int i = leftFieldOrdinal + leftFieldOffset;
2142 const int j = rightFieldOrdinal + rightFieldOffset;
2144 if (integralViewRank == 3)
2146 integralView3(cellDataOrdinal,i,j) += cellMeasureWeight * integralSum;
2150 integralView2(i,j) += cellMeasureWeight * integralSum;
2154 b_offset += rightDimSpan;
2157 a_offset += leftDimSpan;
2161 if (approximateFlops != NULL)
2164 *approximateFlops += (2 + spaceDim * (3 + numPointTensorComponents)) * cellDataExtent * numFieldsLeft * numFieldsRight;
2172 const bool transposeLeft =
true;
2173 const bool transposeRight =
false;
2177 const bool matrixTransform = (leftTransform.
rank() == 4) || (rightTransform.
rank() == 4);
2182 if (matrixTransform)
2184 composedTransform = leftTransform.
allocateMatMatResult(transposeLeft, leftTransform, transposeRight, rightTransform);
2185 composedTransform.
storeMatMat(transposeLeft, leftTransform, transposeRight, rightTransform);
2188 if (approximateFlops != NULL)
2199 const int newRank = 4;
2200 auto extents = composedTransform.
getExtents();
2202 composedTransform = composedTransform.
shallowCopy(newRank, extents, variationTypes);
2203 if (approximateFlops != NULL)
2209 else if (leftTransform.
isValid())
2212 composedTransform = leftTransform;
2214 else if (rightTransform.
isValid())
2217 composedTransform = rightTransform;
2222 Kokkos::Array<ordinal_type,4> extents {basisValuesLeft.
numCells(),basisValuesLeft.
numPoints(),spaceDim,spaceDim};
2225 Kokkos::View<Scalar*,DeviceType> identityUnderlyingView(
"Intrepid2::FST::integrate() - identity view",spaceDim);
2226 Kokkos::deep_copy(identityUnderlyingView, 1.0);
2234 const int leftFamilyCount = basisValuesLeft. basisValues().numFamilies();
2235 const int rightFamilyCount = basisValuesRight.basisValues().
numFamilies();
2236 const int leftComponentCount = isVectorValued ? basisValuesLeft. vectorData().numComponents() : 1;
2237 const int rightComponentCount = isVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2239 int leftFieldOrdinalOffset = 0;
2240 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
2245 bool haveLaunchedContributionToCurrentFamilyLeft =
false;
2246 for (
int leftComponentOrdinal=0; leftComponentOrdinal<leftComponentCount; leftComponentOrdinal++)
2249 : basisValuesLeft.basisValues().
tensorData(leftFamilyOrdinal);
2253 a_offset += basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal);
2257 int rightFieldOrdinalOffset = 0;
2258 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2262 bool haveLaunchedContributionToCurrentFamilyRight =
false;
2264 for (
int rightComponentOrdinal=0; rightComponentOrdinal<rightComponentCount; rightComponentOrdinal++)
2267 : basisValuesRight.basisValues().
tensorData(rightFamilyOrdinal);
2268 if (!rightComponent.
isValid())
2271 b_offset += basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal);
2277 const int vectorSize = getVectorSizeForHierarchicalParallelism<Scalar>();
2278 Kokkos::TeamPolicy<ExecutionSpace> policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,Kokkos::AUTO(),vectorSize);
2287 bool forceNonSpecialized =
false;
2288 bool usePointCacheForRank3Tensor =
true;
2292 if (haveLaunchedContributionToCurrentFamilyLeft && haveLaunchedContributionToCurrentFamilyRight)
2294 ExecutionSpace().fence();
2296 haveLaunchedContributionToCurrentFamilyLeft =
true;
2297 haveLaunchedContributionToCurrentFamilyRight =
true;
2299 if (integralViewRank == 2)
2303 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2305 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2306 const int teamSize = functor.
teamSize(recommendedTeamSize);
2308 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2310 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 2", policy, functor);
2312 if (approximateFlops != NULL)
2314 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.getDataExtent(0);
2319 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2321 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2322 const int teamSize = functor.
teamSize(recommendedTeamSize);
2324 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2326 Kokkos::parallel_for(
"F_Integrate rank 2", policy, functor);
2328 if (approximateFlops != NULL)
2330 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.getDataExtent(0);
2334 else if (integralViewRank == 3)
2338 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2340 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2341 const int teamSize = functor.
teamSize(recommendedTeamSize);
2343 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2345 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 3", policy, functor);
2347 if (approximateFlops != NULL)
2349 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.getDataExtent(0);
2354 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2356 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2357 const int teamSize = functor.
teamSize(recommendedTeamSize);
2359 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2361 Kokkos::parallel_for(
"F_Integrate rank 3", policy, functor);
2363 if (approximateFlops != NULL)
2365 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.getDataExtent(0);
2369 b_offset += isVectorValued ? basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal) : 1;
2371 rightFieldOrdinalOffset += isVectorValued ? basisValuesRight.
vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal);
2373 a_offset += isVectorValued ? basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal) : 1;
2375 leftFieldOrdinalOffset += isVectorValued ? basisValuesLeft.
vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal);
2382 ExecutionSpace().fence();
KOKKOS_INLINE_FUNCTION void runSpecialized3(const TeamMember &teamMember) const
Hand-coded 3-component version.
KOKKOS_INLINE_FUNCTION DimensionInfo getDimensionInfo(const int &dim) const
Returns an object fully specifying the indicated dimension. This is used in determining appropriate s...
long approximateFlopCountPerCell() const
returns an estimate of the number of floating point operations per cell (counting sums...
static Data< DataScalar, DeviceType > allocateInPlaceCombinationResult(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
KOKKOS_INLINE_FUNCTION void runSpecialized3(const TeamMember &teamMember) const
runSpecialized implementations are hand-coded variants of run() for a particular number of components...
KOKKOS_INLINE_FUNCTION Kokkos::Array< int, 7 > getExtents() const
Returns an array containing the logical extents in each dimension.
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
Implementation of a general sum factorization algorithm, using a novel approach developed by Roberts...
size_t team_shmem_size(int team_size) const
Provide the shared memory capacity.
KOKKOS_INLINE_FUNCTION int familyFieldOrdinalOffset(const int &familyOrdinal) const
Returns the field ordinal offset for the specified family.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1...
void clear() const
Copies 0.0 to the underlying View.
KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
returns true for containers that have data; false for those that don't (namely, those that have been ...
one of two dimensions in a matrix; bottom-right part of matrix is diagonal
KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
Returns true for containers that have data; false for those that don't (e.g., those that have been co...
KOKKOS_INLINE_FUNCTION int incrementArgument(Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const
runtime-sized variant of incrementArgument; gets used by approximate flop count.
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
Defines TensorArgumentIterator, which allows systematic enumeration of a TensorData object...
KOKKOS_INLINE_FUNCTION int nextIncrementResult(const Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const
runtime-sized variant of nextIncrementResult; gets used by approximate flop count.
Struct expressing all variation information about a Data object in a single dimension, including its logical extent and storage extent.
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< std::is_pod< T >::value, unsigned >::type dimension_scalar(const Kokkos::DynRankView< T, P... >)
specialization of functions for pod types, returning the scalar dimension (1 for pod types) of a view...
size_t team_shmem_size(int numThreads) const
Provide the shared memory capacity.
int teamSize(const int &maxTeamSizeFromKokkos) const
returns the team size that should be provided to the policy constructor, based on the Kokkos maximum ...
KOKKOS_INLINE_FUNCTION void setEnumerationIndex(const ordinal_type &enumerationIndex)
Sets the enumeration index; this refers to a 1D enumeration of the possible in-bound arguments...
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
KOKKOS_INLINE_FUNCTION const ordinal_type & argument(const ordinal_type &r) const
KOKKOS_INLINE_FUNCTION int numFamilies() const
For valid vectorData, returns the number of families in vectorData; otherwise, returns number of Tens...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ****, DeviceType > & getUnderlyingView4() const
returns the View that stores the unique data. For rank-4 underlying containers.
void storeMatMat(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION std::enable_if< std::is_integral< iType >::value, ordinal_type >::type extent_int(const iType &d) const
Returns the logical extent in the requested dimension.
KOKKOS_INLINE_FUNCTION int incrementArgument(Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const
runtime-sized variant of incrementArgument; gets used by approximate flop count.
KOKKOS_INLINE_FUNCTION const Data< Scalar, DeviceType > & getTensorComponent(const ordinal_type &r) const
Returns the requested tensor component.
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDimensionInfo(const DimensionInfo &myData, const DimensionInfo &otherData)
Returns DimensionInfo for a Data container that combines (through multiplication, say...
Defines the Intrepid2::FunctorIterator class, as well as the Intrepid2::functor_returns_ref SFINAE he...
void storeInPlaceProduct(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) product, A .* B, into this container.
TensorDataType & tensorData()
TensorData accessor for single-family scalar data.
KOKKOS_INLINE_FUNCTION ordinal_type numTensorComponents() const
Return the number of tensorial components.
KOKKOS_INLINE_FUNCTION int nextIncrementResult(const Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const
runtime-sized variant of nextIncrementResult; gets used by approximate flop count.
Data shallowCopy(const int rank, const Kokkos::Array< int, 7 > &extents, const Kokkos::Array< DataVariationType, 7 > &variationTypes)
Creates a new Data object with the same underlying view, but with the specified logical rank...
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar **, DeviceType > & getUnderlyingView2() const
returns the View that stores the unique data. For rank-2 underlying containers.
View-like interface to tensor data; tensor components are stored separately and multiplied together a...
Allows systematic enumeration of all entries in a TensorData object, tracking indices for each tensor...
DataVariationType
Enumeration to indicate how data varies in a particular dimension of an Intrepid2::Data object...
long approximateFlopCountPerCell() const
returns an estimate of the number of floating point operations per cell (counting sums...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ***, DeviceType > & getUnderlyingView3() const
returns the View that stores the unique data. For rank-3 underlying containers.
int teamSize(const int &maxTeamSizeFromKokkos) const
returns the team size that should be provided to the policy constructor, based on the Kokkos maximum ...
Implementation of a general sum factorization algorithm, abstracted from the algorithm described by M...
static Data< DataScalar, DeviceType > allocateMatMatResult(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
static constexpr ordinal_type MaxTensorComponents
Maximum number of tensor/Cartesian products that can be taken: this allows hypercube basis in 7D to b...
KOKKOS_INLINE_FUNCTION const Kokkos::Array< DataVariationType, 7 > & getVariationTypes() const
Returns an array with the variation types in each logical dimension.