17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE 18 #include <Kokkos_Macros.hpp> 20 "Including non-public Kokkos header files is not allowed.");
22 #ifndef KOKKOS_EXECPOLICY_HPP 23 #define KOKKOS_EXECPOLICY_HPP 25 #include <Kokkos_Core_fwd.hpp> 26 #include <impl/Kokkos_Traits.hpp> 27 #include <impl/Kokkos_Error.hpp> 28 #include <impl/Kokkos_AnalyzePolicy.hpp> 29 #include <Kokkos_Concepts.hpp> 36 struct ParallelForTag {};
37 struct ParallelScanTag {};
38 struct ParallelReduceTag {};
42 ChunkSize(
int value_) : value(value_) {}
66 template <
class... Properties>
67 class RangePolicy :
public Impl::PolicyTraits<Properties...> {
69 using traits = Impl::PolicyTraits<Properties...>;
72 typename traits::execution_space m_space;
73 typename traits::index_type m_begin;
74 typename traits::index_type m_end;
75 typename traits::index_type m_granularity;
76 typename traits::index_type m_granularity_mask;
78 template <
class... OtherProperties>
84 using member_type =
typename traits::index_type;
85 using index_type =
typename traits::index_type;
87 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space& space()
const {
90 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
91 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
98 void operator()(
const int&)
const {}
100 template <
class... OtherProperties>
101 RangePolicy(
const RangePolicy<OtherProperties...>& p)
106 m_granularity(p.m_granularity),
107 m_granularity_mask(p.m_granularity_mask) {}
114 m_granularity_mask(0) {}
117 inline RangePolicy(
const typename traits::execution_space& work_space,
118 const member_type work_begin,
const member_type work_end)
119 : m_space(work_space),
120 m_begin(work_begin < work_end ? work_begin : 0),
121 m_end(work_begin < work_end ? work_end : 0),
123 m_granularity_mask(0) {
124 set_auto_chunk_size();
128 inline RangePolicy(
const member_type work_begin,
const member_type work_end)
129 :
RangePolicy(typename traits::execution_space(), work_begin, work_end) {
130 set_auto_chunk_size();
134 template <
class... Args>
135 inline RangePolicy(
const typename traits::execution_space& work_space,
136 const member_type work_begin,
const member_type work_end,
138 : m_space(work_space),
139 m_begin(work_begin < work_end ? work_begin : 0),
140 m_end(work_begin < work_end ? work_end : 0),
142 m_granularity_mask(0) {
143 set_auto_chunk_size();
148 template <
class... Args>
149 inline RangePolicy(
const member_type work_begin,
const member_type work_end,
151 :
RangePolicy(typename traits::execution_space(), work_begin, work_end) {
152 set_auto_chunk_size();
160 template <
class... Args>
161 inline void set(Args...) {
163 0 ==
sizeof...(Args),
164 "Kokkos::RangePolicy: unhandled constructor arguments encountered.");
167 template <
class... Args>
168 inline void set(
const ChunkSize& chunksize, Args... args) {
169 m_granularity = chunksize.value;
170 m_granularity_mask = m_granularity - 1;
176 inline member_type
chunk_size()
const {
return m_granularity; }
181 m_granularity_mask = m_granularity - 1;
187 inline void set_auto_chunk_size() {
188 #ifdef KOKKOS_ENABLE_SYCL 189 if (std::is_same_v<
typename traits::execution_space,
190 Kokkos::Experimental::SYCL>) {
194 m_granularity_mask = 0;
198 auto concurrency =
static_cast<int64_t
>(m_space.concurrency());
199 if (concurrency == 0) concurrency = 1;
201 if (m_granularity > 0) {
202 if (!Impl::is_integral_power_of_two(m_granularity))
203 Kokkos::abort(
"RangePolicy blocking granularity must be power of two");
206 int64_t new_chunk_size = 1;
207 while (new_chunk_size * 100 * concurrency <
208 static_cast<int64_t>(m_end - m_begin))
210 if (new_chunk_size < 128) {
212 while ((new_chunk_size * 40 * concurrency <
213 static_cast<int64_t>(m_end - m_begin)) &&
214 (new_chunk_size < 128))
217 m_granularity = new_chunk_size;
218 m_granularity_mask = m_granularity - 1;
227 using work_tag =
typename RangePolicy<Properties...>::work_tag;
228 using member_type =
typename RangePolicy<Properties...>::member_type;
230 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
231 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
237 KOKKOS_INLINE_FUNCTION
240 : m_begin(0), m_end(0) {
243 const member_type work_part =
244 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
245 range.m_granularity_mask) &
246 ~member_type(range.m_granularity_mask);
248 m_begin = range.begin() + work_part * part_rank;
249 m_end = m_begin + work_part;
251 if (range.end() < m_begin) m_begin = range.end();
252 if (range.end() < m_end) m_end = range.end();
273 template <
class ExecSpace,
class... Properties>
274 class TeamPolicyInternal :
public Impl::PolicyTraits<Properties...> {
276 using traits = Impl::PolicyTraits<Properties...>;
279 using index_type =
typename traits::index_type;
292 template <
class FunctorType>
293 static int team_size_max(
const FunctorType&);
305 template <
class FunctorType>
306 static int team_size_recommended(
const FunctorType&);
308 template <
class FunctorType>
309 static int team_size_recommended(
const FunctorType&,
const int&);
311 template <
class FunctorType>
312 int team_size_recommended(
const FunctorType& functor,
313 const int vector_length);
317 TeamPolicyInternal(
const typename traits::execution_space&,
318 int league_size_request,
int team_size_request,
319 int vector_length_request = 1);
321 TeamPolicyInternal(
const typename traits::execution_space&,
322 int league_size_request,
const Kokkos::AUTO_t&,
323 int vector_length_request = 1);
327 TeamPolicyInternal(
int league_size_request,
int team_size_request,
328 int vector_length_request = 1);
330 TeamPolicyInternal(
int league_size_request,
const Kokkos::AUTO_t&,
331 int vector_length_request = 1);
342 KOKKOS_INLINE_FUNCTION
int league_size()
const;
349 KOKKOS_INLINE_FUNCTION
int team_size()
const;
353 inline bool impl_auto_team_size()
const;
356 inline bool impl_auto_vector_length()
const;
358 static int vector_length_max();
360 KOKKOS_INLINE_FUNCTION
int impl_vector_length()
const;
362 inline typename traits::index_type chunk_size()
const;
364 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
371 KOKKOS_INLINE_FUNCTION
372 typename traits::execution_space::scratch_memory_space
team_shmem()
const;
381 KOKKOS_INLINE_FUNCTION
int team_rank()
const;
384 KOKKOS_INLINE_FUNCTION
int team_size()
const;
391 template <
class JoinOp>
392 KOKKOS_INLINE_FUNCTION
typename JoinOp::value_type
team_reduce(
393 const typename JoinOp::value_type,
const JoinOp&)
const;
400 template <
typename Type>
401 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type& value)
const;
412 template <
typename Type>
413 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type& value,
414 Type*
const global_accum)
const;
418 struct PerTeamValue {
420 PerTeamValue(
size_t arg);
423 struct PerThreadValue {
425 PerThreadValue(
size_t arg);
428 template <
class iType,
class... Args>
429 struct ExtractVectorLength {
430 static inline iType value(
431 std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
434 static inline std::enable_if_t<!std::is_integral<iType>::value,
int> value(
435 std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
440 template <
class iType,
class... Args>
441 inline std::enable_if_t<std::is_integral<iType>::value, iType>
442 extract_vector_length(iType val, Args...) {
446 template <
class iType,
class... Args>
447 inline std::enable_if_t<!std::is_integral<iType>::value,
int>
448 extract_vector_length(iType, Args...) {
454 Impl::PerTeamValue PerTeam(
const size_t& arg);
455 Impl::PerThreadValue PerThread(
const size_t& arg);
457 struct ScratchRequest {
463 inline ScratchRequest(
const int& level_,
464 const Impl::PerTeamValue& team_value) {
466 per_team = team_value.value;
470 inline ScratchRequest(
const int& level_,
471 const Impl::PerThreadValue& thread_value) {
474 per_thread = thread_value.value;
477 inline ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
478 const Impl::PerThreadValue& thread_value) {
480 per_team = team_value.value;
481 per_thread = thread_value.value;
484 inline ScratchRequest(
const int& level_,
485 const Impl::PerThreadValue& thread_value,
486 const Impl::PerTeamValue& team_value) {
488 per_team = team_value.value;
489 per_thread = thread_value.value;
494 void team_policy_check_valid_storage_level_argument(
int level);
522 template <
class... Properties>
524 :
public Impl::TeamPolicyInternal<
525 typename Impl::PolicyTraits<Properties...>::execution_space,
527 using internal_policy = Impl::TeamPolicyInternal<
528 typename Impl::PolicyTraits<Properties...>::execution_space,
531 template <
class... OtherProperties>
535 using traits = Impl::PolicyTraits<Properties...>;
543 int league_size_request,
int team_size_request,
544 int vector_length_request = 1)
545 : internal_policy(space_, league_size_request, team_size_request,
546 vector_length_request) {}
548 TeamPolicy(
const typename traits::execution_space& space_,
549 int league_size_request,
const Kokkos::AUTO_t&,
550 int vector_length_request = 1)
551 : internal_policy(space_, league_size_request,
Kokkos::AUTO(),
552 vector_length_request) {}
554 TeamPolicy(
const typename traits::execution_space& space_,
555 int league_size_request,
const Kokkos::AUTO_t&,
556 const Kokkos::AUTO_t&)
557 : internal_policy(space_, league_size_request,
Kokkos::AUTO(),
559 TeamPolicy(
const typename traits::execution_space& space_,
560 int league_size_request,
const int team_size_request,
561 const Kokkos::AUTO_t&)
562 : internal_policy(space_, league_size_request, team_size_request,
567 int vector_length_request = 1)
568 : internal_policy(league_size_request, team_size_request,
569 vector_length_request) {}
571 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
572 int vector_length_request = 1)
573 : internal_policy(league_size_request,
Kokkos::AUTO(),
574 vector_length_request) {}
576 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
577 const Kokkos::AUTO_t&)
578 : internal_policy(league_size_request,
Kokkos::AUTO(),
Kokkos::AUTO()) {}
579 TeamPolicy(
int league_size_request,
const int team_size_request,
580 const Kokkos::AUTO_t&)
581 : internal_policy(league_size_request, team_size_request,
584 template <
class... OtherProperties>
585 TeamPolicy(
const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
588 internal_policy::traits::operator=(p);
592 TeamPolicy(
const internal_policy& p) : internal_policy(p) {}
595 inline TeamPolicy& set_chunk_size(
int chunk) {
596 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
597 internal_policy&>::value,
598 "internal set_chunk_size should return a reference");
599 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
602 inline TeamPolicy& set_scratch_size(
const int& level,
603 const Impl::PerTeamValue& per_team) {
604 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
606 internal_policy&>::value,
607 "internal set_chunk_size should return a reference");
609 team_policy_check_valid_storage_level_argument(level);
610 return static_cast<TeamPolicy&
>(
611 internal_policy::set_scratch_size(level, per_team));
613 inline TeamPolicy& set_scratch_size(
const int& level,
614 const Impl::PerThreadValue& per_thread) {
615 team_policy_check_valid_storage_level_argument(level);
616 return static_cast<TeamPolicy&
>(
617 internal_policy::set_scratch_size(level, per_thread));
619 inline TeamPolicy& set_scratch_size(
const int& level,
620 const Impl::PerTeamValue& per_team,
621 const Impl::PerThreadValue& per_thread) {
622 team_policy_check_valid_storage_level_argument(level);
623 return static_cast<TeamPolicy&
>(
624 internal_policy::set_scratch_size(level, per_team, per_thread));
626 inline TeamPolicy& set_scratch_size(
const int& level,
627 const Impl::PerThreadValue& per_thread,
628 const Impl::PerTeamValue& per_team) {
629 team_policy_check_valid_storage_level_argument(level);
630 return static_cast<TeamPolicy&
>(
631 internal_policy::set_scratch_size(level, per_team, per_thread));
637 template <
typename iType,
class TeamMemberType>
638 struct TeamThreadRangeBoundariesStruct {
640 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
641 const iType& arg_end,
642 const iType& arg_rank,
643 const iType& arg_size) {
645 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
648 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
649 const iType& arg_end,
650 const iType& arg_rank,
651 const iType& arg_size) {
654 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
655 return end_ < arg_end ? end_ : arg_end;
659 using index_type = iType;
662 enum { increment = 1 };
663 const TeamMemberType& thread;
665 KOKKOS_INLINE_FUNCTION
666 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
667 const iType& arg_end)
669 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
670 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
671 thread(arg_thread) {}
673 KOKKOS_INLINE_FUNCTION
674 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
675 const iType& arg_begin,
const iType& arg_end)
676 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
677 arg_thread.team_size())),
678 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
679 arg_thread.team_size())),
680 thread(arg_thread) {}
683 template <
typename iType,
class TeamMemberType>
684 struct TeamVectorRangeBoundariesStruct {
686 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
687 const iType& arg_end,
688 const iType& arg_rank,
689 const iType& arg_size) {
691 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
694 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
695 const iType& arg_end,
696 const iType& arg_rank,
697 const iType& arg_size) {
700 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
701 return end_ < arg_end ? end_ : arg_end;
705 using index_type = iType;
708 enum { increment = 1 };
709 const TeamMemberType& thread;
711 KOKKOS_INLINE_FUNCTION
712 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
713 const iType& arg_end)
715 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
716 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
717 thread(arg_thread) {}
719 KOKKOS_INLINE_FUNCTION
720 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
721 const iType& arg_begin,
const iType& arg_end)
722 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
723 arg_thread.team_size())),
724 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
725 arg_thread.team_size())),
726 thread(arg_thread) {}
729 template <
typename iType,
class TeamMemberType>
730 struct ThreadVectorRangeBoundariesStruct {
731 using index_type = iType;
732 const index_type start;
733 const index_type end;
734 enum { increment = 1 };
736 KOKKOS_INLINE_FUNCTION
737 constexpr ThreadVectorRangeBoundariesStruct(
const TeamMemberType,
738 const index_type& count) noexcept
739 : start(static_cast<index_type>(0)), end(count) {}
741 KOKKOS_INLINE_FUNCTION
742 constexpr ThreadVectorRangeBoundariesStruct(
const index_type& count) noexcept
743 : start(static_cast<index_type>(0)), end(count) {}
745 KOKKOS_INLINE_FUNCTION
746 constexpr ThreadVectorRangeBoundariesStruct(
747 const TeamMemberType,
const index_type& arg_begin,
748 const index_type& arg_end) noexcept
749 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
751 KOKKOS_INLINE_FUNCTION
752 constexpr ThreadVectorRangeBoundariesStruct(
753 const index_type& arg_begin,
const index_type& arg_end) noexcept
754 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
757 template <
class TeamMemberType>
758 struct ThreadSingleStruct {
759 const TeamMemberType& team_member;
760 KOKKOS_INLINE_FUNCTION
761 ThreadSingleStruct(
const TeamMemberType& team_member_)
762 : team_member(team_member_) {}
765 template <
class TeamMemberType>
766 struct VectorSingleStruct {
767 const TeamMemberType& team_member;
768 KOKKOS_INLINE_FUNCTION
769 VectorSingleStruct(
const TeamMemberType& team_member_)
770 : team_member(team_member_) {}
782 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
783 KOKKOS_INLINE_FUNCTION_DELETED
784 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
785 TeamThreadRange(
const TeamMemberType&,
const iType& count) =
delete;
794 template <
typename iType1,
typename iType2,
class TeamMemberType,
795 class _never_use_this_overload>
796 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
797 std::common_type_t<iType1, iType2>, TeamMemberType>
798 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
799 const iType2& end) =
delete;
808 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
809 KOKKOS_INLINE_FUNCTION_DELETED
810 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
811 TeamVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
820 template <
typename iType1,
typename iType2,
class TeamMemberType,
821 class _never_use_this_overload>
822 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
823 std::common_type_t<iType1, iType2>, TeamMemberType>
824 TeamVectorRange(
const TeamMemberType&,
const iType1& begin,
825 const iType2& end) =
delete;
834 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
835 KOKKOS_INLINE_FUNCTION_DELETED
836 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
837 ThreadVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
839 template <
typename iType1,
typename iType2,
class TeamMemberType,
840 class _never_use_this_overload>
841 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
842 std::common_type_t<iType1, iType2>, TeamMemberType>
843 ThreadVectorRange(
const TeamMemberType&,
const iType1& arg_begin,
844 const iType2& arg_end) =
delete;
848 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
849 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
850 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
851 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
853 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
854 struct HostBasedNestLevel;
856 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
857 struct AcceleratorBasedNestLevel;
867 template <
typename Rank,
typename ExecSpace,
868 TeamMDRangeThreadAndVector ThreadAndVector>
869 struct ThreadAndVectorNestLevel;
871 struct NoReductionTag {};
873 template <
typename Rank,
typename TeamMDPolicy,
typename Lambda,
874 typename ReductionValueType>
875 KOKKOS_INLINE_FUNCTION
void md_parallel_impl(TeamMDPolicy
const& policy,
876 Lambda
const& lambda,
877 ReductionValueType&& val);
880 template <
typename Rank,
typename TeamHandle>
881 struct TeamThreadMDRange;
883 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
884 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
885 using NestLevelType = int;
886 using BoundaryType = int;
887 using TeamHandleType = TeamHandle;
888 using ExecutionSpace =
typename TeamHandleType::execution_space;
889 using ArrayLayout =
typename ExecutionSpace::array_layout;
891 static constexpr NestLevelType total_nest_level =
892 Rank<N, OuterDir, InnerDir>::rank;
893 static constexpr Iterate iter = OuterDir;
894 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
895 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
897 static constexpr Iterate direction =
898 OuterDir == Iterate::Default
899 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
902 template <
class... Args>
903 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType
const& team_, Args&&... args)
904 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
905 static_assert(
sizeof...(Args) == total_nest_level);
908 TeamHandleType
const& team;
909 BoundaryType boundaries[total_nest_level];
912 template <
typename TeamHandle,
typename... Args>
913 TeamThreadMDRange(TeamHandle
const&, Args&&...)
914 ->TeamThreadMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
916 template <
typename Rank,
typename TeamHandle>
917 struct ThreadVectorMDRange;
919 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
920 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
921 using NestLevelType = int;
922 using BoundaryType = int;
923 using TeamHandleType = TeamHandle;
924 using ExecutionSpace =
typename TeamHandleType::execution_space;
925 using ArrayLayout =
typename ExecutionSpace::array_layout;
927 static constexpr NestLevelType total_nest_level =
928 Rank<N, OuterDir, InnerDir>::rank;
929 static constexpr Iterate iter = OuterDir;
930 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
931 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
933 static constexpr Iterate direction =
934 OuterDir == Iterate::Default
935 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
938 template <
class... Args>
939 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType
const& team_,
941 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
942 static_assert(
sizeof...(Args) == total_nest_level);
945 TeamHandleType
const& team;
946 BoundaryType boundaries[total_nest_level];
949 template <
typename TeamHandle,
typename... Args>
950 ThreadVectorMDRange(TeamHandle
const&, Args&&...)
951 ->ThreadVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
953 template <
typename Rank,
typename TeamHandle>
954 struct TeamVectorMDRange;
956 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
957 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
958 using NestLevelType = int;
959 using BoundaryType = int;
960 using TeamHandleType = TeamHandle;
961 using ExecutionSpace =
typename TeamHandleType::execution_space;
962 using ArrayLayout =
typename ExecutionSpace::array_layout;
964 static constexpr NestLevelType total_nest_level =
965 Rank<N, OuterDir, InnerDir>::rank;
966 static constexpr Iterate iter = OuterDir;
967 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
968 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
970 static constexpr Iterate direction =
971 iter == Iterate::Default
972 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
975 template <
class... Args>
976 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType
const& team_,
978 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
979 static_assert(
sizeof...(Args) == total_nest_level);
982 TeamHandleType
const& team;
983 BoundaryType boundaries[total_nest_level];
986 template <
typename TeamHandle,
typename... Args>
987 TeamVectorMDRange(TeamHandle
const&, Args&&...)
988 ->TeamVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
990 template <
typename Rank,
typename TeamHandle,
typename Lambda,
991 typename ReducerValueType>
992 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
993 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
994 ReducerValueType& val) {
995 Impl::md_parallel_impl<Rank>(policy, lambda, val);
998 template <
typename Rank,
typename TeamHandle,
typename Lambda>
999 KOKKOS_INLINE_FUNCTION
void parallel_for(
1000 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1001 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1004 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1005 typename ReducerValueType>
1006 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1007 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1008 ReducerValueType& val) {
1009 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1012 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1013 KOKKOS_INLINE_FUNCTION
void parallel_for(
1014 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1015 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1018 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1019 typename ReducerValueType>
1020 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1021 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1022 ReducerValueType& val) {
1023 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1026 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1027 KOKKOS_INLINE_FUNCTION
void parallel_for(
1028 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1029 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1034 template <
typename FunctorType,
typename TagType,
1035 bool HasTag = !std::is_void<TagType>::value>
1036 struct ParallelConstructName;
1038 template <
typename FunctorType,
typename TagType>
1039 struct ParallelConstructName<FunctorType, TagType, true> {
1040 ParallelConstructName(std::string
const& label) : label_ref(label) {
1041 if (label.empty()) {
1042 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
1043 typeid(TagType).name();
1046 std::string
const&
get() {
1047 return (label_ref.empty()) ? default_name : label_ref;
1049 std::string
const& label_ref;
1050 std::string default_name;
1053 template <
typename FunctorType,
typename TagType>
1054 struct ParallelConstructName<FunctorType, TagType, false> {
1055 ParallelConstructName(std::string
const& label) : label_ref(label) {
1056 if (label.empty()) {
1057 default_name = std::string(
typeid(FunctorType).name());
1060 std::string
const&
get() {
1061 return (label_ref.empty()) ? default_name : label_ref;
1063 std::string
const& label_ref;
1064 std::string default_name;
1075 template <
class PatternTag,
class... Args>
1076 struct PatternImplSpecializationFromTag;
1078 template <
class... Args>
1079 struct PatternImplSpecializationFromTag<
Kokkos::ParallelForTag, Args...>
1080 : type_identity<ParallelFor<Args...>> {};
1082 template <
class... Args>
1083 struct PatternImplSpecializationFromTag<
Kokkos::ParallelReduceTag, Args...>
1084 : type_identity<ParallelReduce<Args...>> {};
1086 template <
class... Args>
1087 struct PatternImplSpecializationFromTag<
Kokkos::ParallelScanTag, Args...>
1088 : type_identity<ParallelScan<Args...>> {};
1090 template <
class PatternImpl>
1091 struct PatternTagFromImplSpecialization;
1093 template <
class... Args>
1094 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1095 : type_identity<ParallelForTag> {};
1097 template <
class... Args>
1098 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1099 : type_identity<ParallelReduceTag> {};
1101 template <
class... Args>
1102 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1103 : type_identity<ParallelScanTag> {};
RangePolicy(const member_type work_begin, const member_type work_end, Args... args)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
Parallel execution of a functor calls the functor once with each member of the execution policy...
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.