Kokkos Core Kernels Package  Version of the Day
Kokkos_ExecPolicy.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_EXECPOLICY_HPP
23 #define KOKKOS_EXECPOLICY_HPP
24 
25 #include <Kokkos_Core_fwd.hpp>
26 #include <impl/Kokkos_Traits.hpp>
27 #include <impl/Kokkos_Error.hpp>
28 #include <impl/Kokkos_AnalyzePolicy.hpp>
29 #include <Kokkos_Concepts.hpp>
30 #include <typeinfo>
31 
32 //----------------------------------------------------------------------------
33 
34 namespace Kokkos {
35 
36 struct ParallelForTag {};
37 struct ParallelScanTag {};
38 struct ParallelReduceTag {};
39 
40 struct ChunkSize {
41  int value;
42  ChunkSize(int value_) : value(value_) {}
43 };
44 
66 template <class... Properties>
67 class RangePolicy : public Impl::PolicyTraits<Properties...> {
68  public:
69  using traits = Impl::PolicyTraits<Properties...>;
70 
71  private:
72  typename traits::execution_space m_space;
73  typename traits::index_type m_begin;
74  typename traits::index_type m_end;
75  typename traits::index_type m_granularity;
76  typename traits::index_type m_granularity_mask;
77 
78  template <class... OtherProperties>
79  friend class RangePolicy;
80 
81  public:
83  using execution_policy = RangePolicy<Properties...>;
84  using member_type = typename traits::index_type;
85  using index_type = typename traits::index_type;
86 
87  KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
88  return m_space;
89  }
90  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
91  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
92 
93  // TODO: find a better workaround for Clangs weird instantiation order
94  // This thing is here because of an instantiation error, where the RangePolicy
95  // is inserted into FunctorValue Traits, which tries decltype on the operator.
96  // It tries to do this even though the first argument of parallel for clearly
97  // doesn't match.
98  void operator()(const int&) const {}
99 
100  template <class... OtherProperties>
101  RangePolicy(const RangePolicy<OtherProperties...>& p)
102  : traits(p), // base class may contain data such as desired occupancy
103  m_space(p.m_space),
104  m_begin(p.m_begin),
105  m_end(p.m_end),
106  m_granularity(p.m_granularity),
107  m_granularity_mask(p.m_granularity_mask) {}
108 
109  inline RangePolicy()
110  : m_space(),
111  m_begin(0),
112  m_end(0),
113  m_granularity(0),
114  m_granularity_mask(0) {}
115 
117  inline RangePolicy(const typename traits::execution_space& work_space,
118  const member_type work_begin, const member_type work_end)
119  : m_space(work_space),
120  m_begin(work_begin < work_end ? work_begin : 0),
121  m_end(work_begin < work_end ? work_end : 0),
122  m_granularity(0),
123  m_granularity_mask(0) {
124  set_auto_chunk_size();
125  }
126 
128  inline RangePolicy(const member_type work_begin, const member_type work_end)
129  : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
130  set_auto_chunk_size();
131  }
132 
134  template <class... Args>
135  inline RangePolicy(const typename traits::execution_space& work_space,
136  const member_type work_begin, const member_type work_end,
137  Args... args)
138  : m_space(work_space),
139  m_begin(work_begin < work_end ? work_begin : 0),
140  m_end(work_begin < work_end ? work_end : 0),
141  m_granularity(0),
142  m_granularity_mask(0) {
143  set_auto_chunk_size();
144  set(args...);
145  }
146 
148  template <class... Args>
149  inline RangePolicy(const member_type work_begin, const member_type work_end,
150  Args... args)
151  : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
152  set_auto_chunk_size();
153  set(args...);
154  }
155 
156  private:
157  inline void set() {}
158 
159  public:
160  template <class... Args>
161  inline void set(Args...) {
162  static_assert(
163  0 == sizeof...(Args),
164  "Kokkos::RangePolicy: unhandled constructor arguments encountered.");
165  }
166 
167  template <class... Args>
168  inline void set(const ChunkSize& chunksize, Args... args) {
169  m_granularity = chunksize.value;
170  m_granularity_mask = m_granularity - 1;
171  set(args...);
172  }
173 
174  public:
176  inline member_type chunk_size() const { return m_granularity; }
177 
180  m_granularity = chunk_size;
181  m_granularity_mask = m_granularity - 1;
182  return *this;
183  }
184 
185  private:
187  inline void set_auto_chunk_size() {
188 #ifdef KOKKOS_ENABLE_SYCL
189  if (std::is_same_v<typename traits::execution_space,
190  Kokkos::Experimental::SYCL>) {
191  // chunk_size <=1 lets the compiler choose the workgroup size when
192  // launching kernels
193  m_granularity = 1;
194  m_granularity_mask = 0;
195  return;
196  }
197 #endif
198  auto concurrency = static_cast<int64_t>(m_space.concurrency());
199  if (concurrency == 0) concurrency = 1;
200 
201  if (m_granularity > 0) {
202  if (!Impl::is_integral_power_of_two(m_granularity))
203  Kokkos::abort("RangePolicy blocking granularity must be power of two");
204  }
205 
206  int64_t new_chunk_size = 1;
207  while (new_chunk_size * 100 * concurrency <
208  static_cast<int64_t>(m_end - m_begin))
209  new_chunk_size *= 2;
210  if (new_chunk_size < 128) {
211  new_chunk_size = 1;
212  while ((new_chunk_size * 40 * concurrency <
213  static_cast<int64_t>(m_end - m_begin)) &&
214  (new_chunk_size < 128))
215  new_chunk_size *= 2;
216  }
217  m_granularity = new_chunk_size;
218  m_granularity_mask = m_granularity - 1;
219  }
220 
221  public:
226  struct WorkRange {
227  using work_tag = typename RangePolicy<Properties...>::work_tag;
228  using member_type = typename RangePolicy<Properties...>::member_type;
229 
230  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
231  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
232 
237  KOKKOS_INLINE_FUNCTION
238  WorkRange(const RangePolicy& range, const int part_rank,
239  const int part_size)
240  : m_begin(0), m_end(0) {
241  if (part_size) {
242  // Split evenly among partitions, then round up to the granularity.
243  const member_type work_part =
244  ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
245  range.m_granularity_mask) &
246  ~member_type(range.m_granularity_mask);
247 
248  m_begin = range.begin() + work_part * part_rank;
249  m_end = m_begin + work_part;
250 
251  if (range.end() < m_begin) m_begin = range.end();
252  if (range.end() < m_end) m_end = range.end();
253  }
254  }
255 
256  private:
257  member_type m_begin;
258  member_type m_end;
259  WorkRange();
260  WorkRange& operator=(const WorkRange&);
261  };
262 };
263 
264 } // namespace Kokkos
265 
266 //----------------------------------------------------------------------------
267 //----------------------------------------------------------------------------
268 
269 namespace Kokkos {
270 
271 namespace Impl {
272 
273 template <class ExecSpace, class... Properties>
274 class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> {
275  private:
276  using traits = Impl::PolicyTraits<Properties...>;
277 
278  public:
279  using index_type = typename traits::index_type;
280 
281  //----------------------------------------
292  template <class FunctorType>
293  static int team_size_max(const FunctorType&);
294 
305  template <class FunctorType>
306  static int team_size_recommended(const FunctorType&);
307 
308  template <class FunctorType>
309  static int team_size_recommended(const FunctorType&, const int&);
310 
311  template <class FunctorType>
312  int team_size_recommended(const FunctorType& functor,
313  const int vector_length);
314 
315  //----------------------------------------
317  TeamPolicyInternal(const typename traits::execution_space&,
318  int league_size_request, int team_size_request,
319  int vector_length_request = 1);
320 
321  TeamPolicyInternal(const typename traits::execution_space&,
322  int league_size_request, const Kokkos::AUTO_t&,
323  int vector_length_request = 1);
324 
327  TeamPolicyInternal(int league_size_request, int team_size_request,
328  int vector_length_request = 1);
329 
330  TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t&,
331  int vector_length_request = 1);
332 
333  /* TeamPolicyInternal( int league_size_request , int team_size_request );
334 
335  TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
336 
342  KOKKOS_INLINE_FUNCTION int league_size() const;
343 
349  KOKKOS_INLINE_FUNCTION int team_size() const;
350 
353  inline bool impl_auto_team_size() const;
356  inline bool impl_auto_vector_length() const;
357 
358  static int vector_length_max();
359 
360  KOKKOS_INLINE_FUNCTION int impl_vector_length() const;
361 
362  inline typename traits::index_type chunk_size() const;
363 
364  inline TeamPolicyInternal& set_chunk_size(int chunk_size);
365 
369  struct member_type {
371  KOKKOS_INLINE_FUNCTION
372  typename traits::execution_space::scratch_memory_space team_shmem() const;
373 
375  KOKKOS_INLINE_FUNCTION int league_rank() const;
376 
378  KOKKOS_INLINE_FUNCTION int league_size() const;
379 
381  KOKKOS_INLINE_FUNCTION int team_rank() const;
382 
384  KOKKOS_INLINE_FUNCTION int team_size() const;
385 
387  KOKKOS_INLINE_FUNCTION void team_barrier() const;
388 
391  template <class JoinOp>
392  KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce(
393  const typename JoinOp::value_type, const JoinOp&) const;
394 
400  template <typename Type>
401  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const;
402 
412  template <typename Type>
413  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
414  Type* const global_accum) const;
415  };
416 };
417 
418 struct PerTeamValue {
419  size_t value;
420  PerTeamValue(size_t arg);
421 };
422 
423 struct PerThreadValue {
424  size_t value;
425  PerThreadValue(size_t arg);
426 };
427 
428 template <class iType, class... Args>
429 struct ExtractVectorLength {
430  static inline iType value(
431  std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
432  return val;
433  }
434  static inline std::enable_if_t<!std::is_integral<iType>::value, int> value(
435  std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
436  return 1;
437  }
438 };
439 
440 template <class iType, class... Args>
441 inline std::enable_if_t<std::is_integral<iType>::value, iType>
442 extract_vector_length(iType val, Args...) {
443  return val;
444 }
445 
446 template <class iType, class... Args>
447 inline std::enable_if_t<!std::is_integral<iType>::value, int>
448 extract_vector_length(iType, Args...) {
449  return 1;
450 }
451 
452 } // namespace Impl
453 
454 Impl::PerTeamValue PerTeam(const size_t& arg);
455 Impl::PerThreadValue PerThread(const size_t& arg);
456 
457 struct ScratchRequest {
458  int level;
459 
460  size_t per_team;
461  size_t per_thread;
462 
463  inline ScratchRequest(const int& level_,
464  const Impl::PerTeamValue& team_value) {
465  level = level_;
466  per_team = team_value.value;
467  per_thread = 0;
468  }
469 
470  inline ScratchRequest(const int& level_,
471  const Impl::PerThreadValue& thread_value) {
472  level = level_;
473  per_team = 0;
474  per_thread = thread_value.value;
475  }
476 
477  inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value,
478  const Impl::PerThreadValue& thread_value) {
479  level = level_;
480  per_team = team_value.value;
481  per_thread = thread_value.value;
482  }
483 
484  inline ScratchRequest(const int& level_,
485  const Impl::PerThreadValue& thread_value,
486  const Impl::PerTeamValue& team_value) {
487  level = level_;
488  per_team = team_value.value;
489  per_thread = thread_value.value;
490  }
491 };
492 
493 // Throws a runtime exception if level is not `0` or `1`
494 void team_policy_check_valid_storage_level_argument(int level);
495 
522 template <class... Properties>
524  : public Impl::TeamPolicyInternal<
525  typename Impl::PolicyTraits<Properties...>::execution_space,
526  Properties...> {
527  using internal_policy = Impl::TeamPolicyInternal<
528  typename Impl::PolicyTraits<Properties...>::execution_space,
529  Properties...>;
530 
531  template <class... OtherProperties>
532  friend class TeamPolicy;
533 
534  public:
535  using traits = Impl::PolicyTraits<Properties...>;
536 
537  using execution_policy = TeamPolicy<Properties...>;
538 
539  TeamPolicy() : internal_policy(0, AUTO) {}
540 
542  TeamPolicy(const typename traits::execution_space& space_,
543  int league_size_request, int team_size_request,
544  int vector_length_request = 1)
545  : internal_policy(space_, league_size_request, team_size_request,
546  vector_length_request) {}
547 
548  TeamPolicy(const typename traits::execution_space& space_,
549  int league_size_request, const Kokkos::AUTO_t&,
550  int vector_length_request = 1)
551  : internal_policy(space_, league_size_request, Kokkos::AUTO(),
552  vector_length_request) {}
553 
554  TeamPolicy(const typename traits::execution_space& space_,
555  int league_size_request, const Kokkos::AUTO_t&,
556  const Kokkos::AUTO_t&)
557  : internal_policy(space_, league_size_request, Kokkos::AUTO(),
558  Kokkos::AUTO()) {}
559  TeamPolicy(const typename traits::execution_space& space_,
560  int league_size_request, const int team_size_request,
561  const Kokkos::AUTO_t&)
562  : internal_policy(space_, league_size_request, team_size_request,
563  Kokkos::AUTO()) {}
566  TeamPolicy(int league_size_request, int team_size_request,
567  int vector_length_request = 1)
568  : internal_policy(league_size_request, team_size_request,
569  vector_length_request) {}
570 
571  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
572  int vector_length_request = 1)
573  : internal_policy(league_size_request, Kokkos::AUTO(),
574  vector_length_request) {}
575 
576  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
577  const Kokkos::AUTO_t&)
578  : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
579  TeamPolicy(int league_size_request, const int team_size_request,
580  const Kokkos::AUTO_t&)
581  : internal_policy(league_size_request, team_size_request,
582  Kokkos::AUTO()) {}
583 
584  template <class... OtherProperties>
585  TeamPolicy(const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
586  // Cannot call converting constructor in the member initializer list because
587  // it is not a direct base.
588  internal_policy::traits::operator=(p);
589  }
590 
591  private:
592  TeamPolicy(const internal_policy& p) : internal_policy(p) {}
593 
594  public:
595  inline TeamPolicy& set_chunk_size(int chunk) {
596  static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
597  internal_policy&>::value,
598  "internal set_chunk_size should return a reference");
599  return static_cast<TeamPolicy&>(internal_policy::set_chunk_size(chunk));
600  }
601 
602  inline TeamPolicy& set_scratch_size(const int& level,
603  const Impl::PerTeamValue& per_team) {
604  static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
605  level, per_team)),
606  internal_policy&>::value,
607  "internal set_chunk_size should return a reference");
608 
609  team_policy_check_valid_storage_level_argument(level);
610  return static_cast<TeamPolicy&>(
611  internal_policy::set_scratch_size(level, per_team));
612  }
613  inline TeamPolicy& set_scratch_size(const int& level,
614  const Impl::PerThreadValue& per_thread) {
615  team_policy_check_valid_storage_level_argument(level);
616  return static_cast<TeamPolicy&>(
617  internal_policy::set_scratch_size(level, per_thread));
618  }
619  inline TeamPolicy& set_scratch_size(const int& level,
620  const Impl::PerTeamValue& per_team,
621  const Impl::PerThreadValue& per_thread) {
622  team_policy_check_valid_storage_level_argument(level);
623  return static_cast<TeamPolicy&>(
624  internal_policy::set_scratch_size(level, per_team, per_thread));
625  }
626  inline TeamPolicy& set_scratch_size(const int& level,
627  const Impl::PerThreadValue& per_thread,
628  const Impl::PerTeamValue& per_team) {
629  team_policy_check_valid_storage_level_argument(level);
630  return static_cast<TeamPolicy&>(
631  internal_policy::set_scratch_size(level, per_team, per_thread));
632  }
633 };
634 
635 namespace Impl {
636 
637 template <typename iType, class TeamMemberType>
638 struct TeamThreadRangeBoundariesStruct {
639  private:
640  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
641  const iType& arg_end,
642  const iType& arg_rank,
643  const iType& arg_size) {
644  return arg_begin +
645  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
646  }
647 
648  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
649  const iType& arg_end,
650  const iType& arg_rank,
651  const iType& arg_size) {
652  const iType end_ =
653  arg_begin +
654  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
655  return end_ < arg_end ? end_ : arg_end;
656  }
657 
658  public:
659  using index_type = iType;
660  const iType start;
661  const iType end;
662  enum { increment = 1 };
663  const TeamMemberType& thread;
664 
665  KOKKOS_INLINE_FUNCTION
666  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
667  const iType& arg_end)
668  : start(
669  ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
670  end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
671  thread(arg_thread) {}
672 
673  KOKKOS_INLINE_FUNCTION
674  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
675  const iType& arg_begin, const iType& arg_end)
676  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
677  arg_thread.team_size())),
678  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
679  arg_thread.team_size())),
680  thread(arg_thread) {}
681 };
682 
683 template <typename iType, class TeamMemberType>
684 struct TeamVectorRangeBoundariesStruct {
685  private:
686  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
687  const iType& arg_end,
688  const iType& arg_rank,
689  const iType& arg_size) {
690  return arg_begin +
691  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
692  }
693 
694  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
695  const iType& arg_end,
696  const iType& arg_rank,
697  const iType& arg_size) {
698  const iType end_ =
699  arg_begin +
700  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
701  return end_ < arg_end ? end_ : arg_end;
702  }
703 
704  public:
705  using index_type = iType;
706  const iType start;
707  const iType end;
708  enum { increment = 1 };
709  const TeamMemberType& thread;
710 
711  KOKKOS_INLINE_FUNCTION
712  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
713  const iType& arg_end)
714  : start(
715  ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
716  end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
717  thread(arg_thread) {}
718 
719  KOKKOS_INLINE_FUNCTION
720  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
721  const iType& arg_begin, const iType& arg_end)
722  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
723  arg_thread.team_size())),
724  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
725  arg_thread.team_size())),
726  thread(arg_thread) {}
727 };
728 
729 template <typename iType, class TeamMemberType>
730 struct ThreadVectorRangeBoundariesStruct {
731  using index_type = iType;
732  const index_type start;
733  const index_type end;
734  enum { increment = 1 };
735 
736  KOKKOS_INLINE_FUNCTION
737  constexpr ThreadVectorRangeBoundariesStruct(const TeamMemberType,
738  const index_type& count) noexcept
739  : start(static_cast<index_type>(0)), end(count) {}
740 
741  KOKKOS_INLINE_FUNCTION
742  constexpr ThreadVectorRangeBoundariesStruct(const index_type& count) noexcept
743  : start(static_cast<index_type>(0)), end(count) {}
744 
745  KOKKOS_INLINE_FUNCTION
746  constexpr ThreadVectorRangeBoundariesStruct(
747  const TeamMemberType, const index_type& arg_begin,
748  const index_type& arg_end) noexcept
749  : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
750 
751  KOKKOS_INLINE_FUNCTION
752  constexpr ThreadVectorRangeBoundariesStruct(
753  const index_type& arg_begin, const index_type& arg_end) noexcept
754  : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
755 };
756 
757 template <class TeamMemberType>
758 struct ThreadSingleStruct {
759  const TeamMemberType& team_member;
760  KOKKOS_INLINE_FUNCTION
761  ThreadSingleStruct(const TeamMemberType& team_member_)
762  : team_member(team_member_) {}
763 };
764 
765 template <class TeamMemberType>
766 struct VectorSingleStruct {
767  const TeamMemberType& team_member;
768  KOKKOS_INLINE_FUNCTION
769  VectorSingleStruct(const TeamMemberType& team_member_)
770  : team_member(team_member_) {}
771 };
772 
773 } // namespace Impl
774 
782 template <typename iType, class TeamMemberType, class _never_use_this_overload>
783 KOKKOS_INLINE_FUNCTION_DELETED
784  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
785  TeamThreadRange(const TeamMemberType&, const iType& count) = delete;
786 
794 template <typename iType1, typename iType2, class TeamMemberType,
795  class _never_use_this_overload>
796 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
797  std::common_type_t<iType1, iType2>, TeamMemberType>
798 TeamThreadRange(const TeamMemberType&, const iType1& begin,
799  const iType2& end) = delete;
800 
808 template <typename iType, class TeamMemberType, class _never_use_this_overload>
809 KOKKOS_INLINE_FUNCTION_DELETED
810  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
811  TeamVectorRange(const TeamMemberType&, const iType& count) = delete;
812 
820 template <typename iType1, typename iType2, class TeamMemberType,
821  class _never_use_this_overload>
822 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
823  std::common_type_t<iType1, iType2>, TeamMemberType>
824 TeamVectorRange(const TeamMemberType&, const iType1& begin,
825  const iType2& end) = delete;
826 
834 template <typename iType, class TeamMemberType, class _never_use_this_overload>
835 KOKKOS_INLINE_FUNCTION_DELETED
836  Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
837  ThreadVectorRange(const TeamMemberType&, const iType& count) = delete;
838 
839 template <typename iType1, typename iType2, class TeamMemberType,
840  class _never_use_this_overload>
841 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
842  std::common_type_t<iType1, iType2>, TeamMemberType>
843 ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin,
844  const iType2& arg_end) = delete;
845 
846 namespace Impl {
847 
848 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
849 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
850 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
851 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
852 
853 template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
854 struct HostBasedNestLevel;
855 
856 template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
857 struct AcceleratorBasedNestLevel;
858 
859 // ThreadAndVectorNestLevel determines on which nested level parallelization
860 // happens.
861 // - Rank is Kokkos::Rank<TotalNestLevel, Iter>
862 // - TotalNestLevel is the total number of loop nests
863 // - Iter is whether to go forward or backward through ranks (i.e. the
864 // iteration order for MDRangePolicy)
865 // - ThreadAndVector determines whether both vector and thread parallelism is
866 // in use
867 template <typename Rank, typename ExecSpace,
868  TeamMDRangeThreadAndVector ThreadAndVector>
869 struct ThreadAndVectorNestLevel;
870 
871 struct NoReductionTag {};
872 
873 template <typename Rank, typename TeamMDPolicy, typename Lambda,
874  typename ReductionValueType>
875 KOKKOS_INLINE_FUNCTION void md_parallel_impl(TeamMDPolicy const& policy,
876  Lambda const& lambda,
877  ReductionValueType&& val);
878 } // namespace Impl
879 
880 template <typename Rank, typename TeamHandle>
881 struct TeamThreadMDRange;
882 
883 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
884 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
885  using NestLevelType = int;
886  using BoundaryType = int;
887  using TeamHandleType = TeamHandle;
888  using ExecutionSpace = typename TeamHandleType::execution_space;
889  using ArrayLayout = typename ExecutionSpace::array_layout;
890 
891  static constexpr NestLevelType total_nest_level =
892  Rank<N, OuterDir, InnerDir>::rank;
893  static constexpr Iterate iter = OuterDir;
894  static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
895  static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
896 
897  static constexpr Iterate direction =
898  OuterDir == Iterate::Default
899  ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
900  : iter;
901 
902  template <class... Args>
903  KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args)
904  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
905  static_assert(sizeof...(Args) == total_nest_level);
906  }
907 
908  TeamHandleType const& team;
909  BoundaryType boundaries[total_nest_level];
910 };
911 
912 template <typename TeamHandle, typename... Args>
913 TeamThreadMDRange(TeamHandle const&, Args&&...)
914  ->TeamThreadMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
915 
916 template <typename Rank, typename TeamHandle>
917 struct ThreadVectorMDRange;
918 
919 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
920 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
921  using NestLevelType = int;
922  using BoundaryType = int;
923  using TeamHandleType = TeamHandle;
924  using ExecutionSpace = typename TeamHandleType::execution_space;
925  using ArrayLayout = typename ExecutionSpace::array_layout;
926 
927  static constexpr NestLevelType total_nest_level =
928  Rank<N, OuterDir, InnerDir>::rank;
929  static constexpr Iterate iter = OuterDir;
930  static constexpr auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
931  static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
932 
933  static constexpr Iterate direction =
934  OuterDir == Iterate::Default
935  ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
936  : iter;
937 
938  template <class... Args>
939  KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_,
940  Args&&... args)
941  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
942  static_assert(sizeof...(Args) == total_nest_level);
943  }
944 
945  TeamHandleType const& team;
946  BoundaryType boundaries[total_nest_level];
947 };
948 
949 template <typename TeamHandle, typename... Args>
950 ThreadVectorMDRange(TeamHandle const&, Args&&...)
951  ->ThreadVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
952 
953 template <typename Rank, typename TeamHandle>
954 struct TeamVectorMDRange;
955 
956 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
957 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
958  using NestLevelType = int;
959  using BoundaryType = int;
960  using TeamHandleType = TeamHandle;
961  using ExecutionSpace = typename TeamHandleType::execution_space;
962  using ArrayLayout = typename ExecutionSpace::array_layout;
963 
964  static constexpr NestLevelType total_nest_level =
965  Rank<N, OuterDir, InnerDir>::rank;
966  static constexpr Iterate iter = OuterDir;
967  static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
968  static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
969 
970  static constexpr Iterate direction =
971  iter == Iterate::Default
972  ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
973  : iter;
974 
975  template <class... Args>
976  KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_,
977  Args&&... args)
978  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
979  static_assert(sizeof...(Args) == total_nest_level);
980  }
981 
982  TeamHandleType const& team;
983  BoundaryType boundaries[total_nest_level];
984 };
985 
986 template <typename TeamHandle, typename... Args>
987 TeamVectorMDRange(TeamHandle const&, Args&&...)
988  ->TeamVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
989 
990 template <typename Rank, typename TeamHandle, typename Lambda,
991  typename ReducerValueType>
992 KOKKOS_INLINE_FUNCTION void parallel_reduce(
993  TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
994  ReducerValueType& val) {
995  Impl::md_parallel_impl<Rank>(policy, lambda, val);
996 }
997 
998 template <typename Rank, typename TeamHandle, typename Lambda>
999 KOKKOS_INLINE_FUNCTION void parallel_for(
1000  TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1001  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1002 }
1003 
1004 template <typename Rank, typename TeamHandle, typename Lambda,
1005  typename ReducerValueType>
1006 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1007  ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1008  ReducerValueType& val) {
1009  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1010 }
1011 
1012 template <typename Rank, typename TeamHandle, typename Lambda>
1013 KOKKOS_INLINE_FUNCTION void parallel_for(
1014  ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1015  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1016 }
1017 
1018 template <typename Rank, typename TeamHandle, typename Lambda,
1019  typename ReducerValueType>
1020 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1021  TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1022  ReducerValueType& val) {
1023  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1024 }
1025 
1026 template <typename Rank, typename TeamHandle, typename Lambda>
1027 KOKKOS_INLINE_FUNCTION void parallel_for(
1028  TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1029  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1030 }
1031 
1032 namespace Impl {
1033 
1034 template <typename FunctorType, typename TagType,
1035  bool HasTag = !std::is_void<TagType>::value>
1036 struct ParallelConstructName;
1037 
1038 template <typename FunctorType, typename TagType>
1039 struct ParallelConstructName<FunctorType, TagType, true> {
1040  ParallelConstructName(std::string const& label) : label_ref(label) {
1041  if (label.empty()) {
1042  default_name = std::string(typeid(FunctorType).name()) + "/" +
1043  typeid(TagType).name();
1044  }
1045  }
1046  std::string const& get() {
1047  return (label_ref.empty()) ? default_name : label_ref;
1048  }
1049  std::string const& label_ref;
1050  std::string default_name;
1051 };
1052 
1053 template <typename FunctorType, typename TagType>
1054 struct ParallelConstructName<FunctorType, TagType, false> {
1055  ParallelConstructName(std::string const& label) : label_ref(label) {
1056  if (label.empty()) {
1057  default_name = std::string(typeid(FunctorType).name());
1058  }
1059  }
1060  std::string const& get() {
1061  return (label_ref.empty()) ? default_name : label_ref;
1062  }
1063  std::string const& label_ref;
1064  std::string default_name;
1065 };
1066 
1067 } // namespace Impl
1068 
1069 } // namespace Kokkos
1070 
1071 namespace Kokkos {
1072 
1073 namespace Impl {
1074 
1075 template <class PatternTag, class... Args>
1076 struct PatternImplSpecializationFromTag;
1077 
1078 template <class... Args>
1079 struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1080  : type_identity<ParallelFor<Args...>> {};
1081 
1082 template <class... Args>
1083 struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1084  : type_identity<ParallelReduce<Args...>> {};
1085 
1086 template <class... Args>
1087 struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1088  : type_identity<ParallelScan<Args...>> {};
1089 
1090 template <class PatternImpl>
1091 struct PatternTagFromImplSpecialization;
1092 
1093 template <class... Args>
1094 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1095  : type_identity<ParallelForTag> {};
1096 
1097 template <class... Args>
1098 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1099  : type_identity<ParallelReduceTag> {};
1100 
1101 template <class... Args>
1102 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1103  : type_identity<ParallelScanTag> {};
1104 
1105 } // end namespace Impl
1106 
1107 } // namespace Kokkos
1108 #endif /* #define KOKKOS_EXECPOLICY_HPP */
RangePolicy(const member_type work_begin, const member_type work_end, Args... args)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition&#39;s rank and size.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
Subrange for a partition&#39;s rank and size.
Execution policy for parallel work over a league of teams of threads.
Definition: dummy.cpp:17
Parallel execution of a functor calls the functor once with each member of the execution policy...
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.