20 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE 21 #include <Kokkos_Macros.hpp> 23 "Including non-public Kokkos header files is not allowed.");
25 #ifndef KOKKOS_PARALLEL_HPP 26 #define KOKKOS_PARALLEL_HPP 28 #include <Kokkos_Core_fwd.hpp> 29 #include <Kokkos_DetectionIdiom.hpp> 30 #include <Kokkos_ExecPolicy.hpp> 31 #include <Kokkos_View.hpp> 33 #include <impl/Kokkos_Tools.hpp> 34 #include <impl/Kokkos_Tools_Generic.hpp> 36 #include <impl/Kokkos_Traits.hpp> 37 #include <impl/Kokkos_FunctorAnalysis.hpp> 40 #include <type_traits> 50 using execution_space_t =
typename T::execution_space;
53 using device_type_t =
typename T::device_type;
64 template <
class Functor,
class Policy>
65 struct FunctorPolicyExecutionSpace {
66 using policy_execution_space = detected_t<execution_space_t, Policy>;
67 using functor_execution_space = detected_t<execution_space_t, Functor>;
68 using functor_device_type = detected_t<device_type_t, Functor>;
69 using functor_device_type_execution_space =
70 detected_t<execution_space_t, functor_device_type>;
73 !is_detected<execution_space_t, Policy>::value ||
74 !is_detected<execution_space_t, Functor>::value ||
75 std::is_same<policy_execution_space, functor_execution_space>::value,
76 "A policy with an execution space and a functor with an execution space " 77 "are given but the execution space types do not match!");
78 static_assert(!is_detected<execution_space_t, Policy>::value ||
79 !is_detected<device_type_t, Functor>::value ||
80 std::is_same<policy_execution_space,
81 functor_device_type_execution_space>::value,
82 "A policy with an execution space and a functor with a device " 83 "type are given but the execution space types do not match!");
84 static_assert(!is_detected<device_type_t, Functor>::value ||
85 !is_detected<execution_space_t, Functor>::value ||
86 std::is_same<functor_device_type_execution_space,
87 functor_execution_space>::value,
88 "A functor with both an execution space and device type is " 89 "given but their execution space types do not match!");
91 using execution_space = detected_or_t<
94 is_detected<device_type_t, Functor>::value,
95 detected_t<execution_space_t, detected_t<device_type_t, Functor>>,
96 Kokkos::DefaultExecutionSpace>,
97 execution_space_t, Functor>,
98 execution_space_t, Policy>;
131 class ExecPolicy,
class FunctorType,
132 class Enable = std::enable_if_t<is_execution_policy<ExecPolicy>::value>>
133 inline void parallel_for(
const std::string& str,
const ExecPolicy& policy,
134 const FunctorType& functor) {
137 ExecPolicy inner_policy = policy;
138 Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID);
140 Kokkos::Impl::shared_allocation_tracking_disable();
142 Kokkos::Impl::shared_allocation_tracking_enable();
146 Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID);
149 template <
class ExecPolicy,
class FunctorType>
150 inline void parallel_for(
151 const ExecPolicy& policy,
const FunctorType& functor,
152 std::enable_if_t<is_execution_policy<ExecPolicy>::value>* =
nullptr) {
153 Kokkos::parallel_for(
"", policy, functor);
156 template <
class FunctorType>
157 inline void parallel_for(
const std::string& str,
const size_t work_count,
158 const FunctorType& functor) {
159 using execution_space =
160 typename Impl::FunctorPolicyExecutionSpace<FunctorType,
161 void>::execution_space;
162 using policy = RangePolicy<execution_space>;
164 policy execution_policy = policy(0, work_count);
165 ::Kokkos::parallel_for(str, execution_policy, functor);
168 template <
class FunctorType>
169 inline void parallel_for(
const size_t work_count,
const FunctorType& functor) {
170 ::Kokkos::parallel_for(
"", work_count, functor);
175 #include <Kokkos_Parallel_Reduce.hpp> 346 template <
class ExecutionPolicy,
class FunctorType,
348 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
349 inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
350 const FunctorType& functor) {
352 ExecutionPolicy inner_policy = policy;
353 Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
355 Kokkos::Impl::shared_allocation_tracking_disable();
356 Impl::ParallelScan<FunctorType, ExecutionPolicy> closure(functor,
358 Kokkos::Impl::shared_allocation_tracking_enable();
362 Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
365 template <
class ExecutionPolicy,
class FunctorType>
366 inline void parallel_scan(
367 const ExecutionPolicy& policy,
const FunctorType& functor,
368 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* =
nullptr) {
369 ::Kokkos::parallel_scan(
"", policy, functor);
372 template <
class FunctorType>
373 inline void parallel_scan(
const std::string& str,
const size_t work_count,
374 const FunctorType& functor) {
375 using execution_space =
377 void>::execution_space;
381 policy execution_policy(0, work_count);
382 parallel_scan(str, execution_policy, functor);
385 template <
class FunctorType>
386 inline void parallel_scan(
const size_t work_count,
const FunctorType& functor) {
387 ::Kokkos::parallel_scan(
"", work_count, functor);
390 template <
class ExecutionPolicy,
class FunctorType,
class ReturnType,
392 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
393 inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
394 const FunctorType& functor,
397 ExecutionPolicy inner_policy = policy;
398 Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
400 if constexpr (Kokkos::is_view<ReturnType>::value) {
401 Kokkos::Impl::shared_allocation_tracking_disable();
402 Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy, ReturnType>
403 closure(functor, inner_policy, return_value);
404 Kokkos::Impl::shared_allocation_tracking_enable();
407 Kokkos::Impl::shared_allocation_tracking_disable();
409 Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy, ReturnType>
410 closure(functor, inner_policy, view);
411 Kokkos::Impl::shared_allocation_tracking_enable();
415 Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
417 if (!Kokkos::is_view<ReturnType>::value)
418 policy.space().fence(
419 "Kokkos::parallel_scan: fence due to result being a value, not a view");
422 template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
423 inline void parallel_scan(
424 const ExecutionPolicy& policy,
const FunctorType& functor,
426 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* =
nullptr) {
427 ::Kokkos::parallel_scan(
"", policy, functor, return_value);
430 template <
class FunctorType,
class ReturnType>
431 inline void parallel_scan(
const std::string& str,
const size_t work_count,
432 const FunctorType& functor,
434 using execution_space =
436 void>::execution_space;
440 policy execution_policy(0, work_count);
441 parallel_scan(str, execution_policy, functor, return_value);
444 template <
class FunctorType,
class ReturnType>
445 inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
447 ::Kokkos::parallel_scan(
"", work_count, functor, return_value);
458 template <
class FunctorType,
459 bool HasTeamShmemSize =
460 has_member_team_shmem_size<FunctorType>::value,
461 bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
462 struct FunctorTeamShmemSize {
463 KOKKOS_INLINE_FUNCTION
static size_t value(
const FunctorType&,
int) {
468 template <
class FunctorType>
469 struct FunctorTeamShmemSize<FunctorType, true, false> {
470 static inline size_t value(
const FunctorType& f,
int team_size) {
471 return f.team_shmem_size(team_size);
475 template <
class FunctorType>
476 struct FunctorTeamShmemSize<FunctorType, false, true> {
477 static inline size_t value(
const FunctorType& f,
int team_size) {
478 return f.shmem_size(team_size);
481 template <
class FunctorType>
482 struct FunctorTeamShmemSize<FunctorType, true, true> {
483 static inline size_t value(
const FunctorType& ,
int ) {
485 "Functor with both team_shmem_size and shmem_size defined is " View to an array of data.
Implementation of the ParallelFor operator that has a partial specialization for the device...
Given a Functor and Execution Policy query an execution space.
Execution policy for work over a range of an integral type.