Kokkos Core Kernels Package  Version of the Day
Kokkos_Tuners.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_KOKKOS_TUNERS_HPP
23 #define KOKKOS_KOKKOS_TUNERS_HPP
24 
25 #include <Kokkos_Macros.hpp>
26 #include <Kokkos_Core_fwd.hpp>
27 #include <Kokkos_ExecPolicy.hpp>
28 #include <KokkosExp_MDRangePolicy.hpp>
29 #include <impl/Kokkos_Profiling_Interface.hpp>
30 
31 #include <array>
32 #include <utility>
33 #include <tuple>
34 #include <string>
35 #include <vector>
36 #include <map>
37 #include <cassert>
38 
39 namespace Kokkos {
40 namespace Tools {
41 
42 namespace Experimental {
43 
44 // forward declarations
45 SetOrRange make_candidate_set(size_t size, int64_t* data);
46 bool have_tuning_tool();
47 size_t declare_output_type(const std::string&,
48  Kokkos::Tools::Experimental::VariableInfo);
49 void request_output_values(size_t, size_t,
50  Kokkos::Tools::Experimental::VariableValue*);
51 VariableValue make_variable_value(size_t, int64_t);
52 VariableValue make_variable_value(size_t, double);
53 SetOrRange make_candidate_range(double lower, double upper, double step,
54  bool openLower, bool openUpper);
55 size_t get_new_context_id();
56 void begin_context(size_t context_id);
57 void end_context(size_t context_id);
58 namespace Impl {
59 
65 template <typename ValueType, typename ContainedType>
67 
68 template <typename ValueType, typename ContainedType>
69 struct ValueHierarchyNode {
70  std::vector<ValueType> root_values;
71  std::vector<ContainedType> sub_values;
72  void add_root_value(const ValueType& in) noexcept {
73  root_values.push_back(in);
74  }
75  void add_sub_container(const ContainedType& in) { sub_values.push_back(in); }
76  const ValueType& get_root_value(const size_t index) const {
77  return root_values[index];
78  }
79  const ContainedType& get_sub_value(const size_t index) const {
80  return sub_values[index];
81  }
82 };
83 
84 template <typename ValueType>
85 struct ValueHierarchyNode<ValueType, void> {
86  std::vector<ValueType> root_values;
87  explicit ValueHierarchyNode(std::vector<ValueType> rv)
88  : root_values(std::move(rv)) {}
89  void add_root_value(const ValueType& in) noexcept {
90  root_values.push_back(in);
91  }
92  const ValueType& get_root_value(const size_t index) const {
93  return root_values[index];
94  }
95 };
96 
102 template <class NestedMap>
104 
105 // Vectors are our lowest-level, no nested values
106 template <class T>
107 struct MapTypeConverter<std::vector<T>> {
108  using type = ValueHierarchyNode<T, void>;
109 };
110 
111 // Maps contain both the "root" types and sub-vectors
112 template <class K, class V>
113 struct MapTypeConverter<std::map<K, V>> {
115 };
116 
122 template <class NestedMap>
124 
125 // Vectors are our lowest-level, no nested values. Just fill in the fundamental
126 // values
127 template <class T>
128 struct ValueHierarchyConstructor<std::vector<T>> {
129  using return_type = typename MapTypeConverter<std::vector<T>>::type;
130  static return_type build(const std::vector<T>& in) { return return_type{in}; }
131 };
132 
133 // For maps, we need to fill in the fundamental values, and construct child
134 // nodes
135 template <class K, class V>
136 struct ValueHierarchyConstructor<std::map<K, V>> {
137  using return_type = typename MapTypeConverter<std::map<K, V>>::type;
138  static return_type build(const std::map<K, V>& in) {
139  return_type node_to_build;
140  for (auto& entry : in) {
141  node_to_build.add_root_value(entry.first);
142  node_to_build.add_sub_container(
143  ValueHierarchyConstructor<V>::build(entry.second));
144  }
145  return node_to_build;
146  }
147 };
148 
157 template <class InspectForDepth>
159 
160 // The dimensionality of a vector is 1
161 template <class T>
162 struct get_space_dimensionality<std::vector<T>> {
163  static constexpr int value = 1;
164 };
165 
166 // The dimensionality of a map is 1 (the map) plus the dimensionality
167 // of the map's value type
168 template <class K, class V>
169 struct get_space_dimensionality<std::map<K, V>> {
170  static constexpr int value = 1 + get_space_dimensionality<V>::value;
171 };
172 
173 template <class T, int N>
174 struct n_dimensional_sparse_structure;
175 
176 template <class T>
177 struct n_dimensional_sparse_structure<T, 1> {
178  using type = std::vector<T>;
179 };
180 
181 template <class T, int N>
182 struct n_dimensional_sparse_structure {
183  using type =
184  std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>;
185 };
186 
193 // First, a helper to get the value in one dimension
194 template <class Container>
196 
197 // At any given level, just return your value at that level
198 template <class RootType, class Subtype>
199 struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
200  static RootType get(const ValueHierarchyNode<RootType, Subtype>& dimension,
201  double fraction_to_traverse) {
202  size_t index = dimension.root_values.size() * fraction_to_traverse;
203  return dimension.get_root_value(index);
204  }
205 };
206 
212 // At the bottom level, we have one double and a base-level ValueHierarchyNode
213 
214 template <class HierarchyNode, class... InterpolationIndices>
216 
217 template <class ValueType>
218 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
219  using node_type = ValueHierarchyNode<ValueType, void>;
220  using return_type = std::tuple<ValueType>;
221  static return_type build(const node_type& in, double index) {
222  return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
223  }
224 };
225 
226 // At levels above the bottom, we tuple_cat the result of our child on the end
227 // of our own tuple
228 template <class ValueType, class Subtype, class... Indices>
229 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
230  Indices...> {
231  using node_type = ValueHierarchyNode<ValueType, Subtype>;
232  using sub_tuple =
233  typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
234  using return_type = decltype(std::tuple_cat(
235  std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
236  static return_type build(const node_type& in, double fraction_to_traverse,
237  Indices... indices) {
238  size_t index = in.sub_values.size() * fraction_to_traverse;
239  auto dimension_value = std::make_tuple(
240  DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
241  return std::tuple_cat(dimension_value,
242  GetMultidimensionalPoint<Subtype, Indices...>::build(
243  in.get_sub_value(index), indices...));
244  }
245 };
246 
247 template <typename PointType, class ArrayType, size_t... Is>
248 auto get_point_helper(const PointType& in, const ArrayType& indices,
249  std::index_sequence<Is...>) {
250  using helper = GetMultidimensionalPoint<
251  PointType,
252  decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
253  return helper::build(in, std::get<Is>(indices).value.double_value...);
254 }
255 
256 template <typename PointType, typename ArrayType>
257 struct GetPoint;
258 
259 template <typename PointType, size_t X>
260 struct GetPoint<PointType,
261  std::array<Kokkos::Tools::Experimental::VariableValue, X>> {
262  using index_set_type =
263  std::array<Kokkos::Tools::Experimental::VariableValue, X>;
264  static auto build(const PointType& in, const index_set_type& indices) {
265  return get_point_helper(in, indices, std::make_index_sequence<X>{});
266  }
267 };
268 
269 template <typename PointType, typename ArrayType>
270 auto get_point(const PointType& point, const ArrayType& indices) {
271  return GetPoint<PointType, ArrayType>::build(point, indices);
272 }
273 
274 } // namespace Impl
275 
276 template <template <class...> class Container, size_t MaxDimensionSize = 100,
277  class... TemplateArguments>
278 class MultidimensionalSparseTuningProblem {
279  public:
280  using ProblemSpaceInput = Container<TemplateArguments...>;
281  static constexpr int space_dimensionality =
282  Impl::get_space_dimensionality<ProblemSpaceInput>::value;
283  static constexpr size_t max_space_dimension_size = MaxDimensionSize;
284  static constexpr double tuning_min = 0.0;
285  static constexpr double tuning_max = 0.999;
286 
287  // Not declared as static constexpr to work around the following compiler bug
288  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96862
289  // where a floating-point expression cannot be constexpr under -frounding-math
290  double tuning_step = tuning_max / max_space_dimension_size;
291 
292  using StoredProblemSpace =
293  typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
294  using HierarchyConstructor =
295  typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
296 
297  using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
298  space_dimensionality>;
299  template <class Key, class Value>
300  using extended_map = std::map<Key, Value>;
301  template <typename Key>
302  using extended_problem =
303  MultidimensionalSparseTuningProblem<extended_map, MaxDimensionSize, Key,
304  ProblemSpaceInput>;
305  template <typename Key, typename Value>
306  using ExtendedProblemSpace =
307  typename Impl::MapTypeConverter<extended_map<Key, Value>>::type;
308 
309  template <typename Key>
310  auto extend(const std::string& axis_name,
311  const std::vector<Key>& new_tuning_axis) const
312  -> extended_problem<Key> {
313  ExtendedProblemSpace<Key, ProblemSpaceInput> extended_space;
314  for (auto& key : new_tuning_axis) {
315  extended_space.add_root_value(key);
316  extended_space.add_sub_container(m_space);
317  }
318  std::vector<std::string> extended_names;
319  extended_names.reserve(m_variable_names.size() + 1);
320  extended_names.push_back(axis_name);
321  extended_names.insert(extended_names.end(), m_variable_names.begin(),
322  m_variable_names.end());
323  return extended_problem<Key>(extended_space, extended_names);
324  }
325 
326  private:
327  StoredProblemSpace m_space;
328  std::array<size_t, space_dimensionality> variable_ids;
329  std::vector<std::string> m_variable_names;
330  size_t context;
331 
332  public:
333  MultidimensionalSparseTuningProblem() = default;
334 
335  MultidimensionalSparseTuningProblem(StoredProblemSpace space,
336  const std::vector<std::string>& names)
337  : m_space(std::move(space)), m_variable_names(names) {
338  assert(names.size() == space_dimensionality);
339  for (unsigned long x = 0; x < names.size(); ++x) {
340  VariableInfo info;
341  info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
342  info.category = Kokkos::Tools::Experimental::StatisticalCategory::
343  kokkos_value_interval;
344  info.valueQuantity =
345  Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
346  info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
347  tuning_min, tuning_max, tuning_step, true, true);
348  variable_ids[x] = declare_output_type(names[x], info);
349  }
350  }
351 
352  MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
353  const std::vector<std::string>& names)
354  : MultidimensionalSparseTuningProblem(HierarchyConstructor::build(space),
355  names) {}
356 
357  template <typename... Coordinates>
358  auto get_point(Coordinates... coordinates) {
359  using ArrayType = std::array<Kokkos::Tools::Experimental::VariableValue,
360  sizeof...(coordinates)>;
361  return Impl::get_point(
362  m_space, ArrayType({Kokkos::Tools::Experimental::make_variable_value(
363  0, static_cast<double>(coordinates))...}));
364  }
365 
366  auto begin() {
367  context = Kokkos::Tools::Experimental::get_new_context_id();
368  ValueArray values;
369  for (int x = 0; x < space_dimensionality; ++x) {
370  values[x] = Kokkos::Tools::Experimental::make_variable_value(
371  variable_ids[x], 0.0);
372  }
373  begin_context(context);
374  request_output_values(context, space_dimensionality, values.data());
375  return Impl::get_point(m_space, values);
376  }
377 
378  auto end() { end_context(context); }
379 };
380 
381 template <typename Tuner>
382 struct ExtendableTunerMixin {
383  template <typename Key>
384  auto combine(const std::string& axis_name,
385  const std::vector<Key>& new_axis) const {
386  const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
387  return sub_tuner.extend(axis_name, new_axis);
388  }
389 
390  template <typename... Coordinates>
391  auto get_point(Coordinates... coordinates) {
392  const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
393  return sub_tuner.get_point(coordinates...);
394  }
395 };
396 
397 template <size_t MaxDimensionSize = 100, template <class...> class Container,
398  class... TemplateArguments>
399 auto make_multidimensional_sparse_tuning_problem(
400  const Container<TemplateArguments...>& in, std::vector<std::string> names) {
401  return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
402  TemplateArguments...>(in, names);
403 }
404 
405 class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
406  private:
407  using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
408  using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
409  std::declval<SpaceDescription>(),
410  std::declval<std::vector<std::string>>()));
411  TunerType tuner;
412 
413  public:
414  TeamSizeTuner() = default;
415  TeamSizeTuner& operator=(const TeamSizeTuner& other) = default;
416  TeamSizeTuner(const TeamSizeTuner& other) = default;
417  TeamSizeTuner& operator=(TeamSizeTuner&& other) = default;
418  TeamSizeTuner(TeamSizeTuner&& other) = default;
419  template <typename ViableConfigurationCalculator, typename Functor,
420  typename TagType, typename... Properties>
421  TeamSizeTuner(const std::string& name,
423  const Functor& functor, const TagType& tag,
424  ViableConfigurationCalculator calc) {
425  using PolicyType = Kokkos::TeamPolicy<Properties...>;
426  auto initial_vector_length = policy.impl_vector_length();
427  if (initial_vector_length < 1) {
428  policy.impl_set_vector_length(1);
429  }
455  SpaceDescription space_description;
456 
457  auto max_vector_length = PolicyType::vector_length_max();
458  std::vector<int64_t> allowed_vector_lengths;
459 
460  if (policy.impl_auto_vector_length()) { // case 1 or 2
461  for (int vector_length = max_vector_length; vector_length >= 1;
462  vector_length /= 2) {
463  policy.impl_set_vector_length(vector_length);
476  auto max_team_size = calc.get_max_team_size(policy, functor, tag);
477  if ((policy.impl_auto_team_size()) ||
478  (policy.team_size() <= max_team_size)) {
479  allowed_vector_lengths.push_back(vector_length);
480  }
481  }
482  } else { // case 3, there's only one vector length to care about
483  allowed_vector_lengths.push_back(policy.impl_vector_length());
484  }
485 
486  for (const auto vector_length : allowed_vector_lengths) {
487  std::vector<int64_t> allowed_team_sizes;
488  policy.impl_set_vector_length(vector_length);
489  auto max_team_size = calc.get_max_team_size(policy, functor, tag);
490  if (policy.impl_auto_team_size()) { // case 1 or 3, try all legal team
491  // sizes
492  for (int team_size = max_team_size; team_size >= 1; team_size /= 2) {
493  allowed_team_sizes.push_back(team_size);
494  }
495  } else { // case 2, just try the provided team size
496  allowed_team_sizes.push_back(policy.team_size());
497  }
498  space_description[vector_length] = allowed_team_sizes;
499  }
500  tuner = make_multidimensional_sparse_tuning_problem<20>(
501  space_description, {std::string(name + "_vector_length"),
502  std::string(name + "_team_size")});
503  policy.impl_set_vector_length(initial_vector_length);
504  }
505 
506  template <typename... Properties>
507  void tune(Kokkos::TeamPolicy<Properties...>& policy) {
508  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
509  auto configuration = tuner.begin();
510  auto team_size = std::get<1>(configuration);
511  auto vector_length = std::get<0>(configuration);
512  if (vector_length > 0) {
513  policy.impl_set_team_size(team_size);
514  policy.impl_set_vector_length(vector_length);
515  }
516  }
517  }
518  void end() {
519  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
520  tuner.end();
521  }
522  }
523 
524  TunerType get_tuner() const { return tuner; }
525 };
526 
527 namespace Impl {
528 
529 template <typename T>
530 void fill_tile(std::vector<T>& cont, int tile_size) {
531  for (int x = 1; x < tile_size; x *= 2) {
532  cont.push_back(x);
533  }
534 }
535 template <typename T, typename Mapped>
536 void fill_tile(std::map<T, Mapped>& cont, int tile_size) {
537  for (int x = 1; x < tile_size; x *= 2) {
538  fill_tile(cont[x], tile_size / x);
539  }
540 }
541 } // namespace Impl
542 
543 template <int MDRangeRank>
544 struct MDRangeTuner : public ExtendableTunerMixin<MDRangeTuner<MDRangeRank>> {
545  private:
546  static constexpr int rank = MDRangeRank;
547  static constexpr int max_slices = 15;
548  using SpaceDescription =
549  typename Impl::n_dimensional_sparse_structure<int, rank>::type;
550  using TunerType =
551  decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
552  std::declval<SpaceDescription>(),
553  std::declval<std::vector<std::string>>()));
554  TunerType tuner;
555 
556  public:
557  MDRangeTuner() = default;
558  template <typename Functor, typename TagType, typename Calculator,
559  typename... Properties>
560  MDRangeTuner(const std::string& name,
561  const Kokkos::MDRangePolicy<Properties...>& policy,
562  const Functor& functor, const TagType& tag, Calculator calc) {
563  SpaceDescription desc;
564  int max_tile_size =
565  calc.get_mdrange_max_tile_size_product(policy, functor, tag);
566  Impl::fill_tile(desc, max_tile_size);
567  std::vector<std::string> feature_names;
568  for (int x = 0; x < rank; ++x) {
569  feature_names.push_back(name + "_tile_size_" + std::to_string(x));
570  }
571  tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
572  desc, feature_names);
573  }
574  template <typename Policy, typename Tuple, size_t... Indices>
575  void set_policy_tile(Policy& policy, const Tuple& tuple,
576  const std::index_sequence<Indices...>&) {
577  policy.impl_change_tile_size({std::get<Indices>(tuple)...});
578  }
579  template <typename... Properties>
580  void tune(Kokkos::MDRangePolicy<Properties...>& policy) {
581  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
582  auto configuration = tuner.begin();
583  set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
584  }
585  }
586  void end() {
587  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
588  tuner.end();
589  }
590  }
591 
592  TunerType get_tuner() const { return tuner; }
593 };
594 
595 template <class Choice>
596 struct CategoricalTuner {
597  using choice_list = std::vector<Choice>;
598  choice_list choices;
599  size_t context;
600  size_t tuning_variable_id;
601  CategoricalTuner(std::string name, choice_list m_choices)
602  : choices(m_choices) {
603  std::vector<int64_t> indices;
604  for (typename decltype(choices)::size_type x = 0; x < choices.size(); ++x) {
605  indices.push_back(x);
606  }
607  VariableInfo info;
608  info.category = StatisticalCategory::kokkos_value_categorical;
609  info.valueQuantity = CandidateValueType::kokkos_value_set;
610  info.type = ValueType::kokkos_value_int64;
611  info.candidates = make_candidate_set(indices.size(), indices.data());
612  tuning_variable_id = declare_output_type(name, info);
613  }
614  const Choice& begin() {
615  context = get_new_context_id();
616  begin_context(context);
617  VariableValue value = make_variable_value(tuning_variable_id, int64_t(0));
618  request_output_values(context, 1, &value);
619  return choices[value.value.int_value];
620  }
621  void end() { end_context(context); }
622 };
623 
624 template <typename Choice>
625 auto make_categorical_tuner(std::string name, std::vector<Choice> choices)
626  -> CategoricalTuner<Choice> {
627  return CategoricalTuner<Choice>(name, choices);
628 }
629 
630 } // namespace Experimental
631 } // namespace Tools
632 } // namespace Kokkos
633 
634 #endif
Execution policy for parallel work over a league of teams of threads.
Definition: dummy.cpp:17