MueLu  Version of the Day
MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
48 
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
51 
52 #include <Xpetra_Vector.hpp>
53 
55 
56 #include "MueLu_Aggregates_kokkos.hpp"
57 #include "MueLu_Exceptions.hpp"
58 #include "MueLu_LWGraph_kokkos.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 namespace MueLu {
62 
63  // Try to stick unaggregated nodes into a neighboring aggregate if they are
64  // not already too big
65  template <class LocalOrdinal, class GlobalOrdinal, class Node>
67  BuildAggregates(const ParameterList& params,
68  const LWGraph_kokkos& graph,
69  Aggregates_kokkos& aggregates,
70  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71  LO& numNonAggregatedNodes) const {
72 
73  if(params.get<bool>("aggregation: deterministic")) {
74  Monitor m(*this, "BuildAggregatesDeterministic");
75  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
76  } else {
77  Monitor m(*this, "BuildAggregatesRandom");
78  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79  }
80 
81  } // BuildAggregates
82 
83  template <class LO, class GO, class Node>
85  BuildAggregatesRandom(const ParameterList& params,
86  const LWGraph_kokkos& graph,
87  Aggregates_kokkos& aggregates,
88  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
89  LO& numNonAggregatedNodes) const {
90 
91  const LO numRows = graph.GetNodeNumVertices();
92  const int myRank = graph.GetComm()->getRank();
93 
94  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
95  auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
96  auto colors = aggregates.GetGraphColors();
97  const LO numColors = aggregates.GetGraphNumColors();
98  const LO numLocalAggregates = aggregates.GetNumAggregates();
99 
100  auto lclLWGraph = graph.getLocalLWGraph();
101 
102  const LO defaultConnectWeight = 100;
103  const LO penaltyConnectWeight = 10;
104 
105  Kokkos::View<LO*, device_type> aggWeight ("aggWeight", numLocalAggregates);
106  Kokkos::View<LO*, device_type> connectWeight("connectWeight", numRows);
107  Kokkos::View<LO*, device_type> aggPenalties ("aggPenalties", numLocalAggregates);
108 
109  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
110 
111  // taw: by running the aggregation routine more than once there is a chance that also
112  // non-aggregated nodes with a node distance of two are added to existing aggregates.
113  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
114  // should be sufficient.
115  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
116  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
117  // is needed to reach distance 2 neighbors.
118  int maxIters = 2;
119  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
120  if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
121  for (int iter = 0; iter < maxIters; ++iter) {
122  for(LO color = 1; color <= numColors; ++color) {
123  Kokkos::deep_copy(aggWeight, 0);
124 
125  //the reduce counts how many nodes are aggregated by this phase,
126  //which will then be subtracted from numNonAggregatedNodes
127  LO numAggregated = 0;
128  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
129  Kokkos::RangePolicy<execution_space>(0, numRows),
130  KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) {
131  if (aggStat(i) != READY || colors(i) != color)
132  return;
133 
134  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
135  for (int j = 0; j < neighOfINode.length; j++) {
136  LO neigh = neighOfINode(j);
137 
138  // We don't check (neigh != i), as it is covered by checking
139  // (aggStat[neigh] == AGGREGATED)
140  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
141  aggStat(neigh) == AGGREGATED)
142  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
143  connectWeight(neigh));
144  }
145 
146  int bestScore = -100000;
147  int bestAggId = -1;
148  int bestConnect = -1;
149 
150  for (int j = 0; j < neighOfINode.length; j++) {
151  LO neigh = neighOfINode(j);
152 
153  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
154  aggStat(neigh) == AGGREGATED) {
155  auto aggId = vertex2AggId(neigh, 0);
156  int score = aggWeight(aggId) - aggPenalties(aggId);
157 
158  if (score > bestScore) {
159  bestAggId = aggId;
160  bestScore = score;
161  bestConnect = connectWeight(neigh);
162 
163  } else if (aggId == bestAggId &&
164  connectWeight(neigh) > bestConnect) {
165  bestConnect = connectWeight(neigh);
166  }
167  }
168  }
169  if (bestScore >= 0) {
170  aggStat(i) = AGGREGATED;
171  vertex2AggId(i, 0) = bestAggId;
172  procWinner(i, 0) = myRank;
173 
174  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
175  connectWeight(i) = bestConnect - penaltyConnectWeight;
176  tmpNumAggregated++;
177  }
178  }, numAggregated); //parallel_for
179  numNonAggregatedNodes -= numAggregated;
180  }
181  } // loop over maxIters
182 
183  } // BuildAggregatesRandom
184 
185 
186 
187  template <class LO, class GO, class Node>
189  BuildAggregatesDeterministic(const ParameterList& params,
190  const LWGraph_kokkos& graph,
191  Aggregates_kokkos& aggregates,
192  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
193  LO& numNonAggregatedNodes) const {
194 
195  const LO numRows = graph.GetNodeNumVertices();
196  const int myRank = graph.GetComm()->getRank();
197 
198  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
199  auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
200  auto colors = aggregates.GetGraphColors();
201  const LO numColors = aggregates.GetGraphNumColors();
202  LO numLocalAggregates = aggregates.GetNumAggregates();
203 
204  auto lclLWGraph = graph.getLocalLWGraph();
205 
206  const int defaultConnectWeight = 100;
207  const int penaltyConnectWeight = 10;
208 
209  Kokkos::View<int*, device_type> connectWeight ("connectWeight", numRows);
210  Kokkos::View<int*, device_type> aggWeight ("aggWeight", numLocalAggregates);
211  Kokkos::View<int*, device_type> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
212  Kokkos::View<int*, device_type> aggPenalties ("aggPenalties", numLocalAggregates);
213 
214  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
215 
216  // We do this cycle twice.
217  // I don't know why, but ML does it too
218  // taw: by running the aggregation routine more than once there is a chance that also
219  // non-aggregated nodes with a node distance of two are added to existing aggregates.
220  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
221  // should be sufficient.
222  int maxIters = 2;
223  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
224  if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
225  for (int iter = 0; iter < maxIters; ++iter) {
226  for(LO color = 1; color <= numColors; color++) {
227  Kokkos::deep_copy(aggWeight, 0);
228 
229  //the reduce counts how many nodes are aggregated by this phase,
230  //which will then be subtracted from numNonAggregatedNodes
231  LO numAggregated = 0;
232  Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights",
233  Kokkos::RangePolicy<execution_space>(0, numRows),
234  KOKKOS_LAMBDA (const LO i)
235  {
236  if (aggStat(i) != READY || colors(i) != color)
237  return;
238  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
239  for (int j = 0; j < neighOfINode.length; j++) {
240  LO neigh = neighOfINode(j);
241  // We don't check (neigh != i), as it is covered by checking
242  // (aggStat[neigh] == AGGREGATED)
243  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
244  aggStat(neigh) == AGGREGATED)
245  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
246  connectWeight(neigh));
247  }
248  });
249 
250  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
251  Kokkos::RangePolicy<execution_space>(0, numRows),
252  KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated)
253  {
254  if (aggStat(i) != READY || colors(i) != color)
255  return;
256  int bestScore = -100000;
257  int bestAggId = -1;
258  int bestConnect = -1;
259 
260  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
261  for (int j = 0; j < neighOfINode.length; j++) {
262  LO neigh = neighOfINode(j);
263 
264  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
265  aggStat(neigh) == AGGREGATED) {
266  auto aggId = vertex2AggId(neigh, 0);
267  int score = aggWeight(aggId) - aggPenalties(aggId);
268 
269  if (score > bestScore) {
270  bestAggId = aggId;
271  bestScore = score;
272  bestConnect = connectWeight(neigh);
273 
274  } else if (aggId == bestAggId &&
275  connectWeight(neigh) > bestConnect) {
276  bestConnect = connectWeight(neigh);
277  }
278  }
279  }
280  if (bestScore >= 0) {
281  aggStat(i) = AGGREGATED;
282  vertex2AggId(i, 0) = bestAggId;
283  procWinner(i, 0) = myRank;
284 
285  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
286  connectWeight(i) = bestConnect - penaltyConnectWeight;
287  tmpNumAggregated++;
288  }
289  }, numAggregated); //parallel_reduce
290 
291  Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties",
292  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
293  KOKKOS_LAMBDA (const LO agg)
294  {
295  aggPenalties(agg) += aggPenaltyUpdates(agg);
296  aggPenaltyUpdates(agg) = 0;
297  });
298  numNonAggregatedNodes -= numAggregated;
299  }
300  } // loop over k
301  } // BuildAggregatesDeterministic
302 } // end namespace
303 
304 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesRandom(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Namespace for MueLu classes and methods.
void BuildAggregatesDeterministic(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
Timer to be used in non-factories.