MueLu  Version of the Day
MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
48 
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
51 
52 #include <Xpetra_Vector.hpp>
53 
55 
56 #include "MueLu_Aggregates_kokkos.hpp"
57 #include "MueLu_Exceptions.hpp"
58 #include "MueLu_LWGraph_kokkos.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 #include "Kokkos_Sort.hpp"
62 
63 namespace MueLu {
64 
65  template <class LocalOrdinal, class GlobalOrdinal, class Node>
67  BuildAggregates(const ParameterList& params,
68  const LWGraph_kokkos& graph,
69  Aggregates_kokkos& aggregates,
70  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71  LO& numNonAggregatedNodes) const {
72 
73  if(params.get<bool>("aggregation: deterministic")) {
74  Monitor m(*this, "BuildAggregatesDeterministic");
75  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
76  } else {
77  Monitor m(*this, "BuildAggregatesRandom");
78  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79  }
80 
81  } // BuildAggregates
82 
83  template <class LO, class GO, class Node>
85  BuildAggregatesRandom(const ParameterList& params,
86  const LWGraph_kokkos& graph,
87  Aggregates_kokkos& aggregates,
88  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
89  LO& numNonAggregatedNodes) const
90  {
91  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
92  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
93  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
94 
95  const LO numRows = graph.GetNodeNumVertices();
96  const int myRank = graph.GetComm()->getRank();
97 
98  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
99  auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
100  auto colors = aggregates.GetGraphColors();
101  const LO numColors = aggregates.GetGraphNumColors();
102 
103  auto lclLWGraph = graph.getLocalLWGraph();
104 
105  LO numLocalNodes = numRows;
106  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
107 
108  const double aggFactor = 0.5;
109  double factor = static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
110  factor = pow(factor, aggFactor);
111 
112  // LBV on Sept 12, 2019: this looks a little heavy handed,
113  // I'm not sure a view is needed to perform atomic updates.
114  // If we can avoid this and use a simple LO that would be
115  // simpler for later maintenance.
116  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
117  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
118  Kokkos::create_mirror_view(numLocalAggregates);
119  h_numLocalAggregates() = aggregates.GetNumAggregates();
120  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
121 
122  // Now we create new aggregates using root nodes in all colors other than the first color,
123  // as the first color was already exhausted in Phase 1.
124  for(int color = 2; color < numColors + 1; ++color) {
125  LO tmpNumNonAggregatedNodes = 0;
126  Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color",
127  Kokkos::RangePolicy<execution_space>(0, numRows),
128  KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) {
129  if(aggStat(rootCandidate) == READY &&
130  colors(rootCandidate) == color) {
131 
132  LO numNeighbors = 0;
133  LO aggSize = 0;
134  if (matchMLbehavior) {
135  aggSize += 1;
136  numNeighbors +=1;
137  }
138 
139  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
140 
141  // Loop over neighbors to count how many nodes could join
142  // the new aggregate
143 
144  for(int j = 0; j < neighbors.length; ++j) {
145  LO neigh = neighbors(j);
146  if(neigh != rootCandidate) {
147  if(lclLWGraph.isLocalNeighborVertex(neigh) &&
148  (aggStat(neigh) == READY) &&
149  (aggSize < maxNodesPerAggregate)) {
150  ++aggSize;
151  }
152  ++numNeighbors;
153  }
154  }
155 
156  // If a sufficient number of nodes can join the new aggregate
157  // then we actually create the aggregate.
158  if(aggSize > minNodesPerAggregate &&
159  (aggSize > factor*numNeighbors)) {
160 
161  // aggregates.SetIsRoot(rootCandidate);
162  LO aggIndex = Kokkos::
163  atomic_fetch_add(&numLocalAggregates(), 1);
164 
165  LO numAggregated = 0;
166 
167  if (matchMLbehavior) {
168  // Add the root.
169  aggStat(rootCandidate) = AGGREGATED;
170  vertex2AggId(rootCandidate, 0) = aggIndex;
171  procWinner(rootCandidate, 0) = myRank;
172  ++numAggregated;
173  --lNumNonAggregatedNodes;
174  }
175 
176  for(int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
177  LO neigh = neighbors(neighIdx);
178  if(neigh != rootCandidate) {
179  if(lclLWGraph.isLocalNeighborVertex(neigh) &&
180  (aggStat(neigh) == READY) &&
181  (numAggregated < aggSize)) {
182  aggStat(neigh) = AGGREGATED;
183  vertex2AggId(neigh, 0) = aggIndex;
184  procWinner(neigh, 0) = myRank;
185 
186  ++numAggregated;
187  --lNumNonAggregatedNodes;
188  }
189  }
190  }
191  }
192  }
193  }, tmpNumNonAggregatedNodes);
194  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
195  }
196 
197  // update aggregate object
198  Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
199  aggregates.SetNumAggregates(h_numLocalAggregates());
200  } // BuildAggregatesRandom
201 
202  template <class LO, class GO, class Node>
204  BuildAggregatesDeterministic(const ParameterList& params,
205  const LWGraph_kokkos& graph,
206  Aggregates_kokkos& aggregates,
207  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
208  LO& numNonAggregatedNodes) const
209  {
210  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
211  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
212 
213  const LO numRows = graph.GetNodeNumVertices();
214  const int myRank = graph.GetComm()->getRank();
215 
216  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217  auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218  auto colors = aggregates.GetGraphColors();
219  const LO numColors = aggregates.GetGraphNumColors();
220 
221  auto lclLWGraph = graph.getLocalLWGraph();
222 
223  LO numLocalNodes = procWinner.size();
224  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
225 
226  const double aggFactor = 0.5;
227  double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
228  factor = pow(factor, aggFactor);
229 
230  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
231  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
232  Kokkos::create_mirror_view(numLocalAggregates);
233  h_numLocalAggregates() = aggregates.GetNumAggregates();
234  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
235 
236  // Now we create new aggregates using root nodes in all colors other than the first color,
237  // as the first color was already exhausted in Phase 1.
238  //
239  // In the deterministic version, exactly the same set of aggregates will be created
240  // (as the nondeterministic version)
241  // because no vertex V can be a neighbor of two vertices of the same color, so two root
242  // candidates can't fight over V
243  //
244  // But, the precise values in vertex2AggId need to match exactly, so just sort the new
245  // roots of each color before assigning aggregate IDs
246 
247  //numNonAggregatedNodes is the best available upper bound for the number of aggregates
248  //which may be created in this phase, so use it for the size of newRoots
249  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
250  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
251  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
252  for(int color = 1; color < numColors + 1; ++color) {
253  h_numNewRoots() = 0;
254  Kokkos::deep_copy(numNewRoots, h_numNewRoots);
255  Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color",
256  Kokkos::RangePolicy<execution_space>(0, numRows),
257  KOKKOS_LAMBDA(const LO rootCandidate) {
258  if(aggStat(rootCandidate) == READY &&
259  colors(rootCandidate) == color) {
260  LO aggSize = 0;
261  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
262  // Loop over neighbors to count how many nodes could join
263  // the new aggregate
264  LO numNeighbors = 0;
265  for(int j = 0; j < neighbors.length; ++j) {
266  LO neigh = neighbors(j);
267  if(neigh != rootCandidate)
268  {
269  if(lclLWGraph.isLocalNeighborVertex(neigh) &&
270  aggStat(neigh) == READY &&
271  aggSize < maxNodesPerAggregate)
272  {
273  ++aggSize;
274  }
275  ++numNeighbors;
276  }
277  }
278  // If a sufficient number of nodes can join the new aggregate
279  // then we mark rootCandidate as a future root.
280  if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
281  LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
282  newRoots(newRootIndex) = rootCandidate;
283  }
284  }
285  });
286  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
287 
288  if(h_numNewRoots() > 0) {
289  //sort the new root indices
290  Kokkos::sort(newRoots, 0, h_numNewRoots());
291  //now, loop over all new roots again and actually create the aggregates
292  LO tmpNumNonAggregatedNodes = 0;
293  //First, just find the set of color vertices which will become aggregate roots
294  Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates",
295  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
296  KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) {
297  LO root = newRoots(newRootIndex);
298  LO newAggID = numLocalAggregates() + newRootIndex;
299  auto neighbors = lclLWGraph.getNeighborVertices(root);
300  // Loop over neighbors and add them to new aggregate
301  aggStat(root) = AGGREGATED;
302  vertex2AggId(root, 0) = newAggID;
303  LO aggSize = 1;
304  for(int j = 0; j < neighbors.length; ++j) {
305  LO neigh = neighbors(j);
306  if(neigh != root) {
307  if(lclLWGraph.isLocalNeighborVertex(neigh) &&
308  aggStat(neigh) == READY &&
309  aggSize < maxNodesPerAggregate) {
310  aggStat(neigh) = AGGREGATED;
311  vertex2AggId(neigh, 0) = newAggID;
312  procWinner(neigh, 0) = myRank;
313  aggSize++;
314  }
315  }
316  }
317  lNumNonAggregatedNodes -= aggSize;
318  }, tmpNumNonAggregatedNodes);
319  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
320  h_numLocalAggregates() += h_numNewRoots();
321  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
322  }
323  }
324  aggregates.SetNumAggregates(h_numLocalAggregates());
325  }
326 
327 } // end namespace
328 
329 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
Namespace for MueLu classes and methods.
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.