MueLu  Version of the Day
MueLu_AggregationPhase1Algorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
48 
49 #include <queue>
50 #include <vector>
51 
52 #include <Teuchos_Comm.hpp>
53 #include <Teuchos_CommHelpers.hpp>
54 
55 #include <Xpetra_Vector.hpp>
56 
58 
59 #include "MueLu_Aggregates_kokkos.hpp"
60 #include "MueLu_Exceptions.hpp"
61 #include "MueLu_LWGraph_kokkos.hpp"
62 #include "MueLu_Monitor.hpp"
63 
64 #include "Kokkos_Sort.hpp"
65 #include <Kokkos_ScatterView.hpp>
66 
67 namespace MueLu {
68 
69  template <class LocalOrdinal, class GlobalOrdinal, class Node>
71  BuildAggregates(const Teuchos::ParameterList& params,
72  const LWGraph_kokkos& graph,
73  Aggregates_kokkos& aggregates,
74  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
75  LO& numNonAggregatedNodes) const {
76 
77  int minNodesPerAggregate = params.get<int> ("aggregation: min agg size");
78  int maxNodesPerAggregate = params.get<int> ("aggregation: max agg size");
79 
80  TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
82  "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
83 
84  // Distance-2 gives less control than serial uncoupled phase 1
85  // no custom row reordering because would require making deep copy
86  // of local matrix entries and permuting it can only enforce
87  // max aggregate size
88  {
89  if(params.get<bool>("aggregation: deterministic"))
90  {
91  Monitor m(*this, "BuildAggregatesDeterministic");
92  BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
93  aggregates, aggStat, numNonAggregatedNodes);
94  } else {
95  Monitor m(*this, "BuildAggregatesRandom");
96  BuildAggregatesRandom(maxNodesPerAggregate, graph,
97  aggregates, aggStat, numNonAggregatedNodes);
98  }
99  }
100  }
101 
102  template <class LocalOrdinal, class GlobalOrdinal, class Node>
104  BuildAggregatesRandom(const LO maxAggSize,
105  const LWGraph_kokkos& graph,
106  Aggregates_kokkos& aggregates,
107  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
108  LO& numNonAggregatedNodes) const
109  {
110  const LO numRows = graph.GetNodeNumVertices();
111  const int myRank = graph.GetComm()->getRank();
112 
113  // Extract data from aggregates
114  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
115  auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
116  auto colors = aggregates.GetGraphColors();
117 
118  auto lclLWGraph = graph.getLocalLWGraph();
119 
120  LO numAggregatedNodes = 0;
121  LO numLocalAggregates = aggregates.GetNumAggregates();
122  Kokkos::View<LO, device_type> aggCount("aggCount");
123  Kokkos::deep_copy(aggCount, numLocalAggregates);
124  Kokkos::parallel_for("Aggregation Phase 1: initial reduction over color == 1",
125  Kokkos::RangePolicy<LO, execution_space>(0, numRows),
126  KOKKOS_LAMBDA (const LO nodeIdx) {
127  if(colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) {
128  const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1);
129  vertex2AggId(nodeIdx, 0) = aggIdx;
130  aggStat(nodeIdx) = AGGREGATED;
131  procWinner(nodeIdx, 0) = myRank;
132  }
133  });
134  // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates
135  // before updating the value of numLocalAggregates.
136  // But since we also do not want to create a host mirror of aggCount we do some trickery...
137  numAggregatedNodes -= numLocalAggregates;
138  Kokkos::deep_copy(numLocalAggregates, aggCount);
139  numAggregatedNodes += numLocalAggregates;
140 
141  // Compute the initial size of the aggregates.
142  // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
143  // at this point so we could simplify the code below a lot if this
144  // assumption is correct...
145  Kokkos::View<LO*, device_type> aggSizesView("aggSizes", numLocalAggregates);
146  {
147  // Here there is a possibility that two vertices assigned to two different threads contribute
148  // to the same aggregate if somethings happened before phase 1?
149  auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
150  Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size",
151  Kokkos::RangePolicy<LO, execution_space>(0, numRows),
152  KOKKOS_LAMBDA (const LO nodeIdx) {
153  auto aggSizesScatterViewAccess = aggSizesScatterView.access();
154  if(vertex2AggId(nodeIdx, 0) >= 0)
155  aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
156  });
157  Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
158  }
159 
160  LO tmpNumAggregatedNodes = 0;
161  Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes",
162  Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
163  KOKKOS_LAMBDA (const size_t nodeIdx, LO & lNumAggregatedNodes) {
164  if(colors(nodeIdx) != 1
165  && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) {
166  // Get neighbors of vertex i and look for local, aggregated,
167  // color 1 neighbor (valid root).
168  auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
169  for(LO j = 0; j < neighbors.length; ++j) {
170  auto nei = neighbors.colidx(j);
171  if(lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1
172  && aggStat(nei) == AGGREGATED) {
173 
174  // This atomic guarentees that any other node trying to
175  // join aggregate agg has the correct size.
176  LO agg = vertex2AggId(nei, 0);
177  const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
178  1);
179  if(aggSize < maxAggSize) {
180  //assign vertex i to aggregate with root j
181  vertex2AggId(nodeIdx, 0) = agg;
182  procWinner(nodeIdx, 0) = myRank;
183  aggStat(nodeIdx) = AGGREGATED;
184  ++lNumAggregatedNodes;
185  break;
186  } else {
187  // Decrement back the value of aggSizesView(agg)
188  Kokkos::atomic_decrement(&aggSizesView(agg));
189  }
190  }
191  }
192  }
193  // if(aggStat(nodeIdx) != AGGREGATED) {
194  // lNumNonAggregatedNodes++;
195  if(aggStat(nodeIdx) == NOTSEL) { aggStat(nodeIdx) = READY; }
196  // }
197  }, tmpNumAggregatedNodes);
198  numAggregatedNodes += tmpNumAggregatedNodes;
199  numNonAggregatedNodes -= numAggregatedNodes;
200 
201  // update aggregate object
202  aggregates.SetNumAggregates(numLocalAggregates);
203  }
204 
205  template <class LocalOrdinal, class GlobalOrdinal, class Node>
207  BuildAggregatesDeterministic(const LO maxAggSize,
208  const LWGraph_kokkos& graph,
209  Aggregates_kokkos& aggregates,
210  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
211  LO& numNonAggregatedNodes) const
212  {
213  const LO numRows = graph.GetNodeNumVertices();
214  const int myRank = graph.GetComm()->getRank();
215 
216  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217  auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218  auto colors = aggregates.GetGraphColors();
219 
220  auto lclLWGraph = graph.getLocalLWGraph();
221 
222  LO numLocalAggregates = aggregates.GetNumAggregates();
223  Kokkos::View<LO, device_type> numLocalAggregatesView("Num aggregates");
224  {
225  auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
226  h_nla() = numLocalAggregates;
227  Kokkos::deep_copy(numLocalAggregatesView, h_nla);
228  }
229 
230  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
231  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
232  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
233 
234  //first loop build the set of new roots
235  Kokkos::parallel_for("Aggregation Phase 1: building list of new roots",
236  Kokkos::RangePolicy<execution_space>(0, numRows),
237  KOKKOS_LAMBDA(const LO i)
238  {
239  if(colors(i) == 1 && aggStat(i) == READY)
240  {
241  //i will become a root
242  newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
243  }
244  });
245  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
246  //sort new roots by LID to guarantee determinism in agg IDs
247  Kokkos::sort(newRoots, 0, h_numNewRoots());
248  LO numAggregated = 0;
249  Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes",
250  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
251  KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated)
252  {
253  LO root = newRoots(rootIndex);
254  LO aggID = numLocalAggregatesView() + rootIndex;
255  LO aggSize = 1;
256  vertex2AggId(root, 0) = aggID;
257  procWinner(root, 0) = myRank;
258  aggStat(root) = AGGREGATED;
259  auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
260  for(LO n = 0; n < neighOfRoot.length; n++)
261  {
262  LO neigh = neighOfRoot(n);
263  if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY)
264  {
265  //add neigh to aggregate
266  vertex2AggId(neigh, 0) = aggID;
267  procWinner(neigh, 0) = myRank;
268  aggStat(neigh) = AGGREGATED;
269  aggSize++;
270  if(aggSize == maxAggSize)
271  {
272  //can't add any more nodes
273  break;
274  }
275  }
276  }
277  lnumAggregated += aggSize;
278  }, numAggregated);
279  numNonAggregatedNodes -= numAggregated;
280  // update aggregate object
281  aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
282  }
283 
284 } // end namespace
285 
286 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
Namespace for MueLu classes and methods.
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
Exception throws to report errors in the internal logical of the program.