MueLu  Version of the Day
MueLu_PerfUtils_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NodeT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DIScalarLAIMED. IN Node EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NodeT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GlobalOrdinalODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_PERFUTILS_DEF_HPP
47 #define MUELU_PERFUTILS_DEF_HPP
48 
49 #include <algorithm>
50 #include <string>
51 
52 #ifdef HAVE_MPI
53 #include <Teuchos_CommHelpers.hpp>
54 #endif
55 
56 #include <Xpetra_Export.hpp>
57 #include <Xpetra_Import.hpp>
58 #include <Xpetra_Matrix.hpp>
59 #include <Xpetra_CrsMatrixWrap.hpp>
60 
61 #include "MueLu_PerfUtils_decl.hpp"
62 
63 //#include "MueLu_Utilities.hpp"
64 
65 namespace MueLu {
66 
67  template<class Type>
68  void calculateStats(Type& minVal, Type& maxVal, double& avgVal, double& devVal, int& minProc, int& maxProc, const RCP<const Teuchos::Comm<int> >& comm, int numActiveProcs, const Type& v) {
69 
70  Type sumVal, sum2Val, v2 = v*v;
71  using MT = typename Teuchos::ScalarTraits<Type>::magnitudeType;
72  double zero = Teuchos::ScalarTraits<double>::zero();
73 
74  MueLu_sumAll(comm, v, sumVal);
75  MueLu_sumAll(comm, v2, sum2Val);
76  MueLu_minAll(comm, v, minVal);
77  MueLu_maxAll(comm, v, maxVal);
78 
79  int w;
80  w = (minVal == v) ? comm->getRank() : -1;
81  MueLu_maxAll(comm, w, maxProc);
82  w = (maxVal == v) ? comm->getRank() : -1;
83  MueLu_maxAll(comm, w, minProc);
84 
85  avgVal = (numActiveProcs > 0 ? (as<double>(Teuchos::ScalarTraits<Type>::real(sumVal)) / numActiveProcs) : zero);
86  MT avgVal_MT = Teuchos::as<MT>(avgVal);
87  devVal = (numActiveProcs > 1 ? sqrt((as<double>(Teuchos::ScalarTraits<Type>::real(sum2Val - sumVal*avgVal_MT)))/(numActiveProcs-1)) : zero);
88  }
89 
90  template<class Type>
91  std::string stringStats(const RCP<const Teuchos::Comm<int> >& comm, int numActiveProcs, const Type& v, RCP<ParameterList> paramList = Teuchos::null) {
92  Type minVal, maxVal;
93  double avgVal, devVal;
94  int minProc, maxProc;
95  calculateStats<Type>(minVal, maxVal, avgVal, devVal, minProc, maxProc, comm, numActiveProcs, v);
96 
97  const double zero = Teuchos::ScalarTraits<double>::zero();
98  const double one = Teuchos::ScalarTraits<double>::one();
99  std::ostringstream buf;
100  buf << std::fixed;
101  if ((avgVal != zero) && (paramList.is_null() || !paramList->isParameter("print abs") || paramList->get<bool>("print abs") == false)) {
102  double relDev = (devVal/avgVal)*100;
103  double relMin = (as<double>(Teuchos::ScalarTraits<Type>::real(minVal))/avgVal-one)*100;
104  double relMax = (as<double>(Teuchos::ScalarTraits<Type>::real(maxVal))/avgVal-one)*100;
105  buf << "avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal << ", "
106  << "dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev << "%, "
107  << "min = " << std::fixed << std::setw(7) << std::setprecision(1) << std::setw(7) << relMin << "%"
108  << " (" << std::scientific << std::setw(10) << std::setprecision(2) << minVal << " on " << std::fixed << std::setw(4) << minProc << "), "
109  << "max = " << std::fixed << std::setw(7) << std::setprecision(1) << relMax << "%"
110  << " (" << std::scientific << std::setw(10) << std::setprecision(2) << maxVal << " on " << std::fixed << std::setw(4) << maxProc << ")";
111  } else {
112  double relDev = (avgVal != zero ? (devVal/avgVal)*100 : zero);
113  buf << "avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal << ", "
114  << "dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev << "%, "
115  << "min = " << std::scientific << std::setw(10) << std::setprecision(2) << minVal
116  << " (on " << std::fixed << std::setw(4) << minProc << "), "
117  << "max = " << std::scientific << std::setw(10) << std::setprecision(2) << maxVal
118  << " (on " << std::fixed << std::setw(4) << maxProc << ")";
119  }
120  return buf.str();
121  }
122 
123  template<class Map>
124  bool cmp_less(typename Map::value_type& v1, typename Map::value_type& v2) {
125  return v1.second < v2.second;
126  }
127 
128  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
129  std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::PrintMatrixInfo(const Matrix& A, const std::string& msgTag, RCP<const ParameterList> params) {
130  if (!CheckMatrix(A))
131  return "";
132 
133  typedef Xpetra::global_size_t global_size_t;
134 
135  std::ostringstream ss;
136 
137  ss << msgTag << " size = " << A.getGlobalNumRows() << " x " << A.getGlobalNumCols();
138  if(A.haveGlobalConstants())
139  ss << ", nnz = " << A.getGlobalNumEntries();
140  ss << std::endl;
141 
142  if (params.is_null())
143  return ss.str();
144 
145  bool printLoadBalanceInfo = false, printCommInfo = false, printEntryStats = false;
146  if (params->isParameter("printLoadBalancingInfo") && params->get<bool>("printLoadBalancingInfo"))
147  printLoadBalanceInfo = true;
148  if (params->isParameter("printCommInfo") && params->get<bool>("printCommInfo"))
149  printCommInfo = true;
150  if (params->isParameter("printEntryStats") && params->get<bool>("printEntryStats"))
151  printEntryStats = true;
152 
153  if (!printLoadBalanceInfo && !printCommInfo && !printEntryStats)
154  return ss.str();
155 
156  RCP<const Import> importer = A.getCrsGraph()->getImporter();
157  RCP<const Export> exporter = A.getCrsGraph()->getExporter();
158 
159  size_t numMyNnz = A.getLocalNumEntries(), numMyRows = A.getLocalNumRows();
160 
161  // Create communicator only for active processes
162  RCP<const Teuchos::Comm<int> > origComm = A.getRowMap()->getComm();
163  bool activeProc = true;
164  int numProc = origComm->getSize();
165  int numActiveProcs = 0;
166 #ifdef HAVE_MPI
167  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
168  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
169 
170  std::vector<size_t> numRowsPerProc(numProc);
171  Teuchos::gatherAll(*origComm, 1, &numMyRows, numProc, &numRowsPerProc[0]);
172 
173  int root = 0;
174  bool rootFlag = true;
175  for (int i = 0; i < numProc; i++) {
176  if (numRowsPerProc[i]) {
177  ++numActiveProcs;
178  if(rootFlag) {
179  root = i;
180  rootFlag = false;
181  }
182  }
183  }
184 
185  if(numMyRows == 0) {activeProc = false; numMyNnz = 0;} // Reset numMyNnz to avoid adding it up in reduceAll
186 #else
187  if(numMyRows == 0) {
188  //FIXME JJH 10-May-2017 Is there any case in serial where numMyRows would be zero?
189  // Reset numMyNnz to avoid adding it up in reduceAll
190  numActiveProcs = 0;
191  activeProc = false;
192  numMyNnz = 0;
193  } else {
194  numActiveProcs = 1;
195  }
196 #endif
197 
198  std::string outstr;
199  ParameterList absList;
200  absList.set("print abs", true);
201 
202  RCP<const Matrix> rcpA = rcpFromRef(A);
203  RCP<const CrsMatrixWrap> crsWrapA = rcp_dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node> >(rcpA);
204  RCP<const CrsMatrix> crsA;
205  if (!crsWrapA.is_null())
206  crsA = crsWrapA->getCrsMatrix();
207  if (printEntryStats && !crsA.is_null()) {
208  typedef Teuchos::ScalarTraits<Scalar> STS;
209  typedef typename STS::magnitudeType magnitudeType;
210  typedef Teuchos::ScalarTraits<magnitudeType> MTS;
211  ArrayRCP<const size_t> rowptr_RCP;
212  ArrayRCP<const LocalOrdinal> colind_RCP;
213  ArrayRCP<const Scalar> vals_RCP;
214  ArrayRCP<size_t> offsets_RCP;
215  ArrayView<const size_t> rowptr;
216  ArrayView<const Scalar> vals;
217  ArrayView<size_t> offsets;
218 
219  crsA->getAllValues(rowptr_RCP, colind_RCP, vals_RCP);
220  crsA->getLocalDiagOffsets(offsets_RCP);
221  rowptr = rowptr_RCP();
222  vals = vals_RCP();
223  offsets = offsets_RCP();
224 
225  Scalar val, minVal, maxVal;
226  magnitudeType absVal, minAbsVal, maxAbsVal;
227  {
228  minVal = STS::rmax();
229  maxVal = STS::rmin();
230  minAbsVal = MTS::rmax();
231  maxAbsVal = MTS::zero();
232 
233  for (int i = 0; i < offsets.size(); i++) {
234  val = vals[rowptr[i]+offsets[i]];
235  if (STS::real(val) < STS::real(minVal))
236  minVal = val;
237  if (STS::real(val) > STS::real(maxVal))
238  maxVal = val;
239  absVal = STS::magnitude(val);
240  minAbsVal = std::min(minAbsVal, absVal);
241  maxAbsVal = std::max(maxAbsVal, absVal);
242  }
243 
244  ss << msgTag << " diag min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
245  ss << msgTag << " diag max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
246  ss << msgTag << " abs(diag) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
247  ss << msgTag << " abs(diag) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
248  }
249 
250  {
251  minVal = STS::rmax();
252  maxVal = STS::rmin();
253  minAbsVal = MTS::rmax();
254  maxAbsVal = MTS::zero();
255 
256  for (int i = 0; i < vals.size(); i++) {
257  val = vals[i];
258  if (STS::real(val) < STS::real(minVal))
259  minVal = val;
260  if (STS::real(val) > STS::real(maxVal))
261  maxVal = val;
262  absVal = STS::magnitude(val);
263  minAbsVal = std::min(minAbsVal, absVal);
264  maxAbsVal = std::max(maxAbsVal, absVal);
265  }
266 
267  ss << msgTag << " entry min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
268  ss << msgTag << " entry max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
269  ss << msgTag << " abs(entry) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
270  ss << msgTag << " abs(entry) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
271  }
272  }
273 
274 
275  if (printLoadBalanceInfo) {
276  ss << msgTag << " Load balancing info" << std::endl;
277  ss << msgTag << " # active processes: " << numActiveProcs << "/" << numProc << std::endl;
278  ss << msgTag << " # rows per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyRows) << std::endl;
279  ss << msgTag << " # nnz per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyNnz) << std::endl;
280  }
281 
282  if (printCommInfo && numActiveProcs != 1) {
283  typedef std::map<int,size_t> map_type;
284  map_type neighMap;
285  if (!importer.is_null()) {
286  ArrayView<const int> exportPIDs = importer->getExportPIDs();
287  if (exportPIDs.size())
288  for (int i = 0; i < exportPIDs.size(); i++)
289  neighMap[exportPIDs[i]]++;
290  }
291 
292  // Communication volume
293  size_t numExportSend = 0;
294  size_t numImportSend = 0;
295  size_t numMsgs = 0;
296  size_t minMsg = 0;
297  size_t maxMsg = 0;
298 
299  if(activeProc) {
300  numExportSend = (!exporter.is_null() ? exporter->getNumExportIDs() : 0);
301  numImportSend = (!importer.is_null() ? importer->getNumExportIDs() : 0);
302  numMsgs = neighMap.size();
303  map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
304  minMsg = (it != neighMap.end() ? it->second : 0);
305  it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
306  maxMsg = (it != neighMap.end() ? it->second : 0);
307  }
308 
309  ss << msgTag << " Communication info" << std::endl;
310  ss << msgTag << " # num export send : " << stringStats<global_size_t>(origComm, numActiveProcs, numExportSend) << std::endl;
311  ss << msgTag << " # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
312  ss << msgTag << " # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
313  ss << msgTag << " # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
314  ss << msgTag << " # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
315  }
316 
317  outstr = ss.str();
318 
319 #ifdef HAVE_MPI
320  int strLength = outstr.size();
321  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
322  if (origComm->getRank() != root)
323  outstr.resize(strLength);
324  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
325 #endif
326 
327  return outstr;
328  }
329 
330  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
331  std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::PrintImporterInfo(RCP<const Import> importer, const std::string& msgTag) {
332 
333  typedef Xpetra::global_size_t global_size_t;
334 
335  std::ostringstream ss;
336 
337  // Create communicator only for active processes
338  RCP<const Teuchos::Comm<int> > origComm = importer->getSourceMap()->getComm();
339  bool activeProc = true;
340  int numActiveProcs = origComm->getSize();
341 #ifdef HAVE_MPI
342  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
343  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
344  int root = 0;
345 #endif
346 
347  std::string outstr;
348  ParameterList absList;
349  absList.set("print abs", true);
350 
351  typedef std::map<int,size_t> map_type;
352  map_type neighMap;
353  ArrayView<const int> exportPIDs = importer->getExportPIDs();
354  if (exportPIDs.size())
355  for (int i = 0; i < exportPIDs.size(); i++)
356  neighMap[exportPIDs[i]]++;
357 
358  // Communication volume
359  size_t numImportSend = 0;
360  size_t numMsgs = 0;
361  size_t minMsg = 0;
362  size_t maxMsg = 0;
363 
364  if(activeProc) {
365  numImportSend = importer->getNumExportIDs();
366  numMsgs = neighMap.size();
367  map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
368  minMsg = (it != neighMap.end() ? it->second : 0);
369  it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
370  maxMsg = (it != neighMap.end() ? it->second : 0);
371  }
372 
373  ss << msgTag << " Communication info" << std::endl;
374  ss << msgTag << " # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
375  ss << msgTag << " # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
376  ss << msgTag << " # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
377  ss << msgTag << " # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
378 
379 
380  outstr = ss.str();
381 
382 #ifdef HAVE_MPI
383  int strLength = outstr.size();
384  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
385  if (origComm->getRank() != root)
386  outstr.resize(strLength);
387  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
388 #endif
389 
390  return outstr;
391  }
392 
393  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
394  std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CommPattern(const Matrix& A, const std::string& msgTag, RCP<const ParameterList> /* params */) {
395  if (!CheckMatrix(A))
396  return "";
397 
398  std::ostringstream out;
399 
400  RCP<const Teuchos::Comm<int> > comm = A.getRowMap()->getComm();
401  int myRank = comm->getRank();
402 
403  out << msgTag << " " << myRank << ":";
404 
405  RCP<const Import> importer = (A.getCrsGraph() != Teuchos::null ? A.getCrsGraph()->getImporter() : Teuchos::null);
406  if (importer.is_null()) {
407  out << std::endl;
408  return out.str();
409  }
410 
411  ArrayView<const int> exportPIDs = importer->getExportPIDs();
412 
413  if (exportPIDs.size()) {
414  // NodeTE: exportPIDs is sorted but not unique ( 1 1 1 2 2 3 4 4 4 )
415  int neigh = exportPIDs[0];
416  GO weight = 1;
417  for (int i = 1; i < exportPIDs.size(); i++) {
418  if (exportPIDs[i] != exportPIDs[i-1]) {
419  out << " " << neigh << "(" << weight << ")";
420 
421  neigh = exportPIDs[i];
422  weight = 1;
423 
424  } else {
425  weight += 1;
426  }
427  }
428  out << " " << neigh << "(" << weight << ")" << std::endl;
429  }
430 
431  return out.str();
432  }
433 
434  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
436  // We can only print statistics for matrices that have a crs graph. A
437  // potential issue is regarding Xpetra::TpetraBlockCrsMatrix which has no
438  // CrsGraph. It is held as a private data member by Xpetra::CrsMatrix,
439  // which itself is an Xpetra::Matrix. So we check directly whether the
440  // request for the graph throws.
441  bool hasCrsGraph = true;
442  try {
443  A.getCrsGraph();
444 
445  } catch (...) {
446  hasCrsGraph = false;
447  }
448 
449  return hasCrsGraph;
450  }
451 
452 } //namespace MueLu
453 
454 #endif // MUELU_PERFUTILS_DEF_HPP
static bool CheckMatrix(const Matrix &A)
std::string stringStats(const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v, RCP< ParameterList > paramList=Teuchos::null)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_maxAll(rcpComm, in, out)
Namespace for MueLu classes and methods.
#define MueLu_minAll(rcpComm, in, out)
static std::string CommPattern(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static std::string PrintImporterInfo(RCP< const Import > importer, const std::string &msgTag)
MueLu::DefaultScalar Scalar
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void calculateStats(Type &minVal, Type &maxVal, double &avgVal, double &devVal, int &minProc, int &maxProc, const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v)
bool cmp_less(typename Map::value_type &v1, typename Map::value_type &v2)