MueLu  Version of the Day
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1 /*
2  * MueLu_AlgebraicPermutationStrategy_def.hpp
3  *
4  * Created on: Feb 20, 2013
5  * Author: tobias
6  */
7 
8 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
10 
11 #include <queue>
12 
13 #include <Teuchos_ScalarTraits.hpp>
14 
15 #include <Xpetra_MultiVector.hpp>
16 #include <Xpetra_Matrix.hpp>
17 #include <Xpetra_CrsGraph.hpp>
18 #include <Xpetra_Vector.hpp>
19 #include <Xpetra_MultiVectorFactory.hpp>
20 #include <Xpetra_VectorFactory.hpp>
21 #include <Xpetra_CrsMatrixWrap.hpp>
22 #include <Xpetra_Export.hpp>
23 #include <Xpetra_ExportFactory.hpp>
24 #include <Xpetra_Import.hpp>
25 #include <Xpetra_ImportFactory.hpp>
26 #include <Xpetra_MatrixMatrix.hpp>
27 
28 #include "MueLu_Utilities.hpp"
30 
31 namespace MueLu {
32 
33  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
35  BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map>& permRowMap,
36  Level & currentLevel, const FactoryBase* genFactory) const {
37 
38  const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
39  const Scalar SC_ONE = Teuchos::ScalarTraits<Scalar>::one();
40 
41  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
42  const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
43  const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
44 
45  const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();
46  int numProcs = comm->getSize();
47  int myRank = comm->getRank();
48 
49  size_t nDofsPerNode = 1;
50  if (A->IsView("stridedMaps")) {
51  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
52  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
53  }
54 
55  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
56  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
57  std::vector<MT> Weights;
58 
59  // loop over all local rows in matrix A and keep diagonal entries if corresponding
60  // matrix rows are not contained in permRowMap
61  for (size_t row = 0; row < A->getRowMap()->getLocalNumElements(); row++) {
62  GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
63 
64  if(permRowMap->isNodeGlobalElement(grow) == true) continue;
65 
66  size_t nnz = A->getNumEntriesInLocalRow(row);
67 
68  // extract local row information from matrix
69  Teuchos::ArrayView<const LocalOrdinal> indices;
70  Teuchos::ArrayView<const Scalar> vals;
71  A->getLocalRowView(row, indices, vals);
72 
73  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
74  "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
75 
76  // find column entry with max absolute value
77  GlobalOrdinal gMaxValIdx = 0;
78  MT norm1 = MT_ZERO;
79  MT maxVal = MT_ZERO;
80  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
81  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
82  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
83  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
84  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
85  }
86  }
87 
88  if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
89  keepDiagonalEntries.push_back(std::make_pair(grow,grow));
90  }
91 
93  // handle rows that are marked to be relevant for permutations
94  for (size_t row = 0; row < permRowMap->getLocalNumElements(); row++) {
95  GlobalOrdinal grow = permRowMap->getGlobalElement(row);
96  LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
97  size_t nnz = A->getNumEntriesInLocalRow(lArow);
98 
99  // extract local row information from matrix
100  Teuchos::ArrayView<const LocalOrdinal> indices;
101  Teuchos::ArrayView<const Scalar> vals;
102  A->getLocalRowView(lArow, indices, vals);
103 
104  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
105  "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
106 
107  // find column entry with max absolute value
108  GlobalOrdinal gMaxValIdx = 0;
109  MT norm1 = MT_ZERO;
110  MT maxVal = MT_ZERO;
111  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
112  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
113  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
114  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
115  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
116  }
117  }
118 
119  if(maxVal > Teuchos::ScalarTraits<MT>::zero ()) { // keep only max Entries \neq 0.0
120  permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx));
121  Weights.push_back(maxVal/(norm1*Teuchos::as<MT>(nnz)));
122  } else {
123  std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
124  }
125 
126  }
127 
128  // sort Weights in descending order
129  std::vector<int> permutation;
130  sortingPermutation(Weights,permutation);
131 
132  // create new vector with exactly one possible entry for each column
133 
134  // each processor which requests the global column id gcid adds 1 to gColVec
135  // gColVec will be summed up over all processors and communicated to gDomVec
136  // which is based on the non-overlapping domain map of A.
137 
138  Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
139  Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
140  gColVec->putScalar(SC_ZERO);
141  gDomVec->putScalar(SC_ZERO);
142 
143  // put in all keep diagonal entries
144  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
145  gColVec->sumIntoGlobalValue((*p).second,1.0);
146  }
147 
148  Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
149  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs
150  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);
151 
152  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
153  std::map<GlobalOrdinal, MT> gColId2Weight;
154 
155  {
156  Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
157  for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
158  // loop over all candidates
159  std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
160  GlobalOrdinal grow = pp.first;
161  GlobalOrdinal gcol = pp.second;
162 
163  LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
164  if(Teuchos::ScalarTraits<Scalar>::real (ddata[lcol]) > MT_ZERO){
165  continue; // skip lcol: column already handled by another row
166  }
167 
168  // mark column as already taken
169  ddata[lcol] += SC_ONE;
170 
171  permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol));
172  gColId2Weight[gcol] = Weights[permutation[i]];
173  }
174  }
175 
176  // communicate how often each column index is requested by the different procs
177  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
178  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me
179 
180  //*****************************************************************************************
181  // first communicate ALL global ids of column indices which are requested by more
182  // than one proc to all other procs
183  // detect which global col indices are requested by more than one proc
184  // and store them in the multipleColRequests vector
185  std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
186  // requested by another processor. This is possible, since they are stored
187  // in gDomVec which is based on the nonoverlapping domain map. That is, each
188  // global col id is handled by exactly one proc.
189  std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
190 
191  for(size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
192  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
193  //
194  // FIXME (mfh 30 Oct 2015) I _think_ it's OK to check just the
195  // real part, because this is a count. (Shouldn't MueLu use
196  // mag_type instead of Scalar here to save space?)
197  //
198  if(Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) > MT_ONE) {
199  multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
200  } else if(Teuchos::ScalarTraits<Scalar>::real (arrDomVec[sz]) == MT_ZERO) {
201  unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
202  }
203  }
204 
205  // communicate the global number of column indices which are requested by more than one proc
206  LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
207  LocalOrdinal globalMultColRequests = 0;
208 
209  // sum up all entries in multipleColRequests over all processors
210  //
211  // FIXME (lbv 11 Feb 2019) why cast localMultColRequests as LocalOrdinal
212  // when it was properly declared as such just a line above?
213  //
214  MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
215 
216  if(globalMultColRequests > 0) {
217  // special handling: two processors request the same global column id.
218  // decide which processor gets it
219 
220  // distribute number of multipleColRequests to all processors
221  // each processor stores how many column ids for exchange are handled by the cur proc
222  std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
223  std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
224  numMyMultColRequests[myRank] = localMultColRequests;
225  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
226  numMyMultColRequests.data(),
227  numGlobalMultColRequests.data());
228 
229  // communicate multipleColRequests entries to all processors
230  int nMyOffset = 0;
231  for (int i=0; i<myRank-1; i++)
232  nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
233 
234  const GlobalOrdinal zero = 0;
235  std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
236  std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
237 
238  // loop over all local column GIDs that are also requested by other procs
239  for(size_t i = 0; i < multipleColRequests.size(); i++) {
240  procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
241  }
242 
243  // template ordinal, package (double)
244  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests),
245  procMultRequestedColIds.data(),
246  global_procMultRequestedColIds.data());
247 
248  // loop over global_procOverlappingWeights and eliminate wrong entries...
249  for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) {
250  GlobalOrdinal globColId = global_procMultRequestedColIds[k];
251 
252  std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
253  std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
254 
255  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
256  MyWeightForColId[myRank] = gColId2Weight[globColId];
257  } else {
258  MyWeightForColId[myRank] = MT_ZERO;
259  }
260 
261  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
262  MyWeightForColId.data(),
263  GlobalWeightForColId.data());
264 
265  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
266  // note: 2 procs could have the same weight for a column index.
267  // pick the first one.
268  MT winnerValue = MT_ZERO;
269  int winnerProcRank = 0;
270  for (int proc = 0; proc < numProcs; proc++) {
271  if(GlobalWeightForColId[proc] > winnerValue) {
272  winnerValue = GlobalWeightForColId[proc];
273  winnerProcRank = proc;
274  }
275  }
276 
277  // winnerProcRank is the winner for handling globColId.
278  // winnerProcRank is unique (even if two procs have the same weight for a column index)
279 
280  if(myRank != winnerProcRank) {
281  // remove corresponding entry from permutedDiagCandidatesFiltered
282  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
283  while(p != permutedDiagCandidatesFiltered.end() )
284  {
285  if((*p).second == globColId)
286  p = permutedDiagCandidatesFiltered.erase(p);
287  else
288  p++;
289  }
290  }
291 
292  } // end if isNodeGlobalElement
293  } // end loop over global_procOverlappingWeights and eliminate wrong entries...
294  } // end if globalMultColRequests > 0
295 
296  // put together all pairs:
297  //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
298  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
299  RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
300  RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
301 
302 #ifdef DEBUG_OUTPUT
303  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
304  // plausibility check
305  gColVec->putScalar(SC_ZERO);
306  gDomVec->putScalar(SC_ZERO);
307  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
308  while(pl != RowColPairs.end() )
309  {
310  //GlobalOrdinal ik = (*pl).first;
311  GlobalOrdinal jk = (*pl).second;
312 
313  gColVec->sumIntoGlobalValue(jk,1.0);
314  pl++;
315  }
316  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
317  for(size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
318  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
319  if(arrDomVec[sz] > 1.0) {
320  GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
321  } else if(arrDomVec[sz] == SC_ZERO) {
322  GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
323  }
324  }
325  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
326 #endif
327 
329  // assumption: on each processor RowColPairs now contains
330  // a valid set of (row,column) pairs, where the row entries
331  // are a subset of the processor's rows and the column entries
332  // are unique throughout all processors.
333  // Note: the RowColPairs are only defined for a subset of all rows,
334  // so there might be rows without an entry in RowColPairs.
335  // It can be, that some rows seem to be missing in RowColPairs, since
336  // the entry in that row with maximum absolute value has been reserved
337  // by another row already (e.g. as already diagonal dominant row outside
338  // of perRowMap).
339  // In fact, the RowColPairs vector only defines the (row,column) pairs
340  // that will be definitely moved to the diagonal after permutation.
341 
342 #ifdef DEBUG_OUTPUT
343  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
344  // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
345  // }
346  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
347  // {
349  // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
350  // }
351  // std::cout << "\n";
352 #endif
353 
354  // vectors to store permutation information
355  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
356  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
357  Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
358 
359  Pperm->putScalar(SC_ZERO);
360  Qperm->putScalar(SC_ZERO);
361  lQperm->putScalar(SC_ZERO);
362 
363  // setup exporter for Qperm
364  Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
365 
366  Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
367  Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
368  Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
369  Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
370  RowIdStatus->putScalar(SC_ZERO);
371  ColIdStatus->putScalar(SC_ZERO);
372  lColIdStatus->putScalar(SC_ZERO);
373  ColIdUsed->putScalar(SC_ZERO); // no column ids are used
374 
375 
376  // count wide-range permutations
377  // a wide-range permutation is defined as a permutation of rows/columns which do not
378  // belong to the same node
379  LocalOrdinal lWideRangeRowPermutations = 0;
380  GlobalOrdinal gWideRangeRowPermutations = 0;
381  LocalOrdinal lWideRangeColPermutations = 0;
382  GlobalOrdinal gWideRangeColPermutations = 0;
383 
384  // run 1: mark all "identity" permutations
385  {
386  Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0);
387  Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0);
388 
389  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
390  while(p != RowColPairs.end() )
391  {
392  GlobalOrdinal ik = (*p).first;
393  GlobalOrdinal jk = (*p).second;
394 
395  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
396  LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
397 
398  if(RowIdStatusArray[lik] == SC_ZERO) {
399  RowIdStatusArray[lik] = SC_ONE; // use this row id
400  lColIdStatusArray[ljk] = SC_ONE; // use this column id
401  Pperm->replaceLocalValue(lik, ik);
402  lQperm->replaceLocalValue(ljk, ik); // use column map
403  ColIdUsed->replaceGlobalValue(ik,SC_ONE); // ik is now used
404  p = RowColPairs.erase(p);
405 
406  // detect wide range permutations
407  if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) {
408  lWideRangeColPermutations++;
409  }
410  }
411  else
412  p++;
413  }
414 
415 
416  // communicate column map -> domain map
417  Qperm->doExport(*lQperm, *QpermExporter, Xpetra::ABSMAX);
418  ColIdStatus->doExport(*lColIdStatus, *QpermExporter, Xpetra::ABSMAX);
419 
420  // plausibility check
421  if(RowColPairs.size()>0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
422 
423  // close Pperm
424 
425  // count, how many row permutations are missing on current proc
426  size_t cntFreeRowIdx = 0;
427  std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
428  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
429  if(RowIdStatusArray[lik] == SC_ZERO) {
430  cntFreeRowIdx++;
431  qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
432  }
433  }
434 
435  // fix Pperm
436  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
437  if(RowIdStatusArray[lik] == SC_ZERO) {
438  RowIdStatusArray[lik] = SC_ONE; // use this row id
439  Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
440  // detect wide range permutations
441  if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) {
442  lWideRangeRowPermutations++;
443  }
444  qFreeGRowIdx.pop();
445  }
446  }
447  }
448 
449  size_t cntUnusedColIdx = 0;
450  std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
451  size_t cntFreeColIdx = 0;
452  std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
453  {
454  Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
455  Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
456 
457  // close Qperm (free permutation entries in Qperm)
458  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
459  if(ColIdStatusArray[ljk] == SC_ZERO) {
460  cntFreeColIdx++;
461  qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
462  }
463  }
464 
465  for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
466  if(ColIdUsedArray[ljk] == SC_ZERO) {
467  cntUnusedColIdx++;
468  qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
469  }
470  }
471 
472 
473  // fix Qperm with local entries
474  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
475  // stop if no (local) unused column idx are left
476  if(cntUnusedColIdx == 0) break;
477 
478  if(ColIdStatusArray[ljk] == SC_ZERO) {
479  ColIdStatusArray[ljk] = SC_ONE; // use this row id
480  Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
481  ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),SC_ONE); // ljk is now used, too
482  // detect wide range permutations
483  if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
484  lWideRangeColPermutations++;
485  }
486  qUnusedGColIdx.pop();
487  cntUnusedColIdx--;
488  cntFreeColIdx--;
489  }
490  }
491 
492 
493  //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
494  //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
495 
496  // count, how many unused column idx are needed on current processor
497  // to complete Qperm
498  cntFreeColIdx = 0;
499  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
500  if(ColIdStatusArray[ljk] == SC_ZERO) {
501  cntFreeColIdx++;
502  }
503  }
504  }
505 
506  GlobalOrdinal global_cntFreeColIdx = 0;
507  LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
508  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
509 #ifdef DEBUG_OUTPUT
510  std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
511 #endif
512 
513  // avoid global communication if possible
514  if(global_cntFreeColIdx > 0) {
515 
516  // 1) count how many unused column ids are left
517  GlobalOrdinal global_cntUnusedColIdx = 0;
518  LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
519  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
520 #ifdef DEBUG_OUTPUT
521  std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
522 #endif
523 
524  // 2) communicate how many unused column ids are available on procs
525  std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs);
526  std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
527  local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
528  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
529  local_UnusedColIdxOnProc.data(),
530  global_UnusedColIdxOnProc.data());
531 
532 #ifdef DEBUG_OUTPUT
533  std::cout << "PROC " << myRank << " global num unused indices per proc: ";
534  for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
535  std::cout << " " << global_UnusedColIdxOnProc[ljk];
536  }
537  std::cout << std::endl;
538 #endif
539 
540  // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
541  std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
542  std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
543  GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
544  for(int proc=0; proc<myRank; proc++) {
545  global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
546  }
547  for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) {
548  local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
549  qUnusedGColIdx.pop();
550  }
551  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx),
552  local_UnusedColIdxVector.data(),
553  global_UnusedColIdxVector.data());
554 #ifdef DEBUG_OUTPUT
555  std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
556  for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
557  std::cout << " " << global_UnusedColIdxVector[ljk];
558  }
559  std::cout << std::endl;
560 #endif
561 
562 
563 
564  // 4) communicate, how many column idx are needed on each processor
565  // to complete Qperm
566  std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs);
567  std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
568  local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
569  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
570  local_EmptyColIdxOnProc.data(),
571  global_EmptyColIdxOnProc.data());
572 
573 #ifdef DEBUG_OUTPUT
574  std::cout << "PROC " << myRank << " global num of needed column indices: ";
575  for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
576  std::cout << " " << global_EmptyColIdxOnProc[ljk];
577  }
578  std::cout << std::endl;
579 #endif
580 
581  // 5) determine first index in global_UnusedColIdxVector for unused column indices,
582  // that are marked to be used by this processor
583  GlobalOrdinal global_UnusedColStartIdx = 0;
584  for(int proc=0; proc<myRank; proc++) {
585  global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
586  }
587 
588 #ifdef DEBUG_OUTPUT
589  GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
590  for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
591  GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
592  }
593  GetOStream(Statistics0) << std::endl;
594 #endif
595 
596  // 6.) fix Qperm with global entries
597  {
598  Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
599  GlobalOrdinal array_iter = 0;
600  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
601 
602  if(ColIdStatusArray[ljk] == SC_ZERO) {
603  ColIdStatusArray[ljk] = SC_ONE; // use this row id
604  Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
605  ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],SC_ONE);
606  // detect wide range permutations
607  if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
608  lWideRangeColPermutations++;
609  }
610  array_iter++;
611  //cntUnusedColIdx--; // check me
612  }
613  }
614  }
615  } // end if global_cntFreeColIdx > 0
617 
618 
619  // create new empty Matrix
620  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1));
621  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1));
622 
623  {
624  Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0);
625  Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0);
626 
627  for(size_t row=0; row<A->getLocalNumRows(); row++) {
628  // FIXME (mfh 30 Oct 2015): Teuchos::as doesn't know how to
629  // convert from complex Scalar to GO, so we have to take the real
630  // part first. I think that's the right thing to do in this case.
631  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
632  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
633  Teuchos::ArrayRCP<Scalar> valout(1,SC_ONE);
634  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size()));
635  permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size()));
636  }
637  }
638 
639  permPTmatrix->fillComplete();
640  permQTmatrix->fillComplete();
641 
642  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
643 
644  for(size_t row=0; row<permPTmatrix->getLocalNumRows(); row++) {
645  if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
646  GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
647  if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
648  GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
649  if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
650  GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
651  }
652 
653  // build permP * A * permQT
654  Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2));
655  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
656 
657  /*
658  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
659  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
660  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
661  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
662  */
663  // build scaling matrix
664  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
665  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
666  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1));
667  {
668  permPApermQt->getLocalDiagCopy(*diagVec);
669  Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0);
670  Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0);
671  for(size_t i = 0; i<diagVec->getMap()->getLocalNumElements(); ++i) {
672  if(diagVecData[i] != SC_ZERO)
673  invDiagVecData[i] = SC_ONE / diagVecData[i];
674  else {
675  invDiagVecData[i] = SC_ONE;
676  GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
677  }
678  }
679 
680  for(size_t row=0; row<A->getLocalNumRows(); row++) {
681  Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
682  Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]);
683  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
684  }
685  }
686  diagScalingOp->fillComplete();
687 
688  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
689  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/);
690 
691  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!!
692  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/);
693  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/);
694  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/);
695 
697  // count zeros on diagonal in P -> number of row permutations
698  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true);
699  permPmatrix->getLocalDiagCopy(*diagPVec);
700  LocalOrdinal lNumRowPermutations = 0;
701  GlobalOrdinal gNumRowPermutations = 0;
702  {
703  Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0);
704  for(size_t i = 0; i<diagPVec->getMap()->getLocalNumElements(); ++i) {
705  if(diagPVecData[i] == SC_ZERO) {
706  lNumRowPermutations++;
707  }
708  }
709  }
710 
711  // sum up all entries in multipleColRequests over all processors
712  MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
713 
715  // count zeros on diagonal in Q^T -> number of column permutations
716  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true);
717  permQTmatrix->getLocalDiagCopy(*diagQTVec);
718  LocalOrdinal lNumColPermutations = 0;
719  GlobalOrdinal gNumColPermutations = 0;
720  {
721  Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0);
722  for(size_t i = 0; i<diagQTVec->getMap()->getLocalNumElements(); ++i) {
723  if(diagQTVecData[i] == SC_ZERO) {
724  lNumColPermutations++;
725  }
726  }
727  }
728 
729  // sum up all entries in multipleColRequests over all processors
730  MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
731 
732  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/);
733  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/);
734  currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/);
735  currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/);
736 
737  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
738  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
739  GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
740  }
741 
742 } // namespace MueLu
743 
744 #endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators