Tpetra parallel linear algebra  Version of the Day
Tpetra_BlockCrsMatrix_Helpers_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
41 #define TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
42 
44 
45 #include "Tpetra_BlockCrsMatrix.hpp"
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_HashTable.hpp"
48 #include "Tpetra_Import.hpp"
49 #include "Tpetra_Map.hpp"
50 #include "Tpetra_MultiVector.hpp"
51 #include "Teuchos_ParameterList.hpp"
52 #include "Teuchos_ScalarTraits.hpp"
53 #include <ctime>
54 #include <fstream>
55 
56 namespace Tpetra {
57 
58  template<class Scalar, class LO, class GO, class Node>
59  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName) {
60  Teuchos::ParameterList pl;
61  std::ofstream out;
62  out.open(fileName.c_str());
63  blockCrsMatrixWriter(A, out, pl);
64  }
65 
66  template<class Scalar, class LO, class GO, class Node>
67  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName, Teuchos::ParameterList const &params) {
68  std::ofstream out;
69  out.open(fileName.c_str());
70  blockCrsMatrixWriter(A, out, params);
71  }
72 
73  template<class Scalar, class LO, class GO, class Node>
74  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os) {
75  Teuchos::ParameterList pl;
76  blockCrsMatrixWriter(A, os, pl);
77  }
78 
79  template<class Scalar, class LO, class GO, class Node>
80  void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
81 
82  using Teuchos::RCP;
83  using Teuchos::rcp;
84 
85  typedef Teuchos::OrdinalTraits<GO> TOT;
86  typedef BlockCrsMatrix<Scalar, LO, GO, Node> block_crs_matrix_type;
87  typedef Tpetra::Import<LO, GO, Node> import_type;
88  typedef Tpetra::Map<LO, GO, Node> map_type;
90  typedef Tpetra::CrsGraph<LO, GO, Node> crs_graph_type;
91 
92  RCP<const map_type> const rowMap = A.getRowMap(); //"mesh" map
93  RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
94  const int myRank = comm->getRank();
95  const size_t numProcs = comm->getSize();
96 
97  // If true, force use of the import strip-mining infrastructure. This is useful for debugging on one process.
98  bool alwaysUseParallelAlgorithm = false;
99  if (params.isParameter("always use parallel algorithm"))
100  alwaysUseParallelAlgorithm = params.get<bool>("always use parallel algorithm");
101  bool printMatrixMarketHeader = true;
102  if (params.isParameter("print MatrixMarket header"))
103  printMatrixMarketHeader = params.get<bool>("print MatrixMarket header");
104 
105  if (printMatrixMarketHeader && myRank==0) {
106  std::time_t now = std::time(NULL);
107 
108  const std::string dataTypeStr =
109  Teuchos::ScalarTraits<Scalar>::isComplex ? "complex" : "real";
110 
111  // Explanation of first line of file:
112  // - "%%MatrixMarket" is the tag for Matrix Market format.
113  // - "matrix" is what we're printing.
114  // - "coordinate" means sparse (triplet format), rather than dense.
115  // - "real" / "complex" is the type (in an output format sense,
116  // not in a C++ sense) of each value of the matrix. (The
117  // other two possibilities are "integer" (not really necessary
118  // here) and "pattern" (no values, just graph).)
119  os << "%%MatrixMarket matrix coordinate " << dataTypeStr << " general" << std::endl;
120  os << "% time stamp: " << ctime(&now);
121  os << "% written from " << numProcs << " processes" << std::endl;
122  os << "% point representation of Tpetra::BlockCrsMatrix" << std::endl;
123  size_t numRows = A.getGlobalNumRows();
124  size_t numCols = A.getGlobalNumCols();
125  os << "% " << numRows << " block rows, " << numCols << " block columns" << std::endl;
126  const LO blockSize = A.getBlockSize();
127  os << "% block size " << blockSize << std::endl;
128  os << numRows*blockSize << " " << numCols*blockSize << " " << A.getGlobalNumEntries()*blockSize*blockSize << std::endl;
129  }
130 
131  if (numProcs==1 && !alwaysUseParallelAlgorithm) {
132  writeMatrixStrip(A,os,params);
133  } else {
134  size_t numRows = rowMap->getNodeNumElements();
135 
136  //Create source map
137  RCP<const map_type> allMeshGidsMap = rcp(new map_type(TOT::invalid(), numRows, A.getIndexBase(), comm));
138  //Create and populate vector of mesh GIDs corresponding to this pid's rows.
139  //This vector will be imported one pid's worth of information at a time to pid 0.
140  mv_type allMeshGids(allMeshGidsMap,1);
141  Teuchos::ArrayRCP<GO> allMeshGidsData = allMeshGids.getDataNonConst(0);
142 
143  for (size_t i=0; i<numRows; i++)
144  allMeshGidsData[i] = rowMap->getGlobalElement(i);
145  allMeshGidsData = Teuchos::null;
146 
147  // Now construct a RowMatrix on PE 0 by strip-mining the rows of the input matrix A.
148  size_t stripSize = allMeshGids.getGlobalLength() / numProcs;
149  size_t remainder = allMeshGids.getGlobalLength() % numProcs;
150  size_t curStart = 0;
151  size_t curStripSize = 0;
152  Teuchos::Array<GO> importMeshGidList;
153  for (size_t i=0; i<numProcs; i++) {
154  if (myRank==0) { // Only PE 0 does this part
155  curStripSize = stripSize;
156  if (i<remainder) curStripSize++; // handle leftovers
157  importMeshGidList.resize(curStripSize); // Set size of vector to max needed
158  for (size_t j=0; j<curStripSize; j++) importMeshGidList[j] = j + curStart + A.getIndexBase();
159  curStart += curStripSize;
160  }
161  // The following import map should be non-trivial only on PE 0.
162  TEUCHOS_TEST_FOR_EXCEPTION(myRank>0 && curStripSize!=0,
163  std::runtime_error, "Tpetra::blockCrsMatrixWriter: (pid "
164  << myRank << ") map size should be zero, but is " << curStripSize);
165  RCP<map_type> importMeshGidMap = rcp(new map_type(TOT::invalid(), importMeshGidList(), A.getIndexBase(), comm));
166  import_type gidImporter(allMeshGidsMap, importMeshGidMap);
167  mv_type importMeshGids(importMeshGidMap, 1);
168  importMeshGids.doImport(allMeshGids, gidImporter, INSERT);
169 
170  // importMeshGids now has a list of GIDs for the current strip of matrix rows.
171  // Use these values to build another importer that will get rows of the matrix.
172 
173  // The following import map will be non-trivial only on PE 0.
174  Teuchos::ArrayRCP<const GO> importMeshGidsData = importMeshGids.getData(0);
175  Teuchos::Array<GO> importMeshGidsGO;
176  importMeshGidsGO.reserve(importMeshGidsData.size());
177  for (typename Teuchos::ArrayRCP<const GO>::size_type j=0; j<importMeshGidsData.size(); ++j)
178  importMeshGidsGO.push_back(importMeshGidsData[j]);
179  RCP<const map_type> importMap = rcp(new map_type(TOT::invalid(), importMeshGidsGO(), rowMap->getIndexBase(), comm) );
180 
181  import_type importer(rowMap,importMap );
182  size_t numEntriesPerRow = A.getCrsGraph().getGlobalMaxNumRowEntries();
183  RCP<crs_graph_type> graph = createCrsGraph(importMap,numEntriesPerRow);
184  RCP<const map_type> domainMap = A.getCrsGraph().getDomainMap();
185  graph->doImport(A.getCrsGraph(), importer, INSERT);
186  graph->fillComplete(domainMap, importMap);
187 
188  block_crs_matrix_type importA(*graph, A.getBlockSize());
189  importA.doImport(A, importer, INSERT);
190 
191  // Finally we are ready to write this strip of the matrix
192  writeMatrixStrip(importA, os, params);
193  }
194  }
195  }
196 
197  template<class Scalar, class LO, class GO, class Node>
198  void writeMatrixStrip(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
199  using Teuchos::RCP;
200  using map_type = Tpetra::Map<LO, GO, Node>;
201  using bcrs_type = BlockCrsMatrix<Scalar,LO,GO,Node>;
202  using bcrs_local_inds_host_view_type = typename bcrs_type::local_inds_host_view_type;
203  using bcrs_values_host_view_type = typename bcrs_type::values_host_view_type;
204  using impl_scalar_type = typename bcrs_type::impl_scalar_type;
205 
206  size_t numRows = A.getGlobalNumRows();
207  RCP<const map_type> rowMap = A.getRowMap();
208  RCP<const map_type> colMap = A.getColMap();
209  RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
210  const int myRank = comm->getRank();
211 
212  const size_t meshRowOffset = rowMap->getIndexBase();
213  const size_t meshColOffset = colMap->getIndexBase();
214  TEUCHOS_TEST_FOR_EXCEPTION(meshRowOffset != meshColOffset,
215  std::runtime_error, "Tpetra::writeMatrixStrip: "
216  "mesh row index base != mesh column index base");
217 
218  if (myRank !=0) {
219 
220  TEUCHOS_TEST_FOR_EXCEPTION(A.getNodeNumRows() != 0,
221  std::runtime_error, "Tpetra::writeMatrixStrip: pid "
222  << myRank << " should have 0 rows but has " << A.getNodeNumRows());
223  TEUCHOS_TEST_FOR_EXCEPTION(A.getNodeNumCols() != 0,
224  std::runtime_error, "Tpetra::writeMatrixStrip: pid "
225  << myRank << " should have 0 columns but has " << A.getNodeNumCols());
226 
227  } else {
228 
229  TEUCHOS_TEST_FOR_EXCEPTION(numRows != A.getNodeNumRows(),
230  std::runtime_error, "Tpetra::writeMatrixStrip: "
231  "number of rows on pid 0 does not match global number of rows");
232 
233 
234  int err = 0;
235  const LO blockSize = A.getBlockSize();
236  const size_t numLocalRows = A.getNodeNumRows();
237  bool precisionChanged=false;
238  int oldPrecision = 0; // avoid "unused variable" warning
239  if (params.isParameter("precision")) {
240  oldPrecision = os.precision(params.get<int>("precision"));
241  precisionChanged=true;
242  }
243  int pointOffset = 1;
244  if (params.isParameter("zero-based indexing")) {
245  if (params.get<bool>("zero-based indexing") == true)
246  pointOffset = 0;
247  }
248 
249  size_t localRowInd;
250  for (localRowInd = 0; localRowInd < numLocalRows; ++localRowInd) {
251 
252  // Get a view of the current row.
253  bcrs_local_inds_host_view_type localColInds;
254  bcrs_values_host_view_type vals;
255  LO numEntries;
256  A.getLocalRowView (localRowInd, localColInds, vals); numEntries = localColInds.extent(0);
257  GO globalMeshRowID = rowMap->getGlobalElement(localRowInd) - meshRowOffset;
258 
259  for (LO k = 0; k < numEntries; ++k) {
260  GO globalMeshColID = colMap->getGlobalElement(localColInds[k]) - meshColOffset;
261  const impl_scalar_type* curBlock = vals.data() + blockSize * blockSize * k;
262  // Blocks are stored in row-major format.
263  for (LO j = 0; j < blockSize; ++j) {
264  GO globalPointRowID = globalMeshRowID * blockSize + j + pointOffset;
265  for (LO i = 0; i < blockSize; ++i) {
266  GO globalPointColID = globalMeshColID * blockSize + i + pointOffset;
267  const impl_scalar_type curVal = curBlock[i + j * blockSize];
268 
269  os << globalPointRowID << " " << globalPointColID << " ";
270  if (Teuchos::ScalarTraits<impl_scalar_type>::isComplex) {
271  // Matrix Market format wants complex values to be
272  // written as space-delimited pairs. See Bug 6469.
273  os << Teuchos::ScalarTraits<impl_scalar_type>::real (curVal) << " "
274  << Teuchos::ScalarTraits<impl_scalar_type>::imag (curVal);
275  }
276  else {
277  os << curVal;
278  }
279  os << std::endl;
280  }
281  }
282  }
283  }
284  if (precisionChanged)
285  os.precision(oldPrecision);
286  TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
287  std::runtime_error, "Tpetra::writeMatrixStrip: "
288  "error getting view of local row " << localRowInd);
289 
290  }
291 
292  }
293 
294  template<class Scalar, class LO, class GO, class Node>
295  Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >
296  convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
297  {
298 
299  /*
300  ASSUMPTIONS:
301 
302  1) In point matrix, all entries associated with a little block are present (even if they are zero).
303  2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
304  3) Point column map and block column map are ordered consistently.
305  */
306 
307  using Teuchos::Array;
308  using Teuchos::ArrayView;
309  using Teuchos::RCP;
310 
311  typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
312  typedef Tpetra::Map<LO,GO,Node> map_type;
313  typedef Tpetra::CrsGraph<LO,GO,Node> crs_graph_type;
314  typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
315 
316  const map_type &pointRowMap = *(pointMatrix.getRowMap());
317  RCP<const map_type> meshRowMap = createMeshMap<LO,GO,Node>(blockSize, pointRowMap);
318 
319  const map_type &pointColMap = *(pointMatrix.getColMap());
320  RCP<const map_type> meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap);
321 
322  const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
323  RCP<const map_type> meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap);
324 
325  const map_type &pointRangeMap = *(pointMatrix.getRangeMap());
326  RCP<const map_type> meshRangeMap = createMeshMap<LO,GO,Node>(blockSize, pointRangeMap);
327 
328  // Use graph ctor that provides column map and upper bound on nonzeros per row.
329  // We can use static profile because the point graph should have at least as many entries per
330  // row as the mesh graph.
331  RCP<crs_graph_type> meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap,
332  pointMatrix.getGlobalMaxNumRowEntries(), Tpetra::StaticProfile));
333  // Fill the graph by walking through the matrix. For each mesh row, we query the collection of point
334  // rows associated with it. The point column ids are converted to mesh column ids and put into an array.
335  // As each point row collection is finished, the mesh column ids are sorted, made unique, and inserted
336  // into the mesh graph.
337  typename crs_matrix_type::local_inds_host_view_type pointColInds;
338  typename crs_matrix_type::values_host_view_type pointVals;
339  Array<GO> meshColGids;
340  meshColGids.reserve(pointMatrix.getGlobalMaxNumRowEntries());
341  //again, I assume that point GIDs associated with a mesh GID are consecutive.
342  //if they are not, this will break!!
343  for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
344  for (int j=0; j<blockSize; ++j) {
345  LO rowLid = i*blockSize+j;
346  pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals); //TODO optimization: Since I don't care about values,
347  //TODO I should use the graph instead.
348  for (size_t k=0; k<pointColInds.size(); ++k) {
349  GO meshColInd = pointColMap.getGlobalElement(pointColInds[k]) / blockSize;
350  meshColGids.push_back(meshColInd);
351  }
352  }
353  //List of mesh GIDs probably contains duplicates because we looped over all point rows in the block.
354  //Sort and make unique.
355  std::sort(meshColGids.begin(), meshColGids.end());
356  meshColGids.erase( std::unique(meshColGids.begin(), meshColGids.end()), meshColGids.end() );
357  meshCrsGraph->insertGlobalIndices(meshRowMap->getGlobalElement(i), meshColGids());
358  meshColGids.clear();
359  }
360  meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);
361 
362  //create and populate the block matrix
363  RCP<block_crs_matrix_type> blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockSize));
364 
365  //preallocate the maximum number of (dense) block entries needed by any row
366  int maxBlockEntries = blockMatrix->getNodeMaxNumRowEntries();
367  Array<Array<Scalar>> blocks(maxBlockEntries);
368  for (int i=0; i<maxBlockEntries; ++i)
369  blocks[i].reserve(blockSize*blockSize);
370  std::map<int,int> bcol2bentry; //maps block column index to dense block entries
371  std::map<int,int>::iterator iter;
372  //Fill the block matrix. We must do this in local index space.
373  //TODO: Optimization: We assume the blocks are fully populated in the point matrix. This means
374  //TODO: on the first point row in the block row, we know that we're hitting new block col indices.
375  //TODO: on other rows, we know the block col indices have all been seen before
376  //int offset;
377  //if (pointMatrix.getIndexBase()) offset = 0;
378  //else offset = 1;
379  for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
380  int blkCnt=0; //how many unique block entries encountered so far in current block row
381  for (int j=0; j<blockSize; ++j) {
382  LO rowLid = i*blockSize+j;
383  pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals);
384  for (size_t k=0; k<pointColInds.size(); ++k) {
385  //convert point column to block col
386  LO meshColInd = pointColInds[k] / blockSize;
387  iter = bcol2bentry.find(meshColInd);
388  if (iter == bcol2bentry.end()) {
389  //new block column
390  bcol2bentry[meshColInd] = blkCnt;
391  blocks[blkCnt].push_back(pointVals[k]);
392  blkCnt++;
393  } else {
394  //block column found previously
395  int littleBlock = iter->second;
396  blocks[littleBlock].push_back(pointVals[k]);
397  }
398  }
399  }
400  // TODO This inserts the blocks one block entry at a time. It is probably more efficient to
401  // TODO store all the blocks in a block row contiguously so they can be inserted with a single call.
402  for (iter=bcol2bentry.begin(); iter != bcol2bentry.end(); ++iter) {
403  LO localBlockCol = iter->first;
404  Scalar *vals = (blocks[iter->second]).getRawPtr();
405  blockMatrix->replaceLocalValues(i, &localBlockCol, vals, 1);
406  }
407 
408  //Done with block row. Zero everything out.
409  for (int j=0; j<maxBlockEntries; ++j)
410  blocks[j].clear();
411  blkCnt = 0;
412  bcol2bentry.clear();
413  }
414 
415  return blockMatrix;
416 
417  }
418 
419  template<class LO, class GO, class Node>
420  Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
421  createMeshMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& pointMap)
422  {
423  typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
424  typedef Tpetra::Map<LO,GO,Node> map_type;
425 
426  //calculate mesh GIDs
427  Teuchos::ArrayView<const GO> pointGids = pointMap.getNodeElementList();
428  Teuchos::Array<GO> meshGids;
429  GO indexBase = pointMap.getIndexBase();
430 
431  // Use hash table to track whether we've encountered this GID previously. This will happen
432  // when striding through the point DOFs in a block. It should not happen otherwise.
433  // I don't use sort/make unique because I don't want to change the ordering.
434  meshGids.reserve(pointGids.size());
435  Tpetra::Details::HashTable<GO,int> hashTable(pointGids.size());
436  for (int i=0; i<pointGids.size(); ++i) {
437  GO meshGid = (pointGids[i]-indexBase) / blockSize + indexBase;
438  if (hashTable.get(meshGid) == -1) {
439  hashTable.add(meshGid,1); //(key,value)
440  meshGids.push_back(meshGid);
441  }
442  }
443 
444  Teuchos::RCP<const map_type> meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGids(), 0, pointMap.getComm()) );
445  return meshMap;
446 
447  }
448 
449 
450  template<class LO, class GO, class Node>
451  Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
452  createPointMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& blockMap)
453  {
454  typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
455  typedef Tpetra::Map<LO,GO,Node> map_type;
456 
457  //calculate mesh GIDs
458  Teuchos::ArrayView<const GO> blockGids = blockMap.getNodeElementList();
459  Teuchos::Array<GO> pointGids(blockGids.size() * blockSize);
460  GO indexBase = blockMap.getIndexBase();
461 
462  for(LO i=0, ct=0; i<(LO)blockGids.size(); i++) {
463  GO base = (blockGids[i] - indexBase)* blockSize + indexBase;
464  for(LO j=0; j<blockSize; j++, ct++)
465  pointGids[i*blockSize+j] = base+j;
466  }
467 
468  Teuchos::RCP<const map_type> pointMap = Teuchos::rcp( new map_type(TOT::invalid(), pointGids(), 0, blockMap.getComm()) );
469  return pointMap;
470 
471  }
472 
473 
474  template<class Scalar, class LO, class GO, class Node>
475  Teuchos::RCP<CrsMatrix<Scalar, LO, GO, Node> >
477 
478  using Teuchos::Array;
479  using Teuchos::ArrayView;
480  using Teuchos::RCP;
481 
482  typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
483  typedef Tpetra::Map<LO,GO,Node> map_type;
484  typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
485 
486  using crs_graph_type = typename block_crs_matrix_type::crs_graph_type;
487  using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
488  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
489  using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
490  using entries_type = typename local_graph_device_type::entries_type::non_const_type;
491  using values_type = typename local_matrix_device_type::values_type::non_const_type;
492 
493  using row_map_type_const = typename local_graph_device_type::row_map_type;
494  using entries_type_const = typename local_graph_device_type::entries_type;
495 
496  using little_block_type = typename block_crs_matrix_type::const_little_block_type;
497  using offset_type = typename row_map_type::non_const_value_type;
498  using column_type = typename entries_type::non_const_value_type;
499 
500  using execution_space = typename Node::execution_space;
501  using range_type = Kokkos::RangePolicy<execution_space, LO>;
502 
503 
504  LO blocksize = blockMatrix.getBlockSize();
505  const offset_type bs2 = blocksize * blocksize;
506  size_t block_nnz = blockMatrix.getNodeNumEntries();
507  size_t point_nnz = block_nnz * bs2;
508 
509  // We can get these from the blockMatrix directly
510  RCP<const map_type> pointDomainMap = blockMatrix.getDomainMap();
511  RCP<const map_type> pointRangeMap = blockMatrix.getRangeMap();
512 
513  // We need to generate the row/col Map ourselves.
514  RCP<const map_type> blockRowMap = blockMatrix.getRowMap();
515  RCP<const map_type> pointRowMap = createPointMap<LO,GO,Node>(blocksize, *blockRowMap);
516 
517  RCP<const map_type> blockColMap = blockMatrix.getColMap();
518  RCP<const map_type> pointColMap = createPointMap<LO,GO,Node>(blocksize, *blockColMap);
519 
520  // Get the last few things
521 
522  const crs_graph_type & blockGraph = blockMatrix.getCrsGraph();
523  LO point_rows = (LO) pointRowMap->getNodeNumElements();
524  LO block_rows = (LO) blockRowMap->getNodeNumElements();
525  auto blockValues = blockMatrix.getValuesDevice();
526  auto blockLocalGraph = blockGraph.getLocalGraphDevice();
527  row_map_type_const blockRowptr = blockLocalGraph.row_map;
528  entries_type_const blockColind = blockLocalGraph.entries;
529 
530  // Generate the point matrix rowptr / colind / values
531  row_map_type rowptr("row_map", point_rows+1);
532  entries_type colind("entries", point_nnz);
533  values_type values("values", point_nnz);
534 
535  // Pre-fill the rowptr
536  Kokkos::parallel_for("fillRowPtr",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
537  offset_type base = blockRowptr[i];
538  offset_type diff = blockRowptr[i+1] - base;
539  for(LO j=0; j<blocksize; j++) {
540  rowptr[i*blocksize +j] = base*bs2 + j*diff*blocksize;
541  }
542 
543  // Fill the last guy, if we're on the final entry
544  if(i==block_rows-1) {
545  rowptr[block_rows*blocksize] = blockRowptr[i+1] * bs2;
546  }
547  });
548 
549 
550  Kokkos::parallel_for("copyEntriesAndValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
551  const offset_type blkBeg = blockRowptr[i];
552  const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
553 
554  // For each block in the row...
555  for (offset_type block=0; block < numBlocks; block++) {
556  column_type point_col_base = blockColind[blkBeg + block] * blocksize;
557  little_block_type my_block(blockValues.data () + (blkBeg+block) * bs2, blocksize, blocksize);
558 
559  // For each entry in the block...
560  for(LO little_row=0; little_row<blocksize; little_row++) {
561  offset_type point_row_offset = rowptr[i*blocksize + little_row];
562  for(LO little_col=0; little_col<blocksize; little_col++) {
563  colind[point_row_offset + block*blocksize + little_col] = point_col_base + little_col;
564  values[point_row_offset + block*blocksize + little_col] = my_block(little_row,little_col);
565  }
566  }
567 
568  }
569  });
570 
571  // Generate the matrix
572  RCP<crs_matrix_type> pointCrsMatrix = rcp(new crs_matrix_type(pointRowMap, pointColMap, 0));
573  pointCrsMatrix->setAllValues(rowptr,colind,values);
574 
575  // FUTURE OPTIMIZATION: Directly compute import/export, rather than letting ESFC do it
576  pointCrsMatrix->expertStaticFillComplete(pointDomainMap,pointRangeMap);
577 
578  return pointCrsMatrix;
579  }
580 
581 
582 } // namespace Tpetra
583 
584 //
585 // Explicit instantiation macro for blockCrsMatrixWriter (various
586 // overloads), writeMatrixStrip, and convertToBlockCrsMatrix.
587 //
588 // Must be expanded from within the Tpetra namespace!
589 //
590 #define TPETRA_BLOCKCRSMATRIX_HELPERS_INSTANT(S,LO,GO,NODE) \
591  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName); \
592  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName, Teuchos::ParameterList const &params); \
593  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os); \
594  template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
595  template void writeMatrixStrip(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
596  template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
597  template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE>& blockMatrix);
598 
599 //
600 // Explicit instantiation macro for createMeshMap / createPointMap
601 //
602 #define TPETRA_CREATEMESHMAP_INSTANT(LO,GO,NODE) \
603  template Teuchos::RCP<const Map<LO,GO,NODE> > createMeshMap (const LO& blockSize, const Map<LO,GO,NODE>& pointMap); \
604  template Teuchos::RCP<const Map<LO,GO,NODE> > createPointMap (const LO& blockSize, const Map<LO,GO,NODE>& blockMap);
605 
606 #endif // TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createPointMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &blockMap)
Helper function to generate a point map from a block map (with a given block size) GIDs associated wi...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void blockCrsMatrixWriter(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::string const &fileName)
Helper function to write a BlockCrsMatrix. Calls the 3-argument version.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
virtual size_t getNodeNumCols() const override
The number of columns needed to apply the forward operator on this node.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
One or more distributed dense vectors.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph&#39;s communicator...
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix&#39;s communicator...
void writeMatrixStrip(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::ostream &os, Teuchos::ParameterList const &params)
Helper function called by blockCrsMatrixWriter.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createMeshMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &pointMap)
Helper function to generate a mesh map from a point map. Important! It&#39;s assumed that point GIDs asso...
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
size_t getNodeNumRows() const override
get the local number of block rows
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
Insert new values that don&#39;t currently exist.
global_size_t getGlobalNumRows() const override
get the global number of block rows
global_ordinal_type getIndexBase() const
The index base for this Map.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
LO getBlockSize() const
The number of degrees of freedom per mesh point.
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
Teuchos::RCP< CrsMatrix< Scalar, LO, GO, Node > > convertToCrsMatrix(const Tpetra::BlockCrsMatrix< Scalar, LO, GO, Node > &blockMatrix)
Non-member constructor that creates a point CrsMatrix from an existing BlockCrsMatrix.
Teuchos::RCP< CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > createCrsGraph(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t maxNumEntriesPerRow=0, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Nonmember function to create an empty CrsGraph given a row Map and the max number of entries allowed ...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > convertToBlockCrsMatrix(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize)
Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual size_t getNodeNumEntries() const override
The local number of stored (structurally nonzero) entries.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.