Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
52 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP
53 #define AMESOS2_SUPERLUDIST_DEF_HPP
54 
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_StandardParameterEntryValidators.hpp>
57 #include <Teuchos_DefaultMpiComm.hpp>
58 
61 #include "Amesos2_Util.hpp"
62 
63 
64 namespace Amesos2 {
65 
66 
67  template <class Matrix, class Vector>
68  Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
69  Teuchos::RCP<Vector> X,
70  Teuchos::RCP<const Vector> B)
71  : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
72  , bvals_()
73  , xvals_()
74  , in_grid_(false)
75  , is_contiguous_(true)
76  {
77  using Teuchos::Comm;
78  // It's OK to depend on MpiComm explicitly here, because
79  // SuperLU_DIST requires MPI anyway.
80  using Teuchos::MpiComm;
81  using Teuchos::outArg;
82  using Teuchos::ParameterList;
83  using Teuchos::parameterList;
84  using Teuchos::RCP;
85  using Teuchos::rcp;
86  using Teuchos::rcp_dynamic_cast;
87  using Teuchos::REDUCE_SUM;
88  using Teuchos::reduceAll;
89  typedef global_ordinal_type GO;
90  typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
91 
93  // Set up the SuperLU_DIST processor grid //
95 
96  RCP<const Comm<int> > comm = this->getComm ();
97  const int myRank = comm->getRank ();
98  const int numProcs = comm->getSize ();
99 
100  SLUD::int_t nprow, npcol;
101  get_default_grid_size (numProcs, nprow, npcol);
102 
103  {
104  // FIXME (mfh 16 Dec 2014) getComm() just returns
105  // matrixA_->getComm(), so it's not clear why we need to ask for
106  // the matrix's communicator separately here.
107  RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
108  TEUCHOS_TEST_FOR_EXCEPTION(
109  matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
110  "constructor: The matrix's communicator is null!");
111  RCP<const MpiComm<int> > matMpiComm =
112  rcp_dynamic_cast<const MpiComm<int> > (matComm);
113  // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
114  // SerialComm, we probably could just use MPI_COMM_SELF here.
115  // I'm not sure if SuperLU_DIST is smart enough to handle that
116  // case, though.
117  TEUCHOS_TEST_FOR_EXCEPTION(
118  matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
119  "constructor: The matrix's communicator is not an MpiComm!");
120  TEUCHOS_TEST_FOR_EXCEPTION(
121  matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
122  "Superlustdist constructor: The matrix's communicator claims to be a "
123  "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
124  "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
125  "exist, which likely implies that the Teuchos::MpiComm was constructed "
126  "incorrectly. It means something different than if the MPI_Comm were "
127  "MPI_COMM_NULL.");
128  MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
129  data_.mat_comm = rawMpiComm;
130  // This looks a bit like ScaLAPACK's grid initialization (which
131  // technically takes place in the BLACS, not in ScaLAPACK
132  // proper). See http://netlib.org/scalapack/slug/node34.html.
133  // The main difference is that SuperLU_DIST depends explicitly
134  // on MPI, while the BLACS hides its communication protocol.
135  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
136  }
137 
139  // Set some default parameters. //
140  // //
141  // Must do this after grid has been created in //
142  // case user specifies the nprow and npcol parameters //
144  SLUD::set_default_options_dist(&data_.options);
145 
146  RCP<ParameterList> default_params =
147  parameterList (* (this->getValidParameters ()));
148  this->setParameters (default_params);
149 
150  // Set some internal options
151  data_.options.Fact = SLUD::DOFACT;
152  data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
153  data_.options.SolveInitialized = SLUD::NO;
154  data_.options.RefineInitialized = SLUD::NO;
155  data_.rowequ = false;
156  data_.colequ = false;
157  data_.perm_r.resize(this->globalNumRows_);
158  data_.perm_c.resize(this->globalNumCols_);
159 
161  // Set up a communicator for the parallel column ordering and //
162  // parallel symbolic factorization. //
164  data_.symb_comm = MPI_COMM_NULL;
165 
166  // domains is the next power of 2 less than nprow*npcol. This
167  // value will be used for creating an MPI communicator for the
168  // pre-ordering and symbolic factorization methods.
169  data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
170 
171  const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
172  MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
173 
175  // Set up a row Map that only includes processes that are in the
176  // SuperLU process grid. This will be used for redistributing A.
178 
179  // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
180  // with myProcParticipates as the weight, but that costs an extra
181  // all-reduce.
182 
183  // Set to 1 if I am in the grid, and I get some of the matrix rows.
184  int myProcParticipates = 0;
185  if (myRank < nprow * npcol) {
186  in_grid_ = true;
187  myProcParticipates = 1;
188  }
189 
190  // Compute how many processes in the communicator belong to the
191  // process grid.
192  int numParticipatingProcs = 0;
193  reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
194  outArg (numParticipatingProcs));
195  TEUCHOS_TEST_FOR_EXCEPTION(
196  this->globalNumRows_ != 0 && numParticipatingProcs == 0,
197  std::logic_error, "Amesos2::Superludist constructor: The matrix has "
198  << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
199  "communicator want to participate in its factorization! nprow = "
200  << nprow << " and npcol = " << npcol << ".");
201 
202  // Divide up the rows among the participating processes.
203  size_t myNumRows = 0;
204  {
205  const GO GNR = static_cast<GO> (this->globalNumRows_);
206  const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
207  GNR / static_cast<GO> (numParticipatingProcs);
208  const GO remainder =
209  GNR - quotient * static_cast<GO> (numParticipatingProcs);
210  const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
211  (quotient + static_cast<GO> (1)) : quotient;
212  myNumRows = static_cast<size_t> (lclNumRows);
213  }
214 
215  // TODO: might only need to initialize if parallel symbolic factorization is requested.
216  const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
218  rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
219 
221  // Do some other initialization //
223 
224  data_.A.Store = NULL;
225  function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
226  SLUD::PStatInit(&(data_.stat));
227  // We do not use ScalePermstructInit because we will use our own
228  // arrays for storing perm_r and perm_c
229  data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
230  data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
231  }
232 
233 
234  template <class Matrix, class Vector>
236  {
237  /* Free SuperLU_DIST data_types
238  * - Matrices
239  * - Vectors
240  * - Stat object
241  * - ScalePerm, LUstruct, grid, and solve objects
242  *
243  * Note: the function definitions are the same regardless whether
244  * complex or real, so we arbitrarily use the D namespace
245  */
246  if ( this->status_.getNumPreOrder() > 0 ){
248 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
249  SUPERLU_FREE( data_.sizes );
250  SUPERLU_FREE( data_.fstVtxSep );
251 #else
252  free( data_.sizes );
253  free( data_.fstVtxSep );
254 #endif
255  }
256 
257  // Cleanup old matrix store memory if it's non-NULL. Our
258  // Teuchos::Array's will destroy rowind, colptr, and nzval for us
259  if( data_.A.Store != NULL ){
260  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
261  }
262 
263  // LU data is initialized in numericFactorization_impl()
264  if ( this->status_.getNumNumericFact() > 0 ){
265  function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
266  }
267  function_map::LUstructFree(&(data_.lu));
268 
269  // If a symbolic factorization is ever performed without a
270  // follow-up numericfactorization, there are some arrays in the
271  // Pslu_freeable struct which will never be free'd by
272  // SuperLU_DIST.
273  if ( this->status_.symbolicFactorizationDone() &&
274  !this->status_.numericFactorizationDone() ){
275  if ( data_.pslu_freeable.xlsub != NULL ){
276 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
277  SUPERLU_FREE( data_.pslu_freeable.xlsub );
278  SUPERLU_FREE( data_.pslu_freeable.lsub );
279 #else
280  free( data_.pslu_freeable.xlsub );
281  free( data_.pslu_freeable.lsub );
282 #endif
283  }
284  if ( data_.pslu_freeable.xusub != NULL ){
285 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
286  SUPERLU_FREE( data_.pslu_freeable.xusub );
287  SUPERLU_FREE( data_.pslu_freeable.usub );
288 #else
289  free( data_.pslu_freeable.xusub );
290  free( data_.pslu_freeable.usub );
291 #endif
292  }
293  if ( data_.pslu_freeable.supno_loc != NULL ){
294 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
295  SUPERLU_FREE( data_.pslu_freeable.supno_loc );
296  SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
297  SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
298 #else
299  free( data_.pslu_freeable.supno_loc );
300  free( data_.pslu_freeable.xsup_beg_loc );
301  free( data_.pslu_freeable.xsup_end_loc );
302 #endif
303  }
304 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
305  SUPERLU_FREE( data_.pslu_freeable.globToLoc );
306 #else
307  free( data_.pslu_freeable.globToLoc );
308 #endif
309  }
310 
311  SLUD::PStatFree( &(data_.stat) ) ;
312 
313  // Teuchos::Arrays will free R, C, perm_r, and perm_c
314  // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
315 
316  if ( data_.options.SolveInitialized == SLUD::YES )
317  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
318 
319  SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
320  // cases where grid
321  // wouldn't be initialized?
322 
323  if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
324  }
325 
326  template<class Matrix, class Vector>
327  int
329  {
330  // We will always use the NATURAL row ordering to avoid the
331  // sequential bottleneck present when doing any other row
332  // ordering scheme from SuperLU_DIST
333  //
334  // Set perm_r to be the natural ordering
335  SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
336  for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
337 
338  // loadA_impl(); // Refresh matrix values
339 
340  if( in_grid_ ){
341  // If this function has been called at least once, then the
342  // sizes, and fstVtxSep arrays were allocated in
343  // get_perm_c_parmetis. Delete them before calling that
344  // function again. These arrays will also be dealloc'd in the
345  // deconstructor.
346  if( this->status_.getNumPreOrder() > 0 ){
347 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
348  SUPERLU_FREE( data_.sizes );
349  SUPERLU_FREE( data_.fstVtxSep );
350 #else
351  free( data_.sizes );
352  free( data_.fstVtxSep );
353 #endif
354  }
355  float info = 0.0;
356  {
357 #ifdef HAVE_AMESOS2_TIMERS
358  Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
359 #endif
360  info = SLUD::get_perm_c_parmetis( &(data_.A),
361  data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
362  data_.grid.nprow * data_.grid.npcol, data_.domains,
363  &(data_.sizes), &(data_.fstVtxSep),
364  &(data_.grid), &(data_.symb_comm) );
365  }
366  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
367  std::runtime_error,
368  "SuperLU_DIST pre-ordering ran out of memory after allocating "
369  << info << " bytes of memory" );
370  }
371 
372  // Ordering will be applied directly before numeric factorization,
373  // after we have a chance to get updated coefficients from the
374  // matrix
375 
376  return EXIT_SUCCESS;
377  }
378 
379 
380 
381  template <class Matrix, class Vector>
382  int
384  {
385  // loadA_impl(); // Refresh matrix values
386 
387  if( in_grid_ ){
388 
389  float info = 0.0;
390  {
391 #ifdef HAVE_AMESOS2_TIMERS
392  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
393 #endif
394 
395 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
396  info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
397  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
398  data_.perm_r.getRawPtr(), data_.sizes,
399  data_.fstVtxSep, &(data_.pslu_freeable),
400  &(data_.grid.comm), &(data_.symb_comm),
401  &(data_.mem_usage));
402 
403 #else
404  info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
405  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
406  data_.perm_r.getRawPtr(), data_.sizes,
407  data_.fstVtxSep, &(data_.pslu_freeable),
408  &(data_.grid.comm), &(data_.symb_comm),
409  &(data_.mem_usage));
410 #endif
411  }
412  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
413  std::runtime_error,
414  "SuperLU_DIST symbolic factorization ran out of memory after"
415  " allocating " << info << " bytes of memory" );
416  }
417  same_symbolic_ = false;
418  same_solve_struct_ = false;
419 
420  return EXIT_SUCCESS;
421  }
422 
423 
424  template <class Matrix, class Vector>
425  int
427  using Teuchos::as;
428 
429  // loadA_impl(); // Refresh the matrix values
430 
431  if( in_grid_ ) {
432  if( data_.options.Equil == SLUD::YES ) {
433  SLUD::int_t info = 0;
434 
435  // Compute scaling
436  data_.R.resize(this->globalNumRows_);
437  data_.C.resize(this->globalNumCols_);
438  function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
439  &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
440 
441  // Apply the scalings
442  function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
443  data_.rowcnd, data_.colcnd, data_.amax,
444  &(data_.equed));
445 
446  data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
447  data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
448  }
449 
450  // Apply the column ordering, so that AC is the column-permuted A, and compute etree
451  size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
452  for( size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
453 
454  // Distribute data from the symbolic factorization
455  if( same_symbolic_ ){
456  // Note: with the SamePattern_SameRowPerm options, it does not
457  // matter that the glu_freeable member has never been
458  // initialized, because it is never accessed. It is a
459  // placeholder arg. The real work is done in data_.lu
460 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
461  data_.options.Fact = SLUD::SamePattern_SameRowPerm;
462  function_map::pdistribute(&(data_.options),
463  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
464  &(data_.A), &(data_.scale_perm),
465  &(data_.glu_freeable), &(data_.lu),
466  &(data_.grid));
467 #else
468  function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
469  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
470  &(data_.A), &(data_.scale_perm),
471  &(data_.glu_freeable), &(data_.lu),
472  &(data_.grid));
473 #endif
474  } else {
475 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
476  data_.options.Fact = SLUD::DOFACT;
477  function_map::dist_psymbtonum(&(data_.options),
478  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
479  &(data_.A), &(data_.scale_perm),
480  &(data_.pslu_freeable), &(data_.lu),
481  &(data_.grid));
482 #else
483  function_map::dist_psymbtonum(SLUD::DOFACT,
484  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
485  &(data_.A), &(data_.scale_perm),
486  &(data_.pslu_freeable), &(data_.lu),
487  &(data_.grid));
488 #endif
489  }
490 
491  // Retrieve the normI of A (required by gstrf).
492  bool notran = (data_.options.Trans == SLUD::NOTRANS);
493  double anorm = function_map::plangs((notran ? (char *)"1" : (char *)"I"), &(data_.A), &(data_.grid));
494 
495  int info = 0;
496  {
497 #ifdef HAVE_AMESOS2_TIMERS
498  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
499 #endif
500  function_map::gstrf(&(data_.options), this->globalNumRows_,
501  this->globalNumCols_, anorm, &(data_.lu),
502  &(data_.grid), &(data_.stat), &info);
503  }
504 
505  // Check output
506  TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
507  std::runtime_error,
508  "L and U factors have been computed but U("
509  << info << "," << info << ") is exactly zero "
510  "(i.e. U is singular)");
511  }
512 
513  // The other option, that info_st < 0, denotes invalid parameters
514  // to the function, but we'll assume for now that that won't
515  // happen.
516 
517  data_.options.Fact = SLUD::FACTORED;
518  same_symbolic_ = true;
519 
520  return EXIT_SUCCESS;
521  }
522 
523 
524  template <class Matrix, class Vector>
525  int
527  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
528  {
529  using Teuchos::as;
530 
531  // local_len_rhs is how many of the multivector rows belong to
532  // this processor in the SuperLU_DIST processor grid.
533  const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
534  const global_size_type nrhs = X->getGlobalNumVectors();
535  const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
536 
537  // make sure our multivector storage is sized appropriately
538  bvals_.resize(nrhs * local_len_rhs);
539  xvals_.resize(nrhs * local_len_rhs);
540 
541  // We assume the global length of the two vectors have already been
542  // checked for compatibility
543 
544  { // get the values from B
545 #ifdef HAVE_AMESOS2_TIMERS
546  Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
547 #endif
548  {
549  // The input dense matrix for B should be distributed in the
550  // same manner as the superlu_dist matrix. That is, if a
551  // processor has m_loc rows of A, then it should also have
552  // m_loc rows of B (and the same rows). We accomplish this by
553  // distributing the multivector rows with the same Map that
554  // the matrix A's rows are distributed.
555 #ifdef HAVE_AMESOS2_TIMERS
556  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
557 #endif
558  // get grid-distributed mv data. The multivector data will be
559  // distributed across the processes in the SuperLU_DIST grid.
560  typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
561  copy_helper::do_get(B,
562  bvals_(),
563  local_len_rhs,
564  Teuchos::ptrInArg(*superlu_rowmap_));
565  }
566  } // end block for conversion time
567 
568  if( in_grid_ ){
569  // if( data_.options.trans == SLUD::NOTRANS ){
570  // if( data_.rowequ ){ // row equilibration has been done on AC
571  // // scale bxvals_ by diag(R)
572  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
573  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
574  // }
575  // } else if( data_.colequ ){ // column equilibration has been done on AC
576  // // scale bxvals_ by diag(C)
577  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
578  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
579  // }
580 
581  // Initialize the SOLVEstruct_t.
582  //
583  // We are able to reuse the solve struct if we have not changed
584  // the sparsity pattern of L and U since the last solve
585  if( !same_solve_struct_ ){
586  if( data_.options.SolveInitialized == SLUD::YES ){
587  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
588  }
589  function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
590  data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
591  &(data_.grid), &(data_.solve_struct));
592  // Flag that we can reuse this solve_struct unless another
593  // symbolicFactorization is called between here and the next
594  // solve.
595  same_solve_struct_ = true;
596  }
597 
598  // Apply row-scaling if requested
599  if (data_.options.Equil == SLUD::YES && data_.rowequ) {
600  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
601  for(global_size_type j = 0; j < nrhs; ++j) {
602  for(size_t i = 0; i < local_len_rhs; ++i) {
603  bvals_[i + j*ld] *= data_.R[first_global_row_b + i];
604  }
605  }
606  }
607 
608  // Solve
609  int ierr = 0; // returned error code
610  {
611 #ifdef HAVE_AMESOS2_TIMERS
612  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
613 #endif
614 
615 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
616  function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
617  &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
618  as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
619  as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
620  &(data_.solve_struct), &(data_.stat), &ierr);
621 #else
622  function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
623  &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
624  as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
625  as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
626  &(data_.solve_struct), &(data_.stat), &ierr);
627 #endif
628  } // end block for solve time
629 
630  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
631  std::runtime_error,
632  "Argument " << -ierr << " to gstrs had an illegal value" );
633 
634  // "Un-scale" the solution so that it is a solution of the original system
635  // if( data_.options.trans == SLUD::NOTRANS ){
636  // if( data_.colequ ){ // column equilibration has been done on AC
637  // // scale bxvals_ by diag(C)
638  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
639  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
640  // }
641  // } else if( data_.rowequ ){ // row equilibration has been done on AC
642  // // scale bxvals_ by diag(R)
643  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
644  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
645  // }
646  { // permute B to a solution of the original system
647 #ifdef HAVE_AMESOS2_TIMERS
648  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
649 #endif
650  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
651  function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
652  as<SLUD::int_t>(local_len_rhs),
653  data_.solve_struct.row_to_proc,
654  data_.solve_struct.inv_perm_c,
655  bvals_.getRawPtr(), ld,
656  xvals_.getRawPtr(), ld,
657  as<int>(nrhs),
658  &(data_.grid));
659  }
660 
661  // Apply col-scaling if requested
662  if (data_.options.Equil == SLUD::YES && data_.colequ) {
663  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
664  for(global_size_type j = 0; j < nrhs; ++j) {
665  for(size_t i = 0; i < local_len_rhs; ++i) {
666  xvals_[i + j*ld] *= data_.C[first_global_row_b + i];
667  }
668  }
669  }
670  }
671 
672  /* Update X's global values */
673  {
674 #ifdef HAVE_AMESOS2_TIMERS
675  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
676 #endif
677  typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
678  put_helper::do_put(X,
679  xvals_(),
680  local_len_rhs,
681  Teuchos::ptrInArg(*superlu_rowmap_));
682  }
683 
684  return EXIT_SUCCESS;
685  }
686 
687 
688  template <class Matrix, class Vector>
689  bool
691  {
692  // SuperLU_DIST requires square matrices
693  return( this->globalNumRows_ == this->globalNumCols_ );
694  }
695 
696 
697  template <class Matrix, class Vector>
698  void
699  Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
700  {
701  using Teuchos::as;
702  using Teuchos::RCP;
703  using Teuchos::getIntegralValue;
704  using Teuchos::ParameterEntryValidator;
705 
706  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
707 
708  if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
709  TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
710  parameterList->isParameter("npcol")),
711  std::invalid_argument,
712  "nprow and npcol must be set together" );
713 
714  SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
715  SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
716 
717  TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
718  std::invalid_argument,
719  "nprow and npcol combination invalid" );
720 
721  if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
722  // De-allocate the default grid that was initialized in the constructor
723  SLUD::superlu_gridexit(&(data_.grid));
724  // Create a new grid
725  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
726  } // else our grid has not changed size since the last initialization
727  }
728 
729  TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
730  std::invalid_argument,
731  "SuperLU_DIST does not support solving the tranpose system" );
732 
733  data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
734 
735  // Equilbration option
736  bool equil = parameterList->get<bool>("Equil", false);
737  data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
738 
739  if( parameterList->isParameter("ColPerm") ){
740  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
741  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
742 
743  data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
744  }
745 
746  // Always use the "NOROWPERM" option to avoid a serial bottleneck
747  // with the weighted bipartite matching algorithm used for the
748  // "LargeDiag" RowPerm. Note the inconsistency with the SuperLU
749  // User guide (which states that the value should be "NATURAL").
750  data_.options.RowPerm = SLUD::NOROWPERM;
751 
752  // TODO: Uncomment when supported
753  // if( parameterList->isParameter("IterRefine") ){
754  // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
755  // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
756 
757  // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
758  // }
759  data_.options.IterRefine = SLUD::NOREFINE;
760 
761  bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
762  data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
763 
764  if( parameterList->isParameter("IsContiguous") ){
765  is_contiguous_ = parameterList->get<bool>("IsContiguous");
766  }
767  }
768 
769 
770  template <class Matrix, class Vector>
771  Teuchos::RCP<const Teuchos::ParameterList>
773  {
774  using std::string;
775  using Teuchos::tuple;
776  using Teuchos::ParameterList;
777  using Teuchos::EnhancedNumberValidator;
778  using Teuchos::setStringToIntegralParameter;
779  using Teuchos::stringToIntegralParameterEntryValidator;
780 
781  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
782 
783  if( is_null(valid_params) ){
784  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
785 
786  Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
787  = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
788  col_row_validator->setMin(1);
789 
790  pl->set("npcol", data_.grid.npcol,
791  "Number of columns in the processor grid. "
792  "Must be set with nprow", col_row_validator);
793  pl->set("nprow", data_.grid.nprow,
794  "Number of rows in the SuperLU_DIST processor grid. "
795  "Must be set together with npcol", col_row_validator);
796 
797  // validator will catch any value besides NOTRANS
798  setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
799  "Solve for the transpose system or not",
800  tuple<string>("NOTRANS"),
801  tuple<string>("Do not solve with transpose"),
802  tuple<SLUD::trans_t>(SLUD::NOTRANS),
803  pl.getRawPtr());
804 
805  // Equillbration
806  pl->set("Equil", false, "Whether to equilibrate the system before solve");
807 
808  // TODO: uncomment when supported
809  // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
810  // "Type of iterative refinement to use",
811  // tuple<string>("NOREFINE", "DOUBLE"),
812  // tuple<string>("Do not use iterative refinement",
813  // "Do double iterative refinement"),
814  // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
815  // SLUD::DOUBLE),
816  // pl.getRawPtr());
817 
818  pl->set("ReplaceTinyPivot", true,
819  "Specifies whether to replace tiny diagonals during LU factorization");
820 
821  setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
822  "Specifies how to permute the columns of the "
823  "matrix for sparsity preservation",
824  tuple<string>("NATURAL", "PARMETIS"),
825  tuple<string>("Natural ordering",
826  "ParMETIS ordering on A^T + A"),
827  tuple<SLUD::colperm_t>(SLUD::NATURAL,
828  SLUD::PARMETIS),
829  pl.getRawPtr());
830 
831  pl->set("IsContiguous", true, "Whether GIDs contiguous");
832 
833  valid_params = pl;
834  }
835 
836  return valid_params;
837  }
838 
839 
840  template <class Matrix, class Vector>
841  void
843  SLUD::int_t& nprow,
844  SLUD::int_t& npcol) const {
845  TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
846  std::invalid_argument,
847  "Number of MPI processes must be at least 1" );
848  SLUD::int_t c, r = 1;
849  while( r*r <= nprocs ) r++;
850  nprow = npcol = --r; // fall back to square grid
851  c = nprocs / r;
852  while( (r--)*c != nprocs ){
853  c = nprocs / r; // note integer division
854  }
855  ++r;
856  // prefer the square grid over a single row (which will only happen
857  // in the case of a prime nprocs
858  if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
859  nprow = r;
860  npcol = c;
861  }
862  }
863 
864 
865  template <class Matrix, class Vector>
866  bool
868  // Extract the necessary information from mat and call SLU function
869  using Teuchos::Array;
870  using Teuchos::ArrayView;
871  using Teuchos::ptrInArg;
872  using Teuchos::as;
873 
874  using SLUD::int_t;
875 
876 #ifdef HAVE_AMESOS2_TIMERS
877  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
878 #endif
879 
880  // Cleanup old store memory if it's non-NULL
881  if( data_.A.Store != NULL ){
882  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
883  data_.A.Store = NULL;
884  }
885 
886  Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
887  = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
888 
889  int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
890  l_nnz = as<int_t>(redist_mat->getLocalNNZ());
891  l_rows = as<int_t>(redist_mat->getLocalNumRows());
892  g_rows = as<int_t>(redist_mat->getGlobalNumRows());
893  g_cols = g_rows; // we deal with square matrices
894  fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
895 
896  Kokkos::resize(nzvals_view_, l_nnz);
897  Kokkos::resize(colind_view_, l_nnz);
898  Kokkos::resize(rowptr_view_, l_rows + 1);
899  int_t nnz_ret = 0;
900  {
901 #ifdef HAVE_AMESOS2_TIMERS
902  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
903 #endif
904 
906  host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
907  redist_mat.ptr(),
908  nzvals_view_, colind_view_, rowptr_view_,
909  nnz_ret,
910  ptrInArg(*superlu_rowmap_),
911  ROOTED,
912  ARBITRARY);
913  }
914 
915  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
916  std::runtime_error,
917  "Did not get the expected number of non-zero vals");
918 
919  // Get the SLU data type for this type of matrix
920  SLUD::Dtype_t dtype = type_map::dtype;
921 
922  if( in_grid_ ){
923  function_map::create_CompRowLoc_Matrix(&(data_.A),
924  g_rows, g_cols,
925  l_nnz, l_rows, fst_global_row,
926  nzvals_view_.data(),
927  colind_view_.data(),
928  rowptr_view_.data(),
929  SLUD::SLU_NR_loc,
930  dtype, SLUD::SLU_GE);
931  }
932 
933  return true;
934 }
935 
936 
937  template<class Matrix, class Vector>
938  const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
939 
940 
941 } // end namespace Amesos2
942 
943 #endif // AMESOS2_SUPERLUDIST_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:90
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:842
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:479
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:68
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superludist_def.hpp:699
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:772
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:476
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:267
Definition: Amesos2_TypeDecl.hpp:143
Utility functions for Amesos2.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a const parameter list of all of the valid parameters that this->setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:550
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:363
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:526
Provides definition of SuperLU_DIST types as well as conversions and type traits. ...
Definition: Amesos2_AbstractConcreteMatrixAdapter.hpp:48
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:328
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:690
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:662
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:333
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:235
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:518
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:383
Definition: Amesos2_TypeDecl.hpp:127
bool in_grid_
true if this processor is in SuperLU_DISTS&#39;s 2D process grid
Definition: Amesos2_Superludist_decl.hpp:326
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:373
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:867
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:455
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:426