Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Sacado_Fad_Exp_MP_Vector.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef SACADO_FAD_EXP_MP_VECTOR_HPP
43 #define SACADO_FAD_EXP_MP_VECTOR_HPP
44 
45 #include "Sacado_MP_Vector.hpp"
46 
47 namespace Sacado {
48 
49  namespace Fad {
50  namespace Exp {
51 
53  class ExprSpecMPVector {};
54 
56 
61  template <typename T>
62  class Extender<
63  T,
64  typename std::enable_if<
65  Sacado::is_mp_vector<typename T::value_type>::value >::type
66  > : public T
67  {
68  public:
69 
70  typedef typename T::value_type value_type;
71  typedef typename value_type::value_type val_type;
72 
73  // Define expression template specialization
75 
76  // Bring in constructors
77  using T::T;
78 
79  // Bring in methods we are overloading
80  using T::val;
81  using T::dx;
82  using T::fastAccessDx;
83 
85  KOKKOS_INLINE_FUNCTION
86  const val_type& val(int j) const { return T::val().fastAccessCoeff(j); }
87 
89  KOKKOS_INLINE_FUNCTION
90  val_type& val(int j) { return T::val().fastAccessCoeff(j); }
91 
93  KOKKOS_INLINE_FUNCTION
94  val_type dx(int i, int j) const {
95  return this->size() ? this->dx_[i].fastAccessCoeff(j) : val_type(0.0);
96  }
97 
99  KOKKOS_INLINE_FUNCTION
100  val_type& fastAccessDx(int i, int j) {
101  return this->dx_[i].fastAccessCoeff(j);
102  }
103 
105  KOKKOS_INLINE_FUNCTION
106  const val_type& fastAccessDx(int i, int j) const {
107  return this->dx_[i].fastAccessCoeff(j);
108  }
109 
110  };
111 
113  template <typename DstType>
114  class ExprAssign<
115  DstType,
116  typename std::enable_if<
117  std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
118  >::type
119  > {
120  public:
121 
123  typedef typename DstType::value_type value_type;
124 
125  // MP::Vector size (assuming static, because that's all we care about)
126  static const int VecNum = Sacado::StaticSize<value_type>::value;
127 
129  template <typename SrcType>
130  KOKKOS_INLINE_FUNCTION
131  static void assign_equal(DstType& dst, const SrcType& x)
132  {
133  const int xsz = x.size();
134 
135  if (xsz != dst.size())
136  dst.resizeAndZero(xsz);
137 
138  const int sz = dst.size();
139 
140  // For ViewStorage, the resize above may not in fact resize the
141  // derivative array, so it is possible that sz != xsz at this point.
142  // The only valid use case here is sz > xsz == 0, so we use sz in the
143  // assignment below
144 
145  if (sz) {
146  if (x.hasFastAccess()) {
147  SACADO_FAD_DERIV_LOOP(i,sz)
148  for (int j=0; j<VecNum; ++j)
149  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
150  }
151  else
152  SACADO_FAD_DERIV_LOOP(i,sz)
153  for (int j=0; j<VecNum; ++j)
154  dst.fastAccessDx(i,j) = x.dx(i,j);
155  }
156 
157  for (int j=0; j<VecNum; ++j)
158  dst.val(j) = x.val(j);
159  }
160 
162  template <typename SrcType>
163  KOKKOS_INLINE_FUNCTION
164  static void assign_plus_equal(DstType& dst, const SrcType& x)
165  {
166  const int xsz = x.size(), sz = dst.size();
167 
168 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
169  if ((xsz != sz) && (xsz != 0) && (sz != 0))
170  throw "Fad Error: Attempt to assign with incompatible sizes";
171 #endif
172 
173  if (xsz) {
174  if (sz) {
175  if (x.hasFastAccess())
176  SACADO_FAD_DERIV_LOOP(i,sz)
177  for (int j=0; j<VecNum; ++j)
178  dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
179  else
180  for (int i=0; i<sz; ++i)
181  for (int j=0; j<VecNum; ++j)
182  dst.fastAccessDx(i,j) += x.dx(i,j);
183  }
184  else {
185  dst.resizeAndZero(xsz);
186  if (x.hasFastAccess())
187  SACADO_FAD_DERIV_LOOP(i,xsz)
188  for (int j=0; j<VecNum; ++j)
189  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
190  else
191  SACADO_FAD_DERIV_LOOP(i,xsz)
192  for (int j=0; j<VecNum; ++j)
193  dst.fastAccessDx(i,j) = x.dx(i,j);
194  }
195  }
196 
197  for (int j=0; j<VecNum; ++j)
198  dst.val(j) += x.val(j);
199  }
200 
202  template <typename SrcType>
203  KOKKOS_INLINE_FUNCTION
204  static void assign_minus_equal(DstType& dst, const SrcType& x)
205  {
206  const int xsz = x.size(), sz = dst.size();
207 
208 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
209  if ((xsz != sz) && (xsz != 0) && (sz != 0))
210  throw "Fad Error: Attempt to assign with incompatible sizes";
211 #endif
212 
213  if (xsz) {
214  if (sz) {
215  if (x.hasFastAccess())
216  SACADO_FAD_DERIV_LOOP(i,sz)
217  for (int j=0; j<VecNum; ++j)
218  dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
219  else
220  SACADO_FAD_DERIV_LOOP(i,sz)
221  for (int j=0; j<VecNum; ++j)
222  dst.fastAccessDx(i,j) -= x.dx(i,j);
223  }
224  else {
225  dst.resizeAndZero(xsz);
226  if (x.hasFastAccess())
227  SACADO_FAD_DERIV_LOOP(i,xsz)
228  for (int j=0; j<VecNum; ++j)
229  dst.fastAccessDx(i,j) = -x.fastAccessDx(i,j);
230  else
231  SACADO_FAD_DERIV_LOOP(i,xsz)
232  for (int j=0; j<VecNum; ++j)
233  dst.fastAccessDx(i,j) = -x.dx(i,j);
234  }
235  }
236 
237  for (int j=0; j<VecNum; ++j)
238  dst.val(j) -= x.val(j);
239  }
240 
242  template <typename SrcType>
243  KOKKOS_INLINE_FUNCTION
244  static void assign_times_equal(DstType& dst, const SrcType& x)
245  {
246  const int xsz = x.size(), sz = dst.size();
247  const value_type xval = x.val();
248  const value_type v = dst.val();
249 
250 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
251  if ((xsz != sz) && (xsz != 0) && (sz != 0))
252  throw "Fad Error: Attempt to assign with incompatible sizes";
253 #endif
254 
255  if (xsz) {
256  if (sz) {
257  if (x.hasFastAccess())
258  SACADO_FAD_DERIV_LOOP(i,sz)
259  for (int j=0; j<VecNum; ++j)
260  dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
261  else
262  SACADO_FAD_DERIV_LOOP(i,sz)
263  for (int j=0; j<VecNum; ++j)
264  dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.dx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
265  }
266  else {
267  dst.resizeAndZero(xsz);
268  if (x.hasFastAccess())
269  SACADO_FAD_DERIV_LOOP(i,xsz)
270  for (int j=0; j<VecNum; ++j)
271  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j);
272  else
273  SACADO_FAD_DERIV_LOOP(i,xsz)
274  for (int j=0; j<VecNum; ++j)
275  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.dx(i,j);
276  }
277  }
278  else {
279  if (sz) {
280  SACADO_FAD_DERIV_LOOP(i,sz)
281  for (int j=0; j<VecNum; ++j)
282  dst.fastAccessDx(i,j) *= xval.fastAccessCoeff(j);
283  }
284  }
285 
286  for (int j=0; j<VecNum; ++j)
287  dst.val(j) *= xval.fastAccessCoeff(j);
288  }
289 
291  template <typename SrcType>
292  KOKKOS_INLINE_FUNCTION
293  static void assign_divide_equal(DstType& dst, const SrcType& x)
294  {
295  const int xsz = x.size(), sz = dst.size();
296  const value_type xval = x.val();
297  const value_type v = dst.val();
298 
299 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
300  if ((xsz != sz) && (xsz != 0) && (sz != 0))
301  throw "Fad Error: Attempt to assign with incompatible sizes";
302 #endif
303 
304  if (xsz) {
305  const value_type xval2 = xval*xval;
306  if (sz) {
307  if (x.hasFastAccess())
308  SACADO_FAD_DERIV_LOOP(i,sz)
309  for (int j=0; j<VecNum; ++j)
310  dst.fastAccessDx(i,j) =
311  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) ) / xval2.fastAccessCoeff(j);
312  else
313  SACADO_FAD_DERIV_LOOP(i,sz)
314  for (int j=0; j<VecNum; ++j)
315  dst.fastAccessDx(i,j) =
316  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.dx(i,j) ) / xval2.fastAccessCoeff(j);
317  }
318  else {
319  dst.resizeAndZero(xsz);
320  if (x.hasFastAccess())
321  SACADO_FAD_DERIV_LOOP(i,xsz)
322  for (int j=0; j<VecNum; ++j)
323  dst.fastAccessDx(i,j) = - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) / xval2.fastAccessCoeff(j);
324  else
325  SACADO_FAD_DERIV_LOOP(i,xsz)
326  for (int j=0; j<VecNum; ++j)
327  dst.fastAccessDx(i,j) = -v.fastAccessCoeff(j)*x.dx(i,j) / xval2.fastAccessCoeff(j);
328  }
329  }
330  else {
331  if (sz) {
332  SACADO_FAD_DERIV_LOOP(i,sz)
333  for (int j=0; j<VecNum; ++j)
334  dst.fastAccessDx(i,j) /= xval.fastAccessCoeff(j);
335  }
336  }
337 
338  for (int j=0; j<VecNum; ++j)
339  dst.val(j) /= xval.fastAccessCoeff(j);
340  }
341 
342  };
343 
348  template <typename DstType>
349  class ExprAssign<
350  DstType,
351  typename std::enable_if<
352  Sacado::IsStaticallySized<DstType>::value &&
353  std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
354  >::type
355  > {
356  public:
357 
359  typedef typename DstType::value_type value_type;
360 
361  // MP::Vector size (assuming static, because that's all we care about)
362  static const int VecNum = Sacado::StaticSize<value_type>::value;
363 
365  template <typename SrcType>
366  KOKKOS_INLINE_FUNCTION
367  static void assign_equal(DstType& dst, const SrcType& x)
368  {
369  const int sz = dst.size();
370  SACADO_FAD_DERIV_LOOP(i,sz)
371  for (int j=0; j<VecNum; ++j)
372  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
373  for (int j=0; j<VecNum; ++j)
374  dst.val(j) = x.val(j);
375  }
376 
378  template <typename SrcType>
379  KOKKOS_INLINE_FUNCTION
380  static void assign_plus_equal(DstType& dst, const SrcType& x)
381  {
382  const int sz = dst.size();
383  SACADO_FAD_DERIV_LOOP(i,sz)
384  for (int j=0; j<VecNum; ++j)
385  dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
386  for (int j=0; j<VecNum; ++j)
387  dst.val(j) += x.val(j);
388  }
389 
391  template <typename SrcType>
392  KOKKOS_INLINE_FUNCTION
393  static void assign_minus_equal(DstType& dst, const SrcType& x)
394  {
395  const int sz = dst.size();
396  SACADO_FAD_DERIV_LOOP(i,sz)
397  for (int j=0; j<VecNum; ++j)
398  dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
399  for (int j=0; j<VecNum; ++j)
400  dst.val(j) -= x.val(j);
401  }
402 
404  template <typename SrcType>
405  KOKKOS_INLINE_FUNCTION
406  static void assign_times_equal(DstType& dst, const SrcType& x)
407  {
408  const int sz = dst.size();
409  const value_type xval = x.val();
410  const value_type v = dst.val();
411  SACADO_FAD_DERIV_LOOP(i,sz)
412  for (int j=0; j<VecNum; ++j)
413  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
414  for (int j=0; j<VecNum; ++j)
415  dst.val(j) *= xval.fastAccessCoeff(j);
416  }
417 
419  template <typename SrcType>
420  KOKKOS_INLINE_FUNCTION
421  static void assign_divide_equal(DstType& dst, const SrcType& x)
422  {
423  const int sz = dst.size();
424  const value_type xval = x.val();
425  const value_type xval2 = xval*xval;
426  const value_type v = dst.val();
427  SACADO_FAD_DERIV_LOOP(i,sz)
428  for (int j=0; j<VecNum; ++j)
429  dst.fastAccessDx(i,j) =
430  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) )/ xval2.fastAccessCoeff(j);
431  for (int j=0; j<VecNum; ++j)
432  dst.val(j) /= xval.fastAccessCoeff(j);
433  }
434 
435  };
436 
437  } // namespace Exp
438  } // namespace Fad
439 
440 } // namespace Sacado
441 
442 // Specialize expression template operators to add similar extensions
443 #include "Sacado_Fad_Exp_Ops.hpp"
444 
445 #define FAD_UNARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,FASTACCESSDX) \
446 namespace Sacado { \
447  namespace Fad { \
448  namespace Exp { \
449  \
450  template <typename T> \
451  class OP< T,ExprSpecMPVector > : \
452  public Expr< OP< T,ExprSpecMPVector > > { \
453  public: \
454  \
455  typedef typename std::remove_cv<T>::type ExprT; \
456  typedef typename ExprT::value_type value_type; \
457  typedef typename ExprT::scalar_type scalar_type; \
458  \
459  typedef typename value_type::value_type val_type; \
460  \
461  typedef ExprSpecMPVector expr_spec_type; \
462  \
463  KOKKOS_INLINE_FUNCTION \
464  OP(const T& expr_) : expr(expr_) {} \
465  \
466  KOKKOS_INLINE_FUNCTION \
467  int size() const { return expr.size(); } \
468  \
469  KOKKOS_INLINE_FUNCTION \
470  bool hasFastAccess() const { \
471  return expr.hasFastAccess(); \
472  } \
473  \
474  KOKKOS_INLINE_FUNCTION \
475  value_type val() const { \
476  USING \
477  return MPVALUE; \
478  } \
479  \
480  KOKKOS_INLINE_FUNCTION \
481  val_type val(int j) const { \
482  USING \
483  return VALUE; \
484  } \
485  \
486  KOKKOS_INLINE_FUNCTION \
487  val_type dx(int i, int j) const { \
488  USING \
489  return DX; \
490  } \
491  \
492  KOKKOS_INLINE_FUNCTION \
493  val_type fastAccessDx(int i, int j) const { \
494  USING \
495  return FASTACCESSDX; \
496  } \
497  \
498  protected: \
499  \
500  const T& expr; \
501  }; \
502  \
503  } \
504  } \
505  \
506 }
507 
509  UnaryPlusOp,
510  ;,
511  expr.val(),
512  expr.val(j),
513  expr.dx(i,j),
514  expr.fastAccessDx(i,j))
515 FAD_UNARYOP_MACRO(operator-,
517  ;,
518  -expr.val(),
519  -expr.val(j),
520  -expr.dx(i,j),
521  -expr.fastAccessDx(i,j))
524  using std::exp;,
525  exp(expr.val()),
526  exp(expr.val(j)),
527  exp(expr.val(j))*expr.dx(i,j),
528  exp(expr.val(j))*expr.fastAccessDx(i,j))
530  LogOp,
531  using std::log;,
532  log(expr.val()),
533  log(expr.val(j)),
534  expr.dx(i,j)/expr.val(j),
535  expr.fastAccessDx(i,j)/expr.val(j))
538  using std::log10; using std::log;,
539  log10(expr.val()),
540  log10(expr.val(j)),
541  expr.dx(i,j)/( log(value_type(10))*expr.val()),
542  expr.fastAccessDx(i,j) / ( log(value_type(10))*expr.val()))
545  using std::sqrt;,
546  sqrt(expr.val()),
547  sqrt(expr.val(j)),
548  expr.dx(i,j)/(value_type(2)* sqrt(expr.val())),
549  expr.fastAccessDx(i,j)/(value_type(2)* sqrt(expr.val())))
552  using std::cos; using std::sin;,
553  cos(expr.val()),
554  cos(expr.val(j)),
555  -expr.dx(i,j)* sin(expr.val()),
556  -expr.fastAccessDx(i,j)* sin(expr.val()))
559  using std::cos; using std::sin;,
560  sin(expr.val()),
561  sin(expr.val(j)),
562  expr.dx(i,j)* cos(expr.val()),
563  expr.fastAccessDx(i,j)* cos(expr.val()))
566  using std::tan;,
567  tan(expr.val()),
568  tan(expr.val(j)),
569  expr.dx(i,j)*
570  (value_type(1)+ tan(expr.val())* tan(expr.val())),
571  expr.fastAccessDx(i,j)*
572  (value_type(1)+ tan(expr.val())* tan(expr.val())))
575  using std::acos; using std::sqrt;,
576  acos(expr.val()),
577  acos(expr.val(j)),
578  -expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
579  -expr.fastAccessDx(i,j) /
580  sqrt(value_type(1)-expr.val()*expr.val()))
583  using std::asin; using std::sqrt;,
584  asin(expr.val()),
585  asin(expr.val(j)),
586  expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
587  expr.fastAccessDx(i,j) /
588  sqrt(value_type(1)-expr.val()*expr.val()))
591  using std::atan;,
592  atan(expr.val()),
593  atan(expr.val(j)),
594  expr.dx(i,j)/(value_type(1)+expr.val()*expr.val()),
595  expr.fastAccessDx(i,j)/(value_type(1)+expr.val()*expr.val()))
598  using std::cosh; using std::sinh;,
599  cosh(expr.val()),
600  cosh(expr.val(j)),
601  expr.dx(i,j)* sinh(expr.val()),
602  expr.fastAccessDx(i,j)* sinh(expr.val()))
605  using std::cosh; using std::sinh;,
606  sinh(expr.val()),
607  sinh(expr.val(j)),
608  expr.dx(i,j)* cosh(expr.val()),
609  expr.fastAccessDx(i,j)* cosh(expr.val()))
612  using std::tanh; using std::cosh;,
613  tanh(expr.val()),
614  tanh(expr.val(j)),
615  expr.dx(i,j)/( cosh(expr.val())* cosh(expr.val())),
616  expr.fastAccessDx(i,j) /
617  ( cosh(expr.val())* cosh(expr.val())))
620  using std::acosh; using std::sqrt;,
621  acosh(expr.val()),
622  acosh(expr.val(j)),
623  expr.dx(i,j)/ sqrt((expr.val()-value_type(1)) *
624  (expr.val()+value_type(1))),
625  expr.fastAccessDx(i,j)/ sqrt((expr.val()-value_type(1)) *
626  (expr.val()+value_type(1))))
629  using std::asinh; using std::sqrt;,
630  asinh(expr.val()),
631  asinh(expr.val(j)),
632  expr.dx(i,j)/ sqrt(value_type(1)+expr.val()*expr.val()),
633  expr.fastAccessDx(i,j)/ sqrt(value_type(1)+
634  expr.val()*expr.val()))
637  using std::atanh;,
638  atanh(expr.val()),
639  atanh(expr.val(j)),
640  expr.dx(i,j)/(value_type(1)-expr.val()*expr.val()),
641  expr.fastAccessDx(i,j)/(value_type(1)-
642  expr.val()*expr.val()))
645  using std::abs;,
646  abs(expr.val()),
647  abs(expr.val(j)),
648  if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
649  if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
652  using std::fabs;,
653  fabs(expr.val()),
654  fabs(expr.val(j)),
655  if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
656  if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
659  using std::cbrt;,
660  cbrt(expr.val()),
661  cbrt(expr.val(j)),
662  expr.dx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())),
663  expr.fastAccessDx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())))
664 
665 #undef FAD_UNARYOP_MACRO
666 
667 namespace Sacado {
668  namespace Fad {
669  namespace Exp {
670 
671  // For MP::Vector scalar type, promote constants up to expression's value
672  // type. If the constant type is the same as the value type, we can store
673  // the constant as a reference. If it isn't, we must copy it into a new
674  // value type object. We do this so that we can always access the constant
675  // as a value type.
676  template <typename ConstType, typename ValueType>
677  struct ConstTypeRef {
678  typedef ValueType type;
679  };
680 
681  template <typename ValueType>
682  struct ConstTypeRef<ValueType, ValueType> {
683  typedef ValueType& type;
684  };
685  }
686  }
687 }
688 
689 #define FAD_BINARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,CDX1,CDX2,FASTACCESSDX,MPVAL_CONST_DX_1,MPVAL_CONST_DX_2,VAL_CONST_DX_1,VAL_CONST_DX_2,CONST_DX_1,CONST_DX_2,CONST_FASTACCESSDX_1,CONST_FASTACCESSDX_2) \
690 namespace Sacado { \
691  namespace Fad { \
692  namespace Exp { \
693  \
694  template <typename T1, typename T2 > \
695  class OP< T1, T2, false, false, ExprSpecMPVector > : \
696  public Expr< OP< T1, T2, false, false, ExprSpecMPVector > > { \
697  public: \
698  \
699  typedef typename std::remove_cv<T1>::type ExprT1; \
700  typedef typename std::remove_cv<T2>::type ExprT2; \
701  typedef typename ExprT1::value_type value_type_1; \
702  typedef typename ExprT2::value_type value_type_2; \
703  typedef typename Sacado::Promote<value_type_1, \
704  value_type_2>::type value_type; \
705  \
706  typedef typename ExprT1::scalar_type scalar_type_1; \
707  typedef typename ExprT2::scalar_type scalar_type_2; \
708  typedef typename Sacado::Promote<scalar_type_1, \
709  scalar_type_2>::type scalar_type; \
710  \
711  typedef typename value_type::value_type val_type; \
712  \
713  typedef ExprSpecMPVector expr_spec_type; \
714  \
715  KOKKOS_INLINE_FUNCTION \
716  OP(const T1& expr1_, const T2& expr2_) : \
717  expr1(expr1_), expr2(expr2_) {} \
718  \
719  KOKKOS_INLINE_FUNCTION \
720  int size() const { \
721  const int sz1 = expr1.size(), sz2 = expr2.size(); \
722  return sz1 > sz2 ? sz1 : sz2; \
723  } \
724  \
725  KOKKOS_INLINE_FUNCTION \
726  bool hasFastAccess() const { \
727  return expr1.hasFastAccess() && expr2.hasFastAccess(); \
728  } \
729  \
730  KOKKOS_INLINE_FUNCTION \
731  value_type val() const { \
732  USING \
733  return MPVALUE; \
734  } \
735  \
736  KOKKOS_INLINE_FUNCTION \
737  val_type val(int j) const { \
738  USING \
739  return VALUE; \
740  } \
741  \
742  KOKKOS_INLINE_FUNCTION \
743  val_type dx(int i, int j) const { \
744  USING \
745  const int sz1 = expr1.size(), sz2 = expr2.size(); \
746  if (sz1 > 0 && sz2 > 0) \
747  return DX; \
748  else if (sz1 > 0) \
749  return CDX2; \
750  else \
751  return CDX1; \
752  } \
753  \
754  KOKKOS_INLINE_FUNCTION \
755  val_type fastAccessDx(int i, int j) const { \
756  USING \
757  return FASTACCESSDX; \
758  } \
759  \
760  protected: \
761  \
762  const T1& expr1; \
763  const T2& expr2; \
764  \
765  }; \
766  \
767  template <typename T1, typename T2> \
768  class OP< T1, T2, false, true, ExprSpecMPVector > : \
769  public Expr< OP< T1, T2, false, true, ExprSpecMPVector > > { \
770  public: \
771  \
772  typedef typename std::remove_cv<T1>::type ExprT1; \
773  typedef T2 ConstT; \
774  typedef typename ExprT1::value_type value_type; \
775  typedef typename ExprT1::scalar_type scalar_type; \
776  \
777  typedef typename value_type::value_type val_type; \
778  \
779  typedef ExprSpecMPVector expr_spec_type; \
780  \
781  KOKKOS_INLINE_FUNCTION \
782  OP(const T1& expr1_, const ConstT& c_) : \
783  expr1(expr1_), c(c_) {} \
784  \
785  KOKKOS_INLINE_FUNCTION \
786  int size() const { \
787  return expr1.size(); \
788  } \
789  \
790  KOKKOS_INLINE_FUNCTION \
791  bool hasFastAccess() const { \
792  return expr1.hasFastAccess(); \
793  } \
794  \
795  KOKKOS_INLINE_FUNCTION \
796  value_type val() const { \
797  USING \
798  return MPVAL_CONST_DX_2; \
799  } \
800  \
801  KOKKOS_INLINE_FUNCTION \
802  val_type val(int j) const { \
803  USING \
804  return VAL_CONST_DX_2; \
805  } \
806  \
807  KOKKOS_INLINE_FUNCTION \
808  val_type dx(int i, int j) const { \
809  USING \
810  return CONST_DX_2; \
811  } \
812  \
813  KOKKOS_INLINE_FUNCTION \
814  val_type fastAccessDx(int i, int j) const { \
815  USING \
816  return CONST_FASTACCESSDX_2; \
817  } \
818  \
819  protected: \
820  \
821  const T1& expr1; \
822  const typename ConstTypeRef<ConstT,value_type>::type c; \
823  }; \
824  \
825  template <typename T1, typename T2> \
826  class OP< T1, T2, true, false,ExprSpecMPVector > : \
827  public Expr< OP< T1, T2, true, false, ExprSpecMPVector > > { \
828  public: \
829  \
830  typedef typename std::remove_cv<T2>::type ExprT2; \
831  typedef T1 ConstT; \
832  typedef typename ExprT2::value_type value_type; \
833  typedef typename ExprT2::scalar_type scalar_type; \
834  \
835  typedef typename value_type::value_type val_type; \
836  \
837  typedef ExprSpecMPVector expr_spec_type; \
838  \
839  KOKKOS_INLINE_FUNCTION \
840  OP(const ConstT& c_, const T2& expr2_) : \
841  c(c_), expr2(expr2_) {} \
842  \
843  KOKKOS_INLINE_FUNCTION \
844  int size() const { \
845  return expr2.size(); \
846  } \
847  \
848  KOKKOS_INLINE_FUNCTION \
849  bool hasFastAccess() const { \
850  return expr2.hasFastAccess(); \
851  } \
852  \
853  KOKKOS_INLINE_FUNCTION \
854  value_type val() const { \
855  USING \
856  return MPVAL_CONST_DX_1; \
857  } \
858  \
859  KOKKOS_INLINE_FUNCTION \
860  val_type val(int j) const { \
861  USING \
862  return VAL_CONST_DX_1; \
863  } \
864  \
865  KOKKOS_INLINE_FUNCTION \
866  val_type dx(int i, int j) const { \
867  USING \
868  return CONST_DX_1; \
869  } \
870  \
871  KOKKOS_INLINE_FUNCTION \
872  val_type fastAccessDx(int i, int j) const { \
873  USING \
874  return CONST_FASTACCESSDX_1; \
875  } \
876  \
877  protected: \
878  \
879  const typename ConstTypeRef<ConstT,value_type>::type c; \
880  const T2& expr2; \
881  }; \
882  \
883  } \
884  } \
885  \
886 }
887 
888 
890  AdditionOp,
891  ;,
892  expr1.val() + expr2.val(),
893  expr1.val(j) + expr2.val(j),
894  expr1.dx(i,j) + expr2.dx(i,j),
895  expr2.dx(i,j),
896  expr1.dx(i,j),
897  expr1.fastAccessDx(i,j) + expr2.fastAccessDx(i,j),
898  c + expr2.val(),
899  expr1.val() + c,
900  c.fastAccessCoeff(j) + expr2.val(j),
901  expr1.val(j) + c.fastAccessCoeff(j),
902  expr2.dx(i,j),
903  expr1.dx(i,j),
904  expr2.fastAccessDx(i,j),
905  expr1.fastAccessDx(i,j))
906 FAD_BINARYOP_MACRO(operator-,
908  ;,
909  expr1.val() - expr2.val(),
910  expr1.val(j) - expr2.val(j),
911  expr1.dx(i,j) - expr2.dx(i,j),
912  -expr2.dx(i,j),
913  expr1.dx(i,j),
914  expr1.fastAccessDx(i,j) - expr2.fastAccessDx(i,j),
915  c - expr2.val(),
916  expr1.val() - c,
917  c.fastAccessCoeff(j) - expr2.val(j),
918  expr1.val(j) - c.fastAccessCoeff(j),
919  -expr2.dx(i,j),
920  expr1.dx(i,j),
921  -expr2.fastAccessDx(i,j),
922  expr1.fastAccessDx(i,j))
923 FAD_BINARYOP_MACRO(operator*,
925  ;,
926  expr1.val() * expr2.val(),
927  expr1.val(j) * expr2.val(j),
928  expr1.val(j)*expr2.dx(i,j) + expr1.dx(i,j)*expr2.val(j),
929  expr1.val(j)*expr2.dx(i,j),
930  expr1.dx(i,j)*expr2.val(j),
931  expr1.val(j)*expr2.fastAccessDx(i,j) +
932  expr1.fastAccessDx(i,j)*expr2.val(j),
933  c * expr2.val(),
934  expr1.val() * c,
935  c.fastAccessCoeff(j) * expr2.val(j),
936  expr1.val(j) * c.fastAccessCoeff(j),
937  c.fastAccessCoeff(j)*expr2.dx(i,j),
938  expr1.dx(i,j)*c.fastAccessCoeff(j),
939  c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j),
940  expr1.fastAccessDx(i,j)*c.fastAccessCoeff(j))
941 FAD_BINARYOP_MACRO(operator/,
943  ;,
944  expr1.val() / expr2.val(),
945  expr1.val(j) / expr2.val(j),
946  (expr1.dx(i,j)*expr2.val(j) - expr2.dx(i,j)*expr1.val(j)) /
947  (expr2.val(j)*expr2.val(j)),
948  -expr2.dx(i,j)*expr1.val(j) / (expr2.val(j)*expr2.val(j)),
949  expr1.dx(i,j)/expr2.val(j),
950  (expr1.fastAccessDx(i,j)*expr2.val(j) -
951  expr2.fastAccessDx(i,j)*expr1.val(j)) /
952  (expr2.val(j)*expr2.val(j)),
953  c / expr2.val(),
954  expr1.val() / c,
955  c.fastAccessCoeff(j) / expr2.val(j),
956  expr1.val(j) / c.fastAccessCoeff(j),
957  -expr2.dx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
958  expr1.dx(i,j)/c.fastAccessCoeff(j),
959  -expr2.fastAccessDx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
960  expr1.fastAccessDx(i,j)/c.fastAccessCoeff(j))
963  using std::atan2;,
964  atan2(expr1.val(), expr2.val()),
965  atan2(expr1.val(j), expr2.val(j)),
966  (expr2.val(j)*expr1.dx(i,j) - expr1.val(j)*expr2.dx(i,j))/
967  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
968  -expr1.val(j)*expr2.dx(i,j)/
969  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
970  expr2.val(j)*expr1.dx(i,j)/
971  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
972  (expr2.val(j)*expr1.fastAccessDx(i,j) - expr1.val(j)*expr2.fastAccessDx(i,j))/
973  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
974  atan2(c, expr2.val()),
975  atan2(expr1.val(), c),
976  atan2(c.fastAccessCoeff(j), expr2.val(j)),
977  atan2(expr1.val(j), c.fastAccessCoeff(j)),
978  (-c.fastAccessCoeff(j)*expr2.dx(i,j)) / (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
979  (c.fastAccessCoeff(j)*expr1.dx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)),
980  (-c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j))/ (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
981  (c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)))
982 // FAD_BINARYOP_MACRO(pow,
983 // PowerOp,
984 // using std::pow; using std::log;,
985 // pow(expr1.val(), expr2.val()),
986 // pow(expr1.val(j), expr2.val(j)),
987 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
988 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
989 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) ),
990 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
991 // pow(c, expr2.val()),
992 // pow(expr1.val(), c),
993 // pow(c.fastAccessCoeff(j), expr2.val(j)),
994 // pow(expr1.val(j), c.fastAccessCoeff(j)),
995 // if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
996 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ),
997 // if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
998 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)))) )
1001  ;,
1002  if_then_else( expr1.val() >= expr2.val(), expr1.val(), expr2.val() ),
1003  if_then_else( expr1.val(j) >= expr2.val(j), expr1.val(j), expr2.val(j) ),
1004  if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1005  if_then_else( expr1.val(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1006  if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1007  if_then_else( expr1.val(j) >= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1008  if_then_else( c >= expr2.val(), c, expr2.val() ),
1009  if_then_else( expr1.val() >= c, expr1.val(), c ),
1010  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1011  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1012  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1013  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0.0) ),
1014  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.fastAccessDx(i,j) ),
1015  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0.0) ) )
1018  ;,
1019  if_then_else( expr1.val() <= expr2.val(), expr1.val(), expr2.val() ),
1020  if_then_else( expr1.val(j) <= expr2.val(j), expr1.val(j), expr2.val(j) ),
1021  if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1022  if_then_else( expr1.val(j) <= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1023  if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1024  if_then_else( expr1.val(j) <= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1025  if_then_else( c <= expr2.val(), c, expr2.val() ),
1026  if_then_else( expr1.val() <= c, expr1.val(), c ),
1027  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1028  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1029  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.dx(i,j) ),
1030  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0) ),
1031  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.fastAccessDx(i,j) ),
1032  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0) ) )
1033 
1034 // Special handling for std::pow() to provide specializations of PowerOp for
1035 // "simd" value types that use if_then_else(). The only reason for not using
1036 // if_then_else() always is to avoid evaluating the derivative if the value is
1037 // zero to avoid throwing FPEs.
1038 namespace Sacado {
1039  namespace Fad {
1040  namespace Exp {
1041 
1042  //
1043  // Implementation for simd type using if_then_else()
1044  //
1045  template <typename T1, typename T2>
1046  class PowerOp< T1, T2, false, false, ExprSpecMPVector, true > :
1047  public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector, true > > {
1048  public:
1049 
1050  typedef typename std::remove_cv<T1>::type ExprT1;
1051  typedef typename std::remove_cv<T2>::type ExprT2;
1052  typedef typename ExprT1::value_type value_type_1;
1053  typedef typename ExprT2::value_type value_type_2;
1054  typedef typename Sacado::Promote<value_type_1,
1055  value_type_2>::type value_type;
1056 
1057  typedef typename ExprT1::scalar_type scalar_type_1;
1058  typedef typename ExprT2::scalar_type scalar_type_2;
1059  typedef typename Sacado::Promote<scalar_type_1,
1060  scalar_type_2>::type scalar_type;
1061 
1062  typedef typename value_type::value_type val_type;
1063 
1064  typedef ExprSpecMPVector expr_spec_type;
1065 
1066  KOKKOS_INLINE_FUNCTION
1067  PowerOp(const T1& expr1_, const T2& expr2_) :
1068  expr1(expr1_), expr2(expr2_) {}
1069 
1070  KOKKOS_INLINE_FUNCTION
1071  int size() const {
1072  const int sz1 = expr1.size(), sz2 = expr2.size();
1073  return sz1 > sz2 ? sz1 : sz2;
1074  }
1075 
1076  KOKKOS_INLINE_FUNCTION
1077  bool hasFastAccess() const {
1078  return expr1.hasFastAccess() && expr2.hasFastAccess();
1079  }
1080 
1081  KOKKOS_INLINE_FUNCTION
1082  value_type val() const {
1083  using std::pow;
1084  return pow(expr1.val(), expr2.val());
1085  }
1086 
1087  KOKKOS_INLINE_FUNCTION
1088  val_type val(int j) const {
1089  using std::pow;
1090  return pow(expr1.val(j), expr2.val(j));
1091  }
1092 
1093  KOKKOS_INLINE_FUNCTION
1094  val_type dx(int i, int j) const {
1095  using std::pow; using std::log;
1096  const int sz1 = expr1.size(), sz2 = expr2.size();
1097  if (sz1 > 0 && sz2 > 0)
1098  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1099  else if (sz1 > 0)
1100  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1101  // It seems less accurate and caused convergence problems in some codes
1102  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) );
1103  else
1104  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1105  }
1106 
1107  KOKKOS_INLINE_FUNCTION
1108  val_type fastAccessDx(int i, int j) const {
1109  using std::pow; using std::log;
1110  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1111  }
1112 
1113  protected:
1114 
1115  const T1& expr1;
1116  const T2& expr2;
1117 
1118  };
1119 
1120  template <typename T1, typename T2>
1121  class PowerOp< T1, T2, false, true, ExprSpecMPVector, true >
1122  : public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector, true > > {
1123  public:
1124 
1125  typedef typename std::remove_cv<T1>::type ExprT1;
1126  typedef T2 ConstT;
1127  typedef typename ExprT1::value_type value_type;
1128  typedef typename ExprT1::scalar_type scalar_type;
1129 
1130  typedef typename value_type::value_type val_type;
1131 
1132  typedef ExprSpecMPVector expr_spec_type;
1133 
1134  KOKKOS_INLINE_FUNCTION
1135  PowerOp(const T1& expr1_, const ConstT& c_) :
1136  expr1(expr1_), c(c_) {}
1137 
1138  KOKKOS_INLINE_FUNCTION
1139  int size() const {
1140  return expr1.size();
1141  }
1142 
1143  KOKKOS_INLINE_FUNCTION
1144  bool hasFastAccess() const {
1145  return expr1.hasFastAccess();
1146  }
1147 
1148  KOKKOS_INLINE_FUNCTION
1149  value_type val() const {
1150  using std::pow;
1151  return pow(expr1.val(), c);
1152  }
1153 
1154  KOKKOS_INLINE_FUNCTION
1155  val_type val(int j) const {
1156  using std::pow;
1157  return pow(expr1.val(j), c.fastAccessCoeff(j));
1158  }
1159 
1160  KOKKOS_INLINE_FUNCTION
1161  val_type dx(int i, int j) const {
1162  using std::pow;
1163  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1164  // It seems less accurate and caused convergence problems in some codes
1165  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) );
1166  }
1167 
1168  KOKKOS_INLINE_FUNCTION
1169  val_type fastAccessDx(int i, int j) const {
1170  using std::pow;
1171  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1172  // It seems less accurate and caused convergence problems in some codes
1173  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) );
1174  }
1175 
1176  protected:
1177 
1178  const T1& expr1;
1179  const ConstT& c;
1180  };
1181 
1182  template <typename T1, typename T2>
1183  class PowerOp< T1, T2, true, false, ExprSpecMPVector, true >
1184  : public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector, true > > {
1185  public:
1186 
1187  typedef typename std::remove_cv<T2>::type ExprT2;
1188  typedef T1 ConstT;
1189  typedef typename ExprT2::value_type value_type;
1190  typedef typename ExprT2::scalar_type scalar_type;
1191 
1192  typedef typename value_type::value_type val_type;
1193 
1194  typedef ExprSpecMPVector expr_spec_type;
1195 
1196  KOKKOS_INLINE_FUNCTION
1197  PowerOp(const ConstT& c_, const T2& expr2_) :
1198  c(c_), expr2(expr2_) {}
1199 
1200  KOKKOS_INLINE_FUNCTION
1201  int size() const {
1202  return expr2.size();
1203  }
1204 
1205  KOKKOS_INLINE_FUNCTION
1206  bool hasFastAccess() const {
1207  return expr2.hasFastAccess();
1208  }
1209 
1210  KOKKOS_INLINE_FUNCTION
1211  value_type val() const {
1212  using std::pow;
1213  return pow(c, expr2.val());
1214  }
1215 
1216  KOKKOS_INLINE_FUNCTION
1217  val_type val(int j) const {
1218  using std::pow;
1219  return pow(c.fastAccessCoeff(j), expr2.val(j));
1220  }
1221 
1222  KOKKOS_INLINE_FUNCTION
1223  val_type dx(int i, int j) const {
1224  using std::pow; using std::log;
1225  return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1226  }
1227 
1228  KOKKOS_INLINE_FUNCTION
1229  val_type fastAccessDx(int i, int j) const {
1230  using std::pow; using std::log;
1231  return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1232  }
1233 
1234  protected:
1235 
1236  const ConstT& c;
1237  const T2& expr2;
1238  };
1239 
1240  //
1241  // Specialization for scalar types using ternary operator
1242  //
1243 
1244  template <typename T1, typename T2>
1245  class PowerOp< T1, T2, false, false, ExprSpecMPVector, false > :
1246  public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector, false > > {
1247  public:
1248 
1249  typedef typename std::remove_cv<T1>::type ExprT1;
1250  typedef typename std::remove_cv<T2>::type ExprT2;
1251  typedef typename ExprT1::value_type value_type_1;
1252  typedef typename ExprT2::value_type value_type_2;
1253  typedef typename Sacado::Promote<value_type_1,
1254  value_type_2>::type value_type;
1255 
1256  typedef typename ExprT1::scalar_type scalar_type_1;
1257  typedef typename ExprT2::scalar_type scalar_type_2;
1258  typedef typename Sacado::Promote<scalar_type_1,
1259  scalar_type_2>::type scalar_type;
1260 
1261  typedef typename value_type::value_type val_type;
1262 
1263  typedef ExprSpecMPVector expr_spec_type;
1264 
1265  KOKKOS_INLINE_FUNCTION
1266  PowerOp(const T1& expr1_, const T2& expr2_) :
1267  expr1(expr1_), expr2(expr2_) {}
1268 
1269  KOKKOS_INLINE_FUNCTION
1270  int size() const {
1271  const int sz1 = expr1.size(), sz2 = expr2.size();
1272  return sz1 > sz2 ? sz1 : sz2;
1273  }
1274 
1275  KOKKOS_INLINE_FUNCTION
1276  bool hasFastAccess() const {
1277  return expr1.hasFastAccess() && expr2.hasFastAccess();
1278  }
1279 
1280  KOKKOS_INLINE_FUNCTION
1281  value_type val() const {
1282  using std::pow;
1283  return pow(expr1.val(), expr2.val());
1284  }
1285 
1286  KOKKOS_INLINE_FUNCTION
1287  val_type val(int j) const {
1288  using std::pow;
1289  return pow(expr1.val(j), expr2.val(j));
1290  }
1291 
1292  KOKKOS_INLINE_FUNCTION
1293  val_type dx(int i, int j) const {
1294  using std::pow; using std::log;
1295  const int sz1 = expr1.size(), sz2 = expr2.size();
1296  if (sz1 > 0 && sz2 > 0)
1297  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j)));
1298  else if (sz1 > 0)
1299  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1300  // It seems less accurate and caused convergence problems in some codes
1301  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j)));
1302  else
1303  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j)));
1304  }
1305 
1306  KOKKOS_INLINE_FUNCTION
1307  val_type fastAccessDx(int i, int j) const {
1308  using std::pow; using std::log;
1309  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j)));
1310  }
1311 
1312  protected:
1313 
1314  const T1& expr1;
1315  const T2& expr2;
1316 
1317  };
1318 
1319  template <typename T1, typename T2>
1320  class PowerOp< T1, T2, false, true, ExprSpecMPVector, false >
1321  : public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector, false > > {
1322  public:
1323 
1324  typedef typename std::remove_cv<T1>::type ExprT1;
1325  typedef T2 ConstT;
1326  typedef typename ExprT1::value_type value_type;
1327  typedef typename ExprT1::scalar_type scalar_type;
1328 
1329  typedef typename value_type::value_type val_type;
1330 
1331  typedef ExprSpecMPVector expr_spec_type;
1332 
1333  KOKKOS_INLINE_FUNCTION
1334  PowerOp(const T1& expr1_, const ConstT& c_) :
1335  expr1(expr1_), c(c_) {}
1336 
1337  KOKKOS_INLINE_FUNCTION
1338  int size() const {
1339  return expr1.size();
1340  }
1341 
1342  KOKKOS_INLINE_FUNCTION
1343  bool hasFastAccess() const {
1344  return expr1.hasFastAccess();
1345  }
1346 
1347  KOKKOS_INLINE_FUNCTION
1348  value_type val() const {
1349  using std::pow;
1350  return pow(expr1.val(), c);
1351  }
1352 
1353  KOKKOS_INLINE_FUNCTION
1354  val_type val(int j) const {
1355  using std::pow;
1356  return pow(expr1.val(j), c.fastAccessCoeff(j));
1357  }
1358 
1359  KOKKOS_INLINE_FUNCTION
1360  val_type dx(int i, int j) const {
1361  using std::pow;
1362  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1363  // It seems less accurate and caused convergence problems in some codes
1364  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)));
1365  }
1366 
1367  KOKKOS_INLINE_FUNCTION
1368  val_type fastAccessDx(int i, int j) const {
1369  using std::pow;
1370  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1371  // It seems less accurate and caused convergence problems in some codes
1372  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)));
1373  }
1374 
1375  protected:
1376 
1377  const T1& expr1;
1378  const ConstT& c;
1379  };
1380 
1381  template <typename T1, typename T2>
1382  class PowerOp< T1, T2, true, false, ExprSpecMPVector, false >
1383  : public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector, false > > {
1384  public:
1385 
1386  typedef typename std::remove_cv<T2>::type ExprT2;
1387  typedef T1 ConstT;
1388  typedef typename ExprT2::value_type value_type;
1389  typedef typename ExprT2::scalar_type scalar_type;
1390 
1391  typedef typename value_type::value_type val_type;
1392 
1393  typedef ExprSpecMPVector expr_spec_type;
1394 
1395  KOKKOS_INLINE_FUNCTION
1396  PowerOp(const ConstT& c_, const T2& expr2_) :
1397  c(c_), expr2(expr2_) {}
1398 
1399  KOKKOS_INLINE_FUNCTION
1400  int size() const {
1401  return expr2.size();
1402  }
1403 
1404  KOKKOS_INLINE_FUNCTION
1405  bool hasFastAccess() const {
1406  return expr2.hasFastAccess();
1407  }
1408 
1409  KOKKOS_INLINE_FUNCTION
1410  value_type val() const {
1411  using std::pow;
1412  return pow(c, expr2.val());
1413  }
1414 
1415  KOKKOS_INLINE_FUNCTION
1416  val_type val(int j) const {
1417  using std::pow;
1418  return pow(c.fastAccessCoeff(j), expr2.val(j));
1419  }
1420 
1421  KOKKOS_INLINE_FUNCTION
1422  val_type dx(int i, int j) const {
1423  using std::pow; using std::log;
1424  return c.fastAccessCoeff(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j)));
1425  }
1426 
1427  KOKKOS_INLINE_FUNCTION
1428  val_type fastAccessDx(int i, int j) const {
1429  using std::pow; using std::log;
1430  return c.fastAccessCoeff(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j)));
1431  }
1432 
1433  protected:
1434 
1435  const ConstT& c;
1436  const T2& expr2;
1437  };
1438 
1439  }
1440  }
1441 }
1442 
1443 //--------------------------if_then_else operator -----------------------
1444 // Can't use the above macros because it is a ternary operator (sort of).
1445 // Also, relies on C++11
1446 
1447 namespace Sacado {
1448  namespace Fad {
1449  namespace Exp {
1450 
1451  template <typename CondT, typename T1, typename T2>
1452  class IfThenElseOp< CondT,T1,T2,false,false,ExprSpecMPVector > :
1453  public Expr< IfThenElseOp< CondT, T1, T2, false, false, ExprSpecMPVector > > {
1454 
1455  public:
1456 
1457  typedef typename std::remove_cv<T1>::type ExprT1;
1458  typedef typename std::remove_cv<T2>::type ExprT2;
1461  typedef typename Sacado::Promote<value_type_1,
1463 
1466  typedef typename Sacado::Promote<scalar_type_1,
1468 
1470 
1472 
1473  KOKKOS_INLINE_FUNCTION
1474  IfThenElseOp(const CondT& cond_, const T1& expr1_, const T2& expr2_) :
1475  cond(cond_), expr1(expr1_), expr2(expr2_) {}
1476 
1477  KOKKOS_INLINE_FUNCTION
1478  int size() const {
1479  int sz1 = expr1.size(), sz2 = expr2.size();
1480  return sz1 > sz2 ? sz1 : sz2;
1481  }
1482 
1483  KOKKOS_INLINE_FUNCTION
1484  bool hasFastAccess() const {
1485  return expr1.hasFastAccess() && expr2.hasFastAccess();
1486  }
1487 
1488  KOKKOS_INLINE_FUNCTION
1489  value_type val() const {
1490  return if_then_else( cond, expr1.val(), expr2.val() );
1491  }
1492 
1493  KOKKOS_INLINE_FUNCTION
1494  val_type val(int j) const {
1495  return if_then_else( cond, expr1.val(j), expr2.val(j) );
1496  }
1497 
1498  KOKKOS_INLINE_FUNCTION
1499  val_type dx(int i, int j) const {
1500  return if_then_else( cond, expr1.dx(i,j), expr2.dx(i,j) );
1501  }
1502 
1503  KOKKOS_INLINE_FUNCTION
1504  val_type fastAccessDx(int i, int j) const {
1505  return if_then_else( cond, expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) );
1506  }
1507 
1508  protected:
1509 
1510  const CondT& cond;
1511  const T1& expr1;
1512  const T2& expr2;
1513 
1514  };
1515 
1516  template <typename CondT, typename T1, typename T2>
1517  class IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector> :
1518  public Expr< IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector > > {
1519 
1520  public:
1521 
1522  typedef typename std::remove_cv<T1>::type ExprT1;
1523  typedef T2 ConstT;
1524  typedef typename ExprT1::value_type value_type;
1526 
1528 
1529  KOKKOS_INLINE_FUNCTION
1530  IfThenElseOp(const CondT& cond_, const T1& expr1_, const ConstT& c_) :
1531  cond(cond_), expr1(expr1_), c(c_) {}
1532 
1533  KOKKOS_INLINE_FUNCTION
1534  int size() const {
1535  return expr1.size();
1536  }
1537 
1538  KOKKOS_INLINE_FUNCTION
1539  bool hasFastAccess() const {
1540  return expr1.hasFastAccess();
1541  }
1542 
1543  KOKKOS_INLINE_FUNCTION
1544  value_type val() const {
1545  return if_then_else( cond, expr1.val(), c );
1546  }
1547 
1548  KOKKOS_INLINE_FUNCTION
1549  val_type val(int j) const {
1550  return if_then_else( cond, expr1.val(j), c.fastAccessCoeff(j) );
1551  }
1552 
1553  KOKKOS_INLINE_FUNCTION
1554  val_type dx(int i, int j) const {
1555  return if_then_else( cond, expr1.dx(i,j), val_type(0.0) );
1556  }
1557 
1558  KOKKOS_INLINE_FUNCTION
1559  val_type fastAccessDx(int i, int j) const {
1560  return if_then_else( cond, expr1.fastAccessDx(i,j), val_type(0.0) );
1561  }
1562 
1563  protected:
1564 
1565  const CondT& cond;
1566  const T1& expr1;
1567  const typename ConstTypeRef<ConstT,value_type>::type c;
1568  };
1569 
1570  template <typename CondT, typename T1, typename T2>
1571  class IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector> :
1572  public Expr< IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector > > {
1573 
1574  public:
1575 
1576  typedef typename std::remove_cv<T2>::type ExprT2;
1577  typedef T1 ConstT;
1578  typedef typename ExprT2::value_type value_type;
1580 
1582 
1584 
1585  KOKKOS_INLINE_FUNCTION
1586  IfThenElseOp(const CondT& cond_, const ConstT& c_, const T2& expr2_) :
1587  cond(cond_), c(c_), expr2(expr2_) {}
1588 
1589  KOKKOS_INLINE_FUNCTION
1590  int size() const {
1591  return expr2.size();
1592  }
1593 
1594  KOKKOS_INLINE_FUNCTION
1595  bool hasFastAccess() const {
1596  return expr2.hasFastAccess();
1597  }
1598 
1599  KOKKOS_INLINE_FUNCTION
1600  value_type val() const {
1601  return if_then_else( cond, c, expr2.val() );
1602  }
1603 
1604  KOKKOS_INLINE_FUNCTION
1605  val_type val(int j) const {
1606  return if_then_else( cond, c.fastAccessCoeff(j), expr2.val(j) );
1607  }
1608 
1609  KOKKOS_INLINE_FUNCTION
1610  val_type dx(int i, int j) const {
1611  return if_then_else( cond, val_type(0.0), expr2.dx(i,j) );
1612  }
1613 
1614  KOKKOS_INLINE_FUNCTION
1615  val_type fastAccessDx(int i, int j) const {
1616  return if_then_else( cond, val_type(0.0), expr2.fastAccessDx(i,j) );
1617  }
1618 
1619  protected:
1620 
1621  const CondT& cond;
1622  const typename ConstTypeRef<ConstT,value_type>::type c;
1623  const T2& expr2;
1624  };
1625 
1626  }
1627  }
1628 }
1629 
1630 #undef FAD_BINARYOP_MACRO
1631 
1632 #endif // SACADO_FAD_EXP_MP_VECTOR_HPP
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 expr1 expr1 expr1 j *expr1 expr2 expr1 expr1 j *expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 Atan2Op
fabs(expr.val())
asinh(expr.val())
tan(expr.val())
abs(expr.val())
expr expr SinOp
expr expr SinhOp
cos(expr.val())
expr expr SqrtOp
expr expr ACosOp
expr expr ATanOp
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MinOp
cosh(expr.val())
expr expr ACoshOp
acos(expr.val())
sin(expr.val())
sinh(expr.val())
expr expr ASinOp
#define FAD_BINARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, CDX1, CDX2, FASTACCESSDX, MPVAL_CONST_DX_1, MPVAL_CONST_DX_2, VAL_CONST_DX_1, VAL_CONST_DX_2, CONST_DX_1, CONST_DX_2, CONST_FASTACCESSDX_1, CONST_FASTACCESSDX_2)
cbrt(expr.val())
log10(expr.val())
expr expr TanhOp
expr expr TanOp
expr expr CoshOp
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 MultiplicationOp
expr expr ASinhOp
exp(expr.val())
atan(expr.val())
acosh(expr.val())
expr expr AbsOp
atan2(expr1.val(), expr2.val())
sqrt(expr.val())
expr expr ATanhOp
expr expr expr expr ExpOp
expr expr expr expr fastAccessDx(i, j)) FAD_UNARYOP_MACRO(exp
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 DivisionOp
expr val()
tanh(expr.val())
atanh(expr.val())
if_then_else(expr.val() >=0, expr.dx(i, j), value_type(-expr.dx(i, j)))
expr expr expr dx(i, j)
expr expr CosOp
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MaxOp
#define FAD_UNARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, FASTACCESSDX)
asin(expr.val())
Expression template specialization tag for Fad< MP::Vector >
KOKKOS_INLINE_FUNCTION val_type & fastAccessDx(int i, int j)
Returns derivative component i without bounds checking.
KOKKOS_INLINE_FUNCTION const val_type & fastAccessDx(int i, int j) const
Returns derivative component i without bounds checking.
KOKKOS_INLINE_FUNCTION val_type dx(int i, int j) const
Returns derivative component i with bounds checking.
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const T2 &expr2_)
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const ConstT &c_)
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const ConstT &c_, const T2 &expr2_)
KOKKOS_INLINE_FUNCTION PCE< Storage > pow(const PCE< Storage > &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION PCE< Storage > max(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION PCE< Storage > log(const PCE< Storage > &a)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:265