TBCI Numerical high perf. C++ Library  2.8.0
basics.h
Go to the documentation of this file.
1 
32 #ifndef TBCI_BASICS_H
33 #define TBCI_BASICS_H
34 
35 #include "tbci/tbci_version.h"
36 
37 #ifdef HAVE_TBCICONFIG_H
38 # include "tbci/tbciconfig.h"
39 #else
40 # include "tbci/config_manual.h"
41 #endif
42 
43 // We pass __GNUC_SUBVER__ on the commandline just in case
44 #if !defined(__GNUC_PATCHLEVEL__) && defined(__GNUC__SUBVER__)
45 # define __GNUC_PATCHLEVEL__ __GNUC_SUBVER__
46 #endif
47 
48 /* Win vs. Unix
49  * - HAVE_WIN_32: In case somebody wants to know
50  * - HAVE_UNIX: For the symmetry
51  */
52 #undef HAVE_WIN_32
53 #undef HAVE_UNIX
54 #ifdef unix
55 # define HAVE_UNIX 1
56 #endif
57 #ifdef _MSC_VER
58 # define HAVE_WIN_32 1
59 // Switch off annoying stuff
60 # pragma warning(disable: 4003)
61 #endif
62 
63 // SMP?
64 #if defined(USE_SMP) && !defined(SMP)
65 # define SMP
66 #endif
67 
68 // Use namespaces?
69 #if !defined(NO_NS) && !defined(HAVE_BUGGY_NAMESPACE)
70 # define USE_NS
71 # undef NO_NS
72 #else
73 # undef USE_NS
74 #endif
75 
76 // Signatures
77 #ifdef HAVE_SIGNATURE
78 # define SIGNATURE signature
79 #else
80 # define SIGNATURE class
81 #endif
82 
83 // restrict
84 #if defined(HAVE_RESTRICT) && !defined(NO_RESTRICT)
85 # define RESTRICT restrict
86 #elif defined (HAVE___RESTRICT__) && !defined(NO_RESTRICT)
87 # define RESTRICT __restrict__
88 #else
89 # define RESTRICT
90 #endif
91 
96 #if defined(HAVE_BUILTIN_EXPECT) && !defined(NO_EXPECT)
97 # define LIKELY(expr) __builtin_expect((expr) != 0, 1)
98 # define UNLIKELY(expr) __builtin_expect((expr) != 0, 0)
99 #else
100 # define LIKELY(expr) (expr)
101 # define UNLIKELY(expr) (expr)
102 #endif
103 
104 // http://gcc.gnu.org/wiki/Visibility
105 #ifdef _MSC_VER
106 # ifdef BUILDING_DLL
107 # define TBCI_DLLEXPORT __declspec(dllexport)
108 # else
109 # define TBCI_DLLEXPORT __declspec(dllimport)
110 # endif
111 # define TBCI_DLLLOCAL
112 #else
113 # ifdef HAVE_VISIBILITY_ATTR
114 # define TBCI_DLLEXPORT __attribute__ ((visibility("default")))
115 # define TBCI_DLLLOCAL __attribute__ ((visibility("hidden")))
116 # else
117 # define TBCI_DLLEXPORT
118 # define TBCI_DLLLOCAL
119 # endif
120 #endif
121 
122 // pragma interface/implementation stuff
123 #if defined(HAVE_PRAGMA_IFACE_IMPL) && !defined(NO_PRAGMA_I)
124 # define PRAGMA_I
125 #endif
126 
127 // guiding decl stuff; explicit template params
128 #ifdef HAVE_NEED_FOR_EXPL_TMPLPARM
129 # define FGD <>
130 # define FGDT <T>
131 # define FGDU <U>
132 # define FGDTD <T,dims>
133 # define FGDDT <dims,T>
134 # define FGDCT <cplx<T> >
135 # define FGDR <rank>
136 #else
137 # define FGD
138 # define FGDT
139 # define FGDU
140 # define FGDTD
141 # define FGDDT
142 # define FGDCT
143 # define FGDR
144 #endif
145 
146 // BorlandC++ 0x570 compat with newer glibc headers
147 // This is a problems with Borland's cdefs.h; it replaces cdefs.h
148 // from glibc; glibc-2.3 has and needs the defs from below
149 #if defined(__BORLANDC__) && defined(__linux__)
150 # define __BEGIN_NAMESPACE_STD
151 # define __END_NAMESPACE_STD
152 # define __USING_NAMESPACE_STD(name)
153 # define __BEGIN_NAMESPACE_C99
154 # define __END_NAMESPACE_C99
155 # define __USING_NAMESPACE_C99(name)
156 #endif
157 
158 // Include headers
159 # include <iostream>
160 # include <fstream>
161 # include <iomanip>
162 # include <string>
163 #ifndef HAVE_NO_NEW_HEADERS_BUG
164 # include <cstring>
165 # include <cstdarg>
166 # include <cstdlib>
167 # include <cmath>
168 //# include <sstream>
169 #else
170 //# include <iostream.h>
171 //# include <fstream.h>
172 //# include <ios.h>
173 //# include <iomanip.h>
174 # include <string.h>
175 # include <stdarg.h>
176 # include <stdlib.h>
177 # include <math.h>
178 // no way to include C++ string within this scheme ...
179 #endif
180 
181 #ifdef HAVE_UNISTD_H
182 # include <unistd.h>
183 #endif
184 
185 #if defined(C_MEMALLOC) && defined(HAVE_MEMALIGN)
186 # include <malloc.h>
187 #endif
188 
189 // IRIX defines MIN and MAX there
190 #ifdef HAVE_MINMAX_IN_SYS_PARAM_H
191 # include <sys/param.h>
192 #endif
193 
195 #if !defined(HAVE_SSTREAM) || defined(HAVE_BUGGY_SSTREAM)
196 //# include <strstream>
197 # define ISTRINGSTREAM istrstream
198 # define OSTRINGSTREAM ostrstream
199 # define STRINGSTREAM strstream
200 #else
201 //# include <sstream>
202 # define ISTRINGSTREAM istringstream
203 # define OSTRINGSTREAM ostringstream
204 # define STRINGSTREAM stringstream
205 #endif
206 
207 
208 // long long and long doubel types
209 #ifdef HAVE_LONG_DOUBLE
210 # define LONG_DOUBLE long double
211 #else
212 # define LONG_DOUBLE double
213 #endif
214 #ifdef HAVE_LONG_LONG
215 # define LONG_LONG long long
216 #else
217 # define LONG_LONG long
218 #endif
219 
220 // Auto instantiate using the auto_decl program
221 #ifdef AUTO_DECL
222 # define INST(x) _instantiate x
223 # define INST2(x,y) _instantiate x,y
224 # define INST3(x,y,z) _instantiate x,y,z
225 # define INST4(x,y,z,a) _instantiate x,y,z,a
226 # define INST5(x,y,z,a,b) _instantiate x,y,z,a,b
227 # define INST6(x,y,z,a,b,c) _instantiate x,y,z,a,b,c
228 # define NOINST _noinstantiate
229 # define INSTCTL(x) _instfile x
230 #else
231 # define INST(x)
232 # define INST2(x,y)
233 # define INST3(x,y,z)
234 # define INST4(x,y,z,a)
235 # define INST5(x,y,z,a,b)
236 # define INST6(x,y,z,a,b,c)
237 # define NOINST
238 # define INSTCTL(x)
239 #endif
240 
241 // NULL
242 #ifndef NULL
243 # define NULL (0)
244 #endif
245 
246 // Remark: The math functions should be put into std:: namespace
247 // but neither egcs-1.1.x nor MSVC do this ...
248 // egcs-1.1.x at least can access them via std:: though.
249 
254 #if defined(NO_NS)
255 # define NAMESPACE_TBCI /* namespace TBCI { */
256 # define NAMESPACE_STD /* namespace std { */
257 # define NAMESPACE_CSTD /* namespace std { */
258 # define NAMESPACE_GRID /* namespace Grid { */
259 # define NAMESPACE_CPLX /* namespace std { */
260 # define NAMESPACE_END /* } namespace */
261 # define NAMESPACE_STD_END /* } namespace std */
262 # define NAMESPACE_CSTD_END /* } namespace std */
263 # define NAMESPACE_CPLX_END /* } namespace std */
264 # define USING_TBCI /* using namespace TBCI */
265 # define USING_GRID /* using namespace Grid */
266 # define USING_FD /* using namespace FD */
267 # define USING_END /* using namespace std */
268 # define USIND_STD /* using namespace std */
269 # define FRIEND_TBCI__ /* */
270 # define FRIEND_TBCI2__ /* */
271 # define TBCI__ /*TBCI::*/
272 # define __TBCI__ /* ::TBCI:: */
273 # define GRID__ /*Grid::*/
274 # define FD__ /*FD::*/
275 # define STD__ /*std::*/
276 # define CSTD__ /*std::*/
277 # define MATH__ /*std::*/
278 # define CPLX__ /*std::*/
279 # define GLBL__ :: /* Look outside scope of class; */
280 # define GLBL2__ :: /* Look outside scope of class; */
281 #elif defined(NO_NS_TBCI)
282 # define NAMESPACE_TBCI /* namespace TBCI { */
283 # define NAMESPACE_STD namespace std {
284 # define NAMESPACE_CSTD namespace std {
285 # define NAMESPACE_CPLX namespace std {
286 # define NAMESPACE_GRID /* namespace Grid { */
287 # define NAMESPACE_FD /* namespace Finite_Difference { */
288 # define NAMESPACE_END /* } */; /* namespace */
289 # define NAMESPACE_STD_END } /* namespace std */
290 # define NAMESPACE_CSTD_END } /* namespace std */
291 # define NAMESPACE_CPLX_END } /* namespace std */
292 # define USING_TBCI /* using namespace TBCI; */
293 # define USING_GRID /* using namespace Grid; */
294 # define USING_FD /* using namespace Finite_Difference; */
295 # define USING_END using namespace std; /* Don't use! */
296 # define USING_STD using namespace std;
297 # define TBCI__ /* TBCI::*/
298 # define __TBCI__ /*::TBCI::*/
299 # define FRIEND_TBCI__ /* */
300 # define FRIEND_TCBI2__ /* */
301 # define GRID__ /*::Grid::*/
302 # define FD__ /*::FD::*/
303 # define STD__ std::
304 # define MATH__ std::
305 # define CSTD__ std::
306 # define CPLX__ std::
307 # define GLBL__ /* not needed if ::std:: is used */
308 # define GLBL2__ :: /* needed as TBCI__ is disabled */
309 #else
310 # define NAMESPACE_TBCI namespace TBCI {
311 # define NAMESPACE_STD namespace std {
312 # define NAMESPACE_CSTD namespace std {
313 # define NAMESPACE_GRID namespace Grid {
314 # define NAMESPACE_FD namespace Finite_Difference {
315 # define NAMESPACE_CPLX namespace std {
316 # define NAMESPACE_END } /* namespace */
317 # define NAMESPACE_STD_END } /* namespace std */
318 # define NAMESPACE_CSTD_END } /* namespace std */
319 # define NAMESPACE_CPLX_END } /* namespace std */
320 # define USING_TBCI using namespace TBCI;
321 # define USING_GRID using namespace Grid;
322 # define USING_FD using namespace Finite_Difference;
323 # define USING_END using namespace std; /* Don't use! */
324 # define USING_STD using namespace std;
325 # define TBCI__ TBCI::
326 # define __TBCI__ ::TBCI::
327 # define FRIEND_TBCI__ /* */
328 # define FRIEND_TBCI2__ /* */
329 # define GRID__ Grid::
330 # define FD__ FD::
331 # define STD__ std::
332 # define MATH__ std::
333 # define CSTD__ std::
334 # define CPLX__ std::
335 # define GLBL__ /* not needed if ::std:: is used */
336 # define GLBL2__ /* not needed if ::TBCI:: is used */
337 #endif
338 
344 #ifndef NO_NS
345 
346 # ifdef HAVE_LIBC_GLOBAL_NS_BUG
347 # undef MATH__
348 # define MATH__ ::
349 # undef CSTD__
350 # define CSTD__ ::
351 # undef NAMESPACE_CSTD
352 # define NAMESPACE_CSTD /* namespace std { */
353 # undef NAMESPACE_CSTD_END
354 # define NAMESPACE_CSTD_END /* } namespace std */
355 # endif
356 
357 # ifdef HAVE_CPP_GLOBAL_NS_BUG
358 # undef STD__
359 # define STD__ ::
360 # undef USING_STD
361 # define USING_STD using namespace;
362 # undef NAMESPACE_STD
363 # define NAMESPACE_STD /* namespace std { */
364 # undef NAMESPACE_STD_END
365 # define NAMESPACE_STD_END /* } namespace std */
366 # endif
367 
368 # ifdef HAVE_CPLX_GLOBAL_NS_BUG
369 # undef CPLX__
370 # define CPLX__ ::
371 # undef NAMESPACE_CPLX
372 # define NAMESPACE_CPLX
373 # undef NAMESPACE_CPLX_END
374 # define NAMESPACE_CPLX_END
375 # endif
376 
377 /* We might want to know that LIBC/MATH functions are in a namespace
378  * different from std C++ stuff. Most notably affects sqrt(std::complex)
379  * and friends. */
380 # if defined(HAVE_LIBC_GLOBAL_NS_BUG) && !defined(HAVE_CPP_GLOBAL_NS_BUG)
381 # define HAVE_LIBC_NEQ_CPP_BUG
382 # endif
383 # if defined(HAVE_LIBC_GLOBAL_NS_BUG) && !defined(HAVE_CPLX_GLOBAL_NS_BUG)
384 # define HAVE_LIBC_NEQ_CPLX_BUG
385 # endif
386 
387 /* SGI MIPSpro thinks friend decls refer to global namespace,
388  * others shadow friends with member fns of same name if not scoped */
389 # if (HAVE_FRIEND_GLOBAL_NS_BUG || HAVE_NEED_FOR_FRIEND_SCOPE) && !defined(AUTO_DECL) && !defined(NO_NS_TBCI)
390 # undef FRIEND_TBCI__
391 # define FRIEND_TBCI__ TBCI::
392 # ifdef HAVE_FRIEND_GLOBAL_NS_BUG
393 # undef FRIEND_TBCI2__
394 # define FRIEND_TBCI2__ TBCI::
395 # endif
396 # endif
397 
398 #endif /* NO_NS */
399 
400 /* Note that abortion is illegal in a lot of countries ;-) */
401 #ifdef ABORT_ON_ERR
402 # define ABORT_RET(x) abort ()
403 # define ABORT_RET_NR abort ()
404 #else
405 # define ABORT_RET(x) return x
406 # define ABORT_RET_NR do {} while (0)
407 #endif
408 
413 #ifndef MIN_ALIGN
414 # define MIN_ALIGN 8
415 #endif
416 #ifndef MIN_ALIGN2
417 # define MIN_ALIGN2 16
418 #endif
419 
420 #undef ALIGN
421 #if defined(HAVE_ALIGN_ATTR) || defined(HAVE_NEW_ALIGN_ATTR)
422 # ifdef HAVE_NEW_ALIGN_ATTR
423 # define ALIGN3(v,i,x) v __attribute__ ((aligned(x))) (i)
424 # else
425 # define ALIGN3(v,i,x) v(i) __attribute__ ((aligned(x)))
426 # endif
427 # define ALIGN2(v,x) v __attribute__ ((aligned(x)))
428 # define ALIGN(x) __attribute__ ((aligned(x)))
429 #else
430 # ifdef HAVE_DECLSPEC_ALIGN
431 # define ALIGN3(v,i,x) __declspec(align(x)) v(i)
432 # define ALIGN2(v,x) __declspec(align(x)) v
433 # define ALIGN(x) __attribute__ ((aligned(x)))
434 # else
435 # define ALIGN3(v,i,x) v(i)
436 # define ALIGN2(v,x) v
437 # define ALIGN(x)
438 # endif
439 #endif
440 #ifdef HAVE_TEMPL_ALIGN_ATTR
441 # define TALIGN(x) ALIGN(x)
442 #else
443 # define TALIGN(x)
444 #endif
445 
446 #ifdef HAVE_CONST_ATTR
447 # define CONSTA __attribute__ ((const))
448 # define TBCI_CONST(x) x __attribute__ ((const)); x
449 #else
450 # define CONSTA
451 # define TBCI_CONST(x) x
452 #endif
453 #ifdef HAVE_REGPARM_ATTR
454 # define REGPARMA(n) __attribute__ ((regparm(n)))
455 # define REGPARM(n,x) x __attribute__ ((regparm(n))); x
456 #else
457 # define REGPARMA(n)
458 # define REGPARM(n,x) x
459 #endif
460 
461 #ifdef HAVE_UNUSED_ATTR
462 # define UNUSED __attribute__((unused))
463 #else
464 # define UNUSED
465 #endif
466 
467 
468 #ifdef HAVE_WEAK_ATTR
469 # define WEAKA __attribute__ ((weak))
470 # define WEAK(x) x __attribute__ ((weak)); x
471 #ifdef __INTEL_COMPILER // Does not like that attr in templ specializations
472 # define TWEAK(x) x
473 #else
474 # define TWEAK(x) x __attribute__ ((weak)); x
475 #endif
476 #else
477 # define WEAKA
478 # define WEAK(x) x
479 # define TWEAK(x) x
480 #endif
481 
482 #if defined(HAVE_HOT_ATTR) && defined(USE_HOT)
483 # define HOT __attribute__ ((hot))
484 # define COLD __attribute__ ((cold))
485 # define HOTDECL(x) x __attribute__ ((hot)); x
486 # define COLDDECL(x) x __attribute__ ((cold)); x
487 #else
488 # define HOT
489 # define COLD
490 # define HOTDECL(x) x
491 # define COLDDECL(x) x
492 #endif
493 
494 
495 /* Exception stuff */
496 #if !defined(NO_EXCEPT) && !defined(HAVE_BUGGY_EXCEPTIONS)
497 # include "tbci/except.h"
498 #else
499 # define TBCI_DISABLE_EXCEPT
500 #endif
501 
502 // Error checking
503 
504 #ifndef HAVE_PRETTY_FUNCTION
505 # if defined(HAVE_FUNC) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
506 # define __PRETTY_FUNCTION__ __func__
507 # else
508 # define __PRETTY_FUNCTION__ "<unknown>"
509 # endif
510 #endif
511 
512 
513 #define TBCIERRH1 __PRETTY_FUNCTION__
514 #define TBCIERRM(cond,exc,txt,ind) \
515  STD__ string(#exc) + " in " + TBCIERRH1 + ":\n " + #txt + " at "\
516  + __FILE__ + ":" + TBCI__ ltoa(__LINE__) + ":\n (" + #cond \
517  + ") == TRUE! (" + #ind + " = " + TBCI__ ltoa(ind) + ")"
518 #define TBCIERRS(cond,exc,txt,ind) \
519  #exc << " in " << TBCIERRH1 << ":\n " << #txt << " at " \
520  << __FILE__ << ":" << __LINE__ << ":\n (" << #cond \
521  << ") == TRUE! (" << #ind << " = " << ind << ")"
522 
523 
524 #ifndef TBCI_NO_ERRCHECK
525 
568 #define BCHK(cond,exc,txt,ind,rtval) \
569  /*if (UNLIKELY((cond) && __TBCI__ do_errcheck())) { */ \
570  if (UNLIKELY(__TBCI__ do_errcheck() && (cond))) { \
571  if (__TBCI__ do_except()) \
572  throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
573  else \
574  STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
575  if (__TBCI__ do_abort()) \
576  abort(); \
577  return rtval; \
578  }
579 #define BCHKNR(cond,exc,txt,ind) \
580  if (UNLIKELY(__TBCI__ do_errcheck() && (cond))) { \
581  if (__TBCI__ do_except()) \
582  throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
583  else \
584  STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
585  if (__TBCI__ do_abort()) \
586  abort(); \
587  }
588 #else
589 #ifdef __GNUC__
590 # warning Error checking disabled
591 #endif
592 # define BCHK(cond,exc,txt,ind,rtval) do {} while(0)
593 # define BCHKNR(cond,exc,txt,ind) do {} while(0)
594 #endif
595 
596 /* Expensive checks */
597 #ifdef TBCI_EXPCHECK
598 #define EXPCHK(cond,exc,txt,ind,rtval) \
599  /* if (UNLIKELY((cond) && do_expcheck())) { */ \
600  if (UNLIKELY(__TBCI__ do_expcheck() && (cond) )) { \
601  if (__TBCI__ do_except()) \
602  throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
603  else \
604  STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
605  if (__TBCI__ do_abort()) \
606  abort(); \
607  return rtval; \
608  }
609 #define EXPCHKNR(cond,exc,txt,ind) \
610  if (UNLIKELY(__TBCI__ do_expcheck() && (cond) )) { \
611  if (__TBCI__ do_except()) \
612  throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
613  else \
614  STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
615  if (__TBCI__ do_abort()) \
616  abort(); \
617  }
618 #ifdef __GNUC__
619 # warning Expensive error checking enabled
620 #endif
621 #else
622 /* OK, no expensive error checking */
623 # define EXPCHK(cond,exc,txt,ind,rtval) do {} while(0)
624 # define EXPCHKNR(cond,exc,txt,ind) do {} while(0)
625 #endif
626 
627 
628 #if defined(HAVE_ALGORITHM) && defined(HAVE_STD_MINMAX) &&!defined(HAVE_GCC_MINMAX)
629 # include <algorithm>
630 #endif
631 
636 #ifndef MIN
637 # ifdef HAVE_GCC_MINMAX
638 # define MIN(a,b) ((a) <? (b))
639 # define MAX(a,b) ((a) >? (b))
640 # elif defined(HAVE_STD_MINMAX)
641 # define MIN(a,b) std::min(a,b)
642 # define MAX(a,b) std::max(a,b)
643 # else
644 //TBCI_CONST(template <typename T> inline T MIN (const T a, const T b))
645 //{ return (a < b ? a : b); }
646 //TBCI_CONST(template <typename T> inline T MAX (const T a, const T b))
647 //{ return (a > b ? a : b); }
648 # define MIN(a,b) ((a) < (b)? (a) : (b))
649 # define MAX(a,b) ((a) > (b)? (a) : (b))
650 # endif
651 #endif
652 
653 /* DEBUGINFO */
654 #ifdef DEBUGINFO
655 # define INFO(x) cout << x
656 #else
657 # define INFO(x)
658 #endif
659 
660 
661 /* Makros for return value optimization for GNU g++ */
662 //return value optimization
663 #if defined(RETVAL_OPT) && defined(HAVE_NAMED_RETVAL)
664 # define RET(x) return(x);
665 # define CONSTR(x)
666 #else
667 # define RET(x)
668 # define CONSTR(x) x
669 #endif /* RETVAL_OPT */
670 
671 // Work around MSVC failure to delete[] something else than void*
672 #if defined(_MSC_VER)
673 # define _VOID (void*)
674 #else
675 # define _VOID
676 #endif
677 
678 // Work around Borland C++ 0x570 bug not handling inline in
679 // templated member functions
680 #ifndef HAVE_BCXX_TEMPL_INLINE_MFUNC_BUG
681 # define INLINE inline
682 #else
683 # define INLINE
684 #endif
685 
687 
688 #if defined(_OPENMP) && !defined(TBCI_NO_OMP)
689 # define TBCI_OMP
690 # include <omp.h>
691 # define OMP_FOR _Pragma("omp parallel for")
692 # define OMP_FOR_REDUCE_F2 _Pragma("omp parallel for reduction(+:f2)")
693 # define OMP_FOR_REDUCE_F1F2 _Pragma("omp parallel for reduction(+:f1,f2)")
694 
695 # define NOSMP_VECVEC
696 
697 # define NOSMP_MATVEC
698 //# define NOSMP_BDMATVEC
699 //# define TBCI_NO_SIMD
700 #else
701 # define OMP_FOR
702 # define OMP_FOR_REDUCE_F2
703 # define OMP_FOR_REDUCE_F1F2
704 #endif
705 
706 #include "perf_opt.h"
707 #include "tbci_traits.h"
708 #include "tbci_param.h"
709 
710 // prefetch
711 // loc argument gives hint on whether the accessed variable
712 // should be left in cache. 3 = Yes (all caches), 0 = No (cache),
713 // 1, 2 are intermediate (only some of the caches)
714 #if defined(HAVE_BUILTIN_PREFETCH) && !defined(NO_PREFETCH)
715 # define PREFETCH_R(addr,loc) __builtin_prefetch(addr, 0, loc )
716 # define PREFETCH_W(addr,loc) __builtin_prefetch(addr, 1, loc )
717 # define USE_PREFETCH 1
718 #else
719 
726 # if defined(__alpha__) && defined(AXP_PREFETCH) && !defined(NO_PREFETCH) && defined(__GNUC__)
727 # define PREFETCH_R(addr,loc) asm (" lda $31,%0 \n" : : "m" (*(addr)) )
728 # define PREFETCH_W(addr,loc) asm (" lda $31,%0 \n" : : "m" (*(addr)) )
729 # define USE_PREFETCH 1
730 # else
731 # if defined(__i386__) && defined(SSE_PREFETCH) && !defined(NO_PREFETCH) && (defined(__GNUC__) || defined(__INTEL_COMPILER))
732 # define PREFETCH_R(addr,loc) asm (" prefetcht0 %0 \n" : : "m" (*(addr)) )
733 # define PREFETCH_W(addr,loc) asm (" prefetcht0 %0 \n" : : "m" (*(addr)) )
734 # define USE_PREFETCH 1
735 # else
736 # if defined(__i386__) && defined(AMD_PREFETCH) && !defined(NO_PREFETCH) && (defined(__GNUC__) || defined(__INTEL_COMPILER))
737 # define PREFETCH_R(addr,loc) asm (" prefetch %0 \n" : : "m" (*(addr)) )
738 # define PREFETCH_W(addr,loc) asm (" prefetchw %0 \n" : : "m" (*(addr)) )
739 # define USE_PREFETCH 1
740 # else
741 # define PREFETCH_R(addr,loc) do {} while (0)
742 # define PREFETCH_W(addr,loc) do {} while (0)
743 # undef USE_PREFETCH
744 # endif
745 # endif
746 # endif
747 #endif
748 
749 
750 #ifdef HAVE_TLS
751 # define THREAD__ __thread
752 #elif defined(HAVE_DTLS)
753 # define THREAD__ __declspec(thread)
754 #else
755 # define THREAD__
756 #endif
757 
758 //#if !defined(_WIN_32) && !defined(CYGWIN)
759 #ifdef HAVE_UNISTD_H
760 # include "smp.h"
761 #else
762 # define MAIN_PID (getpid())
763 # define num_threads (0)
764 # define thrno (0)
765 # define ismainthread (1)
766 #endif
767 
768 
769 #ifdef __SSE2__
770 # define TBCI_SIMD_ALIGN 16
771 #endif
772 
773 #ifdef USE_PLAIN_VEC_KERNELS
774 # include "plain_def.h"
775 #elif defined(USE_UNR_VEC_KERNELS2)
776 # include "unroll_prefetch_def2.h"
777 #else
778 # include "unroll_prefetch_def.h"
779 #endif
780 
781 
783 
793 template <typename T>
794 inline void SWAP (T& a, T& b)
795 {
796  register T ALIGN(MIN_ALIGN) tmp = a; a = b; b = tmp;
797 }
798 
800 #ifdef C_MEMALLOC
801 
809 # if defined(__GNUC__) && !defined(C_MEMALLOC_WE_KNOW_WHAT_WE_DO)
810 # warning "Don't define C_MEMALLOC if you use containers with elements which need intitalization"
811 # warning " or take care yourself ! (Read basics.h:690)"
812 # endif
813 # ifdef HAVE_MEMALIGN
814 # define NEW(t,s) (t*) /*CSTD__*/ memalign (sizeof(t)*MIN(4,s>>1), sizeof(t)*(s))
815 # else
816 # define NEW(t,s) (t*) CSTD__ malloc (sizeof(t)*s)
817 # endif
818 # define TBCIDELETE(t,v,sz) do { BCHKNR(!v,NumErr,free null ptr,0); CSTD__ free (v); v = 0; } while (0)
819 # define TBCIDELETE_RO(t,v,sz) do { BCHKNR(!v,NumErr,free null ptr,0); CSTD__ free (v); } while (0)
820 # define REALLOC(v,os,t,s) v = (t*) CSTD__ realloc ((v), sizeof(t)*(s))
821 #elif !defined(NO_MALLOC_CACHE) //&& !defined(SMP)
823 # if defined(PRAGMA_I) && defined(PRAGMA_IMPL_MALLOC_CACHE)
824 # pragma implementation "malloc_cache.h"
825 # endif
826 # include "malloc_cache.h"
828 #else /* ! C_MEMALLOC && NO_MALLOC_CACHE */
829 # define NEW(t,s) new t[s]
830 # define TBCIDELETE(t,v,sz) do { BCHKNR(!v,NumErr,delete[] null ptr,0); delete[] _VOID v; v = 0; } while (0)
831 # define TBCIDELETE_RO(t,v,sz) do { BCHKNR(!v,NumErr,delete[] null ptr,0); delete[] _VOID v; } while (0)
832 # ifdef ALLOW_MEMCPY
833 # define REALLOC(v,os,t,s) do { \
834  t* _tmp = (v); \
835  if (LIKELY(s)) (v) = new t[(s)]; else (v) = 0; \
836  if (LIKELY(_tmp != (v) && _tmp && (v) && (os) && (s))) \
837  CSTD__ memcpy ((v), _tmp, sizeof(t)*MIN((os),(s))); \
838  if (LIKELY(_tmp)) delete[] (_tmp); \
839 } while (0)
840 # else
841 # define REALLOC(v,os,t,s) do { \
842  t* _tmp = (v); \
843  if (LIKELY(s)) (v) = new t[(s)]; else (v) = 0; \
844  if (LIKELY(_tmp != (v) && _tmp && (v) && (os) && (s))) { \
845  for (register unsigned long _i = 0; _i < MIN((s),(os)); _i++) \
846  (v)[_i] = _tmp[_i]; } \
847  if (LIKELY(_tmp)) delete[] (_tmp); \
848  } while (0)
849 # endif
850 #endif
851 
862 #if defined(C_MEMALLOC) || defined(ALLOW_MEMCPY)
863 # if defined(__GNUC__) && !defined(C_MEMALLOC_WE_KNOW_WHAT_WE_DO)
864 # warning "Don't define C_MEMALLOC or ALLOW_MEMCPY if you use containers with elements which"
865 # warning " use dynamic memory allocation or handle with pointers somehow. Copying otherwise"
866 # warning " will break things. (Read basics.h:740)"
867 # endif
868 # define TBCICOPY(n,o,t,s) CSTD__ memcpy ((n),(o),(s)*sizeof(t))
869 #else
870 # define VEC_INLINE inline
871 # define COPY2(res,v1,f1,f2) res = v1
873 # ifdef SMP
874 template <typename T>
875 void _par_copy (const unsigned long sz, T* v1, const T* v2);
876 # define TBCICOPY(n,o,t,s) _par_copy < t > (s,n,o)
877 # else
878 # define TBCICOPY(n,o,t,s) _tbci_copy < t > (s,n,o)
879 #endif
880 # undef VEC_INLINE
881 #endif
882 
883 
884 #if defined(C_MEMALLOC) || defined(ALLOW_MEMSET)
885 # define TBCICLEAR(n,t,s) CSTD__ memset ((n), 0, (s)*sizeof(t))
886 #else
887 # define VEC_INLINE /*inline*/
888 # define FILL1(res,f1,f2) res = f2
890 # ifdef SMP
891 template <typename T>
892 void _par_fill (const unsigned long, T * const, typename tbci_traits<T>::loop_const_refval_type);
893 # define TBCIFILL(n,v,t,s) _par_fill < t > (s,n,v)
894 # define TBCICLEAR(n,t,s) _par_fill < t > (s,n,t(0))
895 # else
896 # define TBCIFILL(n,v,t,s) _tbci_fill < t > (s,n,v)
897 # define TBCICLEAR(n,t,s) _tbci_fill < t > (s,n,t(0))
898 # endif
899 # undef VEC_INLINE
900 #endif
902 template <typename T>
903 class vec_fill_fn {
904  public:
905  T (*fn)(const unsigned long idx, void* par);
906  vec_fill_fn(T (*f)(const unsigned long, void*))
907  :fn(f) {};
908 };
910 template <typename T>
911 void _tbci_fill_fn(const unsigned long sz, T* vec, vec_fill_fn<T> fn, void* par)
912 {
913  for (register unsigned long i = 0; i < sz; ++i)
914  vec[i] = fn.fn(i, par);
915 }
916 
917 
925 #if defined(C_MEMALLOC) || defined(ALLOW_MEMCMP)
926 # define TBCICOMP(n,o,t,s) CSTD__ memcmp ((n),(o),(s)*sizeof(t))
927 // Filling
928 # define TBCIFILL(n,v,t,s) \
929  do { for (register unsigned long _i = 0; _i < (s); _i++) \
930  (n)[_i] = (t)(v); } while (0)
931 #else
932 # if defined(__GNUG__) && !defined(PEDANTIC)
933 # define _TBCICOMP(n,o,t,s) ({ \
934  register int _r; register unsigned long _i; \
935  for (_r=0, _i=0; _i<(s) && !_r; _i++) { \
936  _r = (int)((n)[_i] != (o)[_i]); \
937  if (_r) break; \
938  } \
939  _r; \
940 })
941 # else
942 
943 HOTDECL(template <typename T>
944 inline int _tbci_comp (const unsigned long s, T const *n, T const *o))
945 {
946  // if (n == o) return 0;
947  for (register unsigned long _i = 0; _i < (s); _i++)
948  if (UNLIKELY((n)[_i] != (o)[_i])) return 1;
949  return 0;
950 }
951 # define TBCICOMP(n,o,t,s) _tbci_comp ((s),(n),(o))
952 # endif
953 #endif
955 #define VEC_INLINE /*inline*/
956 #define COMP2(r,v1,f1,f2) if (r != v1) { ++f2; i = sz; goto _fin; /* break; */ }
957 VKERN_TEMPL_2V_T(do_vv_comp, COMP2, volatile long);
959 #ifdef SMP
960 template <typename T>
961 int _par_comp (const unsigned long sz, const T* v1, const T* v2);
962 # define TBCICOMP(n,o,t,s) _par_comp< t > (s, n, o)
963 #else
964 # define TBCICOMP(n,o,t,s) ({ long res; do_vv_comp<t>(s,n,o,res); res; })
965 #endif
966 #undef VEC_INLINE
967 
968 // Helper functions
969 
974 template <typename T>
975 unsigned long _bin_search(const T* vec, T el, unsigned long start, unsigned long end)
976 {
977  if (end == start || end-1 == start)
978  return (unsigned long)-1;
979  unsigned long half = (end+start)/2;
980  T hel = vec[half];
981  if (hel == el)
982  return half;
983  else if (hel < el)
984  return _bin_search(vec, el, half, end);
985  else
986  return _bin_search(vec, el, start, half);
987 }
988 
993 template <typename T>
994 unsigned long bin_search(const T* vec, T el, unsigned long start, unsigned long end)
995 {
996  if (end == start)
997  return (unsigned long)-1;
998  if (vec[start] == el)
999  return start;
1000  else if (vec[end-1] == el)
1001  return end-1;
1002  if (vec[start] > el)
1003  return (unsigned long)-1;
1004  if (vec[end-1] < el)
1005  return (unsigned long)-1;
1006  BCHK(vec[end-1] < vec[start], NumErr, unsorted vector in bin_search, end-1, (unsigned long)-1);
1007  return _bin_search(vec, el, start, end);
1008 }
1009 
1011 
1036 #define _REF_
1037 TBCI_CONST(inline int conj (const int _REF_ arg)) {return arg;}
1039 TBCI_CONST(inline unsigned conj (const unsigned _REF_ arg)) {return arg;}
1040 TBCI_CONST(inline long conj (const long _REF_ arg)) {return arg;}
1041 TBCI_CONST(inline short conj (const short _REF_ arg)) {return arg;}
1042 TBCI_CONST(inline char conj (const char _REF_ arg)) {return arg;}
1043 TBCI_CONST(inline float conj (const float _REF_ arg)) {return arg;}
1044 TBCI_CONST(inline double conj (const double _REF_ arg)) {return arg;}
1046 TBCI_CONST(inline int real (const int _REF_ d)) { return d; }
1047 TBCI_CONST(inline unsigned real (const unsigned _REF_ d)) { return d; }
1048 TBCI_CONST(inline float real (const float _REF_ d)) { return d; }
1049 TBCI_CONST(inline double real (const double _REF_ d)) { return d; }
1051 TBCI_CONST(inline int imag (const int _REF_ d)) { return 0; }
1052 TBCI_CONST(inline unsigned imag (const unsigned _REF_ d)) { return 0; }
1053 TBCI_CONST(inline float imag (const float _REF_ d)) { return 0; }
1054 TBCI_CONST(inline double imag (const double _REF_ d)) { return 0; }
1055 
1056 #ifdef HAVE_LONG_DOUBLE
1057 TBCI_CONST(inline long double conj (const long double _REF_ arg)) {return arg;}
1058 TBCI_CONST(inline long double real (const long double _REF_ d)) { return d; }
1059 TBCI_CONST(inline long double imag (const long double _REF_ d)) { return 0; }
1060 #endif
1061 #ifdef HAVE_LONG_LONG
1062 TBCI_CONST(inline long long conj (const long long _REF_ arg)) {return arg;}
1063 TBCI_CONST(inline long long real (const long long _REF_ d)) { return d; }
1064 TBCI_CONST(inline long long imag (const long long _REF_ d)) { return 0; }
1065 #endif
1066 #undef _REF_
1068 
1069 #ifdef _INCLUDE_CPLX_H
1070 # include "cplx.h"
1071 #endif
1072 
1073 #ifdef _INCLUDE_STDCPLX_H
1074 # include "std_cplx.h"
1075 #endif
1076 
1077 #ifdef _INCLUDE_BUILTINCPLX_H
1078 # include "builtin_cplx.h"
1079 #endif
1080 
1081 
1084 // intel C++ 6.00 has a preprocessor (!) bug
1085 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1086 template <typename T> inline int sign (const T& x)
1087 #else
1088 TBCI_CONST(template <typename T> inline int sign (const T& x))
1089 #endif
1090 {
1091  if (LIKELY(CPLX__ real(x) > 0)) return 1;
1092  else if(LIKELY(CPLX__ real(x) < 0)) return -1;
1093  else return 0;
1094 }
1095 
1132 #ifdef TEMPLATED_FABSSQR
1133 TBCI_CONST(template <typename T> inline double fabssqr (const T& a))
1134 { return CPLX__ real(a * CPLX__ conj(a)); }
1135 #else
1136 # define FABSSQR(T) \
1137 TBCI_CONST(inline double fabssqr (const T a)) \
1138 { return CPLX__ real(a * CPLX__ conj(a)); }
1140 FABSSQR(double)
1141 FABSSQR(float)
1142 FABSSQR(int)
1143 FABSSQR(unsigned)
1144 # ifdef HAVE_LONG_DOUBLE
1145 FABSSQR(long double)
1146 # endif
1147 # ifdef HAVE_LONG_LONG
1148 FABSSQR(long long)
1149 # endif
1150 # undef FABSSQR
1151 #endif /* TEMPLATED_FABSSQR */
1152 
1153 // Square
1154 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1155 template <typename T> inline T sqr (const T& a)
1156 #else
1157 TBCI_CONST(template <typename T> inline T sqr (const T& a))
1158 #endif
1159 { return a*a; }
1160 
1161 // Dot product
1162 #if !defined(HAVE_WIN_32) && !defined (NO_NS) // Problems with other dot
1163 # if defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1164 template <typename T> inline T dot (const T& a1, const T& a2)
1165 # else
1166 TBCI_CONST(template <typename T> inline T dot (const T& a1, const T& a2))
1167 # endif
1168 { return a1 * CPLX__ conj (a2); }
1169 #endif
1170 
1172 
1174 
1175 #if 0
1176 /* For the std namespace, we don't want to use templated functions */
1177 TBCI_CONST(template <typename T> inline double fabs (const T& a))
1178 { return a<(T)0?(double)-a:(double)a; }
1179 
1180 #else
1182 /* hopefully the std::fabs(double) is not extern "C" ... */
1183 # define FABS(T) \
1184 TBCI_CONST(inline double fabs (const T a)) \
1185 { return a<(T)0?(double)-a:(double)a; }
1186 /* Also provide sqrt(int) functions ... */
1187 # define SQRT(T) \
1188 TBCI_CONST(inline double sqrt (const T a)) \
1189 { return MATH__ sqrt ((double)a); }
1190 
1191 # ifdef HAVE_LONG_DOUBLE
1192 # if defined(__GNUC__) && __GNUC__ == 2 && __GNUC_MINOR__ < 97
1193 FABS(long double)
1194 SQRT(long double)
1195 # endif
1196 # endif
1197 //FABS(float);
1198 FABS(int)
1199 SQRT(int)
1200 //SQRT(unsigned int); // MSVC does not like it ...
1201 # ifdef HAVE_LONG_LONG
1202 FABS(long long)
1203 SQRT(long long)
1204 # endif
1205 //FABS(unsigned int); // MSVC should not like it either ...
1206 TBCI_CONST(inline double fabs (const unsigned int a))
1207 { return (double)a; }
1208 
1209 # undef FABS
1210 #endif /* 0 */
1211 
1213 
1215 #if 0
1216 /* For the std namespace, we don't want to use templated functions */
1217 TBCI_CONST(template <typename T> inline T abs (const T& a))
1218 { return a<(T)0?-a:a; }
1219 
1220 #else
1221 
1222 # define ABS(T) \
1223 TBCI_CONST(inline T abs (const T a)) \
1224 { return a<(T)0?-a:a; }
1225 
1226 # ifdef HAVE_MISS_CSTD_ABS_BUG
1227 # ifdef HAVE_LONG_DOUBLE
1228 ABS(long double)
1229 # endif
1230 ABS(double)
1231 ABS(float)
1232 # ifdef HAVE_LONG_LONG
1233 //ABS(long long)
1234 # endif
1235 //ABS(unsigned)
1236 # endif
1238 # if 1 //defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1239 ABS(unsigned)
1240 # endif
1241 # undef ABS
1242 #endif /* 0 */
1243 
1245 
1246 #ifdef INLINE_VEC_KERNELS
1247 # define VEC_INLINE inline
1248 #else
1249 # define VEC_INLINE
1250 #endif
1251 
1257 typedef enum _vararg vararg;
1258 
1260 
1261 #ifndef _NO_INCLUDE_COST_H
1262 # include "cost.h"
1263 #endif
1264 
1265 #if !defined(NO_NS) && defined (USING_STD_NS)
1266 using std::cin;
1267 using std::cout;
1268 using std::cerr;
1269 using std::endl;
1270 using std::flush;
1271 using std::dec;
1272 using std::hex;
1273 using std::setw;
1274 using std::setprecision;
1275 using std::istream;
1276 using std::ostream;
1277 using std::ifstream;
1278 using std::ofstream;
1279 using std::string;
1280 # if defined(HAVE_SSTREAM) && !defined(HAVE_BUGGY_SSTREAM)
1281 using std::istringstream;
1282 using std::ostringstream;
1283 # else
1284 using std::istrstream;
1285 using std::ostrstream;
1286 # endif
1287 #endif
1288 
1289 #endif /* TBCI_BASICS_H */
macros for composing unrolled prefetching loops over arrays.
Matrix< T > a(10, 10)
#define _REF_
Definition: basics.h:1034
#define MIN_ALIGN
Definition: basics.h:414
#define ALIGN(x)
Definition: basics.h:437
int _par_comp(const unsigned long sz, const T *v1, const T *v2)
void _tbci_fill(const unsigned long sz, T *const res, register typename tbci_traits< T >::loop_const_refval_type f2)
Definition: basics.h:888
unsigned abs(const unsigned a)
Definition: basics.h:1237
Wrapper for GCC's builtin complex type plus compatibility features (helper class TBCI::hcplx) ...
unsigned long bin_search(const T *vec, T el, unsigned long start, unsigned long end)
Search for an element el in a sorted vector between start and end-1, returns (unsigned long)-1 if ele...
Definition: basics.h:992
#define NAMESPACE_TBCI
Definition: basics.h:310
#define COMP2(r, v1, f1, f2)
Definition: basics.h:954
Caches memory blocks to avoid calls to __builtin_free()/new() It performs very well for strictly recu...
#define ABS(T)
Definition: basics.h:1220
exception base class for the TBCI NumLib
Definition: except.h:58
macros for composing plain loops over arrays.
#define COPY2(res, v1, f1, f2)
Definition: basics.h:871
#define FILL1(res, f1, f2)
Definition: basics.h:887
#define BCHK(cond, exc, txt, ind, rtval)
Definition: basics.h:568
#define NAMESPACE_CSTD_END
Definition: basics.h:318
#define UNLIKELY(expr)
Definition: basics.h:101
const Vector< T > const Vector< T > & x
Definition: LM_fit.h:97
Preprocessor macros for estimating the "cost" of operations.
void _par_copy(const unsigned long sz, T *v1, const T *v2)
#define VKERN_TEMPL_2V_T(FNAME, OP2, TYPE)
Operations of type TYPE = VEC OP VEC.
Definition: plain_def.h:119
T arg(const TBCI__ cplx< T > &c)
Definition: cplx.h:690
#define NAMESPACE_CPLX
Definition: basics.h:315
#define FABSSQR(T)
Definition: basics.h:1134
void _par_fill(const unsigned long, T *const, typename tbci_traits< T >::loop_const_refval_type)
double conj(const double arg)
Definition: basics.h:1042
unsigned long _bin_search(const T *vec, T el, unsigned long start, unsigned long end)
Search for an element el in a sorted vector between start and end-1, returns (unsigned long)-1 if ele...
Definition: basics.h:973
#define VKERN_TEMPL_2V(FNAME, OP2)
Operations of type vec OP= vec.
Definition: plain_def.h:72
cplx< T > sqr(const cplx< T > &c)
Definition: cplx.h:449
F_TMatrix< T > b
Definition: f_matrix.h:736
#define NAMESPACE_CSTD
Definition: basics.h:312
int sign(const T &x)
Signum.
Definition: basics.h:1086
#define FABS(T)
Definition: basics.h:1181
#define TBCI_CONST(x)
Definition: basics.h:451
void SWAP(T &a, T &b)
SWAP function Note: We could implement a swap function without temporaries: a -= b b += a a -= b a = ...
Definition: basics.h:794
#define CPLX__
Definition: basics.h:334
T dot(const T &a1, const T &a2)
Definition: basics.h:1164
int i
Definition: LM_fit.h:71
#define VKERN_TEMPL_1V_C(FNAME, OP1)
Operations of type VEC OP= VAL.
Definition: plain_def.h:172
#define NAMESPACE_CPLX_END
Definition: basics.h:319
Wrapper for C++ std library complex type plus compatibility features.
double real(const double d)
Definition: basics.h:1047
double fabs(const unsigned int a)
Definition: basics.h:1204
Rich implementation of complex numbers TBCI::cplx.
T(* fn)(const unsigned long idx, void *par)
Definition: basics.h:903
_vararg
This is a helper type to identify and count varargs.
Definition: basics.h:1253
This provides some parameters that control the behavior of various functions in the TBCI library...
macros for composing unrolled prefetching loops over arrays.
#define NAMESPACE_END
Definition: basics.h:316
float real
Definition: f2c.h:31
double fabssqr(const unsigned a)
Definition: basics.h:1141
#define HOTDECL(x)
Definition: basics.h:490
double imag(const double d)
Definition: basics.h:1052
#define T
Definition: bdmatlib.cc:20
#define SQRT(T)
Definition: basics.h:1185
void do_vv_comp(const unsigned long sz, const T *const v1, const T *const v2, volatile long &_f2)
f2 = number of differences vec, vec
Definition: basics.h:956
enum _vararg vararg
Definition: basics.h:1255
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
Definition: basics.h:100
void _tbci_fill_fn(const unsigned long sz, T *vec, vec_fill_fn< T > fn, void *par)
Definition: basics.h:909
void _tbci_copy(const unsigned long sz, T *const res, const T *const v1)
Definition: basics.h:872