tesseract  4.1.0
simddetect.cpp
Go to the documentation of this file.
1 // File: simddetect.cpp
3 // Description: Architecture detector.
4 // Author: Stefan Weil (based on code from Ray Smith)
5 //
6 // (C) Copyright 2014, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 
18 #include <numeric> // for std::inner_product
19 #include "simddetect.h"
20 #include "dotproduct.h"
21 #include "dotproductavx.h"
22 #include "dotproductsse.h"
23 #include "intsimdmatrix.h" // for IntSimdMatrix
24 #include "params.h" // for STRING_VAR
25 #include "tprintf.h" // for tprintf
26 
27 #if defined(AVX) || defined(AVX2) || defined(SSE4_1)
28 # define HAS_CPUID
29 #endif
30 
31 #if defined(HAS_CPUID)
32 #if defined(__GNUC__)
33 # include <cpuid.h>
34 #elif defined(_WIN32)
35 # include <intrin.h>
36 #endif
37 #endif
38 
39 namespace tesseract {
40 
41 // Computes and returns the dot product of the two n-vectors u and v.
42 // Note: because the order of addition is different among the different dot
43 // product functions, the results can (and do) vary slightly (although they
44 // agree to within about 4e-15). This produces different results when running
45 // training, despite all random inputs being precisely equal.
46 // To get consistent results, use just one of these dot product functions.
47 // On a test multi-layer network, serial is 57% slower than SSE, and AVX
48 // is about 8% faster than SSE. This suggests that the time is memory
49 // bandwidth constrained and could benefit from holding the reused vector
50 // in AVX registers.
52 
53 static STRING_VAR(dotproduct, "auto",
54  "Function used for calculation of dot product");
55 
56 SIMDDetect SIMDDetect::detector;
57 
58 // If true, then AVX has been detected.
59 bool SIMDDetect::avx_available_;
60 bool SIMDDetect::avx2_available_;
61 bool SIMDDetect::avx512F_available_;
62 bool SIMDDetect::avx512BW_available_;
63 // If true, then SSe4.1 has been detected.
64 bool SIMDDetect::sse_available_;
65 
66 // Computes and returns the dot product of the two n-vectors u and v.
67 static double DotProductGeneric(const double* u, const double* v, int n) {
68  double total = 0.0;
69  for (int k = 0; k < n; ++k) total += u[k] * v[k];
70  return total;
71 }
72 
73 // Compute dot product using std::inner_product.
74 static double DotProductStdInnerProduct(const double* u, const double* v, int n) {
75  return std::inner_product(u, u + n, v, 0.0);
76 }
77 
78 static void SetDotProduct(DotProductFunction f, const IntSimdMatrix* m = nullptr) {
79  DotProduct = f;
81 }
82 
83 // Constructor.
84 // Tests the architecture in a system-dependent way to detect AVX, SSE and
85 // any other available SIMD equipment.
86 // __GNUC__ is also defined by compilers that include GNU extensions such as
87 // clang.
88 SIMDDetect::SIMDDetect() {
89  // The fallback is a generic dot product calculation.
90  SetDotProduct(DotProductGeneric);
91 
92 #if defined(HAS_CPUID)
93 #if defined(__GNUC__)
94  unsigned int eax, ebx, ecx, edx;
95  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
96  // Note that these tests all use hex because the older compilers don't have
97  // the newer flags.
98 #if defined(SSE4_1)
99  sse_available_ = (ecx & 0x00080000) != 0;
100 #endif
101 #if defined(AVX)
102  avx_available_ = (ecx & 0x10000000) != 0;
103  if (avx_available_) {
104  // There is supposed to be a __get_cpuid_count function, but this is all
105  // there is in my cpuid.h. It is a macro for an asm statement and cannot
106  // be used inside an if.
107  __cpuid_count(7, 0, eax, ebx, ecx, edx);
108  avx2_available_ = (ebx & 0x00000020) != 0;
109  avx512F_available_ = (ebx & 0x00010000) != 0;
110  avx512BW_available_ = (ebx & 0x40000000) != 0;
111  }
112 #endif
113  }
114 # elif defined(_WIN32)
115  int cpuInfo[4];
116  int max_function_id;
117  __cpuid(cpuInfo, 0);
118  max_function_id = cpuInfo[0];
119  if (max_function_id >= 1) {
120  __cpuid(cpuInfo, 1);
121 #if defined(SSE4_1)
122  sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
123 #endif
124 #if defined(AVX)
125  avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
126 #endif
127 #if defined(AVX2)
128  if (max_function_id >= 7) {
129  __cpuid(cpuInfo, 7);
130  avx2_available_ = (cpuInfo[1] & 0x00000020) != 0;
131  avx512F_available_ = (cpuInfo[1] & 0x00010000) != 0;
132  avx512BW_available_ = (cpuInfo[1] & 0x40000000) != 0;
133  }
134 #endif
135  }
136 #else
137 #error "I don't know how to test for SIMD with this compiler"
138 #endif
139 #endif
140 
141  // Select code for calculation of dot product based on autodetection.
142  if (false) {
143  // This is a dummy to support conditional compilation.
144 #if defined(AVX2)
145  } else if (avx2_available_) {
146  // AVX2 detected.
148 #endif
149 #if defined(AVX)
150  } else if (avx_available_) {
151  // AVX detected.
153 #endif
154 #if defined(SSE4_1)
155  } else if (sse_available_) {
156  // SSE detected.
158 #endif
159  }
160 }
161 
163  // Select code for calculation of dot product based on the
164  // value of the config variable if that value is not empty.
165  const char* dotproduct_method = "generic";
166  if (!strcmp(dotproduct.string(), "auto")) {
167  // Automatic detection. Nothing to be done.
168  } else if (!strcmp(dotproduct.string(), "generic")) {
169  // Generic code selected by config variable.
170  SetDotProduct(DotProductGeneric);
171  dotproduct_method = "generic";
172  } else if (!strcmp(dotproduct.string(), "native")) {
173  // Native optimized code selected by config variable.
174  SetDotProduct(DotProductNative);
175  dotproduct_method = "native";
176 #if defined(AVX2)
177  } else if (!strcmp(dotproduct.string(), "avx2")) {
178  // AVX2 selected by config variable.
180  dotproduct_method = "avx2";
181 #endif
182 #if defined(AVX)
183  } else if (!strcmp(dotproduct.string(), "avx")) {
184  // AVX selected by config variable.
186  dotproduct_method = "avx";
187 #endif
188 #if defined(SSE4_1)
189  } else if (!strcmp(dotproduct.string(), "sse")) {
190  // SSE selected by config variable.
192  dotproduct_method = "sse";
193 #endif
194  } else if (!strcmp(dotproduct.string(), "std::inner_product")) {
195  // std::inner_product selected by config variable.
196  SetDotProduct(DotProductStdInnerProduct);
197  dotproduct_method = "std::inner_product";
198  } else {
199  // Unsupported value of config variable.
200  tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n",
201  dotproduct.string());
202  tprintf("Support values for dotproduct: auto generic native"
203 #if defined(AVX)
204  " avx"
205 #endif
206 #if defined(SSE4_1)
207  " sse"
208 #endif
209  " std::inner_product.\n");
210  }
211 
212  dotproduct.set_value(dotproduct_method);
213 }
214 
215 } // namespace tesseract
static const IntSimdMatrix intSimdMatrixAVX2
double DotProductAVX(const double *u, const double *v, int n)
DotProductFunction DotProduct
Definition: simddetect.cpp:51
double DotProductNative(const double *u, const double *v, int n)
Definition: dotproduct.cpp:22
#define STRING_VAR(name, val, comment)
Definition: params.h:309
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
double(*)(const double *, const double *, int) DotProductFunction
Definition: simddetect.h:25
static const IntSimdMatrix intSimdMatrixSSE
double DotProductSSE(const double *u, const double *v, int n)
static TESS_API void Update()
Definition: simddetect.cpp:162
static const IntSimdMatrix * intSimdMatrix