tesseract  3.04.01
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
openclwrapper.h
Go to the documentation of this file.
1 #include <stdio.h>
2 #include "allheaders.h"
3 #include "pix.h"
4 #ifdef USE_OPENCL
5 #include "tiff.h"
6 #include "tiffio.h"
7 #endif
8 #include "tprintf.h"
9 
10 // including CL/cl.h doesn't occur until USE_OPENCL defined below
11 
12 // platform preprocessor commands
13 #if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN32__ ) || defined( __MINGW32__ )
14 #define ON_WINDOWS 1
15 #define ON_LINUX 0
16 #define ON_APPLE 0
17 #define ON_OTHER 0
18 #define IF_WINDOWS(X) X
19 #define IF_LINUX(X)
20 #define IF_APPLE(X)
21 #define IF_OTHER(X)
22 #define NOT_WINDOWS(X)
23 #elif defined( __linux__ )
24 #define ON_WINDOWS 0
25 #define ON_LINUX 1
26 #define ON_APPLE 0
27 #define ON_OTHER 0
28 #define IF_WINDOWS(X)
29 #define IF_LINUX(X) X
30 #define IF_APPLE(X)
31 #define IF_OTHER(X)
32 #define NOT_WINDOWS(X) X
33 #elif defined( __APPLE__ )
34 #define ON_WINDOWS 0
35 #define ON_LINUX 0
36 #define ON_APPLE 1
37 #define ON_OTHER 0
38 #define IF_WINDOWS(X)
39 #define IF_LINUX(X)
40 #define IF_APPLE(X) X
41 #define IF_OTHER(X)
42 #define NOT_WINDOWS(X) X
43 #else
44 #define ON_WINDOWS 0
45 #define ON_LINUX 0
46 #define ON_APPLE 0
47 #define ON_OTHER 1
48 #define IF_WINDOWS(X)
49 #define IF_LINUX(X)
50 #define IF_APPLE(X)
51 #define IF_OTHER(X) X
52 #define NOT_WINDOWS(X) X
53 #endif
54 
55 #if ON_LINUX
56 #include <time.h>
57 #endif
58 
59 /************************************************************************************
60  * enable/disable reporting of performance
61  * PERF_REPORT_LEVEL
62  * 0 - no reporting
63  * 1 - no reporting
64  * 2 - report total function call time for functions we're tracking
65  * 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies)
66  ************************************************************************************/
67 #define PERF_COUNT_VERBOSE 1
68 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
69 
70 
71 #if ON_WINDOWS
72 
73 #if PERF_COUNT_VERBOSE >= 2
74 #define PERF_COUNT_START(FUNCT_NAME) \
75  char *funct_name = FUNCT_NAME; \
76  double elapsed_time_sec; \
77  LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
78  QueryPerformanceFrequency(&freq); \
79  QueryPerformanceCounter(&time_funct_start); \
80  time_sub_start = time_funct_start; \
81  time_sub_end = time_funct_start;
82 
83 #define PERF_COUNT_END \
84  QueryPerformanceCounter(&time_funct_end); \
85  elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \
86  printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
87 #else
88 #define PERF_COUNT_START(FUNCT_NAME)
89 #define PERF_COUNT_END
90 #endif
91 
92 #if PERF_COUNT_VERBOSE >= 3
93 #define PERF_COUNT_SUB(SUB) \
94  QueryPerformanceCounter(&time_sub_end); \
95  elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \
96  printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
97  time_sub_start = time_sub_end;
98 #else
99 #define PERF_COUNT_SUB(SUB)
100 #endif
101 
102 
103 // not on windows
104 #else
105 
106 #if PERF_COUNT_VERBOSE >= 2
107 #define PERF_COUNT_START(FUNCT_NAME) \
108  char *funct_name = FUNCT_NAME; \
109  double elapsed_time_sec; \
110  timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
111  clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \
112  time_sub_start = time_funct_start; \
113  time_sub_end = time_funct_start;
114 
115 #define PERF_COUNT_END \
116  clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \
117  elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \
118  printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
119 #else
120 #define PERF_COUNT_START(FUNCT_NAME)
121 #define PERF_COUNT_END
122 #endif
123 
124 #if PERF_COUNT_VERBOSE >= 3
125 #define PERF_COUNT_SUB(SUB) \
126  clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \
127  elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \
128  printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
129  time_sub_start = time_sub_end;
130 #else
131 #define PERF_COUNT_SUB(SUB)
132 #endif
133 
134 #endif
135 /**************************************************************************
136  * enable/disable use of OpenCL
137  **************************************************************************/
138 
139 #ifdef USE_OPENCL
140 #include "opencl_device_selection.h"
141 
142 #ifndef strcasecmp
143 #define strcasecmp strcmp
144 #endif
145 
146 #define MAX_KERNEL_STRING_LEN 64
147 #define MAX_CLFILE_NUM 50
148 #define MAX_CLKERNEL_NUM 200
149 #define MAX_KERNEL_NAME_LEN 64
150 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
151 #define GROUPSIZE_X 16
152 #define GROUPSIZE_Y 16
153 #define GROUPSIZE_HMORX 256
154 #define GROUPSIZE_HMORY 1
155 
156 typedef struct _KernelEnv
157 {
158  cl_context mpkContext;
159  cl_command_queue mpkCmdQueue;
160  cl_program mpkProgram;
161  cl_kernel mpkKernel;
162  char mckKernelName[150];
163 } KernelEnv;
164 
165 typedef struct _OpenCLEnv
166 {
167  cl_platform_id mpOclPlatformID;
168  cl_context mpOclContext;
169  cl_device_id mpOclDevsID;
170  cl_command_queue mpOclCmdQueue;
171 } OpenCLEnv;
172 typedef int ( *cl_kernel_function )( void **userdata, KernelEnv *kenv );
173 
174 
175 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
176 
177 static const l_uint32 lmask32[] = {0x0,
178  0x80000000, 0xc0000000, 0xe0000000, 0xf0000000,
179  0xf8000000, 0xfc000000, 0xfe000000, 0xff000000,
180  0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
181  0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000,
182  0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000,
183  0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
184  0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0,
185  0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff};
186 
187 static const l_uint32 rmask32[] = {0x0,
188  0x00000001, 0x00000003, 0x00000007, 0x0000000f,
189  0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
190  0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
191  0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
192  0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
193  0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
194  0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
195  0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff};
196 
197 #define CHECK_OPENCL(status,name) \
198 if( status != CL_SUCCESS ) \
199 { \
200  printf ("OpenCL error code is %d at when %s .\n", status, name); \
201 }
202 
203 
204 typedef struct _GPUEnv
205 {
206  //share vb in all modules in hb library
207  cl_platform_id mpPlatformID;
208  cl_device_type mDevType;
209  cl_context mpContext;
210  cl_device_id *mpArryDevsID;
211  cl_device_id mpDevID;
212  cl_command_queue mpCmdQueue;
213  cl_kernel mpArryKernels[MAX_CLFILE_NUM];
214  cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file
215  char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256
216  mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
217  cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
218  int mnKernelCount, mnFileCount, // only one kernel file
219  mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
220  int mnKhrFp64Flag;
221  int mnAmdFp64Flag;
222 
223 } GPUEnv;
224 
225 
226 class OpenclDevice
227 {
228 
229 public:
230  static GPUEnv gpuEnv;
231  static int isInited;
232  OpenclDevice();
233  ~OpenclDevice();
234  static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
235  static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels
236  static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels
237  static int RegistOpenclKernel();
238  static int ReleaseOpenclRunEnv();
239  static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
240  static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption );
241  static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
242  static int GeneratBinFromKernelSource( cl_program program, const char * clFileName );
243  static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
244  static int BinaryGenerated( const char * clFileName, FILE ** fhandle );
245  //static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
246  static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
247  static Pix* pixReadTiffCl( const char *filename, l_int32 n );
248  static PIX * pixReadStreamTiffCl ( FILE *fp, l_int32 n );
249  static PIX * pixReadMemTiffCl(const l_uint8 *data, size_t size, l_int32 n);
250  static PIX* pixReadFromTiffStreamCl(TIFF *tif);
251  static int composeRGBPixelCl(int *tiffdata,int *line,int h,int w);
252  static l_int32 getTiffStreamResolutionCl(TIFF *tif,l_int32 *pxres,l_int32 *pyres);
253  static TIFF* fopenTiffCl(FILE *fp,const char *modestring);
254 
255 /* OpenCL implementations of Morphological operations*/
256 
257  //Initialiation of OCL buffers used in Morph operations
258  static int initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs);
259  static void releaseMorphCLBuffers();
260 
261  // OpenCL implementation of Morphology Dilate
262  static PIX* pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
263 
264  // OpenCL implementation of Morphology Erode
265  static PIX* pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
266 
267  // OpenCL implementation of Morphology Close
268  static PIX* pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
269 
270  // OpenCL implementation of Morphology Open
271  static PIX* pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
272 
273  // OpenCL implementation of Morphology Open
274  static PIX* pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, bool reqDataCopy);
275 
276  // OpenCL implementation of Morphology (Hollow = Closed - Open)
277  static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, bool reqDataCopy);
278 
279  static void pixGetLinesCL(PIX *pixd, PIX *pixs,
280  PIX** pix_vline, PIX** pix_hline,
281  PIX** pixClosed, bool getpixClosed,
282  l_int32 close_hsize, l_int32 close_vsize,
283  l_int32 open_hsize, l_int32 open_vsize,
284  l_int32 line_hsize, l_int32 line_vsize);
285 
286  //int InitOpenclAttr( OpenCLEnv * env );
287  //int ReleaseKernel( KernelEnv * env );
288  static int SetKernelEnv( KernelEnv *envInfo );
289  //int CreateKernel( char * kernelname, KernelEnv * env );
290  //int RunKernel( const char *kernelName, void **userdata );
291  //int ConvertToString( const char *filename, char **source );
292  //int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
293  //int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function );
294  //int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
295  //int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
296  // static cl_device_id performDeviceSelection( );
297  //static bool thresholdRectToPixMicroBench( TessScoreEvaluationInputData input, ds_device_type type);
298 
299  static int LoadOpencl();
300 #ifdef WIN32
301  //static int OpenclInite();
302  static void FreeOpenclDll();
303 #endif
304 
305 
306  inline static int AddKernelConfig( int kCount, const char *kName );
307 
308  /* for binarization */
309  static int HistogramRectOCL(
310  const unsigned char *imagedata,
311  int bytes_per_pixel,
312  int bytes_per_line,
313  int left,
314  int top,
315  int width,
316  int height,
317  int kHistogramSize,
318  int *histogramAllChannels);
319 
320  static int ThresholdRectToPixOCL(
321  const unsigned char* imagedata,
322  int bytes_per_pixel,
323  int bytes_per_line,
324  const int* thresholds,
325  const int* hi_values,
326  Pix** pix,
327  int rect_height,
328  int rect_width,
329  int rect_top,
330  int rect_left);
331 
332  static Pix * pixConvertRGBToGrayOCL( Pix *pix, float weightRed = 0.3, float weightGreen = 0.5, float weightBlue = 0.2 );
333 
334  static ds_device getDeviceSelection();
335  static ds_device selectedDevice;
336  static bool deviceIsSelected;
337  static bool selectedDeviceIsOpenCL();
338  static bool selectedDeviceIsNativeCPU();
339 
340 };
341 
342 
343 #endif
const int kHistogramSize
Definition: otsuthr.h:27