tesseract 3.04.01

opencl/openclwrapper.h

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include "allheaders.h"
00003 #include "pix.h"
00004 #ifdef USE_OPENCL
00005 #include "tiff.h"
00006 #include "tiffio.h"
00007 #endif
00008 #include "tprintf.h"
00009 
00010 // including CL/cl.h doesn't occur until USE_OPENCL defined below
00011 
00012 // platform preprocessor commands
00013 #if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN32__ ) || defined( __MINGW32__ )
00014 #define ON_WINDOWS 1
00015 #define ON_LINUX   0
00016 #define ON_APPLE   0
00017 #define ON_OTHER   0
00018 #define IF_WINDOWS(X) X
00019 #define IF_LINUX(X)
00020 #define IF_APPLE(X)
00021 #define IF_OTHER(X)
00022 #define NOT_WINDOWS(X)
00023 #elif defined( __linux__ )
00024 #define ON_WINDOWS 0
00025 #define ON_LINUX   1
00026 #define ON_APPLE   0
00027 #define ON_OTHER   0
00028 #define IF_WINDOWS(X)
00029 #define IF_LINUX(X) X
00030 #define IF_APPLE(X)
00031 #define IF_OTHER(X)
00032 #define NOT_WINDOWS(X) X
00033 #elif defined( __APPLE__ )
00034 #define ON_WINDOWS 0
00035 #define ON_LINUX   0
00036 #define ON_APPLE   1
00037 #define ON_OTHER   0
00038 #define IF_WINDOWS(X)
00039 #define IF_LINUX(X)
00040 #define IF_APPLE(X) X
00041 #define IF_OTHER(X)
00042 #define NOT_WINDOWS(X) X
00043 #else
00044 #define ON_WINDOWS 0
00045 #define ON_LINUX   0
00046 #define ON_APPLE   0
00047 #define ON_OTHER   1
00048 #define IF_WINDOWS(X)
00049 #define IF_LINUX(X)
00050 #define IF_APPLE(X)
00051 #define IF_OTHER(X) X
00052 #define NOT_WINDOWS(X) X
00053 #endif
00054 
00055 #if ON_LINUX
00056 #include <time.h>
00057 #endif
00058 
00059 /************************************************************************************
00060  * enable/disable reporting of performance
00061  * PERF_REPORT_LEVEL
00062  * 0 - no reporting
00063  * 1 - no reporting
00064  * 2 - report total function call time for functions we're tracking
00065  * 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies)
00066  ************************************************************************************/
00067 #define PERF_COUNT_VERBOSE 1
00068 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
00069 
00070 
00071 #if ON_WINDOWS
00072 
00073 #if PERF_COUNT_VERBOSE >= 2
00074 #define PERF_COUNT_START(FUNCT_NAME) \
00075     char *funct_name = FUNCT_NAME; \
00076     double elapsed_time_sec; \
00077     LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
00078     QueryPerformanceFrequency(&freq); \
00079     QueryPerformanceCounter(&time_funct_start); \
00080     time_sub_start = time_funct_start; \
00081     time_sub_end = time_funct_start;
00082 
00083 #define PERF_COUNT_END \
00084     QueryPerformanceCounter(&time_funct_end); \
00085     elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \
00086     printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
00087 #else
00088 #define PERF_COUNT_START(FUNCT_NAME)
00089 #define PERF_COUNT_END
00090 #endif
00091 
00092 #if PERF_COUNT_VERBOSE >= 3
00093 #define PERF_COUNT_SUB(SUB) \
00094     QueryPerformanceCounter(&time_sub_end); \
00095     elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \
00096     printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
00097     time_sub_start = time_sub_end;
00098 #else
00099 #define PERF_COUNT_SUB(SUB)
00100 #endif
00101 
00102 
00103 // not on windows
00104 #else
00105 
00106 #if PERF_COUNT_VERBOSE >= 2
00107 #define PERF_COUNT_START(FUNCT_NAME) \
00108     char *funct_name = FUNCT_NAME; \
00109     double elapsed_time_sec; \
00110     timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
00111     clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \
00112     time_sub_start = time_funct_start; \
00113     time_sub_end = time_funct_start;
00114 
00115 #define PERF_COUNT_END \
00116     clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \
00117     elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \
00118     printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
00119 #else
00120 #define PERF_COUNT_START(FUNCT_NAME)
00121 #define PERF_COUNT_END
00122 #endif
00123 
00124 #if PERF_COUNT_VERBOSE >= 3
00125 #define PERF_COUNT_SUB(SUB) \
00126     clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \
00127     elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \
00128     printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
00129     time_sub_start = time_sub_end;
00130 #else
00131 #define PERF_COUNT_SUB(SUB)
00132 #endif
00133 
00134 #endif
00135 /**************************************************************************
00136  * enable/disable use of OpenCL
00137  **************************************************************************/
00138 
00139 #ifdef USE_OPENCL
00140 #include "opencl_device_selection.h"
00141 
00142 #ifndef strcasecmp
00143 #define strcasecmp strcmp
00144 #endif
00145 
00146 #define MAX_KERNEL_STRING_LEN 64
00147 #define MAX_CLFILE_NUM 50
00148 #define MAX_CLKERNEL_NUM 200
00149 #define MAX_KERNEL_NAME_LEN 64
00150 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
00151 #define GROUPSIZE_X 16
00152 #define GROUPSIZE_Y 16
00153 #define GROUPSIZE_HMORX 256
00154 #define GROUPSIZE_HMORY 1
00155 
00156 typedef struct _KernelEnv
00157 {
00158     cl_context mpkContext;
00159     cl_command_queue mpkCmdQueue;
00160     cl_program mpkProgram;
00161     cl_kernel mpkKernel;
00162     char mckKernelName[150];
00163 } KernelEnv;
00164 
00165 typedef struct _OpenCLEnv
00166 {
00167     cl_platform_id mpOclPlatformID;
00168     cl_context mpOclContext;
00169     cl_device_id mpOclDevsID;
00170     cl_command_queue mpOclCmdQueue;
00171 } OpenCLEnv;
00172 typedef int ( *cl_kernel_function )( void **userdata, KernelEnv *kenv );
00173 
00174 
00175 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
00176 
00177 static const l_uint32 lmask32[] = {0x0,
00178     0x80000000, 0xc0000000, 0xe0000000, 0xf0000000,
00179     0xf8000000, 0xfc000000, 0xfe000000, 0xff000000,
00180     0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
00181     0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000,
00182     0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000,
00183     0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
00184     0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0,
00185     0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff};
00186 
00187 static const l_uint32 rmask32[] = {0x0,
00188     0x00000001, 0x00000003, 0x00000007, 0x0000000f,
00189     0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
00190     0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
00191     0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
00192     0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
00193     0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
00194     0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
00195     0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff};
00196 
00197 #define CHECK_OPENCL(status,name)    \
00198 if( status != CL_SUCCESS )    \
00199 {    \
00200     printf ("OpenCL error code is %d at   when %s .\n", status, name);    \
00201 }
00202 
00203 
00204 typedef struct _GPUEnv
00205 {
00206     //share vb in all modules in hb library
00207     cl_platform_id mpPlatformID;
00208     cl_device_type mDevType;
00209     cl_context mpContext;
00210     cl_device_id *mpArryDevsID;
00211     cl_device_id mpDevID;
00212     cl_command_queue mpCmdQueue;
00213     cl_kernel mpArryKernels[MAX_CLFILE_NUM];
00214     cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file
00215     char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256
00216          mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
00217          cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
00218     int mnKernelCount, mnFileCount, // only one kernel file
00219         mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
00220     int mnKhrFp64Flag;
00221     int mnAmdFp64Flag;
00222 
00223 } GPUEnv;
00224 
00225 
00226 class OpenclDevice
00227 {
00228 
00229 public:
00230     static GPUEnv gpuEnv;
00231     static int isInited;
00232     OpenclDevice();
00233     ~OpenclDevice();
00234     static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
00235     static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels
00236     static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels
00237     static int RegistOpenclKernel();
00238     static int ReleaseOpenclRunEnv();
00239     static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
00240     static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption );
00241     static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
00242     static int GeneratBinFromKernelSource( cl_program program, const char * clFileName );
00243     static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
00244     static int BinaryGenerated( const char * clFileName, FILE ** fhandle );
00245     //static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
00246     static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
00247     static Pix* pixReadTiffCl( const char *filename, l_int32 n );
00248     static PIX * pixReadStreamTiffCl ( FILE *fp, l_int32 n );
00249         static PIX * pixReadMemTiffCl(const l_uint8 *data, size_t size, l_int32  n);
00250     static PIX* pixReadFromTiffStreamCl(TIFF  *tif);
00251     static int composeRGBPixelCl(int *tiffdata,int *line,int h,int w);
00252     static l_int32 getTiffStreamResolutionCl(TIFF *tif,l_int32  *pxres,l_int32  *pyres);
00253     static TIFF* fopenTiffCl(FILE *fp,const char  *modestring);
00254 
00255 /* OpenCL implementations of Morphological operations*/
00256 
00257     //Initialiation of OCL buffers used in Morph operations
00258     static int initMorphCLAllocations(l_int32  wpl, l_int32  h, PIX* pixs);
00259     static void releaseMorphCLBuffers();
00260 
00261     // OpenCL implementation of Morphology Dilate
00262     static PIX* pixDilateBrickCL(PIX  *pixd, PIX  *pixs, l_int32  hsize, l_int32  vsize, bool reqDataCopy);
00263 
00264     // OpenCL implementation of Morphology Erode
00265     static PIX* pixErodeBrickCL(PIX  *pixd, PIX  *pixs, l_int32  hsize, l_int32  vsize, bool reqDataCopy);
00266 
00267     // OpenCL implementation of Morphology Close
00268     static PIX* pixCloseBrickCL(PIX  *pixd, PIX  *pixs, l_int32  hsize, l_int32  vsize, bool reqDataCopy);
00269 
00270     // OpenCL implementation of Morphology Open
00271     static PIX* pixOpenBrickCL(PIX  *pixd, PIX  *pixs, l_int32  hsize, l_int32  vsize, bool reqDataCopy);
00272 
00273     // OpenCL implementation of Morphology Open
00274     static PIX* pixSubtractCL(PIX  *pixd, PIX  *pixs1, PIX  *pixs2, bool reqDataCopy);
00275 
00276     // OpenCL implementation of Morphology (Hollow = Closed - Open)
00277     static PIX* pixHollowCL(PIX  *pixd, PIX  *pixs, l_int32  close_hsize, l_int32  close_vsize, l_int32  open_hsize, l_int32  open_vsize, bool reqDataCopy);
00278 
00279     static void pixGetLinesCL(PIX  *pixd, PIX  *pixs,
00280                                             PIX** pix_vline, PIX** pix_hline,
00281                                             PIX** pixClosed, bool  getpixClosed,
00282                                             l_int32  close_hsize, l_int32  close_vsize,
00283                                             l_int32  open_hsize, l_int32  open_vsize,
00284                                             l_int32  line_hsize, l_int32  line_vsize);
00285 
00286     //int InitOpenclAttr( OpenCLEnv * env );
00287     //int ReleaseKernel( KernelEnv * env );
00288     static int SetKernelEnv( KernelEnv *envInfo );
00289     //int CreateKernel( char * kernelname, KernelEnv * env );
00290     //int RunKernel( const char *kernelName, void **userdata );
00291     //int ConvertToString( const char *filename, char **source );
00292     //int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
00293     //int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function );
00294     //int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
00295     //int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
00296     // static cl_device_id performDeviceSelection( );
00297     //static bool thresholdRectToPixMicroBench( TessScoreEvaluationInputData input, ds_device_type type);
00298 
00299     static int LoadOpencl();
00300 #ifdef WIN32
00301     //static int OpenclInite();
00302     static void FreeOpenclDll();
00303 #endif
00304 
00305 
00306     inline static int AddKernelConfig( int kCount, const char *kName );
00307 
00308     /* for binarization */
00309     static int HistogramRectOCL(
00310         const unsigned char *imagedata,
00311         int bytes_per_pixel,
00312         int bytes_per_line,
00313         int left,
00314         int top,
00315         int width,
00316         int height,
00317         int kHistogramSize,
00318         int *histogramAllChannels);
00319 
00320     static int ThresholdRectToPixOCL(
00321         const unsigned char* imagedata,
00322         int bytes_per_pixel,
00323         int bytes_per_line,
00324         const int* thresholds,
00325         const int* hi_values,
00326         Pix** pix,
00327         int rect_height,
00328         int rect_width,
00329         int rect_top,
00330         int rect_left);
00331 
00332     static Pix * pixConvertRGBToGrayOCL( Pix *pix, float weightRed = 0.3, float weightGreen = 0.5, float weightBlue = 0.2 );
00333 
00334     static ds_device getDeviceSelection();
00335     static ds_device selectedDevice;
00336     static bool deviceIsSelected;
00337     static bool selectedDeviceIsOpenCL();
00338     static bool selectedDeviceIsNativeCPU();
00339 
00340 };
00341 
00342 
00343 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines