2 #include "allheaders.h"
13 #if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN32__ ) || defined( __MINGW32__ )
18 #define IF_WINDOWS(X) X
22 #define NOT_WINDOWS(X)
23 #elif defined( __linux__ )
32 #define NOT_WINDOWS(X) X
33 #elif defined( __APPLE__ )
42 #define NOT_WINDOWS(X) X
52 #define NOT_WINDOWS(X) X
67 #define PERF_COUNT_VERBOSE 1
68 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
73 #if PERF_COUNT_VERBOSE >= 2
74 #define PERF_COUNT_START(FUNCT_NAME) \
75 char *funct_name = FUNCT_NAME; \
76 double elapsed_time_sec; \
77 LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
78 QueryPerformanceFrequency(&freq); \
79 QueryPerformanceCounter(&time_funct_start); \
80 time_sub_start = time_funct_start; \
81 time_sub_end = time_funct_start;
83 #define PERF_COUNT_END \
84 QueryPerformanceCounter(&time_funct_end); \
85 elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \
86 printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
88 #define PERF_COUNT_START(FUNCT_NAME)
89 #define PERF_COUNT_END
92 #if PERF_COUNT_VERBOSE >= 3
93 #define PERF_COUNT_SUB(SUB) \
94 QueryPerformanceCounter(&time_sub_end); \
95 elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \
96 printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
97 time_sub_start = time_sub_end;
99 #define PERF_COUNT_SUB(SUB)
106 #if PERF_COUNT_VERBOSE >= 2
107 #define PERF_COUNT_START(FUNCT_NAME) \
108 char *funct_name = FUNCT_NAME; \
109 double elapsed_time_sec; \
110 timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
111 clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \
112 time_sub_start = time_funct_start; \
113 time_sub_end = time_funct_start;
115 #define PERF_COUNT_END \
116 clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \
117 elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \
118 printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
120 #define PERF_COUNT_START(FUNCT_NAME)
121 #define PERF_COUNT_END
124 #if PERF_COUNT_VERBOSE >= 3
125 #define PERF_COUNT_SUB(SUB) \
126 clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \
127 elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \
128 printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
129 time_sub_start = time_sub_end;
131 #define PERF_COUNT_SUB(SUB)
143 #define strcasecmp strcmp
146 #define MAX_KERNEL_STRING_LEN 64
147 #define MAX_CLFILE_NUM 50
148 #define MAX_CLKERNEL_NUM 200
149 #define MAX_KERNEL_NAME_LEN 64
150 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
151 #define GROUPSIZE_X 16
152 #define GROUPSIZE_Y 16
153 #define GROUPSIZE_HMORX 256
154 #define GROUPSIZE_HMORY 1
156 typedef struct _KernelEnv
158 cl_context mpkContext;
159 cl_command_queue mpkCmdQueue;
160 cl_program mpkProgram;
162 char mckKernelName[150];
165 typedef struct _OpenCLEnv
167 cl_platform_id mpOclPlatformID;
168 cl_context mpOclContext;
169 cl_device_id mpOclDevsID;
170 cl_command_queue mpOclCmdQueue;
172 typedef int ( *cl_kernel_function )(
void **userdata, KernelEnv *kenv );
175 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
177 static const l_uint32 lmask32[] = {0x0,
178 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000,
179 0xf8000000, 0xfc000000, 0xfe000000, 0xff000000,
180 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
181 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000,
182 0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000,
183 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
184 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0,
185 0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff};
187 static const l_uint32 rmask32[] = {0x0,
188 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
189 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
190 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
191 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
192 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
193 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
194 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
195 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff};
197 #define CHECK_OPENCL(status,name) \
198 if( status != CL_SUCCESS ) \
200 printf ("OpenCL error code is %d at when %s .\n", status, name); \
204 typedef struct _GPUEnv
207 cl_platform_id mpPlatformID;
208 cl_device_type mDevType;
209 cl_context mpContext;
210 cl_device_id *mpArryDevsID;
211 cl_device_id mpDevID;
212 cl_command_queue mpCmdQueue;
213 cl_kernel mpArryKernels[MAX_CLFILE_NUM];
214 cl_program mpArryPrograms[MAX_CLFILE_NUM];
215 char mArryKnelSrcFile[MAX_CLFILE_NUM][256],
216 mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
217 cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
218 int mnKernelCount, mnFileCount,
230 static GPUEnv gpuEnv;
234 static int InitEnv();
235 static int InitOpenclRunEnv(
int argc );
236 static int InitOpenclRunEnv_DeviceSelection(
int argc );
237 static int RegistOpenclKernel();
238 static int ReleaseOpenclRunEnv();
239 static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
240 static int CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption );
241 static int CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName );
242 static int GeneratBinFromKernelSource( cl_program program,
const char * clFileName );
243 static int WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes );
244 static int BinaryGenerated(
const char * clFileName, FILE ** fhandle );
246 static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
247 static Pix* pixReadTiffCl(
const char *
filename, l_int32 n );
248 static PIX * pixReadStreamTiffCl ( FILE *fp, l_int32 n );
249 static PIX * pixReadMemTiffCl(
const l_uint8 *data,
size_t size, l_int32 n);
250 static PIX* pixReadFromTiffStreamCl(TIFF *tif);
251 static int composeRGBPixelCl(
int *tiffdata,
int *line,
int h,
int w);
252 static l_int32 getTiffStreamResolutionCl(TIFF *tif,l_int32 *pxres,l_int32 *pyres);
253 static TIFF* fopenTiffCl(FILE *fp,
const char *modestring);
258 static int initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs);
259 static void releaseMorphCLBuffers();
262 static PIX* pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
265 static PIX* pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
268 static PIX* pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
271 static PIX* pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
274 static PIX* pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2,
bool reqDataCopy);
277 static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize,
bool reqDataCopy);
279 static void pixGetLinesCL(PIX *pixd, PIX *pixs,
280 PIX** pix_vline, PIX** pix_hline,
281 PIX** pixClosed,
bool getpixClosed,
282 l_int32 close_hsize, l_int32 close_vsize,
283 l_int32 open_hsize, l_int32 open_vsize,
284 l_int32 line_hsize, l_int32 line_vsize);
288 static int SetKernelEnv( KernelEnv *envInfo );
299 static int LoadOpencl();
302 static void FreeOpenclDll();
306 inline static int AddKernelConfig(
int kCount,
const char *kName );
309 static int HistogramRectOCL(
310 const unsigned char *imagedata,
318 int *histogramAllChannels);
320 static int ThresholdRectToPixOCL(
321 const unsigned char* imagedata,
324 const int* thresholds,
325 const int* hi_values,
332 static Pix * pixConvertRGBToGrayOCL( Pix *pix,
float weightRed = 0.3,
float weightGreen = 0.5,
float weightBlue = 0.2 );
334 static ds_device getDeviceSelection();
335 static ds_device selectedDevice;
336 static bool deviceIsSelected;
337 static bool selectedDeviceIsOpenCL();
338 static bool selectedDeviceIsNativeCPU();