20 #include <mach/mach_time.h>
26 #if defined(LIBLEPT_MAJOR_VERSION) && defined(LIBLEPT_MINOR_VERSION)
27 # define TESSERACT_LIBLEPT_PREREQ(maj, min) \
28 ((LIBLEPT_MAJOR_VERSION) > (maj) || ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min)))
30 # define TESSERACT_LIBLEPT_PREREQ(maj, min) 0
33 #if TESSERACT_LIBLEPT_PREREQ(1,73)
34 # define CALLOC LEPT_CALLOC
35 # define FREE LEPT_FREE
41 GPUEnv OpenclDevice::gpuEnv;
44 bool OpenclDevice::deviceIsSelected =
false;
45 ds_device OpenclDevice::selectedDevice;
48 int OpenclDevice::isInited = 0;
50 struct tiff_transform {
58 static struct tiff_transform tiff_orientation_transforms[] = {
69 static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000;
71 cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate;
77 void legalizeFileName(
char *fileName) {
79 const char* invalidChars =
"/\?:*\"><| ";
81 for (
int i = 0; i < strlen(invalidChars); i++) {
83 invalidStr[0] = invalidChars[i];
90 for (
char *pos = strstr(fileName, invalidStr); pos != NULL; pos = strstr(pos+1, invalidStr)) {
98 void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) {
101 gpuInfo->mnIsUserCreated = 1;
103 gpuInfo->mpDevID = device;
104 gpuInfo->mpArryDevsID =
new cl_device_id[1];
105 gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
106 clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE ,
sizeof(cl_device_type), (
void *) &gpuInfo->mDevType , &size);
107 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(TYPE)");
109 clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM ,
sizeof(cl_platform_id), (
void *) &gpuInfo->mpPlatformID , &size);
110 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(PLATFORM)");
112 cl_context_properties props[3];
113 props[0] = CL_CONTEXT_PLATFORM;
114 props[1] = (cl_context_properties) gpuInfo->mpPlatformID;
116 gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, NULL, NULL, &clStatus);
117 CHECK_OPENCL( clStatus,
"populateGPUEnv::createContext");
119 cl_command_queue_properties queueProperties = 0;
120 gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus );
121 CHECK_OPENCL( clStatus,
"populateGPUEnv::createCommandQueue");
125 int OpenclDevice::LoadOpencl()
128 HINSTANCE HOpenclDll = NULL;
129 void * OpenclDll = NULL;
131 OpenclDll =
static_cast<HINSTANCE
>( HOpenclDll );
132 OpenclDll = LoadLibrary(
"openCL.dll" );
133 if ( !static_cast<HINSTANCE>( OpenclDll ) )
135 fprintf(stderr,
"[OD] Load opencl.dll failed!\n");
136 FreeLibrary( static_cast<HINSTANCE>( OpenclDll ) );
140 fprintf(stderr,
"[OD] Load opencl.dll successful!\n");
144 int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
146 envInfo->mpkContext = gpuEnv.mpContext;
147 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
148 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
153 cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer,
size_t nElements, cl_mem_flags flags, cl_int *pStatus)
155 cl_mem membuffer = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (flags),
156 nElements *
sizeof(l_uint32), hostbuffer, pStatus);
161 PIX* mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX* pixd, PIX* pixs,
int elements, cl_mem_flags flags,
bool memcopy =
false,
bool sync =
true)
163 PROCNAME(
"mapOutputCLBuffer");
168 if ((pixd = pixCreateTemplate(pixs)) == NULL)
169 (PIX *)ERROR_PTR(
"pixd not made", procName, NULL);
173 if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), pixGetDepth(pixs))) == NULL)
174 (PIX *)ERROR_PTR(
"pixd not made", procName, NULL);
177 l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0,
178 elements *
sizeof(l_uint32), 0, NULL, NULL, NULL );
182 memcpy(pixGetData(pixd), pValues, elements *
sizeof(l_uint32));
186 pixSetData(pixd, pValues);
189 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,clbuffer,pValues,0,NULL,NULL);
193 clFinish( rEnv.mpkCmdQueue );
199 cl_mem allocateIntBuffer( KernelEnv rEnv,
const l_uint32 *_pValues,
size_t nElements, cl_int *pStatus ,
bool sync =
false)
201 cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
202 nElements *
sizeof(l_int32), NULL, pStatus);
204 if (_pValues != NULL)
206 l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
207 nElements *
sizeof(l_int32), 0, NULL, NULL, NULL );
209 memcpy(pValues, _pValues, nElements *
sizeof(l_int32));
211 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
214 clFinish( rEnv.mpkCmdQueue );
221 void OpenclDevice::releaseMorphCLBuffers()
223 if (pixdCLIntermediate != NULL)
224 clReleaseMemObject(pixdCLIntermediate);
225 if (pixsCLBuffer != NULL)
226 clReleaseMemObject(pixsCLBuffer);
227 if (pixdCLBuffer != NULL)
228 clReleaseMemObject(pixdCLBuffer);
229 if (pixThBuffer != NULL)
230 clReleaseMemObject(pixThBuffer);
233 int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs)
235 SetKernelEnv( &rEnv );
237 if (pixThBuffer != NULL)
239 pixsCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
242 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
sizeof(l_uint32) * wpl*h, 0, NULL, NULL);
247 l_uint32* srcdata = (l_uint32*) malloc(wpl*h*
sizeof(l_uint32));
248 memcpy(srcdata, pixGetData(pixs), wpl*h*
sizeof(l_uint32));
250 pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl*h, CL_MEM_USE_HOST_PTR, &clStatus);
253 pixdCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
255 pixdCLIntermediate = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
257 return (
int)clStatus;
260 int OpenclDevice::InitEnv()
267 if( 1 == LoadOpencl() )
275 InitOpenclRunEnv_DeviceSelection( 0 );
281 int OpenclDevice::ReleaseOpenclRunEnv()
283 ReleaseOpenclEnv( &gpuEnv );
289 inline int OpenclDevice::AddKernelConfig(
int kCount,
const char *kName )
292 fprintf(stderr,
"Error: ( KCount < 1 ) AddKernelConfig\n" );
293 strcpy( gpuEnv.mArrykernelNames[kCount-1], kName );
294 gpuEnv.mnKernelCount++;
297 int OpenclDevice::RegistOpenclKernel()
299 if ( !gpuEnv.mnIsUserCreated )
300 memset( &gpuEnv, 0,
sizeof(gpuEnv) );
302 gpuEnv.mnFileCount = 0;
303 gpuEnv.mnKernelCount = 0UL;
305 AddKernelConfig( 1, (
const char*)
"oclAverageSub1" );
309 int OpenclDevice::InitOpenclRunEnv_DeviceSelection(
int argc ) {
314 ds_device bestDevice_DS = getDeviceSelection( );
316 cl_device_id bestDevice = bestDevice_DS.oclDeviceID;
318 if (selectedDeviceIsOpenCL() ) {
320 populateGPUEnvFromDevice( &gpuEnv, bestDevice );
321 gpuEnv.mnFileCount = 0;
322 gpuEnv.mnKernelCount = 0UL;
324 CompileKernelFile(&gpuEnv,
"");
336 OpenclDevice::OpenclDevice()
341 OpenclDevice::~OpenclDevice()
346 int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo )
356 for ( i = 0; i < gpuEnv.mnFileCount; i++ )
358 if ( gpuEnv.mpArryPrograms[i] )
360 clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] );
361 CHECK_OPENCL( clStatus,
"clReleaseProgram" );
362 gpuEnv.mpArryPrograms[i] = NULL;
365 if ( gpuEnv.mpCmdQueue )
367 clReleaseCommandQueue( gpuEnv.mpCmdQueue );
368 gpuEnv.mpCmdQueue = NULL;
370 if ( gpuEnv.mpContext )
372 clReleaseContext( gpuEnv.mpContext );
373 gpuEnv.mpContext = NULL;
376 gpuInfo->mnIsUserCreated = 0;
377 free( gpuInfo->mpArryDevsID );
380 int OpenclDevice::BinaryGenerated(
const char * clFileName, FILE ** fhandle )
387 char fileName[256] = { 0 }, cl_name[128] = { 0 };
388 char deviceName[1024];
389 clStatus = clGetDeviceInfo( gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,
sizeof(deviceName), deviceName, NULL );
390 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
391 str = (
char*) strstr( clFileName, (
char*)
".cl" );
392 memcpy( cl_name, clFileName, str - clFileName );
393 cl_name[str - clFileName] =
'\0';
394 sprintf( fileName,
"%s-%s.bin", cl_name, deviceName );
395 legalizeFileName(fileName);
396 fd = fopen( fileName,
"rb" );
397 status = ( fd != NULL ) ? 1 : 0;
405 int OpenclDevice::CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName )
408 for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
410 if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
412 if ( gpuEnvCached->mpArryPrograms[i] != NULL )
421 int OpenclDevice::WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes )
424 output = fopen( fileName,
"wb" );
425 if ( output == NULL )
430 fwrite( birary,
sizeof(
char), numBytes, output );
436 int OpenclDevice::GeneratBinFromKernelSource( cl_program program,
const char * clFileName )
440 size_t *binarySizes, numDevices=0;
441 cl_device_id *mpArryDevsID;
442 char **binaries, *str = NULL;
444 clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
445 sizeof(numDevices), &numDevices, NULL );
446 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
448 mpArryDevsID = (cl_device_id*) malloc(
sizeof(cl_device_id) * numDevices );
449 if ( mpArryDevsID == NULL )
454 clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
455 sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL );
456 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
459 binarySizes = (
size_t*) malloc(
sizeof(
size_t) * numDevices );
461 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
462 sizeof(
size_t) * numDevices, binarySizes, NULL );
463 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
466 binaries = (
char**) malloc(
sizeof(
char *) * numDevices );
467 if ( binaries == NULL )
472 for ( i = 0; i < numDevices; i++ )
474 if ( binarySizes[i] != 0 )
476 binaries[i] = (
char*) malloc(
sizeof(
char) * binarySizes[i] );
477 if ( binaries[i] == NULL )
488 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
489 sizeof(
char *) * numDevices, binaries, NULL );
490 CHECK_OPENCL(clStatus,
"clGetProgramInfo");
493 for ( i = 0; i < numDevices; i++ )
495 char fileName[256] = { 0 }, cl_name[128] = { 0 };
497 if ( binarySizes[i] != 0 )
499 char deviceName[1024];
500 clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
501 sizeof(deviceName), deviceName, NULL);
502 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
504 str = (
char*) strstr( clFileName, (
char*)
".cl" );
505 memcpy( cl_name, clFileName, str - clFileName );
506 cl_name[str - clFileName] =
'\0';
507 sprintf( fileName,
"%s-%s.bin", cl_name, deviceName );
508 legalizeFileName(fileName);
509 if ( !WriteBinaryToFile( fileName, binaries[i], binarySizes[i] ) )
511 printf(
"[OD] write binary[%s] failed\n", fileName);
514 printf(
"[OD] write binary[%s] successfully\n", fileName);
519 for ( i = 0; i < numDevices; i++ )
521 if ( binaries[i] != NULL )
528 if ( binaries != NULL )
534 if ( binarySizes != NULL )
540 if ( mpArryDevsID != NULL )
542 free( mpArryDevsID );
548 void copyIntBuffer( KernelEnv rEnv, cl_mem xValues,
const l_uint32 *_pValues,
size_t nElements, cl_int *pStatus )
550 l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
551 nElements *
sizeof(l_int32), 0, NULL, NULL, NULL );
552 clFinish( rEnv.mpkCmdQueue );
553 if (_pValues != NULL)
555 for (
int i = 0; i < (int)nElements; i++ )
556 pValues[i] = (l_int32)_pValues[i];
559 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
564 int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption )
569 char *buildLog = NULL, *binary;
571 size_t source_size[1];
572 int b_error, binary_status, binaryExisted, idx;
574 cl_device_id *mpArryDevsID;
578 if ( CachedOfKernerPrg(gpuInfo, filename) == 1 )
583 idx = gpuInfo->mnFileCount;
587 source_size[0] = strlen( source );
589 binaryExisted = BinaryGenerated( filename, &fd );
591 if ( binaryExisted == 1 )
593 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
594 sizeof(numDevices), &numDevices, NULL );
595 CHECK_OPENCL( clStatus,
"clGetContextInfo" );
597 mpArryDevsID = (cl_device_id*) malloc(
sizeof(cl_device_id) * numDevices );
598 if ( mpArryDevsID == NULL )
605 b_error |= fseek( fd, 0, SEEK_END ) < 0;
606 b_error |= ( length = ftell(fd) ) <= 0;
607 b_error |= fseek( fd, 0, SEEK_SET ) < 0;
613 binary = (
char*) malloc( length + 2 );
619 memset( binary, 0, length + 2 );
620 b_error |= fread( binary, 1, length, fd ) != length;
627 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
628 sizeof( cl_device_id ) * numDevices, mpArryDevsID, NULL );
629 CHECK_OPENCL( clStatus,
"clGetContextInfo" );
632 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
633 mpArryDevsID, &length, (
const unsigned char**) &binary,
634 &binary_status, &clStatus );
635 CHECK_OPENCL( clStatus,
"clCreateProgramWithBinary" );
638 free( mpArryDevsID );
646 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &source,
647 source_size, &clStatus);
648 CHECK_OPENCL( clStatus,
"clCreateProgramWithSource" );
652 if ( gpuInfo->mpArryPrograms[idx] == (cl_program) NULL )
661 if (!gpuInfo->mnIsUserCreated)
663 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
664 buildOption, NULL, NULL);
669 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
670 buildOption, NULL, NULL);
674 if ( clStatus != CL_SUCCESS )
676 printf (
"BuildProgram error!\n");
677 if ( !gpuInfo->mnIsUserCreated )
679 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
680 CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
684 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
685 CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
687 if ( clStatus != CL_SUCCESS )
689 printf(
"opencl create build log fail\n");
692 buildLog = (
char*) malloc( length );
693 if ( buildLog == (
char*) NULL )
697 if ( !gpuInfo->mnIsUserCreated )
699 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
700 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
704 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
705 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
707 if ( clStatus != CL_SUCCESS )
709 printf(
"opencl program build info fail\n");
713 fd1 = fopen(
"kernel-build.log",
"w+" );
716 fwrite( buildLog,
sizeof(
char), length, fd1 );
725 strcpy( gpuInfo->mArryKnelSrcFile[idx], filename );
727 if ( binaryExisted == 0 ) {
728 GeneratBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename );
732 gpuInfo->mnFileCount += 1;
737 l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl,l_uint32 *line)
742 size_t globalThreads[2];
743 size_t localThreads[2];
749 gsize = (w + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
750 globalThreads[0] = gsize;
751 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
752 globalThreads[1] = gsize;
753 localThreads[0] = GROUPSIZE_X;
754 localThreads[1] = GROUPSIZE_Y;
756 SetKernelEnv( &rEnv );
758 l_uint32 *pResult = (l_uint32 *)malloc(w*h * sizeof(l_uint32));
759 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus );
760 CHECK_OPENCL( clStatus, "clCreateKernel");
763 valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
764 outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
767 clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), (
void *)&valuesCl );
768 CHECK_OPENCL( clStatus, "clSetKernelArg");
769 clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(w), (
void *)&w );
770 CHECK_OPENCL( clStatus, "clSetKernelArg" );
771 clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(h), (
void *)&h );
772 CHECK_OPENCL( clStatus, "clSetKernelArg" );
773 clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(wpl), (
void *)&wpl );
774 CHECK_OPENCL( clStatus, "clSetKernelArg" );
775 clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), (
void *)&outputCl );
776 CHECK_OPENCL( clStatus, "clSetKernelArg");
780 clStatus = clEnqueueNDRangeKernel( rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL );
781 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
784 void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, 0, w*h * sizeof(l_uint32), 0, NULL, NULL, &clStatus);
785 CHECK_OPENCL( clStatus, "clEnqueueMapBuffer outputCl");
786 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, NULL, NULL);
789 clFinish( rEnv.mpkCmdQueue );
795 PIX * OpenclDevice::pixReadTiffCl ( const
char *filename, l_int32 n )
802 PROCNAME("pixReadTiff");
805 return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
807 if ((fp = fopenReadStream(filename)) == NULL)
808 return (PIX *)ERROR_PTR("image file not found", procName, NULL);
809 if ((pix = pixReadStreamTiffCl(fp, n)) == NULL) {
811 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
819 OpenclDevice::fopenTiffCl(FILE *fp,
820 const char *modestring)
824 PROCNAME(
"fopenTiff");
827 return (TIFF *)ERROR_PTR(
"stream not opened", procName, NULL);
829 return (TIFF *)ERROR_PTR(
"modestring not defined", procName, NULL);
831 if ((fd = fileno(fp)) < 0)
832 return (TIFF *)ERROR_PTR(
"invalid file descriptor", procName, NULL);
833 lseek(fd, 0, SEEK_SET);
835 return TIFFFdOpen(fd,
"TIFFstream", modestring);
837 l_int32 OpenclDevice::getTiffStreamResolutionCl(TIFF *tif,
842 l_int32 foundxres, foundyres;
843 l_float32 fxres, fyres;
845 PROCNAME(
"getTiffStreamResolution");
848 return ERROR_INT(
"tif not opened", procName, 1);
849 if (!pxres || !pyres)
850 return ERROR_INT(
"&xres and &yres not both defined", procName, 1);
853 TIFFGetFieldDefaulted(tif, TIFFTAG_RESOLUTIONUNIT, &resunit);
854 foundxres = TIFFGetField(tif, TIFFTAG_XRESOLUTION, &fxres);
855 foundyres = TIFFGetField(tif, TIFFTAG_YRESOLUTION, &fyres);
856 if (!foundxres && !foundyres)
return 1;
857 if (!foundxres && foundyres)
859 else if (foundxres && !foundyres)
862 if (resunit == RESUNIT_CENTIMETER) {
863 *pxres = (l_int32)(2.54 * fxres + 0.5);
864 *pyres = (l_int32)(2.54 * fyres + 0.5);
867 *pxres = (l_int32)fxres;
868 *pyres = (l_int32)fyres;
885 typedef struct L_Memstream L_MEMSTREAM;
888 static L_MEMSTREAM *memstreamCreateForRead(l_uint8 *indata,
size_t pinsize);
889 static L_MEMSTREAM *memstreamCreateForWrite(l_uint8 **poutdata,
891 static tsize_t tiffReadCallback(thandle_t handle, tdata_t data, tsize_t length);
892 static tsize_t tiffWriteCallback(thandle_t handle, tdata_t data,
894 static toff_t tiffSeekCallback(thandle_t handle, toff_t offset, l_int32 whence);
895 static l_int32 tiffCloseCallback(thandle_t handle);
896 static toff_t tiffSizeCallback(thandle_t handle);
897 static l_int32 tiffMapCallback(thandle_t handle, tdata_t *data, toff_t *length);
898 static void tiffUnmapCallback(thandle_t handle, tdata_t data, toff_t length);
902 memstreamCreateForRead(l_uint8 *indata,
905 L_MEMSTREAM *mstream;
907 mstream = (L_MEMSTREAM *)CALLOC(1,
sizeof(L_MEMSTREAM));
908 mstream->buffer = indata;
909 mstream->bufsize = insize;
910 mstream->hw = insize;
917 memstreamCreateForWrite(l_uint8 **poutdata,
920 L_MEMSTREAM *mstream;
922 mstream = (L_MEMSTREAM *)CALLOC(1,
sizeof(L_MEMSTREAM));
923 mstream->buffer = (l_uint8 *)CALLOC(8 * 1024, 1);
924 mstream->bufsize = 8 * 1024;
925 mstream->poutdata = poutdata;
926 mstream->poutsize = poutsize;
927 mstream->hw = mstream->offset = 0;
933 tiffReadCallback(thandle_t handle,
937 L_MEMSTREAM *mstream;
940 mstream = (L_MEMSTREAM *)handle;
941 amount = L_MIN((
size_t)length, mstream->hw - mstream->offset);
942 memcpy(data, mstream->buffer + mstream->offset, amount);
943 mstream->offset += amount;
949 tiffWriteCallback(thandle_t handle,
953 L_MEMSTREAM *mstream;
960 mstream = (L_MEMSTREAM *)handle;
961 if (mstream->offset + length > mstream->bufsize) {
962 newsize = 2 * (mstream->offset + length);
963 mstream->buffer = (l_uint8 *)reallocNew((
void **)&mstream->buffer,
964 mstream->offset, newsize);
965 mstream->bufsize = newsize;
968 memcpy(mstream->buffer + mstream->offset, data, length);
969 mstream->offset += length;
970 mstream->hw = L_MAX(mstream->offset, mstream->hw);
976 tiffSeekCallback(thandle_t handle,
980 L_MEMSTREAM *mstream;
982 PROCNAME(
"tiffSeekCallback");
983 mstream = (L_MEMSTREAM *)handle;
987 mstream->offset = offset;
991 mstream->offset += offset;
996 mstream->offset = mstream->hw - offset;
999 return (toff_t)ERROR_INT(
"bad whence value", procName,
1003 return mstream->offset;
1008 tiffCloseCallback(thandle_t handle)
1010 L_MEMSTREAM *mstream;
1012 mstream = (L_MEMSTREAM *)handle;
1013 if (mstream->poutdata) {
1014 *mstream->poutdata = mstream->buffer;
1015 *mstream->poutsize = mstream->hw;
1023 tiffSizeCallback(thandle_t handle)
1025 L_MEMSTREAM *mstream;
1027 mstream = (L_MEMSTREAM *)handle;
1033 tiffMapCallback(thandle_t handle,
1037 L_MEMSTREAM *mstream;
1039 mstream = (L_MEMSTREAM *)handle;
1040 *data = mstream->buffer;
1041 *length = mstream->hw;
1047 tiffUnmapCallback(thandle_t handle,
1072 fopenTiffMemstream(
const char *filename,
1073 const char *operation,
1077 L_MEMSTREAM *mstream;
1079 PROCNAME(
"fopenTiffMemstream");
1082 return (TIFF *)ERROR_PTR(
"filename not defined", procName, NULL);
1084 return (TIFF *)ERROR_PTR(
"operation not defined", procName, NULL);
1086 return (TIFF *)ERROR_PTR(
"&data not defined", procName, NULL);
1088 return (TIFF *)ERROR_PTR(
"&datasize not defined", procName, NULL);
1089 if (!strcmp(operation,
"r") && !strcmp(operation,
"w"))
1090 return (TIFF *)ERROR_PTR(
"operation not 'r' or 'w'}", procName, NULL);
1092 if (!strcmp(operation,
"r"))
1093 mstream = memstreamCreateForRead(*pdata, *pdatasize);
1095 mstream = memstreamCreateForWrite(pdata, pdatasize);
1097 return TIFFClientOpen(filename, operation, mstream,
1098 tiffReadCallback, tiffWriteCallback,
1099 tiffSeekCallback, tiffCloseCallback,
1100 tiffSizeCallback, tiffMapCallback,
1107 OpenclDevice::pixReadMemTiffCl(
const l_uint8 *data,
size_t size,l_int32 n)
1109 l_int32 i, pagefound;
1113 PROCNAME(
"pixReadMemTiffCl");
1116 return (PIX *)ERROR_PTR(
"data pointer is NULL", procName, NULL);
1118 if ((tif = fopenTiffMemstream(
"",
"r", (l_uint8 **)&data, &size)) == NULL)
1119 return (PIX *)ERROR_PTR(
"tif not opened", procName, NULL);
1123 for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
1126 if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
1128 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
1132 if (TIFFReadDirectory(tif) == 0)
1136 if (pagefound ==
FALSE) {
1137 L_WARNING(
"tiff page %d not found", procName);
1147 OpenclDevice::pixReadStreamTiffCl(FILE *fp,
1150 l_int32 i, pagefound;
1154 PROCNAME(
"pixReadStreamTiff");
1157 return (PIX *)ERROR_PTR(
"stream not defined", procName, NULL);
1159 if ((tif = fopenTiffCl(fp,
"rb")) == NULL)
1160 return (PIX *)ERROR_PTR(
"tif not opened", procName, NULL);
1164 for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
1167 if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
1169 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
1173 if (TIFFReadDirectory(tif) == 0)
1177 if (pagefound ==
FALSE) {
1178 L_WARNING(
"tiff page %d not found", procName, n);
1188 getTiffCompressedFormat(l_uint16 tiffcomp)
1194 case COMPRESSION_CCITTFAX4:
1195 comptype = IFF_TIFF_G4;
1197 case COMPRESSION_CCITTFAX3:
1198 comptype = IFF_TIFF_G3;
1200 case COMPRESSION_CCITTRLE:
1201 comptype = IFF_TIFF_RLE;
1203 case COMPRESSION_PACKBITS:
1204 comptype = IFF_TIFF_PACKBITS;
1206 case COMPRESSION_LZW:
1207 comptype = IFF_TIFF_LZW;
1209 case COMPRESSION_ADOBE_DEFLATE:
1210 comptype = IFF_TIFF_ZIP;
1213 comptype = IFF_TIFF;
1219 void compare(l_uint32 *cpu, l_uint32 *gpu,
int size)
1221 for(
int i=0;i<size;i++)
1225 printf(
"\ndoesnot match\n");
1229 printf(
"\nit matches\n");
1236 OpenclDevice::pixReadFromTiffStreamCl(TIFF *tif)
1238 l_uint8 *linebuf, *data;
1239 l_uint16 spp, bps, bpp, tiffbpl, photometry, tiffcomp, orientation;
1240 l_uint16 *redmap, *greenmap, *bluemap;
1241 l_int32 d, wpl, bpl, comptype, i, ncolors;
1244 l_uint32 *line, *tiffdata;
1248 PROCNAME(
"pixReadFromTiffStream");
1251 return (PIX *)ERROR_PTR(
"tif not defined", procName, NULL);
1254 TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
1255 TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
1258 return (PIX *)ERROR_PTR(
"can't handle bpp > 32", procName, NULL);
1261 else if (spp == 3 || spp == 4)
1264 return (PIX *)ERROR_PTR(
"spp not in set {1,3,4}", procName, NULL);
1266 TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
1267 TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
1268 tiffbpl = TIFFScanlineSize(tif);
1270 if ((pix = pixCreate(w, h, d)) == NULL)
1271 return (PIX *)ERROR_PTR(
"pix not made", procName, NULL);
1272 data = (l_uint8 *)pixGetData(pix);
1273 wpl = pixGetWpl(pix);
1278 if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1,
sizeof(l_uint8))) == NULL)
1279 return (PIX *)ERROR_PTR(
"calloc fail for linebuf", procName, NULL);
1281 for (i = 0 ; i < h ; i++) {
1282 if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
1285 return (PIX *)ERROR_PTR(
"line read fail", procName, NULL);
1287 memcpy((
char *)data, (
char *)linebuf, tiffbpl);
1291 pixEndianByteSwap(pix);
1293 pixEndianTwoByteSwap(pix);
1297 if ((tiffdata = (l_uint32 *)CALLOC(w * h,
sizeof(l_uint32))) == NULL) {
1299 return (PIX *)ERROR_PTR(
"calloc fail for tiffdata", procName, NULL);
1301 if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata,
1302 ORIENTATION_TOPLEFT, 0)) {
1305 return (PIX *)ERROR_PTR(
"failed to read tiffdata", procName, NULL);
1307 line = pixGetData(pix);
1310 l_uint32* output_gpu=pixReadFromTiffKernel(tiffdata,w,h,wpl,line);
1312 pixSetData(pix, output_gpu);
1319 if (getTiffStreamResolutionCl(tif, &xres, &yres) == 0) {
1320 pixSetXRes(pix, xres);
1321 pixSetYRes(pix, yres);
1325 TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
1326 comptype = getTiffCompressedFormat(tiffcomp);
1327 pixSetInputFormat(pix, comptype);
1329 if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) {
1331 if ((cmap = pixcmapCreate(bps)) == NULL) {
1333 return (PIX *)ERROR_PTR(
"cmap not made", procName, NULL);
1336 for (i = 0; i < ncolors; i++)
1337 pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8,
1339 pixSetColormap(pix, cmap);
1342 if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) {
1344 if (tiffcomp == COMPRESSION_CCITTFAX3 ||
1345 tiffcomp == COMPRESSION_CCITTFAX4 ||
1346 tiffcomp == COMPRESSION_CCITTRLE ||
1347 tiffcomp == COMPRESSION_CCITTRLEW) {
1348 photometry = PHOTOMETRIC_MINISWHITE;
1351 photometry = PHOTOMETRIC_MINISBLACK;
1353 if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) ||
1354 (d == 8 && photometry == PHOTOMETRIC_MINISWHITE))
1355 pixInvert(pix, pix);
1358 if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) {
1359 if (orientation >= 1 && orientation <= 8) {
1360 struct tiff_transform *transform =
1361 &tiff_orientation_transforms[orientation - 1];
1362 if (transform->vflip) pixFlipTB(pix, pix);
1363 if (transform->hflip) pixFlipLR(pix, pix);
1364 if (transform->rotate) {
1366 pix = pixRotate90(oldpix, transform->rotate);
1367 pixDestroy(&oldpix);
1377 pixDilateCL_55(l_int32 wpl, l_int32 h)
1379 size_t globalThreads[2];
1383 size_t localThreads[2];
1386 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1387 globalThreads[0] = gsize;
1388 globalThreads[1] = GROUPSIZE_HMORY;
1389 localThreads[0] = GROUPSIZE_HMORX;
1390 localThreads[1] = GROUPSIZE_HMORY;
1392 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor_5x5", &status );
1394 status = clSetKernelArg(rEnv.mpkKernel,
1398 status = clSetKernelArg(rEnv.mpkKernel,
1402 status = clSetKernelArg(rEnv.mpkKernel,
1405 (
const void *)&wpl);
1406 status = clSetKernelArg(rEnv.mpkKernel,
1411 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1422 pixtemp = pixsCLBuffer;
1423 pixsCLBuffer = pixdCLBuffer;
1424 pixdCLBuffer = pixtemp;
1427 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1428 globalThreads[0] = gsize;
1429 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1430 globalThreads[1] = gsize;
1431 localThreads[0] = GROUPSIZE_X;
1432 localThreads[1] = GROUPSIZE_Y;
1434 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer_5x5", &status );
1436 status = clSetKernelArg(rEnv.mpkKernel,
1440 status = clSetKernelArg(rEnv.mpkKernel,
1444 status = clSetKernelArg(rEnv.mpkKernel,
1447 (
const void *)&wpl);
1448 status = clSetKernelArg(rEnv.mpkKernel,
1452 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1467 pixErodeCL_55(l_int32 wpl, l_int32 h)
1469 size_t globalThreads[2];
1473 l_uint32 fwmask, lwmask;
1474 size_t localThreads[2];
1476 lwmask = lmask32[32 - 2];
1477 fwmask = rmask32[32 - 2];
1480 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1481 globalThreads[0] = gsize;
1482 globalThreads[1] = GROUPSIZE_HMORY;
1483 localThreads[0] = GROUPSIZE_HMORX;
1484 localThreads[1] = GROUPSIZE_HMORY;
1486 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor_5x5", &status );
1488 status = clSetKernelArg(rEnv.mpkKernel,
1492 status = clSetKernelArg(rEnv.mpkKernel,
1496 status = clSetKernelArg(rEnv.mpkKernel,
1499 (
const void *)&wpl);
1500 status = clSetKernelArg(rEnv.mpkKernel,
1505 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1516 pixtemp = pixsCLBuffer;
1517 pixsCLBuffer = pixdCLBuffer;
1518 pixdCLBuffer = pixtemp;
1521 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1522 globalThreads[0] = gsize;
1523 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1524 globalThreads[1] = gsize;
1525 localThreads[0] = GROUPSIZE_X;
1526 localThreads[1] = GROUPSIZE_Y;
1528 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeVer_5x5", &status );
1530 status = clSetKernelArg(rEnv.mpkKernel,
1534 status = clSetKernelArg(rEnv.mpkKernel,
1538 status = clSetKernelArg(rEnv.mpkKernel,
1541 (
const void *)&wpl);
1542 status = clSetKernelArg(rEnv.mpkKernel,
1546 status = clSetKernelArg(rEnv.mpkKernel,
1549 (
const void *)&fwmask);
1550 status = clSetKernelArg(rEnv.mpkKernel,
1553 (
const void *)&lwmask);
1554 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1569 pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1571 l_int32 xp, yp, xn, yn;
1573 size_t globalThreads[2];
1577 size_t localThreads[2];
1580 OpenclDevice::SetKernelEnv( &rEnv );
1582 if (hsize == 5 && vsize == 5)
1585 status = pixDilateCL_55(wpl, h);
1589 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1591 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1594 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1595 globalThreads[0] = gsize;
1596 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1597 globalThreads[1] = gsize;
1598 localThreads[0] = GROUPSIZE_X;
1599 localThreads[1] = GROUPSIZE_Y;
1601 if (xp > 31 || xn > 31)
1604 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor", &status );
1606 status = clSetKernelArg(rEnv.mpkKernel,
1610 status = clSetKernelArg(rEnv.mpkKernel,
1614 status = clSetKernelArg(rEnv.mpkKernel,
1618 status = clSetKernelArg(rEnv.mpkKernel,
1622 status = clSetKernelArg(rEnv.mpkKernel,
1625 (
const void *)&wpl);
1626 status = clSetKernelArg(rEnv.mpkKernel,
1630 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1640 if (yp > 0 || yn > 0)
1642 pixtemp = pixsCLBuffer;
1643 pixsCLBuffer = pixdCLBuffer;
1644 pixdCLBuffer = pixtemp;
1647 else if (xp > 0 || xn > 0 )
1650 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor_32word", &status );
1651 isEven = (xp != xn);
1653 status = clSetKernelArg(rEnv.mpkKernel,
1657 status = clSetKernelArg(rEnv.mpkKernel,
1661 status = clSetKernelArg(rEnv.mpkKernel,
1665 status = clSetKernelArg(rEnv.mpkKernel,
1668 (
const void *)&wpl);
1669 status = clSetKernelArg(rEnv.mpkKernel,
1673 status = clSetKernelArg(rEnv.mpkKernel,
1676 (
const void *)&isEven);
1677 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1687 if (yp > 0 || yn > 0)
1689 pixtemp = pixsCLBuffer;
1690 pixsCLBuffer = pixdCLBuffer;
1691 pixdCLBuffer = pixtemp;
1695 if (yp > 0 || yn > 0)
1697 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer", &status );
1699 status = clSetKernelArg(rEnv.mpkKernel,
1703 status = clSetKernelArg(rEnv.mpkKernel,
1707 status = clSetKernelArg(rEnv.mpkKernel,
1711 status = clSetKernelArg(rEnv.mpkKernel,
1714 (
const void *)&wpl);
1715 status = clSetKernelArg(rEnv.mpkKernel,
1719 status = clSetKernelArg(rEnv.mpkKernel,
1723 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1740 pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h)
1743 l_int32 xp, yp, xn, yn;
1745 size_t globalThreads[2];
1746 size_t localThreads[2];
1750 char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC);
1751 l_uint32 rwmask, lwmask;
1754 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1756 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1758 OpenclDevice::SetKernelEnv( &rEnv );
1760 if (hsize == 5 && vsize == 5 && isAsymmetric)
1763 status = pixErodeCL_55(wpl, h);
1767 rwmask = rmask32[32 - (xp & 31)];
1768 lwmask = lmask32[32 - (xn & 31)];
1771 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1772 globalThreads[0] = gsize;
1773 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1774 globalThreads[1] = gsize;
1775 localThreads[0] = GROUPSIZE_X;
1776 localThreads[1] = GROUPSIZE_Y;
1779 if (xp > 31 || xn > 31 )
1782 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor", &status );
1784 status = clSetKernelArg(rEnv.mpkKernel,
1788 status = clSetKernelArg(rEnv.mpkKernel,
1792 status = clSetKernelArg(rEnv.mpkKernel,
1796 status = clSetKernelArg(rEnv.mpkKernel,
1800 status = clSetKernelArg(rEnv.mpkKernel,
1803 (
const void *)&wpl);
1804 status = clSetKernelArg(rEnv.mpkKernel,
1808 status = clSetKernelArg(rEnv.mpkKernel,
1810 sizeof(isAsymmetric),
1811 (
const void *)&isAsymmetric);
1812 status = clSetKernelArg(rEnv.mpkKernel,
1815 (
const void *)&rwmask);
1816 status = clSetKernelArg(rEnv.mpkKernel,
1819 (
const void *)&lwmask);
1820 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1830 if (yp > 0 || yn > 0)
1832 pixtemp = pixsCLBuffer;
1833 pixsCLBuffer = pixdCLBuffer;
1834 pixdCLBuffer = pixtemp;
1837 else if (xp > 0 || xn > 0)
1839 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor_32word", &status );
1840 isEven = (xp != xn);
1842 status = clSetKernelArg(rEnv.mpkKernel,
1846 status = clSetKernelArg(rEnv.mpkKernel,
1850 status = clSetKernelArg(rEnv.mpkKernel,
1854 status = clSetKernelArg(rEnv.mpkKernel,
1857 (
const void *)&wpl);
1858 status = clSetKernelArg(rEnv.mpkKernel,
1862 status = clSetKernelArg(rEnv.mpkKernel,
1864 sizeof(isAsymmetric),
1865 (
const void *)&isAsymmetric);
1866 status = clSetKernelArg(rEnv.mpkKernel,
1869 (
const void *)&rwmask);
1870 status = clSetKernelArg(rEnv.mpkKernel,
1873 (
const void *)&lwmask);
1874 status = clSetKernelArg(rEnv.mpkKernel,
1877 (
const void *)&isEven);
1878 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1888 if (yp > 0 || yn > 0)
1890 pixtemp = pixsCLBuffer;
1891 pixsCLBuffer = pixdCLBuffer;
1892 pixdCLBuffer = pixtemp;
1897 if (yp > 0 || yn > 0)
1899 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeVer", &status );
1901 status = clSetKernelArg(rEnv.mpkKernel,
1905 status = clSetKernelArg(rEnv.mpkKernel,
1909 status = clSetKernelArg(rEnv.mpkKernel,
1913 status = clSetKernelArg(rEnv.mpkKernel,
1916 (
const void *)&wpl);
1917 status = clSetKernelArg(rEnv.mpkKernel,
1921 status = clSetKernelArg(rEnv.mpkKernel,
1923 sizeof(isAsymmetric),
1924 (
const void *)&isAsymmetric);
1925 status = clSetKernelArg(rEnv.mpkKernel,
1929 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1946 OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy =
false)
1950 wpl = pixGetWpl(pixs);
1951 h = pixGetHeight(pixs);
1953 clStatus = pixDilateCL(hsize, vsize, wpl, h);
1957 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ,
false);
1966 OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy =
false)
1970 wpl = pixGetWpl(pixs);
1971 h = pixGetHeight(pixs);
1973 clStatus = pixErodeCL(hsize, vsize, wpl, h);
1977 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
1985 pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1991 status = pixErodeCL(hsize, vsize, wpl, h);
1993 pixtemp = pixsCLBuffer;
1994 pixsCLBuffer = pixdCLBuffer;
1995 pixdCLBuffer = pixtemp;
1997 status = pixDilateCL(hsize, vsize, wpl, h);
2004 pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
2010 status = pixDilateCL(hsize, vsize, wpl, h);
2012 pixtemp = pixsCLBuffer;
2013 pixsCLBuffer = pixdCLBuffer;
2014 pixdCLBuffer = pixtemp;
2016 status = pixErodeCL(hsize, vsize, wpl, h);
2024 OpenclDevice::pixCloseBrickCL(PIX *pixd,
2028 bool reqDataCopy =
false)
2032 wpl = pixGetWpl(pixs);
2033 h = pixGetHeight(pixs);
2035 clStatus = pixCloseCL(hsize, vsize, wpl, h);
2039 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
2048 OpenclDevice::pixOpenBrickCL(PIX *pixd,
2052 bool reqDataCopy =
false)
2056 wpl = pixGetWpl(pixs);
2057 h = pixGetHeight(pixs);
2059 clStatus = pixOpenCL(hsize, vsize, wpl, h);
2063 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
2071 pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outbuffer)
2074 size_t globalThreads[2];
2076 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
2078 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
2079 globalThreads[0] = gsize;
2080 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
2081 globalThreads[1] = gsize;
2083 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixOR", &status );
2085 status = clSetKernelArg(rEnv.mpkKernel,
2089 status = clSetKernelArg(rEnv.mpkKernel,
2093 status = clSetKernelArg(rEnv.mpkKernel,
2097 status = clSetKernelArg(rEnv.mpkKernel,
2100 (
const void *)&wpl);
2101 status = clSetKernelArg(rEnv.mpkKernel,
2105 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2120 pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outbuffer)
2123 size_t globalThreads[2];
2125 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
2127 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
2128 globalThreads[0] = gsize;
2129 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
2130 globalThreads[1] = gsize;
2132 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixAND", &status );
2135 status = clSetKernelArg(rEnv.mpkKernel,
2139 status = clSetKernelArg(rEnv.mpkKernel,
2143 status = clSetKernelArg(rEnv.mpkKernel,
2147 status = clSetKernelArg(rEnv.mpkKernel,
2150 (
const void *)&wpl);
2151 status = clSetKernelArg(rEnv.mpkKernel,
2155 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2170 pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outBuffer = NULL)
2173 size_t globalThreads[2];
2175 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
2177 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
2178 globalThreads[0] = gsize;
2179 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
2180 globalThreads[1] = gsize;
2182 if (outBuffer != NULL)
2184 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixSubtract", &status );
2188 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixSubtract_inplace", &status );
2192 status = clSetKernelArg(rEnv.mpkKernel,
2196 status = clSetKernelArg(rEnv.mpkKernel,
2200 status = clSetKernelArg(rEnv.mpkKernel,
2203 (
const void *)&wpl);
2204 status = clSetKernelArg(rEnv.mpkKernel,
2208 if (outBuffer != NULL)
2210 status = clSetKernelArg(rEnv.mpkKernel,
2213 (
const void *)&outBuffer);
2215 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2231 OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2,
bool reqDataCopy =
false)
2235 PROCNAME(
"pixSubtractCL");
2238 return (PIX *)ERROR_PTR(
"pixs1 not defined", procName, pixd);
2240 return (PIX *)ERROR_PTR(
"pixs2 not defined", procName, pixd);
2241 if (pixGetDepth(pixs1) != pixGetDepth(pixs2))
2242 return (PIX *)ERROR_PTR(
"depths of pixs* unequal", procName, pixd);
2244 #if EQUAL_SIZE_WARNING
2245 if (!pixSizesEqual(pixs1, pixs2))
2246 L_WARNING(
"pixs1 and pixs2 not equal sizes", procName);
2249 wpl = pixGetWpl(pixs1);
2250 h = pixGetHeight(pixs1);
2252 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2257 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs1, wpl*h, CL_MAP_READ);
2266 OpenclDevice::pixHollowCL(PIX *pixd,
2268 l_int32 close_hsize,
2269 l_int32 close_vsize,
2272 bool reqDataCopy =
false)
2277 wpl = pixGetWpl(pixs);
2278 h = pixGetHeight(pixs);
2281 clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
2285 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0,
sizeof(
int) * wpl*h, 0, NULL, NULL);
2288 pixtemp = pixsCLBuffer;
2289 pixsCLBuffer = pixdCLBuffer;
2290 pixdCLBuffer = pixtemp;
2292 clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
2295 pixtemp = pixsCLBuffer;
2296 pixsCLBuffer = pixdCLBuffer;
2297 pixdCLBuffer = pixdCLIntermediate;
2298 pixdCLIntermediate = pixtemp;
2300 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2305 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
2313 OpenclDevice::pixGetLinesCL(PIX *pixd,
2319 l_int32 close_hsize, l_int32 close_vsize,
2320 l_int32 open_hsize, l_int32 open_vsize,
2321 l_int32 line_hsize, l_int32 line_vsize)
2326 wpl = pixGetWpl(pixs);
2327 h = pixGetHeight(pixs);
2330 clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
2335 *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, wpl*h, CL_MAP_READ,
true,
false);
2340 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0,
sizeof(
int) * wpl*h, 0, NULL, NULL);
2343 pixtemp = pixsCLBuffer;
2344 pixsCLBuffer = pixdCLBuffer;
2345 pixdCLBuffer = pixtemp;
2347 clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
2350 pixtemp = pixsCLBuffer;
2351 pixsCLBuffer = pixdCLBuffer;
2352 pixdCLBuffer = pixdCLIntermediate;
2353 pixdCLIntermediate = pixtemp;
2355 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2359 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0,
sizeof(
int) * wpl*h, 0, NULL, NULL);
2361 pixtemp = pixsCLBuffer;
2362 pixsCLBuffer = pixdCLBuffer;
2363 pixdCLBuffer = pixtemp;
2367 clStatus = pixOpenCL(1, line_vsize, wpl, h);
2370 *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl*h, CL_MAP_READ,
true,
false);
2372 pixtemp = pixsCLBuffer;
2373 pixsCLBuffer = pixdCLIntermediate;
2374 pixdCLIntermediate = pixtemp;
2378 clStatus = pixOpenCL(line_hsize, 1, wpl, h);
2381 *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl*h, CL_MAP_READ,
true,
true);
2393 int OpenclDevice::HistogramRectOCL(
2394 const unsigned char* imageData,
2395 int bytes_per_pixel,
2402 int* histogramAllChannels)
2408 SetKernelEnv( &histKern );
2409 KernelEnv histRedKern;
2410 SetKernelEnv( &histRedKern );
2415 cl_mem imageBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(
char), (
void *)imageData, &clStatus );
2416 CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
2419 int block_size = 256;
2421 clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL);
2422 CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
2424 int requestedOccupancy = 10;
2425 int numWorkGroups = numCUs * requestedOccupancy;
2426 int numThreads = block_size*numWorkGroups;
2427 size_t local_work_size[] = {
static_cast<size_t>(block_size)};
2428 size_t global_work_size[] = {
static_cast<size_t>(numThreads)};
2429 size_t red_global_work_size[] = {
static_cast<size_t>(block_size*kHistogramSize*bytes_per_pixel)};
2432 int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;
2434 cl_mem histogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, kHistogramSize*bytes_per_pixel*
sizeof(
int), (
void *)histogramAllChannels, &clStatus );
2435 CHECK_OPENCL( clStatus,
"clCreateBuffer histogramBuffer");
2439 int tmpHistogramBins = kHistogramSize*bytes_per_pixel*histRed;
2441 cl_mem tmpHistogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE, tmpHistogramBins*
sizeof(cl_uint), NULL, &clStatus );
2442 CHECK_OPENCL( clStatus,
"clCreateBuffer tmpHistogramBuffer");
2445 int *zeroBuffer =
new int[1];
2447 cl_mem atomicSyncBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int), (
void *)zeroBuffer, &clStatus );
2448 CHECK_OPENCL( clStatus,
"clCreateBuffer atomicSyncBuffer");
2449 delete[] zeroBuffer;
2451 if (bytes_per_pixel == 1)
2453 histKern.mpkKernel = clCreateKernel( histKern.mpkProgram,
"kernel_HistogramRectOneChannel", &clStatus );
2454 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectOneChannel");
2456 histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram,
"kernel_HistogramRectOneChannelReduction", &clStatus );
2457 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectOneChannelReduction");
2459 histKern.mpkKernel = clCreateKernel( histKern.mpkProgram,
"kernel_HistogramRectAllChannels", &clStatus );
2460 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannels");
2462 histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram,
"kernel_HistogramRectAllChannelsReduction", &clStatus );
2463 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannelsReduction");
2469 ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0, tmpHistogramBins*
sizeof(cl_uint), 0, NULL, NULL, &clStatus);
2470 CHECK_OPENCL( clStatus,
"clEnqueueMapBuffer tmpHistogramBuffer");
2472 memset(ptr, 0, tmpHistogramBins*
sizeof(cl_uint));
2473 clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, NULL, NULL);
2476 clStatus = clSetKernelArg( histKern.mpkKernel, 0,
sizeof(cl_mem), (
void *)&imageBuffer );
2477 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2478 cl_uint numPixels = width*height;
2479 clStatus = clSetKernelArg( histKern.mpkKernel, 1,
sizeof(cl_uint), (
void *)&numPixels );
2480 CHECK_OPENCL( clStatus,
"clSetKernelArg numPixels" );
2481 clStatus = clSetKernelArg( histKern.mpkKernel, 2,
sizeof(cl_mem), (
void *)&tmpHistogramBuffer );
2482 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
2485 int n = numThreads/bytes_per_pixel;
2486 clStatus = clSetKernelArg( histRedKern.mpkKernel, 0,
sizeof(cl_int), (
void *)&n );
2487 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2488 clStatus = clSetKernelArg( histRedKern.mpkKernel, 1,
sizeof(cl_mem), (
void *)&tmpHistogramBuffer );
2489 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
2490 clStatus = clSetKernelArg( histRedKern.mpkKernel, 2,
sizeof(cl_mem), (
void *)&histogramBuffer );
2491 CHECK_OPENCL( clStatus,
"clSetKernelArg histogramBuffer");
2495 clStatus = clEnqueueNDRangeKernel(
2496 histKern.mpkCmdQueue,
2498 1, NULL, global_work_size, local_work_size,
2500 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels" );
2501 clFinish( histKern.mpkCmdQueue );
2507 clStatus = clEnqueueNDRangeKernel(
2508 histRedKern.mpkCmdQueue,
2509 histRedKern.mpkKernel,
2510 1, NULL, red_global_work_size, local_work_size,
2512 CHECK_OPENCL( clStatus,
"clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" );
2513 clFinish( histRedKern.mpkCmdQueue );
2521 ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, CL_MAP_READ, 0, kHistogramSize*bytes_per_pixel*sizeof(
int), 0, NULL, NULL, &clStatus);
2522 CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer");
2527 clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, NULL, NULL);
2529 clReleaseMemObject(histogramBuffer);
2530 clReleaseMemObject(imageBuffer);
2542 int OpenclDevice::ThresholdRectToPixOCL(
2543 const
unsigned char* imageData,
2544 int bytes_per_pixel,
2546 const
int* thresholds,
2547 const
int* hi_values,
2556 *pix = pixCreate(width, height, 1);
2557 uinT32* pixData = pixGetData(*pix);
2558 int wpl = pixGetWpl(*pix);
2559 int pixSize = wpl*height*sizeof(
uinT32);
2563 SetKernelEnv( &rEnv );
2566 int block_size = 256;
2568 clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL);
2569 CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
2571 int requestedOccupancy = 10;
2572 int numWorkGroups = numCUs * requestedOccupancy;
2573 int numThreads = block_size*numWorkGroups;
2574 size_t local_work_size[] = {(size_t) block_size};
2575 size_t global_work_size[] = {(size_t) numThreads};
2581 cl_mem imageBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*
sizeof(
char), (
void *)imageData, &clStatus );
2582 CHECK_OPENCL( clStatus,
"clCreateBuffer imageBuffer");
2585 pixThBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, pixSize, (
void *)pixData, &clStatus );
2586 CHECK_OPENCL( clStatus,
"clCreateBuffer pix");
2589 cl_mem thresholdsBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*
sizeof(
int), (
void *)thresholds, &clStatus );
2590 CHECK_OPENCL( clStatus,
"clCreateBuffer thresholdBuffer");
2591 cl_mem hiValuesBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*
sizeof(
int), (
void *)hi_values, &clStatus );
2592 CHECK_OPENCL( clStatus,
"clCreateBuffer hiValuesBuffer");
2595 if (bytes_per_pixel == 4) {
2596 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"kernel_ThresholdRectToPix", &clStatus );
2597 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_ThresholdRectToPix");
2599 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"kernel_ThresholdRectToPix_OneChan", &clStatus );
2600 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_ThresholdRectToPix_OneChan");
2604 clStatus = clSetKernelArg( rEnv.mpkKernel, 0,
sizeof(cl_mem), (
void *)&imageBuffer );
2605 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2606 cl_uint numPixels = width*height;
2607 clStatus = clSetKernelArg( rEnv.mpkKernel, 1,
sizeof(
int), (
void *)&height );
2608 CHECK_OPENCL( clStatus,
"clSetKernelArg height" );
2609 clStatus = clSetKernelArg( rEnv.mpkKernel, 2,
sizeof(
int), (
void *)&width );
2610 CHECK_OPENCL( clStatus,
"clSetKernelArg width" );
2611 clStatus = clSetKernelArg( rEnv.mpkKernel, 3,
sizeof(
int), (
void *)&wpl );
2612 CHECK_OPENCL( clStatus,
"clSetKernelArg wpl" );
2613 clStatus = clSetKernelArg( rEnv.mpkKernel, 4,
sizeof(cl_mem), (
void *)&thresholdsBuffer );
2614 CHECK_OPENCL( clStatus,
"clSetKernelArg thresholdsBuffer" );
2615 clStatus = clSetKernelArg( rEnv.mpkKernel, 5,
sizeof(cl_mem), (
void *)&hiValuesBuffer );
2616 CHECK_OPENCL( clStatus,
"clSetKernelArg hiValuesBuffer" );
2617 clStatus = clSetKernelArg( rEnv.mpkKernel, 6,
sizeof(cl_mem), (
void *)&pixThBuffer );
2618 CHECK_OPENCL( clStatus,
"clSetKernelArg pixThBuffer");
2622 clStatus = clEnqueueNDRangeKernel(
2625 1, NULL, global_work_size, local_work_size,
2627 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix" );
2628 clFinish( rEnv.mpkCmdQueue );
2632 printf(
"Setting return value to -1\n");
2636 void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, NULL, NULL, &clStatus);
2637 CHECK_OPENCL( clStatus,
"clEnqueueMapBuffer histogramBuffer");
2638 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL);
2640 clReleaseMemObject(imageBuffer);
2641 clReleaseMemObject(thresholdsBuffer);
2642 clReleaseMemObject(hiValuesBuffer);
2655 typedef struct _TessScoreEvaluationInputData {
2659 unsigned char *imageData;
2661 } TessScoreEvaluationInputData;
2663 void populateTessScoreEvaluationInputData( TessScoreEvaluationInputData *input ) {
2668 int numChannels = 4;
2669 input->height = height;
2670 input->width = width;
2671 input->numChannels = numChannels;
2672 unsigned char (*imageData4)[4] = (
unsigned char (*)[4]) malloc(height*width*numChannels*
sizeof(
unsigned char));
2673 input->imageData = (
unsigned char *) &imageData4[0];
2676 unsigned char pixelWhite[4] = { 0, 0, 0, 255};
2677 unsigned char pixelBlack[4] = {255, 255, 255, 255};
2678 for (
int p = 0; p < height*width; p++) {
2680 imageData4[p][0] = pixelWhite[0];
2681 imageData4[p][1] = pixelWhite[1];
2682 imageData4[p][2] = pixelWhite[2];
2683 imageData4[p][3] = pixelWhite[3];
2686 int maxLineWidth = 64;
2689 for (
int i = 0; i < numLines; i++) {
2690 int lineWidth = rand()%maxLineWidth;
2691 int vertLinePos = lineWidth + rand()%(width-2*lineWidth);
2693 for (
int row = vertLinePos-lineWidth/2; row < vertLinePos+lineWidth/2; row++) {
2694 for (
int col = 0; col < height; col++) {
2696 imageData4[row*width+col][0] = pixelBlack[0];
2697 imageData4[row*width+col][1] = pixelBlack[1];
2698 imageData4[row*width+col][2] = pixelBlack[2];
2699 imageData4[row*width+col][3] = pixelBlack[3];
2704 for (
int i = 0; i < numLines; i++) {
2705 int lineWidth = rand()%maxLineWidth;
2706 int horLinePos = lineWidth + rand()%(height-2*lineWidth);
2708 for (
int row = 0; row < width; row++) {
2709 for (
int col = horLinePos-lineWidth/2; col < horLinePos+lineWidth/2; col++) {
2712 imageData4[row*width+col][0] = pixelBlack[0];
2713 imageData4[row*width+col][1] = pixelBlack[1];
2714 imageData4[row*width+col][2] = pixelBlack[2];
2715 imageData4[row*width+col][3] = pixelBlack[3];
2720 float fractionBlack = 0.1;
2721 int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2);
2722 for (
int i = 0; i < numSpots; i++) {
2724 int lineWidth = rand()%maxLineWidth;
2725 int col = lineWidth + rand()%(width-2*lineWidth);
2726 int row = lineWidth + rand()%(height-2*lineWidth);
2728 for (
int r = row-lineWidth/2; r < row+lineWidth/2; r++) {
2729 for (
int c = col-lineWidth/2; c < col+lineWidth/2; c++) {
2732 imageData4[r*width+c][0] = pixelBlack[0];
2733 imageData4[r*width+c][1] = pixelBlack[1];
2734 imageData4[r*width+c][2] = pixelBlack[2];
2735 imageData4[r*width+c][3] = pixelBlack[3];
2740 input->pix = pixCreate(input->width, input->height, 1);
2743 typedef struct _TessDeviceScore {
2753 double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2757 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2758 QueryPerformanceFrequency(&freq);
2760 mach_timebase_info_data_t info = { 0, 0 };
2761 mach_timebase_info(&info);
2762 long long start,stop;
2764 timespec time_funct_start, time_funct_end;
2767 l_uint32 *tiffdata = (l_uint32 *)input.imageData;
2770 if (type == DS_DEVICE_OPENCL_DEVICE) {
2772 QueryPerformanceCounter(&time_funct_start);
2774 start = mach_absolute_time();
2776 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2779 OpenclDevice::gpuEnv = *env;
2780 int wpl = pixGetWpl(input.pix);
2781 OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, wpl, NULL);
2783 QueryPerformanceCounter(&time_funct_end);
2784 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2786 stop = mach_absolute_time();
2787 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;
2789 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2790 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2795 QueryPerformanceCounter(&time_funct_start);
2797 start = mach_absolute_time();
2799 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2801 Pix *pix = pixCreate(input.width, input.height, 32);
2802 l_uint32 *pixData = pixGetData(pix);
2803 int wpl = pixGetWpl(pix);
2808 for (i = 0; i < input.height ; i++) {
2809 for (j = 0; j < input.width; j++) {
2811 l_uint32 tiffword = tiffdata[i * input.width + j];
2812 l_int32 rval = ((tiffword) & 0xff);
2813 l_int32 gval = (((tiffword) >> 8) & 0xff);
2814 l_int32 bval = (((tiffword) >> 16) & 0xff);
2815 l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8);
2816 pixData[idx] = value;
2821 QueryPerformanceCounter(&time_funct_end);
2822 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2824 stop = mach_absolute_time();
2825 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;
2827 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2828 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2839 double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2843 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2844 QueryPerformanceFrequency(&freq);
2846 mach_timebase_info_data_t info = { 0, 0 };
2847 mach_timebase_info(&info);
2848 long long start,stop;
2850 timespec time_funct_start, time_funct_end;
2853 unsigned char pixelHi = (
unsigned char)255;
2857 int kHistogramSize = 256;
2858 int bytes_per_line = input.width*input.numChannels;
2859 int *histogramAllChannels =
new int[kHistogramSize*input.numChannels];
2862 if (type == DS_DEVICE_OPENCL_DEVICE) {
2864 QueryPerformanceCounter(&time_funct_start);
2866 start = mach_absolute_time();
2868 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2871 OpenclDevice::gpuEnv = *env;
2872 int wpl = pixGetWpl(input.pix);
2873 retVal= OpenclDevice::HistogramRectOCL(input.imageData, input.numChannels, bytes_per_line, top, left, input.width, input.height, kHistogramSize, histogramAllChannels);
2876 QueryPerformanceCounter(&time_funct_end);
2877 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2879 stop = mach_absolute_time();
2882 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;
2889 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2890 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2896 QueryPerformanceCounter(&time_funct_start);
2898 start = mach_absolute_time();
2900 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2902 for (
int ch = 0; ch < input.numChannels; ++ch) {
2904 left, top, input.width, input.height, histogram);
2907 QueryPerformanceCounter(&time_funct_end);
2908 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2910 stop = mach_absolute_time();
2911 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;
2913 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2914 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2920 delete[] histogramAllChannels;
2925 void ThresholdRectToPix_Native(
const unsigned char* imagedata,
2926 int bytes_per_pixel,
2928 const int* thresholds,
2929 const int* hi_values,
2933 int width = pixGetWidth(*pix);
2934 int height = pixGetHeight(*pix);
2936 *pix = pixCreate(width, height, 1);
2937 uinT32* pixdata = pixGetData(*pix);
2938 int wpl = pixGetWpl(*pix);
2939 const unsigned char* srcdata = imagedata + top * bytes_per_line +
2940 left * bytes_per_pixel;
2941 for (
int y = 0; y < height; ++y) {
2942 const uinT8* linedata = srcdata;
2943 uinT32* pixline = pixdata + y * wpl;
2944 for (
int x = 0; x < width; ++x, linedata += bytes_per_pixel) {
2945 bool white_result =
true;
2946 for (
int ch = 0; ch < bytes_per_pixel; ++ch) {
2947 if (hi_values[ch] >= 0 &&
2948 (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
2949 white_result =
false;
2954 CLEAR_DATA_BIT(pixline, x);
2956 SET_DATA_BIT(pixline, x);
2958 srcdata += bytes_per_line;
2962 double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2967 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2968 QueryPerformanceFrequency(&freq);
2970 mach_timebase_info_data_t info = { 0, 0 };
2971 mach_timebase_info(&info);
2972 long long start,stop;
2974 timespec time_funct_start, time_funct_end;
2978 unsigned char pixelHi = (
unsigned char)255;
2979 int* thresholds =
new int[4];
2980 thresholds[0] = pixelHi/2;
2981 thresholds[1] = pixelHi/2;
2982 thresholds[2] = pixelHi/2;
2983 thresholds[3] = pixelHi/2;
2984 int *hi_values =
new int[4];
2985 thresholds[0] = pixelHi;
2986 thresholds[1] = pixelHi;
2987 thresholds[2] = pixelHi;
2988 thresholds[3] = pixelHi;
2992 int bytes_per_line = input.width*input.numChannels;
2995 if (type == DS_DEVICE_OPENCL_DEVICE) {
2997 QueryPerformanceCounter(&time_funct_start);
2999 start = mach_absolute_time();
3001 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3004 OpenclDevice::gpuEnv = *env;
3005 int wpl = pixGetWpl(input.pix);
3006 retVal= OpenclDevice::ThresholdRectToPixOCL(input.imageData, input.numChannels, bytes_per_line, thresholds, hi_values, &input.pix, input.height, input.width, top, left);
3009 QueryPerformanceCounter(&time_funct_end);
3010 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3012 stop = mach_absolute_time();
3015 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;;
3023 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3024 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3032 QueryPerformanceCounter(&time_funct_start);
3034 start = mach_absolute_time();
3036 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3038 ThresholdRectToPix_Native( input.imageData, input.numChannels, bytes_per_line,
3039 thresholds, hi_values, &input.pix );
3042 QueryPerformanceCounter(&time_funct_end);
3043 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3045 stop = mach_absolute_time();
3046 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;
3048 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3049 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3054 delete[] thresholds;
3059 double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
3063 LARGE_INTEGER freq, time_funct_start, time_funct_end;
3064 QueryPerformanceFrequency(&freq);
3066 mach_timebase_info_data_t info = { 0, 0 };
3067 mach_timebase_info(&info);
3068 long long start,stop;
3070 timespec time_funct_start, time_funct_end;
3074 int resolution = 300;
3075 int wpl = pixGetWpl(input.pix);
3080 int closing_brick = max_line_width / 3;
3083 if (type == DS_DEVICE_OPENCL_DEVICE) {
3085 QueryPerformanceCounter(&time_funct_start);
3087 start = mach_absolute_time();
3089 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3091 Pix *src_pix = input.pix;
3092 OpenclDevice::gpuEnv = *env;
3093 OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
3094 Pix *pix_vline = NULL, *pix_hline = NULL, *pix_closed = NULL;
3095 OpenclDevice::pixGetLinesCL(NULL, input.pix, &pix_vline, &pix_hline, &pix_closed,
true, closing_brick, closing_brick, max_line_width, max_line_width, min_line_length, min_line_length);
3097 OpenclDevice::releaseMorphCLBuffers();
3100 QueryPerformanceCounter(&time_funct_end);
3101 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3103 stop = mach_absolute_time();
3104 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;
3106 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3107 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3111 QueryPerformanceCounter(&time_funct_start);
3113 start = mach_absolute_time();
3115 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3119 Pix *src_pix = input.pix;
3120 Pix *pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
3121 Pix *pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width, max_line_width);
3122 Pix *pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
3123 pixDestroy(&pix_solid);
3124 Pix *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
3125 Pix *pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
3126 pixDestroy(&pix_hollow);
3129 QueryPerformanceCounter(&time_funct_end);
3130 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3132 stop = mach_absolute_time();
3133 time = ((stop - start) * (
double) info.numer / info.denom) / 1.0E9;
3135 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3136 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3153 ds_status serializeScore( ds_device* device,
void **serializedScore,
unsigned int* serializedScoreSize ) {
3154 *serializedScoreSize =
sizeof(TessDeviceScore);
3155 *serializedScore = (
void *)
new unsigned char[*serializedScoreSize];
3156 memcpy(*serializedScore, device->score, *serializedScoreSize);
3161 ds_status deserializeScore( ds_device* device,
const unsigned char* serializedScore,
unsigned int serializedScoreSize ) {
3163 device->score =
new TessDeviceScore;
3164 memcpy(device->score, serializedScore, serializedScoreSize);
3168 ds_status releaseScore(
void* score ) {
3174 ds_status evaluateScoreForDevice( ds_device *device,
void *inputData) {
3178 printf(
"\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
3180 if (device->type == DS_DEVICE_OPENCL_DEVICE) {
3183 populateGPUEnvFromDevice( env, device->oclDeviceID);
3184 env->mnFileCount = 0;
3185 env->mnKernelCount = 0UL;
3187 OpenclDevice::gpuEnv = *env;
3188 OpenclDevice::CompileKernelFile(env,
"");
3191 TessScoreEvaluationInputData *input = (TessScoreEvaluationInputData *)inputData;
3194 double composeRGBPixelTime = composeRGBPixelMicroBench( env, *input, device->type );
3197 double histogramRectTime = histogramRectMicroBench( env, *input, device->type );
3200 double thresholdRectToPixTime = thresholdRectToPixMicroBench( env, *input, device->type );
3203 double getLineMasksMorphTime = getLineMasksMorphMicroBench( env, *input, device->type );
3208 float composeRGBPixelWeight = 1.2f;
3209 float histogramRectWeight = 2.4f;
3210 float thresholdRectToPixWeight = 4.5f;
3211 float getLineMasksMorphWeight = 5.0f;
3213 float weightedTime =
3214 composeRGBPixelWeight * composeRGBPixelTime +
3215 histogramRectWeight * histogramRectTime +
3216 thresholdRectToPixWeight * thresholdRectToPixTime +
3217 getLineMasksMorphWeight * getLineMasksMorphTime
3219 device->score = (
void *)
new TessDeviceScore;
3220 ((TessDeviceScore *)device->score)->time = weightedTime;
3222 printf(
"[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
3223 printf(
"[DS]%25s: %f (w=%.1f)\n",
"composeRGBPixel", composeRGBPixelTime, composeRGBPixelWeight );
3224 printf(
"[DS]%25s: %f (w=%.1f)\n",
"HistogramRect", histogramRectTime, histogramRectWeight );
3225 printf(
"[DS]%25s: %f (w=%.1f)\n",
"ThresholdRectToPix", thresholdRectToPixTime, thresholdRectToPixWeight );
3226 printf(
"[DS]%25s: %f (w=%.1f)\n",
"getLineMasksMorph", getLineMasksMorphTime, getLineMasksMorphWeight );
3227 printf(
"[DS]%25s: %f\n",
"Score", ((TessDeviceScore *)device->score)->time );
3232 ds_device OpenclDevice::getDeviceSelection( ) {
3233 if (!deviceIsSelected) {
3236 if( 1 == LoadOpencl() ) {
3241 ds_profile *profile;
3242 status = initDSProfile( &profile,
"v0.1" );
3245 char *fileName = "tesseract_opencl_profile_devices.dat";
3246 status = readProfileFromFile( profile, deserializeScore, fileName);
3247 if (status != DS_SUCCESS) {
3249 printf(
"[DS] Profile file not available (%s); performing profiling.\n", fileName);
3252 TessScoreEvaluationInputData input;
3253 populateTessScoreEvaluationInputData( &input );
3256 unsigned int numUpdates;
3257 status = profileDevices( profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (
void *)&input, &numUpdates );
3260 if ( status == DS_SUCCESS ) {
3261 status = writeProfileToFile( profile, serializeScore, fileName);
3263 if ( status == DS_SUCCESS ) {
3264 printf(
"[DS] Scores written to file (%s).\n", fileName);
3266 printf(
"[DS] Error saving scores to file (%s); scores not written to file.\n", fileName);
3269 printf(
"[DS] Unable to evaluate performance; scores not written to file.\n");
3274 printf("[DS] Profile read from file (%s).\n", fileName);
3279 float bestTime = FLT_MAX;
3280 int bestDeviceIdx = -1;
3281 for (
int d = 0; d < profile->numDevices; d++) {
3282 ds_device device = profile->devices[d];
3283 TessDeviceScore score = *(TessDeviceScore *)device.score;
3285 float time = score.time;
3286 printf(
"[DS] Device[%i] %i:%s score is %f\n", d+1, device.type, device.oclDeviceName, time);
3287 if (time < bestTime) {
3292 printf(
"[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native");
3296 bool overrided =
false;
3297 char *overrideDeviceStr = getenv(
"TESSERACT_OPENCL_DEVICE");
3298 if (overrideDeviceStr != NULL) {
3299 int overrideDeviceIdx = atoi(overrideDeviceStr);
3300 if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices ) {
3301 printf(
"[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, %i)\n", overrideDeviceStr, overrideDeviceIdx);
3302 bestDeviceIdx = overrideDeviceIdx - 1;
3305 printf(
"[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are valid devices).\n", overrideDeviceStr, profile->numDevices);
3310 printf(
"[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native");
3312 selectedDevice = profile->devices[bestDeviceIdx];
3314 releaseDSProfile(profile, releaseScore);
3317 printf(
"[DS] OpenCL runtime not available.\n");
3318 selectedDevice.type = DS_DEVICE_NATIVE_CPU;
3319 selectedDevice.oclDeviceName =
"(null)";
3320 selectedDevice.score = NULL;
3321 selectedDevice.oclDeviceID = NULL;
3322 selectedDevice.oclDriverVersion = NULL;
3324 deviceIsSelected =
true;
3329 return selectedDevice;
3333 bool OpenclDevice::selectedDeviceIsOpenCL() {
3334 ds_device device = getDeviceSelection();
3335 return (device.type == DS_DEVICE_OPENCL_DEVICE);
3338 bool OpenclDevice::selectedDeviceIsNativeCPU() {
3339 ds_device device = getDeviceSelection();
3340 return (device.type == DS_DEVICE_NATIVE_CPU);
3356 #define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))
3358 Pix * OpenclDevice::pixConvertRGBToGrayOCL(
3367 if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL;
3369 if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) {
3376 float sum = rwt + gwt + bwt;
3383 pixGetDimensions(srcPix, &w, &h, NULL);
3385 unsigned int *srcData = pixGetData(srcPix);
3386 int srcWPL = pixGetWpl(srcPix);
3387 int srcSize = srcWPL * h *
sizeof(
unsigned int);
3390 if ((dstPix = pixCreate(w, h, 8)) == NULL)
3392 pixCopyResolution(dstPix, srcPix);
3393 unsigned int *dstData = pixGetData(dstPix);
3394 int dstWPL = pixGetWpl(dstPix);
3395 int dstWords = dstWPL * h;
3396 int dstSize = dstWords *
sizeof(
unsigned int);
3403 SetKernelEnv( &kEnv );
3406 cl_mem srcBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, srcSize, (
void *)srcData, &clStatus );
3407 CHECK_OPENCL( clStatus, "clCreateBuffer srcBuffer");
3410 cl_mem dstBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, dstSize, (
void *)dstData, &clStatus );
3411 CHECK_OPENCL( clStatus, "clCreateBuffer dstBuffer");
3414 int block_size = 256;
3415 int numWorkGroups = ((h*w+block_size-1) / block_size );
3416 int numThreads = block_size*numWorkGroups;
3417 size_t local_work_size[] = {
static_cast<size_t>(block_size)};
3418 size_t global_work_size[] = {
static_cast<size_t>(numThreads)};
3422 kEnv.mpkKernel = clCreateKernel( kEnv.mpkProgram,
"kernel_RGBToGray", &clStatus );
3423 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_RGBToGray");
3427 clStatus = clSetKernelArg( kEnv.mpkKernel, 0,
sizeof(cl_mem), (
void *)&srcBuffer );
3428 CHECK_OPENCL( clStatus,
"clSetKernelArg srcBuffer");
3429 clStatus = clSetKernelArg( kEnv.mpkKernel, 1,
sizeof(cl_mem), (
void *)&dstBuffer );
3430 CHECK_OPENCL( clStatus,
"clSetKernelArg dstBuffer");
3431 clStatus = clSetKernelArg( kEnv.mpkKernel, 2,
sizeof(
int), (
void *)&srcWPL );
3432 CHECK_OPENCL( clStatus,
"clSetKernelArg srcWPL" );
3433 clStatus = clSetKernelArg( kEnv.mpkKernel, 3,
sizeof(
int), (
void *)&dstWPL );
3434 CHECK_OPENCL( clStatus,
"clSetKernelArg dstWPL" );
3435 clStatus = clSetKernelArg( kEnv.mpkKernel, 4,
sizeof(
int), (
void *)&h );
3436 CHECK_OPENCL( clStatus,
"clSetKernelArg height" );
3437 clStatus = clSetKernelArg( kEnv.mpkKernel, 5,
sizeof(
int), (
void *)&w );
3438 CHECK_OPENCL( clStatus,
"clSetKernelArg width" );
3439 clStatus = clSetKernelArg( kEnv.mpkKernel, 6,
sizeof(
float), (
void *)&rwt );
3440 CHECK_OPENCL( clStatus,
"clSetKernelArg rwt" );
3441 clStatus = clSetKernelArg( kEnv.mpkKernel, 7,
sizeof(
float), (
void *)&gwt );
3442 CHECK_OPENCL( clStatus,
"clSetKernelArg gwt");
3443 clStatus = clSetKernelArg( kEnv.mpkKernel, 8,
sizeof(
float), (
void *)&bwt );
3444 CHECK_OPENCL( clStatus,
"clSetKernelArg bwt");
3448 clStatus = clEnqueueNDRangeKernel(
3451 1, NULL, global_work_size, local_work_size,
3453 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray" );
3454 clFinish( kEnv.mpkCmdQueue );
3458 void *ptr = clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0, dstSize, 0, NULL, NULL, &clStatus);
3459 CHECK_OPENCL( clStatus, "clEnqueueMapBuffer dstBuffer");
3460 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, NULL);
3464 Pix *cpuPix = pixCreate(w, h, 8);
3465 pixCopyResolution(cpuPix, srcPix);
3466 unsigned int *cpuData = pixGetData(cpuPix);
3467 int cpuWPL = pixGetWpl(cpuPix);
3468 unsigned int *cpuLine, *srcLine;
3470 for (i = 0, srcLine = srcData, cpuLine = cpuData; i < h; i++) {
3471 for (j = 0; j < w; j++) {
3472 unsigned int word = *(srcLine + j);
3473 int val = (l_int32)(rwt * ((word >> L_RED_SHIFT) & 0xff) +
3474 gwt * ((word >> L_GREEN_SHIFT) & 0xff) +
3475 bwt * ((word >> L_BLUE_SHIFT) & 0xff) + 0.5);
3476 SET_DATA_BYTE(cpuLine, j, val);
3483 printf(
"converted 32-bit -> 8-bit image\n");
3484 for (
int row = 0; row < h; row++) {
3485 for (
int col = 0; col < w; col++) {
3486 int idx = row*w + col;
3487 unsigned int srcVal = srcData[idx];
3488 unsigned char cpuVal = ((
unsigned char *)cpuData)[idx];
3489 unsigned char oclVal = ((
unsigned char *)dstData)[idx];
3491 printf(
"%4i,%4i: %u, %u, %u\n", row, col, srcVal, cpuVal, oclVal);
3498 clReleaseMemObject(srcBuffer);
3499 clReleaseMemObject(dstBuffer);
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
const int kThinLineFraction
Denominator of resolution makes max pixel width to allow thin lines.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
#define PERF_COUNT_SUB(SUB)
const int kMinLineLengthFraction
Denominator of resolution makes min pixels to demand line lengths to be.
#define PERF_COUNT_START(FUNCT_NAME)