/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Jia Haipeng, jiahaipeng95@gmail.com // Xiaopeng Fu, xiaopeng@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; namespace cv { namespace ocl { namespace stereoBM { ///////////////////////////////////////////////////////////////////////// //////////////////////////prefilter_xsbel//////////////////////////////// //////////////////////////////////////////////////////////////////////// static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap) { string kernelName = "prefilter_xsobel"; size_t blockSize = 1; size_t globalThreads[3] = { (size_t)input.cols, (size_t)input.rows, 1 }; size_t localThreads[3] = { blockSize, blockSize, 1 }; std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&input.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&output.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&prefilterCap)); openCLExecuteKernel(Context::getContext(), &stereobm, kernelName, globalThreads, localThreads, args, -1, -1); } ////////////////////////////////////////////////////////////////////////// //////////////////////////////common//////////////////////////////////// //////////////////////////////////////////////////////////////////////// #define N_DISPARITIES 8 #define ROWSperTHREAD 21 #define BLOCK_W 128 //////////////////////////////////////////////////////////////////////////// ///////////////////////////////stereoBM_GPU//////////////////////////////// //////////////////////////////////////////////////////////////////////////// static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp, int maxdisp, int winSize, oclMat &minSSD_buf) { int winsz2 = winSize >> 1; string kernelName = "stereoKernel"; disp.setTo(Scalar_<unsigned char>::all(0)); minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF)); size_t minssd_step = minSSD_buf.step / minSSD_buf.elemSize(); size_t local_mem_size = (N_DISPARITIES * (BLOCK_W + 2 * winsz2)) * sizeof(cl_uint); //size_t blockSize = 1; size_t localThreads[] = { BLOCK_W, 1, 1 }; size_t globalThreads[] = { (size_t)left.cols - maxdisp - 2 * winsz2, divUp(left.rows - 2 * winsz2, ROWSperTHREAD), 1 }; std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&right.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&minSSD_buf.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&minssd_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disp.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disp.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&maxdisp)); args.push_back(std::make_pair(local_mem_size, (void *)NULL)); char opt [128]; sprintf(opt, "-D radius=%d", winsz2); openCLExecuteKernel(Context::getContext(), &stereobm, kernelName, globalThreads, localThreads, args, -1, -1, opt); } //////////////////////////////////////////////////////////////////////////// ///////////////////////////////postfilter_textureness/////////////////////// //////////////////////////////////////////////////////////////////////////// static void postfilter_textureness(oclMat &left, int winSize, float avergeTexThreshold, oclMat &disparity) { string kernelName = "textureness_kernel"; size_t blockSize = 1; size_t localThreads[] = { BLOCK_W, blockSize ,1}; size_t globalThreads[] = { (size_t)left.cols, divUp(left.rows, 2 * ROWSperTHREAD), 1 }; size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float); std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disparity.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.step)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&winSize)); args.push_back(std::make_pair(sizeof(cl_float), (void *)&avergeTexThreshold)); args.push_back(std::make_pair(local_mem_size, (void*)NULL)); openCLExecuteKernel(Context::getContext(), &stereobm, kernelName, globalThreads, localThreads, args, -1, -1); } ////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////operator///////////////////////////////// ///////////////////////////////////////////////////////////////////////////// static void operator_(oclMat &minSSD, oclMat &leBuf, oclMat &riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const oclMat &left, const oclMat &right, oclMat &disparity) { CV_DbgAssert(left.rows == right.rows && left.cols == right.cols); CV_DbgAssert(left.type() == CV_8UC1); CV_DbgAssert(right.type() == CV_8UC1); disparity.create(left.size(), CV_8UC1); minSSD.create(left.size(), CV_32SC1); oclMat le_for_bm = left; oclMat ri_for_bm = right; if (preset == cv::ocl::StereoBM_OCL::PREFILTER_XSOBEL) { leBuf.create( left.size(), left.type()); riBuf.create(right.size(), right.type()); prefilter_xsobel( left, leBuf, 31); prefilter_xsobel(right, riBuf, 31); le_for_bm = leBuf; ri_for_bm = riBuf; } stereo_bm(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD); if (avergeTexThreshold) { postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity); } } } } } const float defaultAvgTexThreshold = 3; cv::ocl::StereoBM_OCL::StereoBM_OCL() : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold) {} cv::ocl::StereoBM_OCL::StereoBM_OCL(int preset_, int ndisparities_, int winSize_) : preset(preset_), ndisp(ndisparities_), winSize(winSize_), avergeTexThreshold(defaultAvgTexThreshold) { const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8); CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp); CV_Assert(ndisp % 8 == 0); CV_Assert(winSize % 2 == 1); } bool cv::ocl::StereoBM_OCL::checkIfGpuCallReasonable() { return true; } void cv::ocl::StereoBM_OCL::operator() ( const oclMat &left, const oclMat &right, oclMat &disparity) { cv::ocl::stereoBM::operator_(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity); }