/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Sen Liu, swjtuls1987@126.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "opencl_kernels.hpp" #include "opencv2/video/tracking.hpp" using namespace cv; using namespace cv::ocl; #define MIN_SIZE 32 namespace cv { namespace ocl { namespace optflow_farneback { static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M) { string kernelName("updateMatrices"); #ifdef ANDROID size_t localThreads[3] = { 32, 4, 1 }; #else size_t localThreads[3] = { 32, 8, 1 }; #endif size_t globalThreads[3] = { (size_t)flowx.cols, (size_t)flowx.rows, 1 }; std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step)); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1); } static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) { string kernelName("boxFilter5"); int height = src.rows / 5; #ifdef ANDROID size_t localThreads[3] = { 128, 1, 1 }; #else size_t localThreads[3] = { 256, 1, 1 }; #endif size_t globalThreads[3] = { (size_t)src.cols, (size_t)height, 1 }; int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1); } static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy) { string kernelName("updateFlow"); int cols = divUp(flowx.cols, 4); #ifdef ANDROID size_t localThreads[3] = { 32, 4, 1 }; #else size_t localThreads[3] = { 32, 8, 1 }; #endif size_t globalThreads[3] = { (size_t)cols, (size_t)flowx.rows, 1 }; std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1); } } } } // namespace cv { namespace ocl { namespace optflow_farneback static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat) { if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) return mat(Rect(0, 0, cols, rows)); return mat = oclMat(rows, cols, type); } void cv::ocl::FarnebackOpticalFlow::setGaussianBlurKernel(const float *c_gKer, int ksizeHalf) { cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer)); gKerMat.upload(t_gKer); } void cv::ocl::FarnebackOpticalFlow::gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst) { string kernelName("gaussianBlur"); #ifdef ANDROID size_t localThreads[3] = { 128, 1, 1 }; #else size_t localThreads[3] = { 256, 1, 1 }; #endif size_t globalThreads[3] = { (size_t)src.cols, (size_t)src.rows, 1 }; int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float); CV_Assert(dst.size() == src.size()); std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKerMat.data)); args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1); } void cv::ocl::FarnebackOpticalFlow::polynomialExpansionOcl(const oclMat &src, int lpolyN, oclMat &dst) { string kernelName("polynomialExpansion"); #ifdef ANDROID size_t localThreads[3] = { 128, 1, 1 }; #else size_t localThreads[3] = { 256, 1, 1 }; #endif size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*lpolyN) * localThreads[0], (size_t)src.rows, 1 }; int smem_size = 3 * localThreads[0] * sizeof(float); std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gMat.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xgMat.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxgMat.data)); args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); char opt [128]; sprintf(opt, "-D polyN=%d", lpolyN); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1, opt); } void cv::ocl::FarnebackOpticalFlow::gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) { string kernelName("gaussianBlur5"); int height = src.rows / 5; #ifdef ANDROID size_t localThreads[3] = { 128, 1, 1 }; #else size_t localThreads[3] = { 256, 1, 1 }; #endif size_t globalThreads[3] = { (size_t)src.cols, (size_t)height, 1 }; int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKerMat.data)); args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1); } cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow() { numLevels = 5; pyrScale = 0.5; fastPyramids = false; winSize = 13; numIters = 10; polyN = 5; polySigma = 1.1; flags = 0; } void cv::ocl::FarnebackOpticalFlow::releaseMemory() { frames_[0].release(); frames_[1].release(); pyrLevel_[0].release(); pyrLevel_[1].release(); M_.release(); bufM_.release(); R_[0].release(); R_[1].release(); blurredFrame_[0].release(); blurredFrame_[1].release(); pyramid0_.clear(); pyramid1_.clear(); } void cv::ocl::FarnebackOpticalFlow::prepareGaussian( int n, double sigma, float *g, float *xg, float *xxg, double &ig11, double &ig03, double &ig33, double &ig55) { double s = 0.; for (int x = -n; x <= n; x++) { g[x] = (float)std::exp(-x*x/(2*sigma*sigma)); s += g[x]; } s = 1./s; for (int x = -n; x <= n; x++) { g[x] = (float)(g[x]*s); xg[x] = (float)(x*g[x]); xxg[x] = (float)(x*x*g[x]); } Mat_<double> G(6, 6); G.setTo(0); for (int y = -n; y <= n; y++) { for (int x = -n; x <= n; x++) { G(0,0) += g[y]*g[x]; G(1,1) += g[y]*g[x]*x*x; G(3,3) += g[y]*g[x]*x*x*x*x; G(5,5) += g[y]*g[x]*x*x*y*y; } } //G[0][0] = 1.; G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1); G(4,4) = G(3,3); G(3,4) = G(4,3) = G(5,5); // invG: // [ x e e ] // [ y ] // [ y ] // [ e z ] // [ e z ] // [ u ] Mat_<double> invG = G.inv(DECOMP_CHOLESKY); ig11 = invG(1,1); ig03 = invG(0,3); ig33 = invG(3,3); ig55 = invG(5,5); } void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma) { vector<float> buf(n*6 + 3); float* g = &buf[0] + n; float* xg = g + n*2 + 1; float* xxg = xg + n*2 + 1; if (sigma < FLT_EPSILON) sigma = n*0.3; double ig11, ig03, ig33, ig55; prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55); cv::Mat t_g(1, n + 1, CV_32FC1, g); cv::Mat t_xg(1, n + 1, CV_32FC1, xg); cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg); gMat.upload(t_g); xgMat.upload(t_xg); xxgMat.upload(t_xxg); ig[0] = static_cast<float>(ig11); ig[1] = static_cast<float>(ig03); ig[2] = static_cast<float>(ig33); ig[3] = static_cast<float>(ig55); } void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter( const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy, oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) { optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM); swap(M, bufM); optflow_farneback::updateFlowOcl(M, flowx, flowy); if (updateMatrices) optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M); } void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur( const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy, oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) { gaussianBlur5Ocl(M, blockSize/2, bufM); swap(M, bufM); optflow_farneback::updateFlowOcl(M, flowx, flowy); if (updateMatrices) optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M); } void cv::ocl::FarnebackOpticalFlow::operator ()( const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy) { CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); CV_Assert(frame0.size() == frame1.size()); CV_Assert(polyN == 5 || polyN == 7); CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6); Size size = frame0.size(); oclMat prevFlowX, prevFlowY, curFlowX, curFlowY; flowx.create(size, CV_32F); flowy.create(size, CV_32F); oclMat flowx0 = flowx; oclMat flowy0 = flowy; // Crop unnecessary levels double scale = 1; int numLevelsCropped = 0; for (; numLevelsCropped < numLevels; numLevelsCropped++) { scale *= pyrScale; if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE) break; } frame0.convertTo(frames_[0], CV_32F); frame1.convertTo(frames_[1], CV_32F); if (fastPyramids) { // Build Gaussian pyramids using pyrDown() pyramid0_.resize(numLevelsCropped + 1); pyramid1_.resize(numLevelsCropped + 1); pyramid0_[0] = frames_[0]; pyramid1_[0] = frames_[1]; for (int i = 1; i <= numLevelsCropped; ++i) { pyrDown(pyramid0_[i - 1], pyramid0_[i]); pyrDown(pyramid1_[i - 1], pyramid1_[i]); } } setPolynomialExpansionConsts(polyN, polySigma); for (int k = numLevelsCropped; k >= 0; k--) { scale = 1; for (int i = 0; i < k; i++) scale *= pyrScale; double sigma = (1./scale - 1) * 0.5; int smoothSize = cvRound(sigma*5) | 1; smoothSize = std::max(smoothSize, 3); int width = cvRound(size.width*scale); int height = cvRound(size.height*scale); if (fastPyramids) { width = pyramid0_[k].cols; height = pyramid0_[k].rows; } if (k > 0) { curFlowX.create(height, width, CV_32F); curFlowY.create(height, width, CV_32F); } else { curFlowX = flowx0; curFlowY = flowy0; } if (!prevFlowX.data) { if (flags & cv::OPTFLOW_USE_INITIAL_FLOW) { resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR); resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR); multiply(scale, curFlowX, curFlowX); multiply(scale, curFlowY, curFlowY); } else { curFlowX.setTo(0); curFlowY.setTo(0); } } else { resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR); resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR); multiply(1./pyrScale, curFlowX, curFlowX); multiply(1./pyrScale, curFlowY, curFlowY); } oclMat M = allocMatFromBuf(5*height, width, CV_32F, M_); oclMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_); oclMat R[2] = { allocMatFromBuf(5*height, width, CV_32F, R_[0]), allocMatFromBuf(5*height, width, CV_32F, R_[1]) }; if (fastPyramids) { polynomialExpansionOcl(pyramid0_[k], polyN, R[0]); polynomialExpansionOcl(pyramid1_[k], polyN, R[1]); } else { oclMat blurredFrame[2] = { allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]), allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1]) }; oclMat pyrLevel[2] = { allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]), allocMatFromBuf(height, width, CV_32F, pyrLevel_[1]) }; Mat g = getGaussianKernel(smoothSize, sigma, CV_32F); setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2); for (int i = 0; i < 2; i++) { gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]); resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR); polynomialExpansionOcl(pyrLevel[i], polyN, R[i]); } } optflow_farneback::updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M); if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) { Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F); setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2); } for (int i = 0; i < numIters; i++) { if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1); else updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1); } prevFlowX = curFlowX; prevFlowY = curFlowY; } flowx = curFlowX; flowy = curFlowY; }