/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" using namespace cv; using namespace cv::ocl; #if !defined HAVE_CLAMDFFT void cv::ocl::dft(const oclMat&, oclMat&, Size, int) { CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented"); } namespace cv { namespace ocl { void fft_teardown(); }} void cv::ocl::fft_teardown(){} #else #include "opencv2/ocl/cl_runtime/clamdfft_runtime.hpp" namespace cv { namespace ocl { void fft_setup(); void fft_teardown(); enum FftType { C2R = 1, // complex to complex R2C = 2, // real to opencl HERMITIAN_INTERLEAVED C2C = 3 // opencl HERMITIAN_INTERLEAVED to real }; struct FftPlan { protected: clAmdFftPlanHandle plHandle; FftPlan& operator=(const FftPlan&); public: FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); ~FftPlan(); inline clAmdFftPlanHandle getPlanHandle() { return plHandle; } const Size dft_size; const int src_step, dst_step; const int flags; const FftType type; }; class PlanCache { protected: PlanCache(); ~PlanCache(); static PlanCache* planCache; bool started; vector<FftPlan *> planStore; clAmdFftSetupData *setupData; public: friend void fft_setup(); friend void fft_teardown(); static PlanCache* getPlanCache() { if (NULL == planCache) planCache = new PlanCache(); return planCache; } // return a baked plan-> // if there is one matched plan, return it // if not, bake a new one, put it into the planStore and return it. static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); // remove a single plan from the store // return true if the plan is successfully removed // else static bool removePlan(clAmdFftPlanHandle ); }; } } PlanCache* PlanCache::planCache = NULL; void cv::ocl::fft_setup() { PlanCache& pCache = *PlanCache::getPlanCache(); if(pCache.started) { return; } if (pCache.setupData == NULL) pCache.setupData = new clAmdFftSetupData; openCLSafeCall(clAmdFftInitSetupData( pCache.setupData )); pCache.started = true; } void cv::ocl::fft_teardown() { PlanCache& pCache = *PlanCache::getPlanCache(); if(!pCache.started) return; for(size_t i = 0; i < pCache.planStore.size(); i ++) delete pCache.planStore[i]; pCache.planStore.clear(); try { openCLSafeCall( clAmdFftTeardown( ) ); } catch (const std::bad_alloc &) { } delete pCache.setupData; pCache.setupData = NULL; pCache.started = false; } // bake a new plan cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) : plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type) { fft_setup(); bool is_1d_input = (_dft_size.height == 1); int is_row_dft = flags & DFT_ROWS; int is_scaled_dft = flags & DFT_SCALE; int is_inverse = flags & DFT_INVERSE; //clAmdFftResultLocation place; clAmdFftLayout inLayout; clAmdFftLayout outLayout; clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; size_t batchSize = is_row_dft ? dft_size.height : 1; size_t clLengthsIn[ 3 ] = {1, 1, 1}; size_t clStridesIn[ 3 ] = {1, 1, 1}; //size_t clLengthsOut[ 3 ] = {1, 1, 1}; size_t clStridesOut[ 3 ] = {1, 1, 1}; clLengthsIn[0] = dft_size.width; clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; clStridesIn[0] = 1; clStridesOut[0] = 1; switch(_type) { case C2C: inLayout = CLFFT_COMPLEX_INTERLEAVED; outLayout = CLFFT_COMPLEX_INTERLEAVED; clStridesIn[1] = src_step / sizeof(std::complex<float>); clStridesOut[1] = clStridesIn[1]; break; case R2C: inLayout = CLFFT_REAL; outLayout = CLFFT_HERMITIAN_INTERLEAVED; clStridesIn[1] = src_step / sizeof(float); clStridesOut[1] = dst_step / sizeof(std::complex<float>); break; case C2R: inLayout = CLFFT_HERMITIAN_INTERLEAVED; outLayout = CLFFT_REAL; clStridesIn[1] = src_step / sizeof(std::complex<float>); clStridesOut[1] = dst_step / sizeof(float); break; default: //std::runtime_error("does not support this conversion!"); cout << "Does not support this conversion!" << endl; throw exception(); break; } clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1]; clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1]; openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) ); openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) ); openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) ); openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesOut[ dim ]) ); float scale_ = is_scaled_dft ? 1.f / _dft_size.area() : 1.f; openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) ); //ready to bake openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) ); } cv::ocl::FftPlan::~FftPlan() { openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) ); } cv::ocl::PlanCache::PlanCache() : started(false), planStore(vector<cv::ocl::FftPlan *>()), setupData(NULL) { } cv::ocl::PlanCache::~PlanCache() { fft_teardown(); } FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) { PlanCache& pCache = *PlanCache::getPlanCache(); vector<FftPlan *>& pStore = pCache.planStore; // go through search for(size_t i = 0; i < pStore.size(); i ++) { FftPlan *plan = pStore[i]; if( plan->dft_size.width == _dft_size.width && plan->dft_size.height == _dft_size.height && plan->flags == _flags && plan->src_step == _src_step && plan->dst_step == _dst_step && plan->type == _type ) { return plan; } } // no baked plan is found FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type); pStore.push_back(newPlan); return newPlan; } bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle) { PlanCache& pCache = *PlanCache::getPlanCache(); vector<FftPlan *>& pStore = pCache.planStore; for(size_t i = 0; i < pStore.size(); i ++) { if(pStore[i]->getPlanHandle() == plHandle) { pStore.erase(pStore.begin() + i); delete pStore[i]; return true; } } return false; } void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) { if(dft_size == Size(0, 0)) { dft_size = src.size(); } // check if the given dft size is of optimal dft size CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area())); // the two flags are not compatible CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) ); // similar assertions with cuda module CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); //bool is_1d_input = (src.rows == 1); //int is_row_dft = flags & DFT_ROWS; //int is_scaled_dft = flags & DFT_SCALE; int is_inverse = flags & DFT_INVERSE; bool is_complex_input = src.channels() == 2; bool is_complex_output = !(flags & DFT_REAL_OUTPUT); // We don't support real-to-real transform CV_Assert(is_complex_input || is_complex_output); FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1); switch(type) { case C2C: dst.create(src.rows, src.cols, CV_32FC2); break; case R2C: dst.create(src.rows, src.cols / 2 + 1, CV_32FC2); break; case C2R: CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); dst.create(src.rows, dft_size.width, CV_32FC1); break; default: //std::runtime_error("does not support this conversion!"); cout << "Does not support this conversion!" << endl; throw exception(); break; } clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle(); //get the buffersize size_t buffersize = 0; openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) ); //allocate the intermediate buffer // TODO, bind this with the current FftPlan cl_mem clMedBuffer = NULL; if (buffersize) { cl_int medstatus; clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus); openCLSafeCall( medstatus ); } cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr()); openCLSafeCall( clAmdFftEnqueueTransform( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, 1, &clq, 0, NULL, NULL, (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); openCLSafeCall( clFinish(clq) ); if(clMedBuffer) { openCLFree(clMedBuffer); } //fft_teardown(); } #endif