/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // Intel License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of Intel Corporation may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "opencl_kernels_imgproc.hpp" namespace cv { ////////////////// Helper functions ////////////////////// static const size_t OUT_OF_RANGE = (size_t)1 << (sizeof(size_t)*8 - 2); static void calcHistLookupTables_8u( const Mat& hist, const SparseMat& shist, int dims, const float** ranges, const double* uniranges, bool uniform, bool issparse, std::vector<size_t>& _tab ) { const int low = 0, high = 256; int i, j; _tab.resize((high-low)*dims); size_t* tab = &_tab[0]; if( uniform ) { for( i = 0; i < dims; i++ ) { double a = uniranges[i*2]; double b = uniranges[i*2+1]; int sz = !issparse ? hist.size[i] : shist.size(i); size_t step = !issparse ? hist.step[i] : 1; for( j = low; j < high; j++ ) { int idx = cvFloor(j*a + b); size_t written_idx; if( (unsigned)idx < (unsigned)sz ) written_idx = idx*step; else written_idx = OUT_OF_RANGE; tab[i*(high - low) + j - low] = written_idx; } } } else { for( i = 0; i < dims; i++ ) { int limit = std::min(cvCeil(ranges[i][0]), high); int idx = -1, sz = !issparse ? hist.size[i] : shist.size(i); size_t written_idx = OUT_OF_RANGE; size_t step = !issparse ? hist.step[i] : 1; for(j = low;;) { for( ; j < limit; j++ ) tab[i*(high - low) + j - low] = written_idx; if( (unsigned)(++idx) < (unsigned)sz ) { limit = std::min(cvCeil(ranges[i][idx+1]), high); written_idx = idx*step; } else { for( ; j < high; j++ ) tab[i*(high - low) + j - low] = OUT_OF_RANGE; break; } } } } } static void histPrepareImages( const Mat* images, int nimages, const int* channels, const Mat& mask, int dims, const int* histSize, const float** ranges, bool uniform, std::vector<uchar*>& ptrs, std::vector<int>& deltas, Size& imsize, std::vector<double>& uniranges ) { int i, j, c; CV_Assert( channels != 0 || nimages == dims ); imsize = images[0].size(); int depth = images[0].depth(), esz1 = (int)images[0].elemSize1(); bool isContinuous = true; ptrs.resize(dims + 1); deltas.resize((dims + 1)*2); for( i = 0; i < dims; i++ ) { if(!channels) { j = i; c = 0; CV_Assert( images[j].channels() == 1 ); } else { c = channels[i]; CV_Assert( c >= 0 ); for( j = 0; j < nimages; c -= images[j].channels(), j++ ) if( c < images[j].channels() ) break; CV_Assert( j < nimages ); } CV_Assert( images[j].size() == imsize && images[j].depth() == depth ); if( !images[j].isContinuous() ) isContinuous = false; ptrs[i] = images[j].data + c*esz1; deltas[i*2] = images[j].channels(); deltas[i*2+1] = (int)(images[j].step/esz1 - imsize.width*deltas[i*2]); } if( !mask.empty() ) { CV_Assert( mask.size() == imsize && mask.channels() == 1 ); isContinuous = isContinuous && mask.isContinuous(); ptrs[dims] = mask.data; deltas[dims*2] = 1; deltas[dims*2 + 1] = (int)(mask.step/mask.elemSize1()); } #ifndef HAVE_TBB if( isContinuous ) { imsize.width *= imsize.height; imsize.height = 1; } #endif if( !ranges ) { CV_Assert( depth == CV_8U ); uniranges.resize( dims*2 ); for( i = 0; i < dims; i++ ) { uniranges[i*2] = histSize[i]/256.; uniranges[i*2+1] = 0; } } else if( uniform ) { uniranges.resize( dims*2 ); for( i = 0; i < dims; i++ ) { CV_Assert( ranges[i] && ranges[i][0] < ranges[i][1] ); double low = ranges[i][0], high = ranges[i][1]; double t = histSize[i]/(high - low); uniranges[i*2] = t; uniranges[i*2+1] = -t*low; } } else { for( i = 0; i < dims; i++ ) { size_t n = histSize[i]; for(size_t k = 0; k < n; k++ ) CV_Assert( ranges[i][k] < ranges[i][k+1] ); } } } ////////////////////////////////// C A L C U L A T E H I S T O G R A M //////////////////////////////////// #ifdef HAVE_TBB enum {one = 1, two, three}; // array elements number template<typename T> class calcHist1D_Invoker { public: calcHist1D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Mat& hist, const double* _uniranges, int sz, int dims, Size& imageSize ) : mask_(_ptrs[dims]), mstep_(_deltas[dims*2 + 1]), imageWidth_(imageSize.width), histogramSize_(hist.size()), histogramType_(hist.type()), globalHistogram_((tbb::atomic<int>*)hist.data) { p_[0] = ((T**)&_ptrs[0])[0]; step_[0] = (&_deltas[0])[1]; d_[0] = (&_deltas[0])[0]; a_[0] = (&_uniranges[0])[0]; b_[0] = (&_uniranges[0])[1]; size_[0] = sz; } void operator()( const BlockedRange& range ) const { T* p0 = p_[0] + range.begin() * (step_[0] + imageWidth_*d_[0]); uchar* mask = mask_ + range.begin()*mstep_; for( int row = range.begin(); row < range.end(); row++, p0 += step_[0] ) { if( !mask_ ) { for( int x = 0; x < imageWidth_; x++, p0 += d_[0] ) { int idx = cvFloor(*p0*a_[0] + b_[0]); if( (unsigned)idx < (unsigned)size_[0] ) { globalHistogram_[idx].fetch_and_add(1); } } } else { for( int x = 0; x < imageWidth_; x++, p0 += d_[0] ) { if( mask[x] ) { int idx = cvFloor(*p0*a_[0] + b_[0]); if( (unsigned)idx < (unsigned)size_[0] ) { globalHistogram_[idx].fetch_and_add(1); } } } mask += mstep_; } } } private: calcHist1D_Invoker operator=(const calcHist1D_Invoker&); T* p_[one]; uchar* mask_; int step_[one]; int d_[one]; int mstep_; double a_[one]; double b_[one]; int size_[one]; int imageWidth_; Size histogramSize_; int histogramType_; tbb::atomic<int>* globalHistogram_; }; template<typename T> class calcHist2D_Invoker { public: calcHist2D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Mat& hist, const double* _uniranges, const int* size, int dims, Size& imageSize, size_t* hstep ) : mask_(_ptrs[dims]), mstep_(_deltas[dims*2 + 1]), imageWidth_(imageSize.width), histogramSize_(hist.size()), histogramType_(hist.type()), globalHistogram_(hist.data) { p_[0] = ((T**)&_ptrs[0])[0]; p_[1] = ((T**)&_ptrs[0])[1]; step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3]; d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2]; a_[0] = (&_uniranges[0])[0]; a_[1] = (&_uniranges[0])[2]; b_[0] = (&_uniranges[0])[1]; b_[1] = (&_uniranges[0])[3]; size_[0] = size[0]; size_[1] = size[1]; hstep_[0] = hstep[0]; } void operator()(const BlockedRange& range) const { T* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]); T* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]); uchar* mask = mask_ + range.begin()*mstep_; for( int row = range.begin(); row < range.end(); row++, p0 += step_[0], p1 += step_[1] ) { if( !mask_ ) { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] ) { int idx0 = cvFloor(*p0*a_[0] + b_[0]); int idx1 = cvFloor(*p1*a_[1] + b_[1]); if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] ) ( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0) )[idx1].fetch_and_add(1); } } else { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] ) { if( mask[x] ) { int idx0 = cvFloor(*p0*a_[0] + b_[0]); int idx1 = cvFloor(*p1*a_[1] + b_[1]); if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] ) ((tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0))[idx1].fetch_and_add(1); } } mask += mstep_; } } } private: calcHist2D_Invoker operator=(const calcHist2D_Invoker&); T* p_[two]; uchar* mask_; int step_[two]; int d_[two]; int mstep_; double a_[two]; double b_[two]; int size_[two]; const int imageWidth_; size_t hstep_[one]; Size histogramSize_; int histogramType_; uchar* globalHistogram_; }; template<typename T> class calcHist3D_Invoker { public: calcHist3D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, Mat& hist, const double* uniranges, int _dims, size_t* hstep, int* size ) : mask_(_ptrs[_dims]), mstep_(_deltas[_dims*2 + 1]), imageWidth_(imsize.width), globalHistogram_(hist.data) { p_[0] = ((T**)&_ptrs[0])[0]; p_[1] = ((T**)&_ptrs[0])[1]; p_[2] = ((T**)&_ptrs[0])[2]; step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3]; step_[2] = (&_deltas[0])[5]; d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2]; d_[2] = (&_deltas[0])[4]; a_[0] = uniranges[0]; a_[1] = uniranges[2]; a_[2] = uniranges[4]; b_[0] = uniranges[1]; b_[1] = uniranges[3]; b_[2] = uniranges[5]; size_[0] = size[0]; size_[1] = size[1]; size_[2] = size[2]; hstep_[0] = hstep[0]; hstep_[1] = hstep[1]; } void operator()( const BlockedRange& range ) const { T* p0 = p_[0] + range.begin()*(imageWidth_*d_[0] + step_[0]); T* p1 = p_[1] + range.begin()*(imageWidth_*d_[1] + step_[1]); T* p2 = p_[2] + range.begin()*(imageWidth_*d_[2] + step_[2]); uchar* mask = mask_ + range.begin()*mstep_; for( int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1], p2 += step_[2] ) { if( !mask_ ) { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] ) { int idx0 = cvFloor(*p0*a_[0] + b_[0]); int idx1 = cvFloor(*p1*a_[1] + b_[1]); int idx2 = cvFloor(*p2*a_[2] + b_[2]); if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] && (unsigned)idx2 < (unsigned)size_[2] ) { ( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0 + hstep_[1]*idx1) )[idx2].fetch_and_add(1); } } } else { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] ) { if( mask[x] ) { int idx0 = cvFloor(*p0*a_[0] + b_[0]); int idx1 = cvFloor(*p1*a_[1] + b_[1]); int idx2 = cvFloor(*p2*a_[2] + b_[2]); if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] && (unsigned)idx2 < (unsigned)size_[2] ) { ( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0 + hstep_[1]*idx1) )[idx2].fetch_and_add(1); } } } mask += mstep_; } } } static bool isFit( const Mat& histogram, const Size imageSize ) { return ( imageSize.width * imageSize.height >= 320*240 && histogram.total() >= 8*8*8 ); } private: calcHist3D_Invoker operator=(const calcHist3D_Invoker&); T* p_[three]; uchar* mask_; int step_[three]; int d_[three]; const int mstep_; double a_[three]; double b_[three]; int size_[three]; int imageWidth_; size_t hstep_[two]; uchar* globalHistogram_; }; class CalcHist1D_8uInvoker { public: CalcHist1D_8uInvoker( const std::vector<uchar*>& ptrs, const std::vector<int>& deltas, Size imsize, Mat& hist, int dims, const std::vector<size_t>& tab, tbb::mutex* lock ) : mask_(ptrs[dims]), mstep_(deltas[dims*2 + 1]), imageWidth_(imsize.width), imageSize_(imsize), histSize_(hist.size()), histType_(hist.type()), tab_((size_t*)&tab[0]), histogramWriteLock_(lock), globalHistogram_(hist.data) { p_[0] = (&ptrs[0])[0]; step_[0] = (&deltas[0])[1]; d_[0] = (&deltas[0])[0]; } void operator()( const BlockedRange& range ) const { int localHistogram[256] = { 0, }; uchar* mask = mask_; uchar* p0 = p_[0]; int x; tbb::mutex::scoped_lock lock; if( !mask_ ) { int n = (imageWidth_ - 4) / 4 + 1; int tail = imageWidth_ - n*4; int xN = 4*n; p0 += (xN*d_[0] + tail*d_[0] + step_[0]) * range.begin(); } else { p0 += (imageWidth_*d_[0] + step_[0]) * range.begin(); mask += mstep_*range.begin(); } for( int i = range.begin(); i < range.end(); i++, p0 += step_[0] ) { if( !mask_ ) { if( d_[0] == 1 ) { for( x = 0; x <= imageWidth_ - 4; x += 4 ) { int t0 = p0[x], t1 = p0[x+1]; localHistogram[t0]++; localHistogram[t1]++; t0 = p0[x+2]; t1 = p0[x+3]; localHistogram[t0]++; localHistogram[t1]++; } p0 += x; } else { for( x = 0; x <= imageWidth_ - 4; x += 4 ) { int t0 = p0[0], t1 = p0[d_[0]]; localHistogram[t0]++; localHistogram[t1]++; p0 += d_[0]*2; t0 = p0[0]; t1 = p0[d_[0]]; localHistogram[t0]++; localHistogram[t1]++; p0 += d_[0]*2; } } for( ; x < imageWidth_; x++, p0 += d_[0] ) { localHistogram[*p0]++; } } else { for( x = 0; x < imageWidth_; x++, p0 += d_[0] ) { if( mask[x] ) { localHistogram[*p0]++; } } mask += mstep_; } } lock.acquire(*histogramWriteLock_); for(int i = 0; i < 256; i++ ) { size_t hidx = tab_[i]; if( hidx < OUT_OF_RANGE ) { *(int*)((globalHistogram_ + hidx)) += localHistogram[i]; } } lock.release(); } static bool isFit( const Mat& histogram, const Size imageSize ) { return ( histogram.total() >= 8 && imageSize.width * imageSize.height >= 160*120 ); } private: uchar* p_[one]; uchar* mask_; int mstep_; int step_[one]; int d_[one]; int imageWidth_; Size imageSize_; Size histSize_; int histType_; size_t* tab_; tbb::mutex* histogramWriteLock_; uchar* globalHistogram_; }; class CalcHist2D_8uInvoker { public: CalcHist2D_8uInvoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, Mat& hist, int dims, const std::vector<size_t>& _tab, tbb::mutex* lock ) : mask_(_ptrs[dims]), mstep_(_deltas[dims*2 + 1]), imageWidth_(imsize.width), histSize_(hist.size()), histType_(hist.type()), tab_((size_t*)&_tab[0]), histogramWriteLock_(lock), globalHistogram_(hist.data) { p_[0] = (uchar*)(&_ptrs[0])[0]; p_[1] = (uchar*)(&_ptrs[0])[1]; step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3]; d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2]; } void operator()( const BlockedRange& range ) const { uchar* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]); uchar* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]); uchar* mask = mask_ + range.begin()*mstep_; Mat localHist = Mat::zeros(histSize_, histType_); uchar* localHistData = localHist.data; tbb::mutex::scoped_lock lock; for(int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1]) { if( !mask_ ) { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] ) { size_t idx = tab_[*p0] + tab_[*p1 + 256]; if( idx < OUT_OF_RANGE ) { ++*(int*)(localHistData + idx); } } } else { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] ) { size_t idx; if( mask[x] && (idx = tab_[*p0] + tab_[*p1 + 256]) < OUT_OF_RANGE ) { ++*(int*)(localHistData + idx); } } mask += mstep_; } } lock.acquire(*histogramWriteLock_); for(int i = 0; i < histSize_.width*histSize_.height; i++) { ((int*)globalHistogram_)[i] += ((int*)localHistData)[i]; } lock.release(); } static bool isFit( const Mat& histogram, const Size imageSize ) { return ( (histogram.total() > 4*4 && histogram.total() <= 116*116 && imageSize.width * imageSize.height >= 320*240) || (histogram.total() > 116*116 && imageSize.width * imageSize.height >= 1280*720) ); } private: uchar* p_[two]; uchar* mask_; int step_[two]; int d_[two]; int mstep_; int imageWidth_; Size histSize_; int histType_; size_t* tab_; tbb::mutex* histogramWriteLock_; uchar* globalHistogram_; }; class CalcHist3D_8uInvoker { public: CalcHist3D_8uInvoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, Mat& hist, int dims, const std::vector<size_t>& tab ) : mask_(_ptrs[dims]), mstep_(_deltas[dims*2 + 1]), histogramSize_(hist.size.p), histogramType_(hist.type()), imageWidth_(imsize.width), tab_((size_t*)&tab[0]), globalHistogram_(hist.data) { p_[0] = (uchar*)(&_ptrs[0])[0]; p_[1] = (uchar*)(&_ptrs[0])[1]; p_[2] = (uchar*)(&_ptrs[0])[2]; step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3]; step_[2] = (&_deltas[0])[5]; d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2]; d_[2] = (&_deltas[0])[4]; } void operator()( const BlockedRange& range ) const { uchar* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]); uchar* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]); uchar* p2 = p_[2] + range.begin()*(step_[2] + imageWidth_*d_[2]); uchar* mask = mask_ + range.begin()*mstep_; for(int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1], p2 += step_[2] ) { if( !mask_ ) { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] ) { size_t idx = tab_[*p0] + tab_[*p1 + 256] + tab_[*p2 + 512]; if( idx < OUT_OF_RANGE ) { ( *(tbb::atomic<int>*)(globalHistogram_ + idx) ).fetch_and_add(1); } } } else { for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] ) { size_t idx; if( mask[x] && (idx = tab_[*p0] + tab_[*p1 + 256] + tab_[*p2 + 512]) < OUT_OF_RANGE ) { (*(tbb::atomic<int>*)(globalHistogram_ + idx)).fetch_and_add(1); } } mask += mstep_; } } } static bool isFit( const Mat& histogram, const Size imageSize ) { return ( histogram.total() >= 128*128*128 && imageSize.width * imageSize.width >= 320*240 ); } private: uchar* p_[three]; uchar* mask_; int mstep_; int step_[three]; int d_[three]; int* histogramSize_; int histogramType_; int imageWidth_; size_t* tab_; uchar* globalHistogram_; }; static void callCalcHist2D_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, Mat& hist, int dims, std::vector<size_t>& _tab ) { int grainSize = imsize.height / tbb::task_scheduler_init::default_num_threads(); tbb::mutex histogramWriteLock; CalcHist2D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab, &histogramWriteLock); parallel_for(BlockedRange(0, imsize.height, grainSize), body); } static void callCalcHist3D_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, Mat& hist, int dims, std::vector<size_t>& _tab ) { CalcHist3D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab); parallel_for(BlockedRange(0, imsize.height), body); } #endif template<typename T> static void calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, Mat& hist, int dims, const float** _ranges, const double* _uniranges, bool uniform ) { T** ptrs = (T**)&_ptrs[0]; const int* deltas = &_deltas[0]; uchar* H = hist.ptr(); int i, x; const uchar* mask = _ptrs[dims]; int mstep = _deltas[dims*2 + 1]; int size[CV_MAX_DIM]; size_t hstep[CV_MAX_DIM]; for( i = 0; i < dims; i++ ) { size[i] = hist.size[i]; hstep[i] = hist.step[i]; } if( uniform ) { const double* uniranges = &_uniranges[0]; if( dims == 1 ) { #ifdef HAVE_TBB calcHist1D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size[0], dims, imsize); parallel_for(BlockedRange(0, imsize.height), body); #else double a = uniranges[0], b = uniranges[1]; int sz = size[0], d0 = deltas[0], step0 = deltas[1]; const T* p0 = (const T*)ptrs[0]; for( ; imsize.height--; p0 += step0, mask += mstep ) { if( !mask ) for( x = 0; x < imsize.width; x++, p0 += d0 ) { int idx = cvFloor(*p0*a + b); if( (unsigned)idx < (unsigned)sz ) ((int*)H)[idx]++; } else for( x = 0; x < imsize.width; x++, p0 += d0 ) if( mask[x] ) { int idx = cvFloor(*p0*a + b); if( (unsigned)idx < (unsigned)sz ) ((int*)H)[idx]++; } } #endif //HAVE_TBB return; } else if( dims == 2 ) { #ifdef HAVE_TBB calcHist2D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size, dims, imsize, hstep); parallel_for(BlockedRange(0, imsize.height), body); #else double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3]; int sz0 = size[0], sz1 = size[1]; int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3]; size_t hstep0 = hstep[0]; const T* p0 = (const T*)ptrs[0]; const T* p1 = (const T*)ptrs[1]; for( ; imsize.height--; p0 += step0, p1 += step1, mask += mstep ) { if( !mask ) for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 ) { int idx0 = cvFloor(*p0*a0 + b0); int idx1 = cvFloor(*p1*a1 + b1); if( (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 ) ((int*)(H + hstep0*idx0))[idx1]++; } else for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 ) if( mask[x] ) { int idx0 = cvFloor(*p0*a0 + b0); int idx1 = cvFloor(*p1*a1 + b1); if( (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 ) ((int*)(H + hstep0*idx0))[idx1]++; } } #endif //HAVE_TBB return; } else if( dims == 3 ) { #ifdef HAVE_TBB if( calcHist3D_Invoker<T>::isFit(hist, imsize) ) { calcHist3D_Invoker<T> body(_ptrs, _deltas, imsize, hist, uniranges, dims, hstep, size); parallel_for(BlockedRange(0, imsize.height), body); return; } #endif double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3], a2 = uniranges[4], b2 = uniranges[5]; int sz0 = size[0], sz1 = size[1], sz2 = size[2]; int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3], d2 = deltas[4], step2 = deltas[5]; size_t hstep0 = hstep[0], hstep1 = hstep[1]; const T* p0 = (const T*)ptrs[0]; const T* p1 = (const T*)ptrs[1]; const T* p2 = (const T*)ptrs[2]; for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, mask += mstep ) { if( !mask ) for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 ) { int idx0 = cvFloor(*p0*a0 + b0); int idx1 = cvFloor(*p1*a1 + b1); int idx2 = cvFloor(*p2*a2 + b2); if( (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 && (unsigned)idx2 < (unsigned)sz2 ) ((int*)(H + hstep0*idx0 + hstep1*idx1))[idx2]++; } else for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 ) if( mask[x] ) { int idx0 = cvFloor(*p0*a0 + b0); int idx1 = cvFloor(*p1*a1 + b1); int idx2 = cvFloor(*p2*a2 + b2); if( (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 && (unsigned)idx2 < (unsigned)sz2 ) ((int*)(H + hstep0*idx0 + hstep1*idx1))[idx2]++; } } } else { for( ; imsize.height--; mask += mstep ) { if( !mask ) for( x = 0; x < imsize.width; x++ ) { uchar* Hptr = H; for( i = 0; i < dims; i++ ) { int idx = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]); if( (unsigned)idx >= (unsigned)size[i] ) break; ptrs[i] += deltas[i*2]; Hptr += idx*hstep[i]; } if( i == dims ) ++*((int*)Hptr); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } else for( x = 0; x < imsize.width; x++ ) { uchar* Hptr = H; i = 0; if( mask[x] ) for( ; i < dims; i++ ) { int idx = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]); if( (unsigned)idx >= (unsigned)size[i] ) break; ptrs[i] += deltas[i*2]; Hptr += idx*hstep[i]; } if( i == dims ) ++*((int*)Hptr); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } else { // non-uniform histogram const float* ranges[CV_MAX_DIM]; for( i = 0; i < dims; i++ ) ranges[i] = &_ranges[i][0]; for( ; imsize.height--; mask += mstep ) { for( x = 0; x < imsize.width; x++ ) { uchar* Hptr = H; i = 0; if( !mask || mask[x] ) for( ; i < dims; i++ ) { float v = (float)*ptrs[i]; const float* R = ranges[i]; int idx = -1, sz = size[i]; while( v >= R[idx+1] && ++idx < sz ) ; // nop if( (unsigned)idx >= (unsigned)sz ) break; ptrs[i] += deltas[i*2]; Hptr += idx*hstep[i]; } if( i == dims ) ++*((int*)Hptr); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } static void calcHist_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, Mat& hist, int dims, const float** _ranges, const double* _uniranges, bool uniform ) { uchar** ptrs = &_ptrs[0]; const int* deltas = &_deltas[0]; uchar* H = hist.ptr(); int x; const uchar* mask = _ptrs[dims]; int mstep = _deltas[dims*2 + 1]; std::vector<size_t> _tab; calcHistLookupTables_8u( hist, SparseMat(), dims, _ranges, _uniranges, uniform, false, _tab ); const size_t* tab = &_tab[0]; if( dims == 1 ) { #ifdef HAVE_TBB if( CalcHist1D_8uInvoker::isFit(hist, imsize) ) { int treadsNumber = tbb::task_scheduler_init::default_num_threads(); int grainSize = imsize.height/treadsNumber; tbb::mutex histogramWriteLock; CalcHist1D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab, &histogramWriteLock); parallel_for(BlockedRange(0, imsize.height, grainSize), body); return; } #endif int d0 = deltas[0], step0 = deltas[1]; int matH[256] = { 0, }; const uchar* p0 = (const uchar*)ptrs[0]; for( ; imsize.height--; p0 += step0, mask += mstep ) { if( !mask ) { if( d0 == 1 ) { for( x = 0; x <= imsize.width - 4; x += 4 ) { int t0 = p0[x], t1 = p0[x+1]; matH[t0]++; matH[t1]++; t0 = p0[x+2]; t1 = p0[x+3]; matH[t0]++; matH[t1]++; } p0 += x; } else for( x = 0; x <= imsize.width - 4; x += 4 ) { int t0 = p0[0], t1 = p0[d0]; matH[t0]++; matH[t1]++; p0 += d0*2; t0 = p0[0]; t1 = p0[d0]; matH[t0]++; matH[t1]++; p0 += d0*2; } for( ; x < imsize.width; x++, p0 += d0 ) matH[*p0]++; } else for( x = 0; x < imsize.width; x++, p0 += d0 ) if( mask[x] ) matH[*p0]++; } for(int i = 0; i < 256; i++ ) { size_t hidx = tab[i]; if( hidx < OUT_OF_RANGE ) *(int*)(H + hidx) += matH[i]; } } else if( dims == 2 ) { #ifdef HAVE_TBB if( CalcHist2D_8uInvoker::isFit(hist, imsize) ) { callCalcHist2D_8u(_ptrs, _deltas, imsize, hist, dims, _tab); return; } #endif int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3]; const uchar* p0 = (const uchar*)ptrs[0]; const uchar* p1 = (const uchar*)ptrs[1]; for( ; imsize.height--; p0 += step0, p1 += step1, mask += mstep ) { if( !mask ) for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 ) { size_t idx = tab[*p0] + tab[*p1 + 256]; if( idx < OUT_OF_RANGE ) ++*(int*)(H + idx); } else for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 ) { size_t idx; if( mask[x] && (idx = tab[*p0] + tab[*p1 + 256]) < OUT_OF_RANGE ) ++*(int*)(H + idx); } } } else if( dims == 3 ) { #ifdef HAVE_TBB if( CalcHist3D_8uInvoker::isFit(hist, imsize) ) { callCalcHist3D_8u(_ptrs, _deltas, imsize, hist, dims, _tab); return; } #endif int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3], d2 = deltas[4], step2 = deltas[5]; const uchar* p0 = (const uchar*)ptrs[0]; const uchar* p1 = (const uchar*)ptrs[1]; const uchar* p2 = (const uchar*)ptrs[2]; for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, mask += mstep ) { if( !mask ) for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 ) { size_t idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512]; if( idx < OUT_OF_RANGE ) ++*(int*)(H + idx); } else for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 ) { size_t idx; if( mask[x] && (idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512]) < OUT_OF_RANGE ) ++*(int*)(H + idx); } } } else { for( ; imsize.height--; mask += mstep ) { if( !mask ) for( x = 0; x < imsize.width; x++ ) { uchar* Hptr = H; int i = 0; for( ; i < dims; i++ ) { size_t idx = tab[*ptrs[i] + i*256]; if( idx >= OUT_OF_RANGE ) break; Hptr += idx; ptrs[i] += deltas[i*2]; } if( i == dims ) ++*((int*)Hptr); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } else for( x = 0; x < imsize.width; x++ ) { uchar* Hptr = H; int i = 0; if( mask[x] ) for( ; i < dims; i++ ) { size_t idx = tab[*ptrs[i] + i*256]; if( idx >= OUT_OF_RANGE ) break; Hptr += idx; ptrs[i] += deltas[i*2]; } if( i == dims ) ++*((int*)Hptr); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } for(int i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } #ifdef HAVE_IPP class IPPCalcHistInvoker : public ParallelLoopBody { public: IPPCalcHistInvoker(const Mat & _src, Mat & _hist, AutoBuffer<Ipp32f> & _levels, Ipp32s _histSize, Ipp32f _low, Ipp32f _high, bool * _ok) : ParallelLoopBody(), src(&_src), hist(&_hist), levels(&_levels), histSize(_histSize), low(_low), high(_high), ok(_ok) { *ok = true; } virtual void operator() (const Range & range) const { Mat phist(hist->size(), hist->type(), Scalar::all(0)); #if IPP_VERSION_X100 >= 900 IppiSize roi = {src->cols, range.end - range.start}; int bufferSize = 0; int specSize = 0; IppiHistogramSpec *pSpec = NULL; Ipp8u *pBuffer = NULL; if(ippiHistogramGetBufferSize(ipp8u, roi, &histSize, 1, 1, &specSize, &bufferSize) < 0) { *ok = false; return; } pBuffer = (Ipp8u*)ippMalloc(bufferSize); if(!pBuffer && bufferSize) { *ok = false; return; } pSpec = (IppiHistogramSpec*)ippMalloc(specSize); if(!pSpec && specSize) { if(pBuffer) ippFree(pBuffer); *ok = false; return; } if(ippiHistogramUniformInit(ipp8u, (Ipp32f*)&low, (Ipp32f*)&high, (Ipp32s*)&histSize, 1, pSpec) < 0) { if(pSpec) ippFree(pSpec); if(pBuffer) ippFree(pBuffer); *ok = false; return; } IppStatus status = ippiHistogram_8u_C1R(src->ptr(range.start), (int)src->step, ippiSize(src->cols, range.end - range.start), phist.ptr<Ipp32u>(), pSpec, pBuffer); if(pSpec) ippFree(pSpec); if(pBuffer) ippFree(pBuffer); #else CV_SUPPRESS_DEPRECATED_START IppStatus status = ippiHistogramEven_8u_C1R(src->ptr(range.start), (int)src->step, ippiSize(src->cols, range.end - range.start), phist.ptr<Ipp32s>(), (Ipp32s*)(Ipp32f*)*levels, histSize, (Ipp32s)low, (Ipp32s)high); CV_SUPPRESS_DEPRECATED_END #endif if(status < 0) { *ok = false; return; } for (int i = 0; i < histSize; ++i) CV_XADD((int *)(hist->data + i * hist->step), *(int *)(phist.data + i * phist.step)); } private: const Mat * src; Mat * hist; AutoBuffer<Ipp32f> * levels; Ipp32s histSize; Ipp32f low, high; bool * ok; const IPPCalcHistInvoker & operator = (const IPPCalcHistInvoker & ); }; #endif } #if defined(HAVE_IPP) namespace cv { static bool ipp_calchist(const Mat* images, int nimages, const int* channels, InputArray _mask, OutputArray _hist, int dims, const int* histSize, const float** ranges, bool uniform, bool accumulate ) { Mat mask = _mask.getMat(); CV_Assert(dims > 0 && histSize); _hist.create(dims, histSize, CV_32F); Mat hist = _hist.getMat(), ihist = hist; ihist.flags = (ihist.flags & ~CV_MAT_TYPE_MASK)|CV_32S; { if (nimages == 1 && images[0].type() == CV_8UC1 && dims == 1 && channels && channels[0] == 0 && mask.empty() && images[0].dims <= 2 && !accumulate && uniform) { ihist.setTo(Scalar::all(0)); AutoBuffer<Ipp32f> levels(histSize[0] + 1); bool ok = true; const Mat & src = images[0]; int nstripes = std::min<int>(8, static_cast<int>(src.total() / (1 << 16))); #ifdef HAVE_CONCURRENCY nstripes = 1; #endif IPPCalcHistInvoker invoker(src, ihist, levels, histSize[0] + 1, ranges[0][0], ranges[0][1], &ok); Range range(0, src.rows); parallel_for_(range, invoker, nstripes); if (ok) { ihist.convertTo(hist, CV_32F); return true; } } } return false; } } #endif void cv::calcHist( const Mat* images, int nimages, const int* channels, InputArray _mask, OutputArray _hist, int dims, const int* histSize, const float** ranges, bool uniform, bool accumulate ) { CV_IPP_RUN(nimages == 1 && images[0].type() == CV_8UC1 && dims == 1 && channels && channels[0] == 0 && _mask.getMat().empty() && images[0].dims <= 2 && !accumulate && uniform, ipp_calchist(images, nimages, channels, _mask, _hist, dims, histSize, ranges, uniform, accumulate)); Mat mask = _mask.getMat(); CV_Assert(dims > 0 && histSize); const uchar* const histdata = _hist.getMat().ptr(); _hist.create(dims, histSize, CV_32F); Mat hist = _hist.getMat(), ihist = hist; ihist.flags = (ihist.flags & ~CV_MAT_TYPE_MASK)|CV_32S; if( !accumulate || histdata != hist.data ) hist = Scalar(0.); else hist.convertTo(ihist, CV_32S); std::vector<uchar*> ptrs; std::vector<int> deltas; std::vector<double> uniranges; Size imsize; CV_Assert( mask.empty() || mask.type() == CV_8UC1 ); histPrepareImages( images, nimages, channels, mask, dims, hist.size, ranges, uniform, ptrs, deltas, imsize, uniranges ); const double* _uniranges = uniform ? &uniranges[0] : 0; int depth = images[0].depth(); if( depth == CV_8U ) calcHist_8u(ptrs, deltas, imsize, ihist, dims, ranges, _uniranges, uniform ); else if( depth == CV_16U ) calcHist_<ushort>(ptrs, deltas, imsize, ihist, dims, ranges, _uniranges, uniform ); else if( depth == CV_32F ) calcHist_<float>(ptrs, deltas, imsize, ihist, dims, ranges, _uniranges, uniform ); else CV_Error(CV_StsUnsupportedFormat, ""); ihist.convertTo(hist, CV_32F); } namespace cv { template<typename T> static void calcSparseHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, SparseMat& hist, int dims, const float** _ranges, const double* _uniranges, bool uniform ) { T** ptrs = (T**)&_ptrs[0]; const int* deltas = &_deltas[0]; int i, x; const uchar* mask = _ptrs[dims]; int mstep = _deltas[dims*2 + 1]; const int* size = hist.hdr->size; int idx[CV_MAX_DIM]; if( uniform ) { const double* uniranges = &_uniranges[0]; for( ; imsize.height--; mask += mstep ) { for( x = 0; x < imsize.width; x++ ) { i = 0; if( !mask || mask[x] ) for( ; i < dims; i++ ) { idx[i] = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]); if( (unsigned)idx[i] >= (unsigned)size[i] ) break; ptrs[i] += deltas[i*2]; } if( i == dims ) ++*(int*)hist.ptr(idx, true); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } else { // non-uniform histogram const float* ranges[CV_MAX_DIM]; for( i = 0; i < dims; i++ ) ranges[i] = &_ranges[i][0]; for( ; imsize.height--; mask += mstep ) { for( x = 0; x < imsize.width; x++ ) { i = 0; if( !mask || mask[x] ) for( ; i < dims; i++ ) { float v = (float)*ptrs[i]; const float* R = ranges[i]; int j = -1, sz = size[i]; while( v >= R[j+1] && ++j < sz ) ; // nop if( (unsigned)j >= (unsigned)sz ) break; ptrs[i] += deltas[i*2]; idx[i] = j; } if( i == dims ) ++*(int*)hist.ptr(idx, true); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } static void calcSparseHist_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, SparseMat& hist, int dims, const float** _ranges, const double* _uniranges, bool uniform ) { uchar** ptrs = (uchar**)&_ptrs[0]; const int* deltas = &_deltas[0]; int x; const uchar* mask = _ptrs[dims]; int mstep = _deltas[dims*2 + 1]; int idx[CV_MAX_DIM]; std::vector<size_t> _tab; calcHistLookupTables_8u( Mat(), hist, dims, _ranges, _uniranges, uniform, true, _tab ); const size_t* tab = &_tab[0]; for( ; imsize.height--; mask += mstep ) { for( x = 0; x < imsize.width; x++ ) { int i = 0; if( !mask || mask[x] ) for( ; i < dims; i++ ) { size_t hidx = tab[*ptrs[i] + i*256]; if( hidx >= OUT_OF_RANGE ) break; ptrs[i] += deltas[i*2]; idx[i] = (int)hidx; } if( i == dims ) ++*(int*)hist.ptr(idx,true); else for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } for(int i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } static void calcHist( const Mat* images, int nimages, const int* channels, const Mat& mask, SparseMat& hist, int dims, const int* histSize, const float** ranges, bool uniform, bool accumulate, bool keepInt ) { size_t i, N; if( !accumulate ) hist.create(dims, histSize, CV_32F); else { SparseMatIterator it = hist.begin(); for( i = 0, N = hist.nzcount(); i < N; i++, ++it ) { Cv32suf* val = (Cv32suf*)it.ptr; val->i = cvRound(val->f); } } std::vector<uchar*> ptrs; std::vector<int> deltas; std::vector<double> uniranges; Size imsize; CV_Assert( mask.empty() || mask.type() == CV_8UC1 ); histPrepareImages( images, nimages, channels, mask, dims, hist.hdr->size, ranges, uniform, ptrs, deltas, imsize, uniranges ); const double* _uniranges = uniform ? &uniranges[0] : 0; int depth = images[0].depth(); if( depth == CV_8U ) calcSparseHist_8u(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, uniform ); else if( depth == CV_16U ) calcSparseHist_<ushort>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, uniform ); else if( depth == CV_32F ) calcSparseHist_<float>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, uniform ); else CV_Error(CV_StsUnsupportedFormat, ""); if( !keepInt ) { SparseMatIterator it = hist.begin(); for( i = 0, N = hist.nzcount(); i < N; i++, ++it ) { Cv32suf* val = (Cv32suf*)it.ptr; val->f = (float)val->i; } } } #ifdef HAVE_OPENCL enum { BINS = 256 }; static bool ocl_calcHist1(InputArray _src, OutputArray _hist, int ddepth = CV_32S) { const ocl::Device & dev = ocl::Device::getDefault(); int compunits = dev.maxComputeUnits(); size_t wgs = dev.maxWorkGroupSize(); Size size = _src.size(); bool use16 = size.width % 16 == 0 && _src.offset() % 16 == 0 && _src.step() % 16 == 0; int kercn = dev.isAMD() && use16 ? 16 : std::min(4, ocl::predictOptimalVectorWidth(_src)); ocl::Kernel k1("calculate_histogram", ocl::imgproc::histogram_oclsrc, format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D kercn=%d -D T=%s%s", BINS, compunits, wgs, kercn, kercn == 4 ? "int" : ocl::typeToStr(CV_8UC(kercn)), _src.isContinuous() ? " -D HAVE_SRC_CONT" : "")); if (k1.empty()) return false; _hist.create(BINS, 1, ddepth); UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1), hist = _hist.getUMat(); k1.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total()); size_t globalsize = compunits * wgs; if (!k1.run(1, &globalsize, &wgs, false)) return false; char cvt[40]; ocl::Kernel k2("merge_histogram", ocl::imgproc::histogram_oclsrc, format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D convertToHT=%s -D HT=%s", BINS, compunits, (int)wgs, ocl::convertTypeStr(CV_32S, ddepth, 1, cvt), ocl::typeToStr(ddepth))); if (k2.empty()) return false; k2.args(ocl::KernelArg::PtrReadOnly(ghist), ocl::KernelArg::WriteOnlyNoSize(hist)); return k2.run(1, &wgs, &wgs, false); } static bool ocl_calcHist(InputArrayOfArrays images, OutputArray hist) { std::vector<UMat> v; images.getUMatVector(v); return ocl_calcHist1(v[0], hist, CV_32F); } #endif } void cv::calcHist( const Mat* images, int nimages, const int* channels, InputArray _mask, SparseMat& hist, int dims, const int* histSize, const float** ranges, bool uniform, bool accumulate ) { Mat mask = _mask.getMat(); calcHist( images, nimages, channels, mask, hist, dims, histSize, ranges, uniform, accumulate, false ); } void cv::calcHist( InputArrayOfArrays images, const std::vector<int>& channels, InputArray mask, OutputArray hist, const std::vector<int>& histSize, const std::vector<float>& ranges, bool accumulate ) { CV_OCL_RUN(images.total() == 1 && channels.size() == 1 && images.channels(0) == 1 && channels[0] == 0 && images.isUMatVector() && mask.empty() && !accumulate && histSize.size() == 1 && histSize[0] == BINS && ranges.size() == 2 && ranges[0] == 0 && ranges[1] == BINS, ocl_calcHist(images, hist)) int i, dims = (int)histSize.size(), rsz = (int)ranges.size(), csz = (int)channels.size(); int nimages = (int)images.total(); CV_Assert(nimages > 0 && dims > 0); CV_Assert(rsz == dims*2 || (rsz == 0 && images.depth(0) == CV_8U)); CV_Assert(csz == 0 || csz == dims); float* _ranges[CV_MAX_DIM]; if( rsz > 0 ) { for( i = 0; i < rsz/2; i++ ) _ranges[i] = (float*)&ranges[i*2]; } AutoBuffer<Mat> buf(nimages); for( i = 0; i < nimages; i++ ) buf[i] = images.getMat(i); calcHist(&buf[0], nimages, csz ? &channels[0] : 0, mask, hist, dims, &histSize[0], rsz ? (const float**)_ranges : 0, true, accumulate); } /////////////////////////////////////// B A C K P R O J E C T //////////////////////////////////// namespace cv { template<typename T, typename BT> static void calcBackProj_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, const Mat& hist, int dims, const float** _ranges, const double* _uniranges, float scale, bool uniform ) { T** ptrs = (T**)&_ptrs[0]; const int* deltas = &_deltas[0]; const uchar* H = hist.ptr(); int i, x; BT* bproj = (BT*)_ptrs[dims]; int bpstep = _deltas[dims*2 + 1]; int size[CV_MAX_DIM]; size_t hstep[CV_MAX_DIM]; for( i = 0; i < dims; i++ ) { size[i] = hist.size[i]; hstep[i] = hist.step[i]; } if( uniform ) { const double* uniranges = &_uniranges[0]; if( dims == 1 ) { double a = uniranges[0], b = uniranges[1]; int sz = size[0], d0 = deltas[0], step0 = deltas[1]; const T* p0 = (const T*)ptrs[0]; for( ; imsize.height--; p0 += step0, bproj += bpstep ) { for( x = 0; x < imsize.width; x++, p0 += d0 ) { int idx = cvFloor(*p0*a + b); bproj[x] = (unsigned)idx < (unsigned)sz ? saturate_cast<BT>(((const float*)H)[idx]*scale) : 0; } } } else if( dims == 2 ) { double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3]; int sz0 = size[0], sz1 = size[1]; int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3]; size_t hstep0 = hstep[0]; const T* p0 = (const T*)ptrs[0]; const T* p1 = (const T*)ptrs[1]; for( ; imsize.height--; p0 += step0, p1 += step1, bproj += bpstep ) { for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 ) { int idx0 = cvFloor(*p0*a0 + b0); int idx1 = cvFloor(*p1*a1 + b1); bproj[x] = (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 ? saturate_cast<BT>(((const float*)(H + hstep0*idx0))[idx1]*scale) : 0; } } } else if( dims == 3 ) { double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3], a2 = uniranges[4], b2 = uniranges[5]; int sz0 = size[0], sz1 = size[1], sz2 = size[2]; int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3], d2 = deltas[4], step2 = deltas[5]; size_t hstep0 = hstep[0], hstep1 = hstep[1]; const T* p0 = (const T*)ptrs[0]; const T* p1 = (const T*)ptrs[1]; const T* p2 = (const T*)ptrs[2]; for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, bproj += bpstep ) { for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 ) { int idx0 = cvFloor(*p0*a0 + b0); int idx1 = cvFloor(*p1*a1 + b1); int idx2 = cvFloor(*p2*a2 + b2); bproj[x] = (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 && (unsigned)idx2 < (unsigned)sz2 ? saturate_cast<BT>(((const float*)(H + hstep0*idx0 + hstep1*idx1))[idx2]*scale) : 0; } } } else { for( ; imsize.height--; bproj += bpstep ) { for( x = 0; x < imsize.width; x++ ) { const uchar* Hptr = H; for( i = 0; i < dims; i++ ) { int idx = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]); if( (unsigned)idx >= (unsigned)size[i] || (_ranges && *ptrs[i] >= _ranges[i][1])) break; ptrs[i] += deltas[i*2]; Hptr += idx*hstep[i]; } if( i == dims ) bproj[x] = saturate_cast<BT>(*(const float*)Hptr*scale); else { bproj[x] = 0; for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } else { // non-uniform histogram const float* ranges[CV_MAX_DIM]; for( i = 0; i < dims; i++ ) ranges[i] = &_ranges[i][0]; for( ; imsize.height--; bproj += bpstep ) { for( x = 0; x < imsize.width; x++ ) { const uchar* Hptr = H; for( i = 0; i < dims; i++ ) { float v = (float)*ptrs[i]; const float* R = ranges[i]; int idx = -1, sz = size[i]; while( v >= R[idx+1] && ++idx < sz ) ; // nop if( (unsigned)idx >= (unsigned)sz ) break; ptrs[i] += deltas[i*2]; Hptr += idx*hstep[i]; } if( i == dims ) bproj[x] = saturate_cast<BT>(*(const float*)Hptr*scale); else { bproj[x] = 0; for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } static void calcBackProj_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, const Mat& hist, int dims, const float** _ranges, const double* _uniranges, float scale, bool uniform ) { uchar** ptrs = &_ptrs[0]; const int* deltas = &_deltas[0]; const uchar* H = hist.ptr(); int i, x; uchar* bproj = _ptrs[dims]; int bpstep = _deltas[dims*2 + 1]; std::vector<size_t> _tab; calcHistLookupTables_8u( hist, SparseMat(), dims, _ranges, _uniranges, uniform, false, _tab ); const size_t* tab = &_tab[0]; if( dims == 1 ) { int d0 = deltas[0], step0 = deltas[1]; uchar matH[256] = {0}; const uchar* p0 = (const uchar*)ptrs[0]; for( i = 0; i < 256; i++ ) { size_t hidx = tab[i]; if( hidx < OUT_OF_RANGE ) matH[i] = saturate_cast<uchar>(*(float*)(H + hidx)*scale); } for( ; imsize.height--; p0 += step0, bproj += bpstep ) { if( d0 == 1 ) { for( x = 0; x <= imsize.width - 4; x += 4 ) { uchar t0 = matH[p0[x]], t1 = matH[p0[x+1]]; bproj[x] = t0; bproj[x+1] = t1; t0 = matH[p0[x+2]]; t1 = matH[p0[x+3]]; bproj[x+2] = t0; bproj[x+3] = t1; } p0 += x; } else for( x = 0; x <= imsize.width - 4; x += 4 ) { uchar t0 = matH[p0[0]], t1 = matH[p0[d0]]; bproj[x] = t0; bproj[x+1] = t1; p0 += d0*2; t0 = matH[p0[0]]; t1 = matH[p0[d0]]; bproj[x+2] = t0; bproj[x+3] = t1; p0 += d0*2; } for( ; x < imsize.width; x++, p0 += d0 ) bproj[x] = matH[*p0]; } } else if( dims == 2 ) { int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3]; const uchar* p0 = (const uchar*)ptrs[0]; const uchar* p1 = (const uchar*)ptrs[1]; for( ; imsize.height--; p0 += step0, p1 += step1, bproj += bpstep ) { for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 ) { size_t idx = tab[*p0] + tab[*p1 + 256]; bproj[x] = idx < OUT_OF_RANGE ? saturate_cast<uchar>(*(const float*)(H + idx)*scale) : 0; } } } else if( dims == 3 ) { int d0 = deltas[0], step0 = deltas[1], d1 = deltas[2], step1 = deltas[3], d2 = deltas[4], step2 = deltas[5]; const uchar* p0 = (const uchar*)ptrs[0]; const uchar* p1 = (const uchar*)ptrs[1]; const uchar* p2 = (const uchar*)ptrs[2]; for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, bproj += bpstep ) { for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 ) { size_t idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512]; bproj[x] = idx < OUT_OF_RANGE ? saturate_cast<uchar>(*(const float*)(H + idx)*scale) : 0; } } } else { for( ; imsize.height--; bproj += bpstep ) { for( x = 0; x < imsize.width; x++ ) { const uchar* Hptr = H; for( i = 0; i < dims; i++ ) { size_t idx = tab[*ptrs[i] + i*256]; if( idx >= OUT_OF_RANGE ) break; ptrs[i] += deltas[i*2]; Hptr += idx; } if( i == dims ) bproj[x] = saturate_cast<uchar>(*(const float*)Hptr*scale); else { bproj[x] = 0; for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } } void cv::calcBackProject( const Mat* images, int nimages, const int* channels, InputArray _hist, OutputArray _backProject, const float** ranges, double scale, bool uniform ) { Mat hist = _hist.getMat(); std::vector<uchar*> ptrs; std::vector<int> deltas; std::vector<double> uniranges; Size imsize; int dims = hist.dims == 2 && hist.size[1] == 1 ? 1 : hist.dims; CV_Assert( dims > 0 && !hist.empty() ); _backProject.create( images[0].size(), images[0].depth() ); Mat backProject = _backProject.getMat(); histPrepareImages( images, nimages, channels, backProject, dims, hist.size, ranges, uniform, ptrs, deltas, imsize, uniranges ); const double* _uniranges = uniform ? &uniranges[0] : 0; int depth = images[0].depth(); if( depth == CV_8U ) calcBackProj_8u(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform); else if( depth == CV_16U ) calcBackProj_<ushort, ushort>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform ); else if( depth == CV_32F ) calcBackProj_<float, float>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform ); else CV_Error(CV_StsUnsupportedFormat, ""); } namespace cv { template<typename T, typename BT> static void calcSparseBackProj_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, const SparseMat& hist, int dims, const float** _ranges, const double* _uniranges, float scale, bool uniform ) { T** ptrs = (T**)&_ptrs[0]; const int* deltas = &_deltas[0]; int i, x; BT* bproj = (BT*)_ptrs[dims]; int bpstep = _deltas[dims*2 + 1]; const int* size = hist.hdr->size; int idx[CV_MAX_DIM]; const SparseMat_<float>& hist_ = (const SparseMat_<float>&)hist; if( uniform ) { const double* uniranges = &_uniranges[0]; for( ; imsize.height--; bproj += bpstep ) { for( x = 0; x < imsize.width; x++ ) { for( i = 0; i < dims; i++ ) { idx[i] = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]); if( (unsigned)idx[i] >= (unsigned)size[i] ) break; ptrs[i] += deltas[i*2]; } if( i == dims ) bproj[x] = saturate_cast<BT>(hist_(idx)*scale); else { bproj[x] = 0; for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } else { // non-uniform histogram const float* ranges[CV_MAX_DIM]; for( i = 0; i < dims; i++ ) ranges[i] = &_ranges[i][0]; for( ; imsize.height--; bproj += bpstep ) { for( x = 0; x < imsize.width; x++ ) { for( i = 0; i < dims; i++ ) { float v = (float)*ptrs[i]; const float* R = ranges[i]; int j = -1, sz = size[i]; while( v >= R[j+1] && ++j < sz ) ; // nop if( (unsigned)j >= (unsigned)sz ) break; idx[i] = j; ptrs[i] += deltas[i*2]; } if( i == dims ) bproj[x] = saturate_cast<BT>(hist_(idx)*scale); else { bproj[x] = 0; for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } static void calcSparseBackProj_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, Size imsize, const SparseMat& hist, int dims, const float** _ranges, const double* _uniranges, float scale, bool uniform ) { uchar** ptrs = &_ptrs[0]; const int* deltas = &_deltas[0]; int i, x; uchar* bproj = _ptrs[dims]; int bpstep = _deltas[dims*2 + 1]; std::vector<size_t> _tab; int idx[CV_MAX_DIM]; calcHistLookupTables_8u( Mat(), hist, dims, _ranges, _uniranges, uniform, true, _tab ); const size_t* tab = &_tab[0]; for( ; imsize.height--; bproj += bpstep ) { for( x = 0; x < imsize.width; x++ ) { for( i = 0; i < dims; i++ ) { size_t hidx = tab[*ptrs[i] + i*256]; if( hidx >= OUT_OF_RANGE ) break; idx[i] = (int)hidx; ptrs[i] += deltas[i*2]; } if( i == dims ) bproj[x] = saturate_cast<uchar>(hist.value<float>(idx)*scale); else { bproj[x] = 0; for( ; i < dims; i++ ) ptrs[i] += deltas[i*2]; } } for( i = 0; i < dims; i++ ) ptrs[i] += deltas[i*2 + 1]; } } } void cv::calcBackProject( const Mat* images, int nimages, const int* channels, const SparseMat& hist, OutputArray _backProject, const float** ranges, double scale, bool uniform ) { std::vector<uchar*> ptrs; std::vector<int> deltas; std::vector<double> uniranges; Size imsize; int dims = hist.dims(); CV_Assert( dims > 0 ); _backProject.create( images[0].size(), images[0].depth() ); Mat backProject = _backProject.getMat(); histPrepareImages( images, nimages, channels, backProject, dims, hist.hdr->size, ranges, uniform, ptrs, deltas, imsize, uniranges ); const double* _uniranges = uniform ? &uniranges[0] : 0; int depth = images[0].depth(); if( depth == CV_8U ) calcSparseBackProj_8u(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform); else if( depth == CV_16U ) calcSparseBackProj_<ushort, ushort>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform ); else if( depth == CV_32F ) calcSparseBackProj_<float, float>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform ); else CV_Error(CV_StsUnsupportedFormat, ""); } #ifdef HAVE_OPENCL namespace cv { static void getUMatIndex(const std::vector<UMat> & um, int cn, int & idx, int & cnidx) { int totalChannels = 0; for (size_t i = 0, size = um.size(); i < size; ++i) { int ccn = um[i].channels(); totalChannels += ccn; if (totalChannels == cn) { idx = (int)(i + 1); cnidx = 0; return; } else if (totalChannels > cn) { idx = (int)i; cnidx = i == 0 ? cn : (cn - totalChannels + ccn); return; } } idx = cnidx = -1; } static bool ocl_calcBackProject( InputArrayOfArrays _images, std::vector<int> channels, InputArray _hist, OutputArray _dst, const std::vector<float>& ranges, float scale, size_t histdims ) { std::vector<UMat> images; _images.getUMatVector(images); size_t nimages = images.size(), totalcn = images[0].channels(); CV_Assert(nimages > 0); Size size = images[0].size(); int depth = images[0].depth(); //kernels are valid for this type only if (depth != CV_8U) return false; for (size_t i = 1; i < nimages; ++i) { const UMat & m = images[i]; totalcn += m.channels(); CV_Assert(size == m.size() && depth == m.depth()); } std::sort(channels.begin(), channels.end()); for (size_t i = 0; i < histdims; ++i) CV_Assert(channels[i] < (int)totalcn); if (histdims == 1) { int idx, cnidx; getUMatIndex(images, channels[0], idx, cnidx); CV_Assert(idx >= 0); UMat im = images[idx]; String opts = format("-D histdims=1 -D scn=%d", im.channels()); ocl::Kernel lutk("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts); if (lutk.empty()) return false; size_t lsize = 256; UMat lut(1, (int)lsize, CV_32SC1), hist = _hist.getUMat(), uranges(ranges, true); lutk.args(ocl::KernelArg::ReadOnlyNoSize(hist), hist.rows, ocl::KernelArg::PtrWriteOnly(lut), scale, ocl::KernelArg::PtrReadOnly(uranges)); if (!lutk.run(1, &lsize, NULL, false)) return false; ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts); if (mapk.empty()) return false; _dst.create(size, depth); UMat dst = _dst.getUMat(); im.offset += cnidx; mapk.args(ocl::KernelArg::ReadOnlyNoSize(im), ocl::KernelArg::PtrReadOnly(lut), ocl::KernelArg::WriteOnly(dst)); size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }; return mapk.run(2, globalsize, NULL, false); } else if (histdims == 2) { int idx0, idx1, cnidx0, cnidx1; getUMatIndex(images, channels[0], idx0, cnidx0); getUMatIndex(images, channels[1], idx1, cnidx1); CV_Assert(idx0 >= 0 && idx1 >= 0); UMat im0 = images[idx0], im1 = images[idx1]; // Lut for the first dimension String opts = format("-D histdims=2 -D scn1=%d -D scn2=%d", im0.channels(), im1.channels()); ocl::Kernel lutk1("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts); if (lutk1.empty()) return false; size_t lsize = 256; UMat lut(1, (int)lsize<<1, CV_32SC1), uranges(ranges, true), hist = _hist.getUMat(); lutk1.args(hist.rows, ocl::KernelArg::PtrWriteOnly(lut), (int)0, ocl::KernelArg::PtrReadOnly(uranges), (int)0); if (!lutk1.run(1, &lsize, NULL, false)) return false; // lut for the second dimension ocl::Kernel lutk2("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts); if (lutk2.empty()) return false; lut.offset += lsize * sizeof(int); lutk2.args(hist.cols, ocl::KernelArg::PtrWriteOnly(lut), (int)256, ocl::KernelArg::PtrReadOnly(uranges), (int)2); if (!lutk2.run(1, &lsize, NULL, false)) return false; // perform lut ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts); if (mapk.empty()) return false; _dst.create(size, depth); UMat dst = _dst.getUMat(); im0.offset += cnidx0; im1.offset += cnidx1; mapk.args(ocl::KernelArg::ReadOnlyNoSize(im0), ocl::KernelArg::ReadOnlyNoSize(im1), ocl::KernelArg::ReadOnlyNoSize(hist), ocl::KernelArg::PtrReadOnly(lut), scale, ocl::KernelArg::WriteOnly(dst)); size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }; return mapk.run(2, globalsize, NULL, false); } return false; } } #endif void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& channels, InputArray hist, OutputArray dst, const std::vector<float>& ranges, double scale ) { #ifdef HAVE_OPENCL Size histSize = hist.size(); bool _1D = histSize.height == 1 || histSize.width == 1; size_t histdims = _1D ? 1 : hist.dims(); #endif CV_OCL_RUN(dst.isUMat() && hist.type() == CV_32FC1 && histdims <= 2 && ranges.size() == histdims * 2 && histdims == channels.size(), ocl_calcBackProject(images, channels, hist, dst, ranges, (float)scale, histdims)) Mat H0 = hist.getMat(), H; int hcn = H0.channels(); if( hcn > 1 ) { CV_Assert( H0.isContinuous() ); int hsz[CV_CN_MAX+1]; memcpy(hsz, &H0.size[0], H0.dims*sizeof(hsz[0])); hsz[H0.dims] = hcn; H = Mat(H0.dims+1, hsz, H0.depth(), H0.ptr()); } else H = H0; bool _1d = H.rows == 1 || H.cols == 1; int i, dims = H.dims, rsz = (int)ranges.size(), csz = (int)channels.size(); int nimages = (int)images.total(); CV_Assert(nimages > 0); CV_Assert(rsz == dims*2 || (rsz == 2 && _1d) || (rsz == 0 && images.depth(0) == CV_8U)); CV_Assert(csz == 0 || csz == dims || (csz == 1 && _1d)); float* _ranges[CV_MAX_DIM]; if( rsz > 0 ) { for( i = 0; i < rsz/2; i++ ) _ranges[i] = (float*)&ranges[i*2]; } AutoBuffer<Mat> buf(nimages); for( i = 0; i < nimages; i++ ) buf[i] = images.getMat(i); calcBackProject(&buf[0], nimages, csz ? &channels[0] : 0, hist, dst, rsz ? (const float**)_ranges : 0, scale, true); } ////////////////// C O M P A R E H I S T O G R A M S //////////////////////// double cv::compareHist( InputArray _H1, InputArray _H2, int method ) { Mat H1 = _H1.getMat(), H2 = _H2.getMat(); const Mat* arrays[] = {&H1, &H2, 0}; Mat planes[2]; NAryMatIterator it(arrays, planes); double result = 0; int j, len = (int)it.size; CV_Assert( H1.type() == H2.type() && H1.depth() == CV_32F ); double s1 = 0, s2 = 0, s11 = 0, s12 = 0, s22 = 0; CV_Assert( it.planes[0].isContinuous() && it.planes[1].isContinuous() ); #if CV_SSE2 bool haveSIMD = checkHardwareSupport(CV_CPU_SSE2); #endif for( size_t i = 0; i < it.nplanes; i++, ++it ) { const float* h1 = it.planes[0].ptr<float>(); const float* h2 = it.planes[1].ptr<float>(); len = it.planes[0].rows*it.planes[0].cols*H1.channels(); j = 0; if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT)) { for( ; j < len; j++ ) { double a = h1[j] - h2[j]; double b = (method == CV_COMP_CHISQR) ? h1[j] : h1[j] + h2[j]; if( fabs(b) > DBL_EPSILON ) result += a*a/b; } } else if( method == CV_COMP_CORREL ) { #if CV_SSE2 if (haveSIMD) { __m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1; __m128d v_s11 = v_s1, v_s22 = v_s1, v_s12 = v_s1; for ( ; j <= len - 4; j += 4) { __m128 v_a = _mm_loadu_ps(h1 + j); __m128 v_b = _mm_loadu_ps(h2 + j); // 0-1 __m128d v_ad = _mm_cvtps_pd(v_a); __m128d v_bd = _mm_cvtps_pd(v_b); v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd)); v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad)); v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd)); v_s1 = _mm_add_pd(v_s1, v_ad); v_s2 = _mm_add_pd(v_s2, v_bd); // 2-3 v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8))); v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8))); v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd)); v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad)); v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd)); v_s1 = _mm_add_pd(v_s1, v_ad); v_s2 = _mm_add_pd(v_s2, v_bd); } double CV_DECL_ALIGNED(16) ar[10]; _mm_store_pd(ar, v_s12); _mm_store_pd(ar + 2, v_s11); _mm_store_pd(ar + 4, v_s22); _mm_store_pd(ar + 6, v_s1); _mm_store_pd(ar + 8, v_s2); s12 += ar[0] + ar[1]; s11 += ar[2] + ar[3]; s22 += ar[4] + ar[5]; s1 += ar[6] + ar[7]; s2 += ar[8] + ar[9]; } #endif for( ; j < len; j++ ) { double a = h1[j]; double b = h2[j]; s12 += a*b; s1 += a; s11 += a*a; s2 += b; s22 += b*b; } } else if( method == CV_COMP_INTERSECT ) { #if CV_NEON float32x4_t v_result = vdupq_n_f32(0.0f); for( ; j <= len - 4; j += 4 ) v_result = vaddq_f32(v_result, vminq_f32(vld1q_f32(h1 + j), vld1q_f32(h2 + j))); float CV_DECL_ALIGNED(16) ar[4]; vst1q_f32(ar, v_result); result += ar[0] + ar[1] + ar[2] + ar[3]; #elif CV_SSE2 if (haveSIMD) { __m128d v_result = _mm_setzero_pd(); for ( ; j <= len - 4; j += 4) { __m128 v_src = _mm_min_ps(_mm_loadu_ps(h1 + j), _mm_loadu_ps(h2 + j)); v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src)); v_src = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8)); v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src)); } double CV_DECL_ALIGNED(16) ar[2]; _mm_store_pd(ar, v_result); result += ar[0] + ar[1]; } #endif for( ; j < len; j++ ) result += std::min(h1[j], h2[j]); } else if( method == CV_COMP_BHATTACHARYYA ) { #if CV_SSE2 if (haveSIMD) { __m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1, v_result = v_s1; for ( ; j <= len - 4; j += 4) { __m128 v_a = _mm_loadu_ps(h1 + j); __m128 v_b = _mm_loadu_ps(h2 + j); __m128d v_ad = _mm_cvtps_pd(v_a); __m128d v_bd = _mm_cvtps_pd(v_b); v_s1 = _mm_add_pd(v_s1, v_ad); v_s2 = _mm_add_pd(v_s2, v_bd); v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd))); v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8))); v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8))); v_s1 = _mm_add_pd(v_s1, v_ad); v_s2 = _mm_add_pd(v_s2, v_bd); v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd))); } double CV_DECL_ALIGNED(16) ar[6]; _mm_store_pd(ar, v_s1); _mm_store_pd(ar + 2, v_s2); _mm_store_pd(ar + 4, v_result); s1 += ar[0] + ar[1]; s2 += ar[2] + ar[3]; result += ar[4] + ar[5]; } #endif for( ; j < len; j++ ) { double a = h1[j]; double b = h2[j]; result += std::sqrt(a*b); s1 += a; s2 += b; } } else if( method == CV_COMP_KL_DIV ) { for( ; j < len; j++ ) { double p = h1[j]; double q = h2[j]; if( fabs(p) <= DBL_EPSILON ) { continue; } if( fabs(q) <= DBL_EPSILON ) { q = 1e-10; } result += p * std::log( p / q ); } } else CV_Error( CV_StsBadArg, "Unknown comparison method" ); } if( method == CV_COMP_CHISQR_ALT ) result *= 2; else if( method == CV_COMP_CORREL ) { size_t total = H1.total(); double scale = 1./total; double num = s12 - s1*s2*scale; double denom2 = (s11 - s1*s1*scale)*(s22 - s2*s2*scale); result = std::abs(denom2) > DBL_EPSILON ? num/std::sqrt(denom2) : 1.; } else if( method == CV_COMP_BHATTACHARYYA ) { s1 *= s2; s1 = fabs(s1) > FLT_EPSILON ? 1./std::sqrt(s1) : 1.; result = std::sqrt(std::max(1. - result*s1, 0.)); } return result; } double cv::compareHist( const SparseMat& H1, const SparseMat& H2, int method ) { double result = 0; int i, dims = H1.dims(); CV_Assert( dims > 0 && dims == H2.dims() && H1.type() == H2.type() && H1.type() == CV_32F ); for( i = 0; i < dims; i++ ) CV_Assert( H1.size(i) == H2.size(i) ); const SparseMat *PH1 = &H1, *PH2 = &H2; if( PH1->nzcount() > PH2->nzcount() && method != CV_COMP_CHISQR && method != CV_COMP_CHISQR_ALT && method != CV_COMP_KL_DIV ) std::swap(PH1, PH2); SparseMatConstIterator it = PH1->begin(); int N1 = (int)PH1->nzcount(), N2 = (int)PH2->nzcount(); if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT) ) { for( i = 0; i < N1; i++, ++it ) { float v1 = it.value<float>(); const SparseMat::Node* node = it.node(); float v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval); double a = v1 - v2; double b = (method == CV_COMP_CHISQR) ? v1 : v1 + v2; if( fabs(b) > DBL_EPSILON ) result += a*a/b; } } else if( method == CV_COMP_CORREL ) { double s1 = 0, s2 = 0, s11 = 0, s12 = 0, s22 = 0; for( i = 0; i < N1; i++, ++it ) { double v1 = it.value<float>(); const SparseMat::Node* node = it.node(); s12 += v1*PH2->value<float>(node->idx, (size_t*)&node->hashval); s1 += v1; s11 += v1*v1; } it = PH2->begin(); for( i = 0; i < N2; i++, ++it ) { double v2 = it.value<float>(); s2 += v2; s22 += v2*v2; } size_t total = 1; for( i = 0; i < H1.dims(); i++ ) total *= H1.size(i); double scale = 1./total; double num = s12 - s1*s2*scale; double denom2 = (s11 - s1*s1*scale)*(s22 - s2*s2*scale); result = std::abs(denom2) > DBL_EPSILON ? num/std::sqrt(denom2) : 1.; } else if( method == CV_COMP_INTERSECT ) { for( i = 0; i < N1; i++, ++it ) { float v1 = it.value<float>(); const SparseMat::Node* node = it.node(); float v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval); if( v2 ) result += std::min(v1, v2); } } else if( method == CV_COMP_BHATTACHARYYA ) { double s1 = 0, s2 = 0; for( i = 0; i < N1; i++, ++it ) { double v1 = it.value<float>(); const SparseMat::Node* node = it.node(); double v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval); result += std::sqrt(v1*v2); s1 += v1; } it = PH2->begin(); for( i = 0; i < N2; i++, ++it ) s2 += it.value<float>(); s1 *= s2; s1 = fabs(s1) > FLT_EPSILON ? 1./std::sqrt(s1) : 1.; result = std::sqrt(std::max(1. - result*s1, 0.)); } else if( method == CV_COMP_KL_DIV ) { for( i = 0; i < N1; i++, ++it ) { double v1 = it.value<float>(); const SparseMat::Node* node = it.node(); double v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval); if( !v2 ) v2 = 1e-10; result += v1 * std::log( v1 / v2 ); } } else CV_Error( CV_StsBadArg, "Unknown comparison method" ); if( method == CV_COMP_CHISQR_ALT ) result *= 2; return result; } const int CV_HIST_DEFAULT_TYPE = CV_32F; /* Creates new histogram */ CvHistogram * cvCreateHist( int dims, int *sizes, CvHistType type, float** ranges, int uniform ) { CvHistogram *hist = 0; if( (unsigned)dims > CV_MAX_DIM ) CV_Error( CV_BadOrder, "Number of dimensions is out of range" ); if( !sizes ) CV_Error( CV_HeaderIsNull, "Null <sizes> pointer" ); hist = (CvHistogram *)cvAlloc( sizeof( CvHistogram )); hist->type = CV_HIST_MAGIC_VAL + ((int)type & 1); if (uniform) hist->type|= CV_HIST_UNIFORM_FLAG; hist->thresh2 = 0; hist->bins = 0; if( type == CV_HIST_ARRAY ) { hist->bins = cvInitMatNDHeader( &hist->mat, dims, sizes, CV_HIST_DEFAULT_TYPE ); cvCreateData( hist->bins ); } else if( type == CV_HIST_SPARSE ) hist->bins = cvCreateSparseMat( dims, sizes, CV_HIST_DEFAULT_TYPE ); else CV_Error( CV_StsBadArg, "Invalid histogram type" ); if( ranges ) cvSetHistBinRanges( hist, ranges, uniform ); return hist; } /* Creates histogram wrapping header for given array */ CV_IMPL CvHistogram* cvMakeHistHeaderForArray( int dims, int *sizes, CvHistogram *hist, float *data, float **ranges, int uniform ) { if( !hist ) CV_Error( CV_StsNullPtr, "Null histogram header pointer" ); if( !data ) CV_Error( CV_StsNullPtr, "Null data pointer" ); hist->thresh2 = 0; hist->type = CV_HIST_MAGIC_VAL; hist->bins = cvInitMatNDHeader( &hist->mat, dims, sizes, CV_HIST_DEFAULT_TYPE, data ); if( ranges ) { if( !uniform ) CV_Error( CV_StsBadArg, "Only uniform bin ranges can be used here " "(to avoid memory allocation)" ); cvSetHistBinRanges( hist, ranges, uniform ); } return hist; } CV_IMPL void cvReleaseHist( CvHistogram **hist ) { if( !hist ) CV_Error( CV_StsNullPtr, "" ); if( *hist ) { CvHistogram* temp = *hist; if( !CV_IS_HIST(temp)) CV_Error( CV_StsBadArg, "Invalid histogram header" ); *hist = 0; if( CV_IS_SPARSE_HIST( temp )) cvReleaseSparseMat( (CvSparseMat**)&temp->bins ); else { cvReleaseData( temp->bins ); temp->bins = 0; } if( temp->thresh2 ) cvFree( &temp->thresh2 ); cvFree( &temp ); } } CV_IMPL void cvClearHist( CvHistogram *hist ) { if( !CV_IS_HIST(hist) ) CV_Error( CV_StsBadArg, "Invalid histogram header" ); cvZero( hist->bins ); } // Clears histogram bins that are below than threshold CV_IMPL void cvThreshHist( CvHistogram* hist, double thresh ) { if( !CV_IS_HIST(hist) ) CV_Error( CV_StsBadArg, "Invalid histogram header" ); if( !CV_IS_SPARSE_MAT(hist->bins) ) { CvMat mat; cvGetMat( hist->bins, &mat, 0, 1 ); cvThreshold( &mat, &mat, thresh, 0, CV_THRESH_TOZERO ); } else { CvSparseMat* mat = (CvSparseMat*)hist->bins; CvSparseMatIterator iterator; CvSparseNode *node; for( node = cvInitSparseMatIterator( mat, &iterator ); node != 0; node = cvGetNextSparseNode( &iterator )) { float* val = (float*)CV_NODE_VAL( mat, node ); if( *val <= thresh ) *val = 0; } } } // Normalizes histogram (make sum of the histogram bins == factor) CV_IMPL void cvNormalizeHist( CvHistogram* hist, double factor ) { double sum = 0; if( !CV_IS_HIST(hist) ) CV_Error( CV_StsBadArg, "Invalid histogram header" ); if( !CV_IS_SPARSE_HIST(hist) ) { CvMat mat; cvGetMat( hist->bins, &mat, 0, 1 ); sum = cvSum( &mat ).val[0]; if( fabs(sum) < DBL_EPSILON ) sum = 1; cvScale( &mat, &mat, factor/sum, 0 ); } else { CvSparseMat* mat = (CvSparseMat*)hist->bins; CvSparseMatIterator iterator; CvSparseNode *node; float scale; for( node = cvInitSparseMatIterator( mat, &iterator ); node != 0; node = cvGetNextSparseNode( &iterator )) { sum += *(float*)CV_NODE_VAL(mat,node); } if( fabs(sum) < DBL_EPSILON ) sum = 1; scale = (float)(factor/sum); for( node = cvInitSparseMatIterator( mat, &iterator ); node != 0; node = cvGetNextSparseNode( &iterator )) { *(float*)CV_NODE_VAL(mat,node) *= scale; } } } // Retrieves histogram global min, max and their positions CV_IMPL void cvGetMinMaxHistValue( const CvHistogram* hist, float *value_min, float* value_max, int* idx_min, int* idx_max ) { double minVal, maxVal; int dims, size[CV_MAX_DIM]; if( !CV_IS_HIST(hist) ) CV_Error( CV_StsBadArg, "Invalid histogram header" ); dims = cvGetDims( hist->bins, size ); if( !CV_IS_SPARSE_HIST(hist) ) { CvMat mat; CvPoint minPt, maxPt; cvGetMat( hist->bins, &mat, 0, 1 ); cvMinMaxLoc( &mat, &minVal, &maxVal, &minPt, &maxPt ); if( dims == 1 ) { if( idx_min ) *idx_min = minPt.y + minPt.x; if( idx_max ) *idx_max = maxPt.y + maxPt.x; } else if( dims == 2 ) { if( idx_min ) idx_min[0] = minPt.y, idx_min[1] = minPt.x; if( idx_max ) idx_max[0] = maxPt.y, idx_max[1] = maxPt.x; } else if( idx_min || idx_max ) { int imin = minPt.y*mat.cols + minPt.x; int imax = maxPt.y*mat.cols + maxPt.x; for(int i = dims - 1; i >= 0; i-- ) { if( idx_min ) { int t = imin / size[i]; idx_min[i] = imin - t*size[i]; imin = t; } if( idx_max ) { int t = imax / size[i]; idx_max[i] = imax - t*size[i]; imax = t; } } } } else { CvSparseMat* mat = (CvSparseMat*)hist->bins; CvSparseMatIterator iterator; CvSparseNode *node; int minv = INT_MAX; int maxv = INT_MIN; CvSparseNode* minNode = 0; CvSparseNode* maxNode = 0; const int *_idx_min = 0, *_idx_max = 0; Cv32suf m; for( node = cvInitSparseMatIterator( mat, &iterator ); node != 0; node = cvGetNextSparseNode( &iterator )) { int value = *(int*)CV_NODE_VAL(mat,node); value = CV_TOGGLE_FLT(value); if( value < minv ) { minv = value; minNode = node; } if( value > maxv ) { maxv = value; maxNode = node; } } if( minNode ) { _idx_min = CV_NODE_IDX(mat,minNode); _idx_max = CV_NODE_IDX(mat,maxNode); m.i = CV_TOGGLE_FLT(minv); minVal = m.f; m.i = CV_TOGGLE_FLT(maxv); maxVal = m.f; } else { minVal = maxVal = 0; } for(int i = 0; i < dims; i++ ) { if( idx_min ) idx_min[i] = _idx_min ? _idx_min[i] : -1; if( idx_max ) idx_max[i] = _idx_max ? _idx_max[i] : -1; } } if( value_min ) *value_min = (float)minVal; if( value_max ) *value_max = (float)maxVal; } // Compares two histograms using one of a few methods CV_IMPL double cvCompareHist( const CvHistogram* hist1, const CvHistogram* hist2, int method ) { int i; int size1[CV_MAX_DIM], size2[CV_MAX_DIM], total = 1; if( !CV_IS_HIST(hist1) || !CV_IS_HIST(hist2) ) CV_Error( CV_StsBadArg, "Invalid histogram header[s]" ); if( CV_IS_SPARSE_MAT(hist1->bins) != CV_IS_SPARSE_MAT(hist2->bins)) CV_Error(CV_StsUnmatchedFormats, "One of histograms is sparse and other is not"); if( !CV_IS_SPARSE_MAT(hist1->bins) ) { cv::Mat H1 = cv::cvarrToMat(hist1->bins); cv::Mat H2 = cv::cvarrToMat(hist2->bins); return cv::compareHist(H1, H2, method); } int dims1 = cvGetDims( hist1->bins, size1 ); int dims2 = cvGetDims( hist2->bins, size2 ); if( dims1 != dims2 ) CV_Error( CV_StsUnmatchedSizes, "The histograms have different numbers of dimensions" ); for( i = 0; i < dims1; i++ ) { if( size1[i] != size2[i] ) CV_Error( CV_StsUnmatchedSizes, "The histograms have different sizes" ); total *= size1[i]; } double result = 0; CvSparseMat* mat1 = (CvSparseMat*)(hist1->bins); CvSparseMat* mat2 = (CvSparseMat*)(hist2->bins); CvSparseMatIterator iterator; CvSparseNode *node1, *node2; if( mat1->heap->active_count > mat2->heap->active_count && method != CV_COMP_CHISQR && method != CV_COMP_CHISQR_ALT && method != CV_COMP_KL_DIV ) { CvSparseMat* t; CV_SWAP( mat1, mat2, t ); } if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT) ) { for( node1 = cvInitSparseMatIterator( mat1, &iterator ); node1 != 0; node1 = cvGetNextSparseNode( &iterator )) { double v1 = *(float*)CV_NODE_VAL(mat1,node1); uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1), 0, 0, &node1->hashval ); double v2 = node2_data ? *(float*)node2_data : 0.f; double a = v1 - v2; double b = (method == CV_COMP_CHISQR) ? v1 : v1 + v2; if( fabs(b) > DBL_EPSILON ) result += a*a/b; } } else if( method == CV_COMP_CORREL ) { double s1 = 0, s11 = 0; double s2 = 0, s22 = 0; double s12 = 0; double num, denom2, scale = 1./total; for( node1 = cvInitSparseMatIterator( mat1, &iterator ); node1 != 0; node1 = cvGetNextSparseNode( &iterator )) { double v1 = *(float*)CV_NODE_VAL(mat1,node1); uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1), 0, 0, &node1->hashval ); if( node2_data ) { double v2 = *(float*)node2_data; s12 += v1*v2; } s1 += v1; s11 += v1*v1; } for( node2 = cvInitSparseMatIterator( mat2, &iterator ); node2 != 0; node2 = cvGetNextSparseNode( &iterator )) { double v2 = *(float*)CV_NODE_VAL(mat2,node2); s2 += v2; s22 += v2*v2; } num = s12 - s1*s2*scale; denom2 = (s11 - s1*s1*scale)*(s22 - s2*s2*scale); result = fabs(denom2) > DBL_EPSILON ? num/sqrt(denom2) : 1; } else if( method == CV_COMP_INTERSECT ) { for( node1 = cvInitSparseMatIterator( mat1, &iterator ); node1 != 0; node1 = cvGetNextSparseNode( &iterator )) { float v1 = *(float*)CV_NODE_VAL(mat1,node1); uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1), 0, 0, &node1->hashval ); if( node2_data ) { float v2 = *(float*)node2_data; if( v1 <= v2 ) result += v1; else result += v2; } } } else if( method == CV_COMP_BHATTACHARYYA ) { double s1 = 0, s2 = 0; for( node1 = cvInitSparseMatIterator( mat1, &iterator ); node1 != 0; node1 = cvGetNextSparseNode( &iterator )) { double v1 = *(float*)CV_NODE_VAL(mat1,node1); uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1), 0, 0, &node1->hashval ); s1 += v1; if( node2_data ) { double v2 = *(float*)node2_data; result += sqrt(v1 * v2); } } for( node1 = cvInitSparseMatIterator( mat2, &iterator ); node1 != 0; node1 = cvGetNextSparseNode( &iterator )) { double v2 = *(float*)CV_NODE_VAL(mat2,node1); s2 += v2; } s1 *= s2; s1 = fabs(s1) > FLT_EPSILON ? 1./sqrt(s1) : 1.; result = 1. - result*s1; result = sqrt(MAX(result,0.)); } else if( method == CV_COMP_KL_DIV ) { cv::SparseMat sH1, sH2; ((const CvSparseMat*)hist1->bins)->copyToSparseMat(sH1); ((const CvSparseMat*)hist2->bins)->copyToSparseMat(sH2); result = cv::compareHist( sH1, sH2, CV_COMP_KL_DIV ); } else CV_Error( CV_StsBadArg, "Unknown comparison method" ); if( method == CV_COMP_CHISQR_ALT ) result *= 2; return result; } // copies one histogram to another CV_IMPL void cvCopyHist( const CvHistogram* src, CvHistogram** _dst ) { if( !_dst ) CV_Error( CV_StsNullPtr, "Destination double pointer is NULL" ); CvHistogram* dst = *_dst; if( !CV_IS_HIST(src) || (dst && !CV_IS_HIST(dst)) ) CV_Error( CV_StsBadArg, "Invalid histogram header[s]" ); bool eq = false; int size1[CV_MAX_DIM]; bool is_sparse = CV_IS_SPARSE_MAT(src->bins); int dims1 = cvGetDims( src->bins, size1 ); if( dst && (is_sparse == CV_IS_SPARSE_MAT(dst->bins))) { int size2[CV_MAX_DIM]; int dims2 = cvGetDims( dst->bins, size2 ); if( dims1 == dims2 ) { int i; for( i = 0; i < dims1; i++ ) { if( size1[i] != size2[i] ) break; } eq = (i == dims1); } } if( !eq ) { cvReleaseHist( _dst ); dst = cvCreateHist( dims1, size1, !is_sparse ? CV_HIST_ARRAY : CV_HIST_SPARSE, 0, 0 ); *_dst = dst; } if( CV_HIST_HAS_RANGES( src )) { float* ranges[CV_MAX_DIM]; float** thresh = 0; if( CV_IS_UNIFORM_HIST( src )) { for( int i = 0; i < dims1; i++ ) ranges[i] = (float*)src->thresh[i]; thresh = ranges; } else { thresh = src->thresh2; } cvSetHistBinRanges( dst, thresh, CV_IS_UNIFORM_HIST(src)); } cvCopy( src->bins, dst->bins ); } // Sets a value range for every histogram bin CV_IMPL void cvSetHistBinRanges( CvHistogram* hist, float** ranges, int uniform ) { int dims, size[CV_MAX_DIM], total = 0; int i, j; if( !ranges ) CV_Error( CV_StsNullPtr, "NULL ranges pointer" ); if( !CV_IS_HIST(hist) ) CV_Error( CV_StsBadArg, "Invalid histogram header" ); dims = cvGetDims( hist->bins, size ); for( i = 0; i < dims; i++ ) total += size[i]+1; if( uniform ) { for( i = 0; i < dims; i++ ) { if( !ranges[i] ) CV_Error( CV_StsNullPtr, "One of <ranges> elements is NULL" ); hist->thresh[i][0] = ranges[i][0]; hist->thresh[i][1] = ranges[i][1]; } hist->type |= CV_HIST_UNIFORM_FLAG + CV_HIST_RANGES_FLAG; } else { float* dim_ranges; if( !hist->thresh2 ) { hist->thresh2 = (float**)cvAlloc( dims*sizeof(hist->thresh2[0])+ total*sizeof(hist->thresh2[0][0])); } dim_ranges = (float*)(hist->thresh2 + dims); for( i = 0; i < dims; i++ ) { float val0 = -FLT_MAX; if( !ranges[i] ) CV_Error( CV_StsNullPtr, "One of <ranges> elements is NULL" ); for( j = 0; j <= size[i]; j++ ) { float val = ranges[i][j]; if( val <= val0 ) CV_Error(CV_StsOutOfRange, "Bin ranges should go in ascenting order"); val0 = dim_ranges[j] = val; } hist->thresh2[i] = dim_ranges; dim_ranges += size[i] + 1; } hist->type |= CV_HIST_RANGES_FLAG; hist->type &= ~CV_HIST_UNIFORM_FLAG; } } CV_IMPL void cvCalcArrHist( CvArr** img, CvHistogram* hist, int accumulate, const CvArr* mask ) { if( !CV_IS_HIST(hist)) CV_Error( CV_StsBadArg, "Bad histogram pointer" ); if( !img ) CV_Error( CV_StsNullPtr, "Null double array pointer" ); int size[CV_MAX_DIM]; int i, dims = cvGetDims( hist->bins, size); bool uniform = CV_IS_UNIFORM_HIST(hist); std::vector<cv::Mat> images(dims); for( i = 0; i < dims; i++ ) images[i] = cv::cvarrToMat(img[i]); cv::Mat _mask; if( mask ) _mask = cv::cvarrToMat(mask); const float* uranges[CV_MAX_DIM] = {0}; const float** ranges = 0; if( hist->type & CV_HIST_RANGES_FLAG ) { ranges = (const float**)hist->thresh2; if( uniform ) { for( i = 0; i < dims; i++ ) uranges[i] = &hist->thresh[i][0]; ranges = uranges; } } if( !CV_IS_SPARSE_HIST(hist) ) { cv::Mat H = cv::cvarrToMat(hist->bins); cv::calcHist( &images[0], (int)images.size(), 0, _mask, H, cvGetDims(hist->bins), H.size, ranges, uniform, accumulate != 0 ); } else { CvSparseMat* sparsemat = (CvSparseMat*)hist->bins; if( !accumulate ) cvZero( hist->bins ); cv::SparseMat sH; sparsemat->copyToSparseMat(sH); cv::calcHist( &images[0], (int)images.size(), 0, _mask, sH, sH.dims(), sH.dims() > 0 ? sH.hdr->size : 0, ranges, uniform, accumulate != 0, true ); if( accumulate ) cvZero( sparsemat ); cv::SparseMatConstIterator it = sH.begin(); int nz = (int)sH.nzcount(); for( i = 0; i < nz; i++, ++it ) *(float*)cvPtrND(sparsemat, it.node()->idx, 0, -2) = (float)*(const int*)it.ptr; } } CV_IMPL void cvCalcArrBackProject( CvArr** img, CvArr* dst, const CvHistogram* hist ) { if( !CV_IS_HIST(hist)) CV_Error( CV_StsBadArg, "Bad histogram pointer" ); if( !img ) CV_Error( CV_StsNullPtr, "Null double array pointer" ); int size[CV_MAX_DIM]; int i, dims = cvGetDims( hist->bins, size ); bool uniform = CV_IS_UNIFORM_HIST(hist); const float* uranges[CV_MAX_DIM] = {0}; const float** ranges = 0; if( hist->type & CV_HIST_RANGES_FLAG ) { ranges = (const float**)hist->thresh2; if( uniform ) { for( i = 0; i < dims; i++ ) uranges[i] = &hist->thresh[i][0]; ranges = uranges; } } std::vector<cv::Mat> images(dims); for( i = 0; i < dims; i++ ) images[i] = cv::cvarrToMat(img[i]); cv::Mat _dst = cv::cvarrToMat(dst); CV_Assert( _dst.size() == images[0].size() && _dst.depth() == images[0].depth() ); if( !CV_IS_SPARSE_HIST(hist) ) { cv::Mat H = cv::cvarrToMat(hist->bins); cv::calcBackProject( &images[0], (int)images.size(), 0, H, _dst, ranges, 1, uniform ); } else { cv::SparseMat sH; ((const CvSparseMat*)hist->bins)->copyToSparseMat(sH); cv::calcBackProject( &images[0], (int)images.size(), 0, sH, _dst, ranges, 1, uniform ); } } ////////////////////// B A C K P R O J E C T P A T C H ///////////////////////// CV_IMPL void cvCalcArrBackProjectPatch( CvArr** arr, CvArr* dst, CvSize patch_size, CvHistogram* hist, int method, double norm_factor ) { CvHistogram* model = 0; IplImage imgstub[CV_MAX_DIM], *img[CV_MAX_DIM]; IplROI roi; CvMat dststub, *dstmat; int i, dims; int x, y; CvSize size; if( !CV_IS_HIST(hist)) CV_Error( CV_StsBadArg, "Bad histogram pointer" ); if( !arr ) CV_Error( CV_StsNullPtr, "Null double array pointer" ); if( norm_factor <= 0 ) CV_Error( CV_StsOutOfRange, "Bad normalization factor (set it to 1.0 if unsure)" ); if( patch_size.width <= 0 || patch_size.height <= 0 ) CV_Error( CV_StsBadSize, "The patch width and height must be positive" ); dims = cvGetDims( hist->bins ); cvNormalizeHist( hist, norm_factor ); for( i = 0; i < dims; i++ ) { CvMat stub, *mat; mat = cvGetMat( arr[i], &stub, 0, 0 ); img[i] = cvGetImage( mat, &imgstub[i] ); img[i]->roi = &roi; } dstmat = cvGetMat( dst, &dststub, 0, 0 ); if( CV_MAT_TYPE( dstmat->type ) != CV_32FC1 ) CV_Error( CV_StsUnsupportedFormat, "Resultant image must have 32fC1 type" ); if( dstmat->cols != img[0]->width - patch_size.width + 1 || dstmat->rows != img[0]->height - patch_size.height + 1 ) CV_Error( CV_StsUnmatchedSizes, "The output map must be (W-w+1 x H-h+1), " "where the input images are (W x H) each and the patch is (w x h)" ); cvCopyHist( hist, &model ); size = cvGetMatSize(dstmat); roi.coi = 0; roi.width = patch_size.width; roi.height = patch_size.height; for( y = 0; y < size.height; y++ ) { for( x = 0; x < size.width; x++ ) { double result; roi.xOffset = x; roi.yOffset = y; cvCalcHist( img, model ); cvNormalizeHist( model, norm_factor ); result = cvCompareHist( model, hist, method ); CV_MAT_ELEM( *dstmat, float, y, x ) = (float)result; } } cvReleaseHist( &model ); } // Calculates Bayes probabilistic histograms CV_IMPL void cvCalcBayesianProb( CvHistogram** src, int count, CvHistogram** dst ) { int i; if( !src || !dst ) CV_Error( CV_StsNullPtr, "NULL histogram array pointer" ); if( count < 2 ) CV_Error( CV_StsOutOfRange, "Too small number of histograms" ); for( i = 0; i < count; i++ ) { if( !CV_IS_HIST(src[i]) || !CV_IS_HIST(dst[i]) ) CV_Error( CV_StsBadArg, "Invalid histogram header" ); if( !CV_IS_MATND(src[i]->bins) || !CV_IS_MATND(dst[i]->bins) ) CV_Error( CV_StsBadArg, "The function supports dense histograms only" ); } cvZero( dst[0]->bins ); // dst[0] = src[0] + ... + src[count-1] for( i = 0; i < count; i++ ) cvAdd( src[i]->bins, dst[0]->bins, dst[0]->bins ); cvDiv( 0, dst[0]->bins, dst[0]->bins ); // dst[i] = src[i]*(1/dst[0]) for( i = count - 1; i >= 0; i-- ) cvMul( src[i]->bins, dst[0]->bins, dst[i]->bins ); } CV_IMPL void cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask, CvHistogram* hist_dens, double scale ) { if( scale <= 0 ) CV_Error( CV_StsOutOfRange, "scale must be positive" ); if( !CV_IS_HIST(hist) || !CV_IS_HIST(hist_mask) || !CV_IS_HIST(hist_dens) ) CV_Error( CV_StsBadArg, "Invalid histogram pointer[s]" ); { CvArr* arrs[] = { hist->bins, hist_mask->bins, hist_dens->bins }; CvMatND stubs[3]; CvNArrayIterator iterator; cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator ); if( CV_MAT_TYPE(iterator.hdr[0]->type) != CV_32FC1 ) CV_Error( CV_StsUnsupportedFormat, "All histograms must have 32fC1 type" ); do { const float* srcdata = (const float*)(iterator.ptr[0]); const float* maskdata = (const float*)(iterator.ptr[1]); float* dstdata = (float*)(iterator.ptr[2]); int i; for( i = 0; i < iterator.size.width; i++ ) { float s = srcdata[i]; float m = maskdata[i]; if( s > FLT_EPSILON ) if( m <= s ) dstdata[i] = (float)(m*scale/s); else dstdata[i] = (float)scale; else dstdata[i] = (float)0; } } while( cvNextNArraySlice( &iterator )); } } class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody { public: enum {HIST_SZ = 256}; EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock) : src_(src), globalHistogram_(histogram), histogramLock_(histogramLock) { } void operator()( const cv::Range& rowRange ) const { int localHistogram[HIST_SZ] = {0, }; const size_t sstep = src_.step; int width = src_.cols; int height = rowRange.end - rowRange.start; if (src_.isContinuous()) { width *= height; height = 1; } for (const uchar* ptr = src_.ptr<uchar>(rowRange.start); height--; ptr += sstep) { int x = 0; for (; x <= width - 4; x += 4) { int t0 = ptr[x], t1 = ptr[x+1]; localHistogram[t0]++; localHistogram[t1]++; t0 = ptr[x+2]; t1 = ptr[x+3]; localHistogram[t0]++; localHistogram[t1]++; } for (; x < width; ++x) localHistogram[ptr[x]]++; } cv::AutoLock lock(*histogramLock_); for( int i = 0; i < HIST_SZ; i++ ) globalHistogram_[i] += localHistogram[i]; } static bool isWorthParallel( const cv::Mat& src ) { return ( src.total() >= 640*480 ); } private: EqualizeHistCalcHist_Invoker& operator=(const EqualizeHistCalcHist_Invoker&); cv::Mat& src_; int* globalHistogram_; cv::Mutex* histogramLock_; }; class EqualizeHistLut_Invoker : public cv::ParallelLoopBody { public: EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut ) : src_(src), dst_(dst), lut_(lut) { } void operator()( const cv::Range& rowRange ) const { const size_t sstep = src_.step; const size_t dstep = dst_.step; int width = src_.cols; int height = rowRange.end - rowRange.start; int* lut = lut_; if (src_.isContinuous() && dst_.isContinuous()) { width *= height; height = 1; } const uchar* sptr = src_.ptr<uchar>(rowRange.start); uchar* dptr = dst_.ptr<uchar>(rowRange.start); for (; height--; sptr += sstep, dptr += dstep) { int x = 0; for (; x <= width - 4; x += 4) { int v0 = sptr[x]; int v1 = sptr[x+1]; int x0 = lut[v0]; int x1 = lut[v1]; dptr[x] = (uchar)x0; dptr[x+1] = (uchar)x1; v0 = sptr[x+2]; v1 = sptr[x+3]; x0 = lut[v0]; x1 = lut[v1]; dptr[x+2] = (uchar)x0; dptr[x+3] = (uchar)x1; } for (; x < width; ++x) dptr[x] = (uchar)lut[sptr[x]]; } } static bool isWorthParallel( const cv::Mat& src ) { return ( src.total() >= 640*480 ); } private: EqualizeHistLut_Invoker& operator=(const EqualizeHistLut_Invoker&); cv::Mat& src_; cv::Mat& dst_; int* lut_; }; CV_IMPL void cvEqualizeHist( const CvArr* srcarr, CvArr* dstarr ) { cv::equalizeHist(cv::cvarrToMat(srcarr), cv::cvarrToMat(dstarr)); } #ifdef HAVE_OPENCL namespace cv { static bool ocl_equalizeHist(InputArray _src, OutputArray _dst) { const ocl::Device & dev = ocl::Device::getDefault(); int compunits = dev.maxComputeUnits(); size_t wgs = dev.maxWorkGroupSize(); Size size = _src.size(); bool use16 = size.width % 16 == 0 && _src.offset() % 16 == 0 && _src.step() % 16 == 0; int kercn = dev.isAMD() && use16 ? 16 : std::min(4, ocl::predictOptimalVectorWidth(_src)); ocl::Kernel k1("calculate_histogram", ocl::imgproc::histogram_oclsrc, format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D kercn=%d -D T=%s%s", BINS, compunits, wgs, kercn, kercn == 4 ? "int" : ocl::typeToStr(CV_8UC(kercn)), _src.isContinuous() ? " -D HAVE_SRC_CONT" : "")); if (k1.empty()) return false; UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1); k1.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total()); size_t globalsize = compunits * wgs; if (!k1.run(1, &globalsize, &wgs, false)) return false; wgs = std::min<size_t>(ocl::Device::getDefault().maxWorkGroupSize(), BINS); UMat lut(1, 256, CV_8UC1); ocl::Kernel k2("calcLUT", ocl::imgproc::histogram_oclsrc, format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d", BINS, compunits, (int)wgs)); k2.args(ocl::KernelArg::PtrWriteOnly(lut), ocl::KernelArg::PtrReadOnly(ghist), (int)_src.total()); // calculation of LUT if (!k2.run(1, &wgs, &wgs, false)) return false; // execute LUT transparently LUT(_src, lut, _dst); return true; } } #endif void cv::equalizeHist( InputArray _src, OutputArray _dst ) { CV_Assert( _src.type() == CV_8UC1 ); if (_src.empty()) return; CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), ocl_equalizeHist(_src, _dst)) Mat src = _src.getMat(); _dst.create( src.size(), src.type() ); Mat dst = _dst.getMat(); Mutex histogramLockInstance; const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ; int hist[hist_sz] = {0,}; int lut[hist_sz]; EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance); EqualizeHistLut_Invoker lutBody(src, dst, lut); cv::Range heightRange(0, src.rows); if(EqualizeHistCalcHist_Invoker::isWorthParallel(src)) parallel_for_(heightRange, calcBody); else calcBody(heightRange); int i = 0; while (!hist[i]) ++i; int total = (int)src.total(); if (hist[i] == total) { dst.setTo(i); return; } float scale = (hist_sz - 1.f)/(total - hist[i]); int sum = 0; for (lut[i++] = 0; i < hist_sz; ++i) { sum += hist[i]; lut[i] = saturate_cast<uchar>(sum * scale); } if(EqualizeHistLut_Invoker::isWorthParallel(src)) parallel_for_(heightRange, lutBody); else lutBody(heightRange); } // ---------------------------------------------------------------------- /* Implementation of RTTI and Generic Functions for CvHistogram */ #define CV_TYPE_NAME_HIST "opencv-hist" static int icvIsHist( const void * ptr ) { return CV_IS_HIST( ((CvHistogram*)ptr) ); } static CvHistogram * icvCloneHist( const CvHistogram * src ) { CvHistogram * dst=NULL; cvCopyHist(src, &dst); return dst; } static void *icvReadHist( CvFileStorage * fs, CvFileNode * node ) { CvHistogram * h = 0; int type = 0; int is_uniform = 0; int have_ranges = 0; h = (CvHistogram *)cvAlloc( sizeof(CvHistogram) ); type = cvReadIntByName( fs, node, "type", 0 ); is_uniform = cvReadIntByName( fs, node, "is_uniform", 0 ); have_ranges = cvReadIntByName( fs, node, "have_ranges", 0 ); h->type = CV_HIST_MAGIC_VAL | type | (is_uniform ? CV_HIST_UNIFORM_FLAG : 0) | (have_ranges ? CV_HIST_RANGES_FLAG : 0); if(type == CV_HIST_ARRAY) { // read histogram bins CvMatND* mat = (CvMatND*)cvReadByName( fs, node, "mat" ); int i, sizes[CV_MAX_DIM]; if(!CV_IS_MATND(mat)) CV_Error( CV_StsError, "Expected CvMatND"); for(i=0; i<mat->dims; i++) sizes[i] = mat->dim[i].size; cvInitMatNDHeader( &(h->mat), mat->dims, sizes, mat->type, mat->data.ptr ); h->bins = &(h->mat); // take ownership of refcount pointer as well h->mat.refcount = mat->refcount; // increase refcount so freeing temp header doesn't free data cvIncRefData( mat ); // free temporary header cvReleaseMatND( &mat ); } else { h->bins = cvReadByName( fs, node, "bins" ); if(!CV_IS_SPARSE_MAT(h->bins)){ CV_Error( CV_StsError, "Unknown Histogram type"); } } // read thresholds if(have_ranges) { int i, dims, size[CV_MAX_DIM], total = 0; CvSeqReader reader; CvFileNode * thresh_node; dims = cvGetDims( h->bins, size ); for( i = 0; i < dims; i++ ) total += size[i]+1; thresh_node = cvGetFileNodeByName( fs, node, "thresh" ); if(!thresh_node) CV_Error( CV_StsError, "'thresh' node is missing"); cvStartReadRawData( fs, thresh_node, &reader ); if(is_uniform) { for(i=0; i<dims; i++) cvReadRawDataSlice( fs, &reader, 2, h->thresh[i], "f" ); h->thresh2 = NULL; } else { float* dim_ranges; h->thresh2 = (float**)cvAlloc( dims*sizeof(h->thresh2[0])+ total*sizeof(h->thresh2[0][0])); dim_ranges = (float*)(h->thresh2 + dims); for(i=0; i < dims; i++) { h->thresh2[i] = dim_ranges; cvReadRawDataSlice( fs, &reader, size[i]+1, dim_ranges, "f" ); dim_ranges += size[i] + 1; } } } return h; } static void icvWriteHist( CvFileStorage* fs, const char* name, const void* struct_ptr, CvAttrList /*attributes*/ ) { const CvHistogram * hist = (const CvHistogram *) struct_ptr; int sizes[CV_MAX_DIM]; int dims; int i; int is_uniform, have_ranges; cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HIST ); is_uniform = (CV_IS_UNIFORM_HIST(hist) ? 1 : 0); have_ranges = (hist->type & CV_HIST_RANGES_FLAG ? 1 : 0); cvWriteInt( fs, "type", (hist->type & 1) ); cvWriteInt( fs, "is_uniform", is_uniform ); cvWriteInt( fs, "have_ranges", have_ranges ); if(!CV_IS_SPARSE_HIST(hist)) cvWrite( fs, "mat", &(hist->mat) ); else cvWrite( fs, "bins", hist->bins ); // write thresholds if(have_ranges){ dims = cvGetDims( hist->bins, sizes ); cvStartWriteStruct( fs, "thresh", CV_NODE_SEQ + CV_NODE_FLOW ); if(is_uniform){ for(i=0; i<dims; i++){ cvWriteRawData( fs, hist->thresh[i], 2, "f" ); } } else{ for(i=0; i<dims; i++){ cvWriteRawData( fs, hist->thresh2[i], sizes[i]+1, "f" ); } } cvEndWriteStruct( fs ); } cvEndWriteStruct( fs ); } CvType hist_type( CV_TYPE_NAME_HIST, icvIsHist, (CvReleaseFunc)cvReleaseHist, icvReadHist, icvWriteHist, (CvCloneFunc)icvCloneHist ); /* End of file. */