knearest.cpp 15.7 KB
Newer Older
wester committed
1 2 3 4 5 6 7 8 9
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
wester committed
10
//                        Intel License Agreement
wester committed
11 12 13 14 15 16 17 18 19 20 21 22 23 24
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
wester committed
25
//   * The name of Intel Corporation may not be used to endorse or promote products
wester committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"

/****************************************************************************************\
wester committed
44
*                          K-Nearest Neighbors Classifier                                *
wester committed
45 46
\****************************************************************************************/

wester committed
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
// k Nearest Neighbors
CvKNearest::CvKNearest()
{
    samples = 0;
    clear();
}


CvKNearest::~CvKNearest()
{
    clear();
}


CvKNearest::CvKNearest( const CvMat* _train_data, const CvMat* _responses,
                        const CvMat* _sample_idx, bool _is_regression, int _max_k )
{
    samples = 0;
    train( _train_data, _responses, _sample_idx, _is_regression, _max_k, false );
}
wester committed
67 68


wester committed
69
void CvKNearest::clear()
wester committed
70
{
wester committed
71
    while( samples )
wester committed
72
    {
wester committed
73 74 75 76
        CvVectors* next_samples = samples->next;
        cvFree( &samples->data.fl );
        cvFree( &samples );
        samples = next_samples;
wester committed
77
    }
wester committed
78 79 80 81
    var_count = 0;
    total = 0;
    max_k = 0;
}
wester committed
82 83


wester committed
84
int CvKNearest::get_max_k() const { return max_k; }
wester committed
85

wester committed
86
int CvKNearest::get_var_count() const { return var_count; }
wester committed
87

wester committed
88
bool CvKNearest::is_regression() const { return regression; }
wester committed
89

wester committed
90
int CvKNearest::get_sample_count() const { return total; }
wester committed
91

wester committed
92 93 94 95 96 97
bool CvKNearest::train( const CvMat* _train_data, const CvMat* _responses,
                        const CvMat* _sample_idx, bool _is_regression,
                        int _max_k, bool _update_base )
{
    bool ok = false;
    CvMat* responses = 0;
wester committed
98

wester committed
99
    CV_FUNCNAME( "CvKNearest::train" );
wester committed
100

wester committed
101
    __BEGIN__;
wester committed
102

wester committed
103 104 105
    CvVectors* _samples = 0;
    float** _data = 0;
    int _count = 0, _dims = 0, _dims_all = 0, _rsize = 0;
wester committed
106

wester committed
107
    if( !_update_base )
wester committed
108 109
        clear();

wester committed
110 111 112 113 114 115 116 117 118 119 120 121
    // Prepare training data and related parameters.
    // Treat categorical responses as ordered - to prevent class label compression and
    // to enable entering new classes in the updates
    CV_CALL( cvPrepareTrainData( "CvKNearest::train", _train_data, CV_ROW_SAMPLE,
        _responses, CV_VAR_ORDERED, 0, _sample_idx, true, (const float***)&_data,
        &_count, &_dims, &_dims_all, &responses, 0, 0 ));

    if( !responses )
        CV_ERROR( CV_StsNoMem, "Could not allocate memory for responses" );

    if( _update_base && _dims != var_count )
        CV_ERROR( CV_StsBadArg, "The newly added data have different dimensionality" );
wester committed
122

wester committed
123
    if( !_update_base )
wester committed
124
    {
wester committed
125 126
        if( _max_k < 1 )
            CV_ERROR( CV_StsOutOfRange, "max_k must be a positive number" );
wester committed
127

wester committed
128 129 130
        regression = _is_regression;
        var_count = _dims;
        max_k = _max_k;
wester committed
131 132
    }

wester committed
133 134 135 136 137 138 139
    _rsize = _count*sizeof(float);
    CV_CALL( _samples = (CvVectors*)cvAlloc( sizeof(*_samples) + _rsize ));
    _samples->next = samples;
    _samples->type = CV_32F;
    _samples->data.fl = _data;
    _samples->count = _count;
    total += _count;
wester committed
140

wester committed
141 142
    samples = _samples;
    memcpy( _samples + 1, responses->data.fl, _rsize );
wester committed
143

wester committed
144
    ok = true;
wester committed
145

wester committed
146
    __END__;
wester committed
147

wester committed
148 149
    if( responses && responses->data.ptr != _responses->data.ptr )
        cvReleaseMat(&responses);
wester committed
150

wester committed
151 152
    return ok;
}
wester committed
153 154


wester committed
155 156 157 158 159 160 161 162 163 164 165

void CvKNearest::find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
                    float* neighbor_responses, const float** neighbors, float* dist ) const
{
    int i, j, count = end - start, k1 = 0, k2 = 0, d = var_count;
    CvVectors* s = samples;

    for( ; s != 0; s = s->next )
    {
        int n = s->count;
        for( j = 0; j < n; j++ )
wester committed
166
        {
wester committed
167
            for( i = 0; i < count; i++ )
wester committed
168
            {
wester committed
169 170 171 172 173 174 175 176 177 178
                double sum = 0;
                Cv32suf si;
                const float* v = s->data.fl[j];
                const float* u = (float*)(_samples->data.ptr + _samples->step*(start + i));
                Cv32suf* dd = (Cv32suf*)(dist + i*k);
                float* nr;
                const float** nn;
                int t, ii, ii1;

                for( t = 0; t <= d - 4; t += 4 )
wester committed
179
                {
wester committed
180 181 182
                    double t0 = u[t] - v[t], t1 = u[t+1] - v[t+1];
                    double t2 = u[t+2] - v[t+2], t3 = u[t+3] - v[t+3];
                    sum += t0*t0 + t1*t1 + t2*t2 + t3*t3;
wester committed
183 184
                }

wester committed
185
                for( ; t < d; t++ )
wester committed
186
                {
wester committed
187 188
                    double t0 = u[t] - v[t];
                    sum += t0*t0;
wester committed
189 190
                }

wester committed
191 192 193
                si.f = (float)sum;
                for( ii = k1-1; ii >= 0; ii-- )
                    if( si.i > dd[ii].i )
wester committed
194
                        break;
wester committed
195
                if( ii >= k-1 )
wester committed
196 197
                    continue;

wester committed
198 199 200
                nr = neighbor_responses + i*k;
                nn = neighbors ? neighbors + (start + i)*k : 0;
                for( ii1 = k2 - 1; ii1 > ii; ii1-- )
wester committed
201
                {
wester committed
202 203 204
                    dd[ii1+1].i = dd[ii1].i;
                    nr[ii1+1] = nr[ii1];
                    if( nn ) nn[ii1+1] = nn[ii1];
wester committed
205
                }
wester committed
206 207 208 209
                dd[ii+1].i = si.i;
                nr[ii+1] = ((float*)(s + 1))[j];
                if( nn )
                    nn[ii+1] = v;
wester committed
210
            }
wester committed
211 212
            k1 = MIN( k1+1, k );
            k2 = MIN( k1, k-1 );
wester committed
213
        }
wester committed
214 215
    }
}
wester committed
216 217


wester committed
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
float CvKNearest::write_results( int k, int k1, int start, int end,
    const float* neighbor_responses, const float* dist,
    CvMat* _results, CvMat* _neighbor_responses,
    CvMat* _dist, Cv32suf* sort_buf ) const
{
    float result = 0.f;
    int i, j, j1, count = end - start;
    double inv_scale = 1./k1;
    int rstep = _results && !CV_IS_MAT_CONT(_results->type) ? _results->step/sizeof(result) : 1;

    for( i = 0; i < count; i++ )
    {
        const Cv32suf* nr = (const Cv32suf*)(neighbor_responses + i*k);
        float* dst;
        float r;
        if( _results || start+i == 0 )
wester committed
234
        {
wester committed
235
            if( regression )
wester committed
236
            {
wester committed
237 238 239 240
                double s = 0;
                for( j = 0; j < k1; j++ )
                    s += nr[j].f;
                r = (float)(s*inv_scale);
wester committed
241
            }
wester committed
242
            else
wester committed
243
            {
wester committed
244 245
                int prev_start = 0, best_count = 0, cur_count;
                Cv32suf best_val;
wester committed
246

wester committed
247 248 249 250
                for( j = 0; j < k1; j++ )
                    sort_buf[j].i = nr[j].i;

                for( j = k1-1; j > 0; j-- )
wester committed
251
                {
wester committed
252 253 254
                    bool swap_fl = false;
                    for( j1 = 0; j1 < j; j1++ )
                        if( sort_buf[j1].i > sort_buf[j1+1].i )
wester committed
255
                        {
wester committed
256 257 258
                            int t;
                            CV_SWAP( sort_buf[j1].i, sort_buf[j1+1].i, t );
                            swap_fl = true;
wester committed
259
                        }
wester committed
260 261 262
                    if( !swap_fl )
                        break;
                }
wester committed
263

wester committed
264 265 266
                best_val.i = 0;
                for( j = 1; j <= k1; j++ )
                    if( j == k1 || sort_buf[j].i != sort_buf[j-1].i )
wester committed
267
                    {
wester committed
268 269
                        cur_count = j - prev_start;
                        if( best_count < cur_count )
wester committed
270
                        {
wester committed
271 272
                            best_count = cur_count;
                            best_val.i = sort_buf[j-1].i;
wester committed
273
                        }
wester committed
274
                        prev_start = j;
wester committed
275
                    }
wester committed
276
                r = best_val.f;
wester committed
277 278
            }

wester committed
279 280
            if( start+i == 0 )
                result = r;
wester committed
281

wester committed
282 283
            if( _results )
                _results->data.fl[(start + i)*rstep] = r;
wester committed
284 285
        }

wester committed
286
        if( _neighbor_responses )
wester committed
287
        {
wester committed
288 289 290 291 292 293
            dst = (float*)(_neighbor_responses->data.ptr +
                (start + i)*_neighbor_responses->step);
            for( j = 0; j < k1; j++ )
                dst[j] = nr[j].f;
            for( ; j < k; j++ )
                dst[j] = 0.f;
wester committed
294 295
        }

wester committed
296
        if( _dist )
wester committed
297
        {
wester committed
298 299 300 301 302
            dst = (float*)(_dist->data.ptr + (start + i)*_dist->step);
            for( j = 0; j < k1; j++ )
                dst[j] = dist[j + i*k];
            for( ; j < k; j++ )
                dst[j] = 0.f;
wester committed
303 304 305
        }
    }

wester committed
306 307
    return result;
}
wester committed
308

wester committed
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
struct P1 : cv::ParallelLoopBody {
  P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
     int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
  {
    pointer = _pointer;
    k = _k;
    _samples = __samples;
    _neighbors = __neighbors;
    k1 = _k1;
    _results = __results;
    _neighbor_responses = __neighbor_responses;
    _dist = __dist;
    result = _result;
    buf_sz = _buf_sz;
  }

  const CvKNearest* pointer;
  int k;
  const CvMat* _samples;
  const float** _neighbors;
  int k1;
  CvMat* _results;
  CvMat* _neighbor_responses;
  CvMat* _dist;
  float* result;
  int buf_sz;

  void operator()( const cv::Range& range ) const
  {
    cv::AutoBuffer<float> buf(buf_sz);
    for(int i = range.start; i < range.end; i += 1 )
wester committed
340
    {
wester committed
341 342 343
        float* neighbor_responses = &buf[0];
        float* dist = neighbor_responses + 1*k;
        Cv32suf* sort_buf = (Cv32suf*)(dist + 1*k);
wester committed
344

wester committed
345 346
        pointer->find_neighbors_direct( _samples, k, i, i + 1,
                    neighbor_responses, _neighbors, dist );
wester committed
347

wester committed
348 349
        float r = pointer->write_results( k, k1, i, i + 1, neighbor_responses, dist,
                                 _results, _neighbor_responses, _dist, sort_buf );
wester committed
350

wester committed
351 352 353 354
        if( i == 0 )
            *result = r;
    }
  }
wester committed
355

wester committed
356
};
wester committed
357

wester committed
358 359 360 361 362
float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
    const float** _neighbors, CvMat* _neighbor_responses, CvMat* _dist ) const
{
    float result = 0.f;
    const int max_blk_count = 128, max_buf_sz = 1 << 12;
wester committed
363

wester committed
364 365
    if( !samples )
        CV_Error( CV_StsError, "The search tree must be constructed first using train method" );
wester committed
366

wester committed
367 368 369 370
    if( !CV_IS_MAT(_samples) ||
        CV_MAT_TYPE(_samples->type) != CV_32FC1 ||
        _samples->cols != var_count )
        CV_Error( CV_StsBadArg, "Input samples must be floating-point matrix (<num_samples>x<var_count>)" );
wester committed
371

wester committed
372 373 374 375 376
    if( _results && (!CV_IS_MAT(_results) ||
        (_results->cols != 1 && _results->rows != 1) ||
        _results->cols + _results->rows - 1 != _samples->rows) )
        CV_Error( CV_StsBadArg,
        "The results must be 1d vector containing as much elements as the number of samples" );
wester committed
377

wester committed
378 379 380 381
    if( _results && CV_MAT_TYPE(_results->type) != CV_32FC1 &&
        (CV_MAT_TYPE(_results->type) != CV_32SC1 || regression))
        CV_Error( CV_StsUnsupportedFormat,
        "The results must be floating-point or integer (in case of classification) vector" );
wester committed
382

wester committed
383 384
    if( k < 1 || k > max_k )
        CV_Error( CV_StsOutOfRange, "k must be within 1..max_k range" );
wester committed
385

wester committed
386
    if( _neighbor_responses )
wester committed
387
    {
wester committed
388 389 390 391
        if( !CV_IS_MAT(_neighbor_responses) || CV_MAT_TYPE(_neighbor_responses->type) != CV_32FC1 ||
            _neighbor_responses->rows != _samples->rows || _neighbor_responses->cols != k )
            CV_Error( CV_StsBadArg,
            "The neighbor responses (if present) must be floating-point matrix of <num_samples> x <k> size" );
wester committed
392
    }
wester committed
393 394

    if( _dist )
wester committed
395
    {
wester committed
396 397 398 399
        if( !CV_IS_MAT(_dist) || CV_MAT_TYPE(_dist->type) != CV_32FC1 ||
            _dist->rows != _samples->rows || _dist->cols != k )
            CV_Error( CV_StsBadArg,
            "The distances from the neighbors (if present) must be floating-point matrix of <num_samples> x <k> size" );
wester committed
400 401
    }

wester committed
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
    int count = _samples->rows;
    int count_scale = k*2;
    int blk_count0 = MIN( count, max_blk_count );
    int buf_sz = MIN( blk_count0 * count_scale, max_buf_sz );
    blk_count0 = MAX( buf_sz/count_scale, 1 );
    blk_count0 += blk_count0 % 2;
    blk_count0 = MIN( blk_count0, count );
    buf_sz = blk_count0 * count_scale + k;
    int k1 = get_sample_count();
    k1 = MIN( k1, k );

    cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
                                             _results, _neighbor_responses, _dist, &result)
    );

    return result;
}
wester committed
419 420


wester committed
421
using namespace cv;
wester committed
422

wester committed
423 424 425 426 427 428
CvKNearest::CvKNearest( const Mat& _train_data, const Mat& _responses,
                       const Mat& _sample_idx, bool _is_regression, int _max_k )
{
    samples = 0;
    train(_train_data, _responses, _sample_idx, _is_regression, _max_k, false );
}
wester committed
429

wester committed
430 431 432 433 434
bool CvKNearest::train( const Mat& _train_data, const Mat& _responses,
                        const Mat& _sample_idx, bool _is_regression,
                        int _max_k, bool _update_base )
{
    CvMat tdata = _train_data, responses = _responses, sidx = _sample_idx;
wester committed
435

wester committed
436 437 438 439 440 441 442 443 444 445 446
    return train(&tdata, &responses, sidx.data.ptr ? &sidx : 0, _is_regression, _max_k, _update_base );
}


float CvKNearest::find_nearest( const Mat& _samples, int k, Mat* _results,
                                const float** _neighbors, Mat* _neighbor_responses,
                                Mat* _dist ) const
{
    CvMat s = _samples, results, *presults = 0, nresponses, *pnresponses = 0, dist, *pdist = 0;

    if( _results )
wester committed
447
    {
wester committed
448 449 450 451 452 453
        if(!(_results->data && (_results->type() == CV_32F ||
            (_results->type() == CV_32S && regression)) &&
             (_results->cols == 1 || _results->rows == 1) &&
             _results->cols + _results->rows - 1 == _samples.rows) )
            _results->create(_samples.rows, 1, CV_32F);
        presults = &(results = *_results);
wester committed
454 455
    }

wester committed
456
    if( _neighbor_responses )
wester committed
457
    {
wester committed
458 459 460 461
        if(!(_neighbor_responses->data && _neighbor_responses->type() == CV_32F &&
             _neighbor_responses->cols == k && _neighbor_responses->rows == _samples.rows) )
            _neighbor_responses->create(_samples.rows, k, CV_32F);
        pnresponses = &(nresponses = *_neighbor_responses);
wester committed
462 463
    }

wester committed
464
    if( _dist )
wester committed
465
    {
wester committed
466 467 468 469
        if(!(_dist->data && _dist->type() == CV_32F &&
             _dist->cols == k && _dist->rows == _samples.rows) )
            _dist->create(_samples.rows, k, CV_32F);
        pdist = &(dist = *_dist);
wester committed
470 471
    }

wester committed
472
    return find_nearest(&s, k, presults, _neighbors, pnresponses, pdist );
wester committed
473 474
}

wester committed
475 476 477 478 479

float CvKNearest::find_nearest( const cv::Mat& _samples, int k, CV_OUT cv::Mat& results,
                                CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const
{
    return find_nearest(_samples, k, &results, 0, &neighborResponses, &dists);
wester committed
480 481 482
}

/* End of file */