ann_mlp.cpp 48.6 KB
Newer Older
wester committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"

wester committed
43 44 45 46 47 48 49 50
CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
{
    term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
    train_method = RPROP;
    bp_dw_scale = bp_moment_scale = 0.1;
    rp_dw0 = 0.1; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
    rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;
}
wester committed
51

wester committed
52 53 54 55

CvANN_MLP_TrainParams::CvANN_MLP_TrainParams( CvTermCriteria _term_crit,
                                              int _train_method,
                                              double _param1, double _param2 )
wester committed
56
{
wester committed
57 58 59 60 61 62 63
    term_crit = _term_crit;
    train_method = _train_method;
    bp_dw_scale = bp_moment_scale = 0.1;
    rp_dw0 = 1.; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
    rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;

    if( train_method == RPROP )
wester committed
64
    {
wester committed
65 66 67 68 69
        rp_dw0 = _param1;
        if( rp_dw0 < FLT_EPSILON )
            rp_dw0 = 1.;
        rp_dw_min = _param2;
        rp_dw_min = MAX( rp_dw_min, 0 );
wester committed
70
    }
wester committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    else if( train_method == BACKPROP )
    {
        bp_dw_scale = _param1;
        if( bp_dw_scale <= 0 )
            bp_dw_scale = 0.1;
        bp_dw_scale = MAX( bp_dw_scale, 1e-3 );
        bp_dw_scale = MIN( bp_dw_scale, 1 );
        bp_moment_scale = _param2;
        if( bp_moment_scale < 0 )
            bp_moment_scale = 0.1;
        bp_moment_scale = MIN( bp_moment_scale, 1 );
    }
    else
        train_method = RPROP;
}
wester committed
86 87


wester committed
88 89 90
CvANN_MLP_TrainParams::~CvANN_MLP_TrainParams()
{
}
wester committed
91 92


wester committed
93
CvANN_MLP::CvANN_MLP()
wester committed
94
{
wester committed
95 96 97 98 99 100
    layer_sizes = wbuf = 0;
    min_val = max_val = min_val1 = max_val1 = 0.;
    weights = 0;
    rng = &cv::theRNG();
    default_model_name = "my_nn";
    clear();
wester committed
101 102
}

wester committed
103 104 105 106

CvANN_MLP::CvANN_MLP( const CvMat* _layer_sizes,
                      int _activ_func,
                      double _f_param1, double _f_param2 )
wester committed
107
{
wester committed
108 109 110 111 112 113 114
    layer_sizes = wbuf = 0;
    min_val = max_val = min_val1 = max_val1 = 0.;
    weights = 0;
    rng = &cv::theRNG();
    default_model_name = "my_nn";
    create( _layer_sizes, _activ_func, _f_param1, _f_param2 );
}
wester committed
115 116


wester committed
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
CvANN_MLP::~CvANN_MLP()
{
    clear();
}


void CvANN_MLP::clear()
{
    cvReleaseMat( &layer_sizes );
    cvReleaseMat( &wbuf );
    cvFree( &weights );
    activ_func = SIGMOID_SYM;
    f_param1 = f_param2 = 1;
    max_buf_sz = 1 << 12;
}


void CvANN_MLP::set_activ_func( int _activ_func, double _f_param1, double _f_param2 )
{
    CV_FUNCNAME( "CvANN_MLP::set_activ_func" );

    __BEGIN__;

    if( _activ_func < 0 || _activ_func > GAUSSIAN )
        CV_ERROR( CV_StsOutOfRange, "Unknown activation function" );

    activ_func = _activ_func;
wester committed
144

wester committed
145
    switch( activ_func )
wester committed
146
    {
wester committed
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    case SIGMOID_SYM:
        max_val = 0.95; min_val = -max_val;
        max_val1 = 0.98; min_val1 = -max_val1;
        if( fabs(_f_param1) < FLT_EPSILON )
            _f_param1 = 2./3;
        if( fabs(_f_param2) < FLT_EPSILON )
            _f_param2 = 1.7159;
        break;
    case GAUSSIAN:
        max_val = 1.; min_val = 0.05;
        max_val1 = 1.; min_val1 = 0.02;
        if( fabs(_f_param1) < FLT_EPSILON )
            _f_param1 = 1.;
        if( fabs(_f_param2) < FLT_EPSILON )
            _f_param2 = 1.;
        break;
    default:
wester committed
164
        min_val = max_val = min_val1 = max_val1 = 0.;
wester committed
165 166
        _f_param1 = 1.;
        _f_param2 = 0.;
wester committed
167 168
    }

wester committed
169 170 171 172 173 174 175 176 177 178
    f_param1 = _f_param1;
    f_param2 = _f_param2;

    __END__;
}


void CvANN_MLP::init_weights()
{
    int i, j, k;
wester committed
179

wester committed
180
    for( i = 1; i < layer_sizes->cols; i++ )
wester committed
181
    {
wester committed
182 183 184 185 186 187 188
        int n1 = layer_sizes->data.i[i-1];
        int n2 = layer_sizes->data.i[i];
        double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
        double* w = weights[i];

        // initialize weights using Nguyen-Widrow algorithm
        for( j = 0; j < n2; j++ )
wester committed
189
        {
wester committed
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
            double s = 0;
            for( k = 0; k <= n1; k++ )
            {
                val = rng->uniform(0., 1.)*2-1.;
                w[k*n2 + j] = val;
                s += fabs(val);
            }

            if( i < layer_sizes->cols - 1 )
            {
                s = 1./(s - fabs(val));
                for( k = 0; k <= n1; k++ )
                    w[k*n2 + j] *= s;
                w[n1*n2 + j] *= G*(-1+j*2./n2);
            }
wester committed
205 206
        }
    }
wester committed
207 208
}

wester committed
209

wester committed
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func,
                        double _f_param1, double _f_param2 )
{
    CV_FUNCNAME( "CvANN_MLP::create" );

    __BEGIN__;

    int i, l_step, l_count, buf_sz = 0;
    int *l_src, *l_dst;

    clear();

    if( !CV_IS_MAT(_layer_sizes) ||
        (_layer_sizes->cols != 1 && _layer_sizes->rows != 1) ||
        CV_MAT_TYPE(_layer_sizes->type) != CV_32SC1 )
        CV_ERROR( CV_StsBadArg,
        "The array of layer neuron counters must be an integer vector" );

    CV_CALL( set_activ_func( _activ_func, _f_param1, _f_param2 ));

    l_count = _layer_sizes->rows + _layer_sizes->cols - 1;
    l_src = _layer_sizes->data.i;
    l_step = CV_IS_MAT_CONT(_layer_sizes->type) ? 1 :
                _layer_sizes->step / sizeof(l_src[0]);
    CV_CALL( layer_sizes = cvCreateMat( 1, l_count, CV_32SC1 ));
    l_dst = layer_sizes->data.i;

    max_count = 0;
    for( i = 0; i < l_count; i++ )
wester committed
239
    {
wester committed
240 241 242 243 244 245 246 247 248
        int n = l_src[i*l_step];
        if( n < 1 + (0 < i && i < l_count-1))
            CV_ERROR( CV_StsOutOfRange,
            "there should be at least one input and one output "
            "and every hidden layer must have more than 1 neuron" );
        l_dst[i] = n;
        max_count = MAX( max_count, n );
        if( i > 0 )
            buf_sz += (l_dst[i-1]+1)*n;
wester committed
249 250
    }

wester committed
251
    buf_sz += (l_dst[0] + l_dst[l_count-1]*2)*2;
wester committed
252

wester committed
253 254
    CV_CALL( wbuf = cvCreateMat( 1, buf_sz, CV_64F ));
    CV_CALL( weights = (double**)cvAlloc( (l_count+2)*sizeof(weights[0]) ));
wester committed
255

wester committed
256 257 258 259 260
    weights[0] = wbuf->data.db;
    weights[1] = weights[0] + l_dst[0]*2;
    for( i = 1; i < l_count; i++ )
        weights[i+1] = weights[i] + (l_dst[i-1] + 1)*l_dst[i];
    weights[l_count+1] = weights[l_count] + l_dst[l_count-1]*2;
wester committed
261

wester committed
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
    __END__;
}


float CvANN_MLP::predict( const CvMat* _inputs, CvMat* _outputs ) const
{
    int i, j, n, dn = 0, l_count, dn0, buf_sz, min_buf_sz;

    if( !layer_sizes )
        CV_Error( CV_StsError, "The network has not been initialized" );

    if( !CV_IS_MAT(_inputs) || !CV_IS_MAT(_outputs) ||
        !CV_ARE_TYPES_EQ(_inputs,_outputs) ||
        (CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
        CV_MAT_TYPE(_inputs->type) != CV_64FC1) ||
        _inputs->rows != _outputs->rows )
        CV_Error( CV_StsBadArg, "Both input and output must be floating-point matrices "
                                "of the same type and have the same number of rows" );

    if( _inputs->cols != layer_sizes->data.i[0] )
        CV_Error( CV_StsBadSize, "input matrix must have the same number of columns as "
                                 "the number of neurons in the input layer" );

    if( _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
        CV_Error( CV_StsBadSize, "output matrix must have the same number of columns as "
                                 "the number of neurons in the output layer" );
    n = dn0 = _inputs->rows;
    min_buf_sz = 2*max_count;
    buf_sz = n*min_buf_sz;

    if( buf_sz > max_buf_sz )
    {
        dn0 = max_buf_sz/min_buf_sz;
        dn0 = MAX( dn0, 1 );
        buf_sz = dn0*min_buf_sz;
wester committed
297 298
    }

wester committed
299 300
    cv::AutoBuffer<double> buf(buf_sz);
    l_count = layer_sizes->cols;
wester committed
301

wester committed
302
    for( i = 0; i < n; i += dn )
wester committed
303
    {
wester committed
304 305
        CvMat hdr[2], _w, *layer_in = &hdr[0], *layer_out = &hdr[1], *temp;
        dn = MIN( dn0, n - i );
wester committed
306

wester committed
307 308 309 310 311 312 313
        cvGetRows( _inputs, layer_in, i, i + dn );
        cvInitMatHeader( layer_out, dn, layer_in->cols, CV_64F, &buf[0] );

        scale_input( layer_in, layer_out );
        CV_SWAP( layer_in, layer_out, temp );

        for( j = 1; j < l_count; j++ )
wester committed
314
        {
wester committed
315 316
            double* data = buf + (j&1 ? max_count*dn0 : 0);
            int cols = layer_sizes->data.i[j];
wester committed
317

wester committed
318 319 320 321
            cvInitMatHeader( layer_out, dn, cols, CV_64F, data );
            cvInitMatHeader( &_w, layer_in->cols, layer_out->cols, CV_64F, weights[j] );
            cvGEMM( layer_in, &_w, 1, 0, 0, layer_out );
            calc_activ_func( layer_out, _w.data.db + _w.rows*_w.cols );
wester committed
322

wester committed
323
            CV_SWAP( layer_in, layer_out, temp );
wester committed
324
        }
wester committed
325 326 327

        cvGetRows( _outputs, layer_out, i, i + dn );
        scale_output( layer_in, layer_out );
wester committed
328 329
    }

wester committed
330 331 332 333 334 335 336 337 338 339 340 341
    return 0.f;
}


void CvANN_MLP::scale_input( const CvMat* _src, CvMat* _dst ) const
{
    int i, j, cols = _src->cols;
    double* dst = _dst->data.db;
    const double* w = weights[0];
    int step = _src->step;

    if( CV_MAT_TYPE( _src->type ) == CV_32F )
wester committed
342
    {
wester committed
343 344
        const float* src = _src->data.fl;
        step /= sizeof(src[0]);
wester committed
345

wester committed
346 347 348 349 350
        for( i = 0; i < _src->rows; i++, src += step, dst += cols )
            for( j = 0; j < cols; j++ )
                dst[j] = src[j]*w[j*2] + w[j*2+1];
    }
    else
wester committed
351
    {
wester committed
352 353
        const double* src = _src->data.db;
        step /= sizeof(src[0]);
wester committed
354

wester committed
355 356 357 358 359
        for( i = 0; i < _src->rows; i++, src += step, dst += cols )
            for( j = 0; j < cols; j++ )
                dst[j] = src[j]*w[j*2] + w[j*2+1];
    }
}
wester committed
360 361


wester committed
362 363 364 365 366 367
void CvANN_MLP::scale_output( const CvMat* _src, CvMat* _dst ) const
{
    int i, j, cols = _src->cols;
    const double* src = _src->data.db;
    const double* w = weights[layer_sizes->cols];
    int step = _dst->step;
wester committed
368

wester committed
369 370 371 372 373 374 375 376
    if( CV_MAT_TYPE( _dst->type ) == CV_32F )
    {
        float* dst = _dst->data.fl;
        step /= sizeof(dst[0]);

        for( i = 0; i < _src->rows; i++, src += cols, dst += step )
            for( j = 0; j < cols; j++ )
                dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
wester committed
377
    }
wester committed
378 379 380 381 382 383 384 385 386 387 388
    else
    {
        double* dst = _dst->data.db;
        step /= sizeof(dst[0]);

        for( i = 0; i < _src->rows; i++, src += cols, dst += step )
            for( j = 0; j < cols; j++ )
                dst[j] = src[j]*w[j*2] + w[j*2+1];
    }
}

wester committed
389

wester committed
390 391 392 393 394 395 396
void CvANN_MLP::calc_activ_func( CvMat* sums, const double* bias ) const
{
    int i, j, n = sums->rows, cols = sums->cols;
    double* data = sums->data.db;
    double scale = 0, scale2 = f_param2;

    switch( activ_func )
wester committed
397
    {
wester committed
398 399 400 401 402 403 404 405 406 407 408 409
    case IDENTITY:
        scale = 1.;
        break;
    case SIGMOID_SYM:
        scale = -f_param1;
        break;
    case GAUSSIAN:
        scale = -f_param1*f_param1;
        break;
    default:
        ;
    }
wester committed
410

wester committed
411
    assert( CV_IS_MAT_CONT(sums->type) );
wester committed
412

wester committed
413 414 415 416 417
    if( activ_func != GAUSSIAN )
    {
        for( i = 0; i < n; i++, data += cols )
            for( j = 0; j < cols; j++ )
                data[j] = (data[j] + bias[j])*scale;
wester committed
418

wester committed
419 420 421 422 423 424 425 426 427 428 429 430
        if( activ_func == IDENTITY )
            return;
    }
    else
    {
        for( i = 0; i < n; i++, data += cols )
            for( j = 0; j < cols; j++ )
            {
                double t = data[j] + bias[j];
                data[j] = t*t*scale;
            }
    }
wester committed
431

wester committed
432
    cvExp( sums, sums );
wester committed
433

wester committed
434 435
    n *= cols;
    data -= n;
wester committed
436

wester committed
437 438 439 440
    switch( activ_func )
    {
    case SIGMOID_SYM:
        for( i = 0; i <= n - 4; i += 4 )
wester committed
441
        {
wester committed
442 443 444 445 446 447 448
            double x0 = 1.+data[i], x1 = 1.+data[i+1], x2 = 1.+data[i+2], x3 = 1.+data[i+3];
            double a = x0*x1, b = x2*x3, d = scale2/(a*b), t0, t1;
            a *= d; b *= d;
            t0 = (2 - x0)*b*x1; t1 = (2 - x1)*b*x0;
            data[i] = t0; data[i+1] = t1;
            t0 = (2 - x2)*a*x3; t1 = (2 - x3)*a*x2;
            data[i+2] = t0; data[i+3] = t1;
wester committed
449
        }
wester committed
450 451

        for( ; i < n; i++ )
wester committed
452
        {
wester committed
453 454
            double t = scale2*(1. - data[i])/(1. + data[i]);
            data[i] = t;
wester committed
455
        }
wester committed
456
        break;
wester committed
457

wester committed
458 459 460 461 462 463 464 465 466
    case GAUSSIAN:
        for( i = 0; i < n; i++ )
            data[i] = scale2*data[i];
        break;

    default:
        ;
    }
}
wester committed
467 468


wester committed
469 470 471 472 473 474 475 476
void CvANN_MLP::calc_activ_func_deriv( CvMat* _xf, CvMat* _df,
                                       const double* bias ) const
{
    int i, j, n = _xf->rows, cols = _xf->cols;
    double* xf = _xf->data.db;
    double* df = _df->data.db;
    double scale, scale2 = f_param2;
    assert( CV_IS_MAT_CONT( _xf->type & _df->type ) );
wester committed
477

wester committed
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
    if( activ_func == IDENTITY )
    {
        for( i = 0; i < n; i++, xf += cols, df += cols )
            for( j = 0; j < cols; j++ )
            {
                xf[j] += bias[j];
                df[j] = 1;
            }
        return;
    }
    else if( activ_func == GAUSSIAN )
    {
        scale = -f_param1*f_param1;
        scale2 *= scale;
        for( i = 0; i < n; i++, xf += cols, df += cols )
            for( j = 0; j < cols; j++ )
wester committed
494
            {
wester committed
495 496 497 498 499
                double t = xf[j] + bias[j];
                df[j] = t*2*scale2;
                xf[j] = t*t*scale;
            }
        cvExp( _xf, _xf );
wester committed
500

wester committed
501 502
        n *= cols;
        xf -= n; df -= n;
wester committed
503

wester committed
504 505 506 507 508 509 510 511 512 513 514
        for( i = 0; i < n; i++ )
            df[i] *= xf[i];
    }
    else
    {
        scale = f_param1;
        for( i = 0; i < n; i++, xf += cols, df += cols )
            for( j = 0; j < cols; j++ )
            {
                xf[j] = (xf[j] + bias[j])*scale;
                df[j] = -fabs(xf[j]);
wester committed
515 516
            }

wester committed
517
        cvExp( _df, _df );
wester committed
518

wester committed
519 520 521 522 523 524 525 526
        n *= cols;
        xf -= n; df -= n;

        // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
        // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
        // 2*a*exp(-ax)/(1+exp(-ax))^2
        scale *= 2*f_param2;
        for( i = 0; i < n; i++ )
wester committed
527
        {
wester committed
528 529 530 531 532 533
            int s0 = xf[i] > 0 ? 1 : -1;
            double t0 = 1./(1. + df[i]);
            double t1 = scale*df[i]*t0*t0;
            t0 *= scale2*(1. - df[i])*s0;
            df[i] = t1;
            xf[i] = t0;
wester committed
534 535
        }
    }
wester committed
536 537
}

wester committed
538

wester committed
539 540 541 542 543 544 545 546
void CvANN_MLP::calc_input_scale( const CvVectors* vecs, int flags )
{
    bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
    bool no_scale = (flags & NO_INPUT_SCALE) != 0;
    double* scale = weights[0];
    int count = vecs->count;

    if( reset_weights )
wester committed
547
    {
wester committed
548 549 550 551 552 553 554 555 556
        int i, j, vcount = layer_sizes->data.i[0];
        int type = vecs->type;
        double a = no_scale ? 1. : 0.;

        for( j = 0; j < vcount; j++ )
            scale[2*j] = a, scale[j*2+1] = 0.;

        if( no_scale )
            return;
wester committed
557

wester committed
558
        for( i = 0; i < count; i++ )
wester committed
559
        {
wester committed
560 561 562
            const float* f = vecs->data.fl[i];
            const double* d = vecs->data.db[i];
            for( j = 0; j < vcount; j++ )
wester committed
563
            {
wester committed
564 565 566
                double t = type == CV_32F ? (double)f[j] : d[j];
                scale[j*2] += t;
                scale[j*2+1] += t*t;
wester committed
567 568
            }
        }
wester committed
569 570

        for( j = 0; j < vcount; j++ )
wester committed
571
        {
wester committed
572 573 574 575
            double s = scale[j*2], s2 = scale[j*2+1];
            double m = s/count, sigma2 = s2/count - m*m;
            scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
            scale[j*2+1] = -m*scale[j*2];
wester committed
576 577
        }
    }
wester committed
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
}


void CvANN_MLP::calc_output_scale( const CvVectors* vecs, int flags )
{
    int i, j, vcount = layer_sizes->data.i[layer_sizes->cols-1];
    int type = vecs->type;
    double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
    bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
    bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
    int l_count = layer_sizes->cols;
    double* scale = weights[l_count];
    double* inv_scale = weights[l_count+1];
    int count = vecs->count;

    CV_FUNCNAME( "CvANN_MLP::calc_output_scale" );
wester committed
594

wester committed
595 596 597
    __BEGIN__;

    if( reset_weights )
wester committed
598
    {
wester committed
599
        double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
wester committed
600

wester committed
601
        for( j = 0; j < vcount; j++ )
wester committed
602
        {
wester committed
603 604
            scale[2*j] = inv_scale[2*j] = a0;
            scale[j*2+1] = inv_scale[2*j+1] = b0;
wester committed
605
        }
wester committed
606 607 608

        if( no_scale )
            EXIT;
wester committed
609 610
    }

wester committed
611
    for( i = 0; i < count; i++ )
wester committed
612
    {
wester committed
613 614
        const float* f = vecs->data.fl[i];
        const double* d = vecs->data.db[i];
wester committed
615

wester committed
616
        for( j = 0; j < vcount; j++ )
wester committed
617
        {
wester committed
618
            double t = type == CV_32F ? (double)f[j] : d[j];
wester committed
619

wester committed
620
            if( reset_weights )
wester committed
621
            {
wester committed
622 623 624
                double mj = scale[j*2], Mj = scale[j*2+1];
                if( mj > t ) mj = t;
                if( Mj < t ) Mj = t;
wester committed
625

wester committed
626 627 628 629
                scale[j*2] = mj;
                scale[j*2+1] = Mj;
            }
            else
wester committed
630
            {
wester committed
631 632 633 634
                t = t*inv_scale[j*2] + inv_scale[2*j+1];
                if( t < m1 || t > M1 )
                    CV_ERROR( CV_StsOutOfRange,
                    "Some of new output training vector components run exceed the original range too much" );
wester committed
635 636
            }
        }
wester committed
637
    }
wester committed
638

wester committed
639 640
    if( reset_weights )
        for( j = 0; j < vcount; j++ )
wester committed
641
        {
wester committed
642 643 644 645 646 647 648 649 650 651 652
            // map mj..Mj to m..M
            double mj = scale[j*2], Mj = scale[j*2+1];
            double a, b;
            double delta = Mj - mj;
            if( delta < DBL_EPSILON )
                a = 1, b = (M + m - Mj - mj)*0.5;
            else
                a = (M - m)/delta, b = m - mj*a;
            inv_scale[j*2] = a; inv_scale[j*2+1] = b;
            a = 1./a; b = -b*a;
            scale[j*2] = a; scale[j*2+1] = b;
wester committed
653 654
        }

wester committed
655 656
    __END__;
}
wester committed
657 658


wester committed
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
bool CvANN_MLP::prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
            const CvMat* _sample_weights, const CvMat* _sample_idx,
            CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags )
{
    bool ok = false;
    CvMat* sample_idx = 0;
    CvVectors ivecs, ovecs;
    double* sw = 0;
    int count = 0;

    CV_FUNCNAME( "CvANN_MLP::prepare_to_train" );

    ivecs.data.ptr = ovecs.data.ptr = 0;
    assert( _ivecs && _ovecs );

    __BEGIN__;

    const int* sidx = 0;
    int i, sw_type = 0, sw_count = 0;
    int sw_step = 0;
    double sw_sum = 0;

    if( !layer_sizes )
        CV_ERROR( CV_StsError,
        "The network has not been created. Use method create or the appropriate constructor" );

    if( !CV_IS_MAT(_inputs) || (CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
        CV_MAT_TYPE(_inputs->type) != CV_64FC1) || _inputs->cols != layer_sizes->data.i[0] )
        CV_ERROR( CV_StsBadArg,
        "input training data should be a floating-point matrix with"
        "the number of rows equal to the number of training samples and "
        "the number of columns equal to the size of 0-th (input) layer" );

    if( !CV_IS_MAT(_outputs) || (CV_MAT_TYPE(_outputs->type) != CV_32FC1 &&
        CV_MAT_TYPE(_outputs->type) != CV_64FC1) ||
        _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
        CV_ERROR( CV_StsBadArg,
        "output training data should be a floating-point matrix with"
        "the number of rows equal to the number of training samples and "
        "the number of columns equal to the size of last (output) layer" );

    if( _inputs->rows != _outputs->rows )
        CV_ERROR( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );

    if( _sample_idx )
    {
        CV_CALL( sample_idx = cvPreprocessIndexArray( _sample_idx, _inputs->rows ));
        sidx = sample_idx->data.i;
        count = sample_idx->cols + sample_idx->rows - 1;
wester committed
708
    }
wester committed
709 710
    else
        count = _inputs->rows;
wester committed
711

wester committed
712
    if( _sample_weights )
wester committed
713
    {
wester committed
714 715
        if( !CV_IS_MAT(_sample_weights) )
            CV_ERROR( CV_StsBadArg, "sample_weights (if passed) must be a valid matrix" );
wester committed
716

wester committed
717 718
        sw_type = CV_MAT_TYPE(_sample_weights->type);
        sw_count = _sample_weights->cols + _sample_weights->rows - 1;
wester committed
719

wester committed
720 721 722 723 724 725
        if( (sw_type != CV_32FC1 && sw_type != CV_64FC1) ||
            (_sample_weights->cols != 1 && _sample_weights->rows != 1) ||
            (sw_count != count && sw_count != _inputs->rows) )
            CV_ERROR( CV_StsBadArg,
            "sample_weights must be 1d floating-point vector containing weights "
            "of all or selected training samples" );
wester committed
726

wester committed
727 728
        sw_step = CV_IS_MAT_CONT(_sample_weights->type) ? 1 :
            _sample_weights->step/CV_ELEM_SIZE(sw_type);
wester committed
729

wester committed
730 731
        CV_CALL( sw = (double*)cvAlloc( count*sizeof(sw[0]) ));
    }
wester committed
732

wester committed
733 734 735 736 737 738 739 740 741 742 743 744 745
    CV_CALL( ivecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ivecs.data.ptr[0]) ));
    CV_CALL( ovecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ovecs.data.ptr[0]) ));

    ivecs.type = CV_MAT_TYPE(_inputs->type);
    ovecs.type = CV_MAT_TYPE(_outputs->type);
    ivecs.count = ovecs.count = count;

    for( i = 0; i < count; i++ )
    {
        int idx = sidx ? sidx[i] : i;
        ivecs.data.ptr[i] = _inputs->data.ptr + idx*_inputs->step;
        ovecs.data.ptr[i] = _outputs->data.ptr + idx*_outputs->step;
        if( sw )
wester committed
746
        {
wester committed
747 748 749 750 751 752 753 754 755 756
            int si = sw_count == count ? i : idx;
            double w = sw_type == CV_32FC1 ?
                (double)_sample_weights->data.fl[si*sw_step] :
                _sample_weights->data.db[si*sw_step];
            sw[i] = w;
            if( w < 0 )
                CV_ERROR( CV_StsOutOfRange, "some of sample weights are negative" );
            sw_sum += w;
        }
    }
wester committed
757

wester committed
758 759 760 761 762 763 764
    // normalize weights
    if( sw )
    {
        sw_sum = sw_sum > DBL_EPSILON ? 1./sw_sum : 0;
        for( i = 0; i < count; i++ )
            sw[i] *= sw_sum;
    }
wester committed
765

wester committed
766 767
    calc_input_scale( &ivecs, _flags );
    CV_CALL( calc_output_scale( &ovecs, _flags ));
wester committed
768

wester committed
769
    ok = true;
wester committed
770

wester committed
771
    __END__;
wester committed
772

wester committed
773 774 775 776 777
    if( !ok )
    {
        cvFree( &ivecs.data.ptr );
        cvFree( &ovecs.data.ptr );
        cvFree( &sw );
wester committed
778 779
    }

wester committed
780 781 782 783
    cvReleaseMat( &sample_idx );
    *_ivecs = ivecs;
    *_ovecs = ovecs;
    *_sw = sw;
wester committed
784

wester committed
785 786
    return ok;
}
wester committed
787 788


wester committed
789 790 791 792 793 794
int CvANN_MLP::train( const CvMat* _inputs, const CvMat* _outputs,
                      const CvMat* _sample_weights, const CvMat* _sample_idx,
                      CvANN_MLP_TrainParams _params, int flags )
{
    const int MAX_ITER = 1000;
    const double DEFAULT_EPSILON = FLT_EPSILON;
wester committed
795

wester committed
796 797 798
    double* sw = 0;
    CvVectors x0, u;
    int iter = -1;
wester committed
799

wester committed
800
    x0.data.ptr = u.data.ptr = 0;
wester committed
801

wester committed
802
    CV_FUNCNAME( "CvANN_MLP::train" );
wester committed
803

wester committed
804
    __BEGIN__;
wester committed
805

wester committed
806 807
    int max_iter;
    double epsilon;
wester committed
808

wester committed
809
    params = _params;
wester committed
810

wester committed
811 812 813
    // initialize training data
    CV_CALL( prepare_to_train( _inputs, _outputs, _sample_weights,
                               _sample_idx, &x0, &u, &sw, flags ));
wester committed
814

wester committed
815 816 817
    // ... and link weights
    if( !(flags & UPDATE_WEIGHTS) )
        init_weights();
wester committed
818

wester committed
819 820
    max_iter = params.term_crit.type & CV_TERMCRIT_ITER ? params.term_crit.max_iter : MAX_ITER;
    max_iter = MAX( max_iter, 1 );
wester committed
821

wester committed
822 823 824 825 826 827
    epsilon = params.term_crit.type & CV_TERMCRIT_EPS ? params.term_crit.epsilon : DEFAULT_EPSILON;
    epsilon = MAX(epsilon, DBL_EPSILON);

    params.term_crit.type = CV_TERMCRIT_ITER + CV_TERMCRIT_EPS;
    params.term_crit.max_iter = max_iter;
    params.term_crit.epsilon = epsilon;
wester committed
828

wester committed
829
    if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
wester committed
830
    {
wester committed
831
        CV_CALL( iter = train_backprop( x0, u, sw ));
wester committed
832
    }
wester committed
833
    else
wester committed
834
    {
wester committed
835
        CV_CALL( iter = train_rprop( x0, u, sw ));
wester committed
836 837
    }

wester committed
838
    __END__;
wester committed
839

wester committed
840 841 842 843 844 845
    cvFree( &x0.data.ptr );
    cvFree( &u.data.ptr );
    cvFree( &sw );

    return iter;
}
wester committed
846 847


wester committed
848 849 850 851 852 853 854
int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
{
    CvMat* dw = 0;
    CvMat* buf = 0;
    double **x = 0, **df = 0;
    CvMat* _idx = 0;
    int iter = -1, count = x0.count;
wester committed
855

wester committed
856
    CV_FUNCNAME( "CvANN_MLP::train_backprop" );
wester committed
857

wester committed
858
    __BEGIN__;
wester committed
859

wester committed
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
    int i, j, k, ivcount, ovcount, l_count, total = 0, max_iter;
    double *buf_ptr;
    double prev_E = DBL_MAX*0.5, E = 0, epsilon;

    max_iter = params.term_crit.max_iter*count;
    epsilon = params.term_crit.epsilon*count;

    l_count = layer_sizes->cols;
    ivcount = layer_sizes->data.i[0];
    ovcount = layer_sizes->data.i[l_count-1];

    // allocate buffers
    for( i = 0; i < l_count; i++ )
        total += layer_sizes->data.i[i] + 1;

    CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
    cvZero( dw );
    CV_CALL( buf = cvCreateMat( 1, (total + max_count)*2, CV_64F ));
    CV_CALL( _idx = cvCreateMat( 1, count, CV_32SC1 ));
    for( i = 0; i < count; i++ )
        _idx->data.i[i] = i;

    CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
    df = x + total;
    buf_ptr = buf->data.db;

    for( j = 0; j < l_count; j++ )
    {
        x[j] = buf_ptr;
        df[j] = x[j] + layer_sizes->data.i[j];
        buf_ptr += (df[j] - x[j])*2;
    }

    // run back-propagation loop
    /*
        y_i = w_i*x_{i-1}
        x_i = f(y_i)
        E = 1/2*||u - x_N||^2
        grad_N = (x_N - u)*f'(y_i)
        dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
        w_i(t+1) = w_i(t) + dw_i(t)
        grad_{i-1} = w_i^t*grad_i
    */
    for( iter = 0; iter < max_iter; iter++ )
    {
        int idx = iter % count;
        double* w = weights[0];
        double sweight = sw ? count*sw[idx] : 1.;
        CvMat _w, _dw, hdr1, hdr2, ghdr1, ghdr2, _df;
        CvMat *x1 = &hdr1, *x2 = &hdr2, *grad1 = &ghdr1, *grad2 = &ghdr2, *temp;

        if( idx == 0 )
wester committed
912
        {
wester committed
913 914 915 916 917
            //printf("%d. E = %g\n", iter/count, E);
            if( fabs(prev_E - E) < epsilon )
                break;
            prev_E = E;
            E = 0;
wester committed
918

wester committed
919 920
            // shuffle indices
            for( i = 0; i < count; i++ )
wester committed
921
            {
wester committed
922 923 924 925
                int tt;
                j = (*rng)(count);
                k = (*rng)(count);
                CV_SWAP( _idx->data.i[j], _idx->data.i[k], tt );
wester committed
926
            }
wester committed
927
        }
wester committed
928

wester committed
929
        idx = _idx->data.i[idx];
wester committed
930

wester committed
931 932 933
        if( x0.type == CV_32F )
        {
            const float* x0data = x0.data.fl[idx];
wester committed
934
            for( j = 0; j < ivcount; j++ )
wester committed
935 936 937 938 939 940 941 942
                x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
        }
        else
        {
            const double* x0data = x0.data.db[idx];
            for( j = 0; j < ivcount; j++ )
                x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
        }
wester committed
943

wester committed
944
        cvInitMatHeader( x1, 1, ivcount, CV_64F, x[0] );
wester committed
945

wester committed
946 947 948 949 950 951 952 953 954 955 956
        // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
        for( i = 1; i < l_count; i++ )
        {
            cvInitMatHeader( x2, 1, layer_sizes->data.i[i], CV_64F, x[i] );
            cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
            cvGEMM( x1, &_w, 1, 0, 0, x2 );
            _df = *x2;
            _df.data.db = df[i];
            calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
            CV_SWAP( x1, x2, temp );
        }
wester committed
957

wester committed
958 959 960
        cvInitMatHeader( grad1, 1, ovcount, CV_64F, buf_ptr );
        *grad2 = *grad1;
        grad2->data.db = buf_ptr + max_count;
wester committed
961

wester committed
962
        w = weights[l_count+1];
wester committed
963

wester committed
964 965 966 967
        // calculate error
        if( u.type == CV_32F )
        {
            const float* udata = u.data.fl[idx];
wester committed
968 969
            for( k = 0; k < ovcount; k++ )
            {
wester committed
970 971
                double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
                grad1->data.db[k] = t*sweight;
wester committed
972 973
                E += t*t;
            }
wester committed
974 975 976 977 978 979 980 981 982 983 984 985
        }
        else
        {
            const double* udata = u.data.db[idx];
            for( k = 0; k < ovcount; k++ )
            {
                double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
                grad1->data.db[k] = t*sweight;
                E += t*t;
            }
        }
        E *= sweight;
wester committed
986

wester committed
987 988 989 990 991 992 993 994 995 996 997 998 999
        // backward pass, update weights
        for( i = l_count-1; i > 0; i-- )
        {
            int n1 = layer_sizes->data.i[i-1], n2 = layer_sizes->data.i[i];
            cvInitMatHeader( &_df, 1, n2, CV_64F, df[i] );
            cvMul( grad1, &_df, grad1 );
            cvInitMatHeader( &_w, n1+1, n2, CV_64F, weights[i] );
            cvInitMatHeader( &_dw, n1+1, n2, CV_64F, dw->data.db + (weights[i] - weights[0]) );
            cvInitMatHeader( x1, n1+1, 1, CV_64F, x[i-1] );
            x[i-1][n1] = 1.;
            cvGEMM( x1, grad1, params.bp_dw_scale, &_dw, params.bp_moment_scale, &_dw );
            cvAdd( &_w, &_dw, &_w );
            if( i > 1 )
wester committed
1000
            {
wester committed
1001 1002 1003
                grad2->cols = n1;
                _w.rows = n1;
                cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
wester committed
1004
            }
wester committed
1005
            CV_SWAP( grad1, grad2, temp );
wester committed
1006
        }
wester committed
1007
    }
wester committed
1008

wester committed
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
    iter /= count;

    __END__;

    cvReleaseMat( &dw );
    cvReleaseMat( &buf );
    cvReleaseMat( &_idx );
    cvFree( &x );

    return iter;
}

struct rprop_loop : cv::ParallelLoopBody {
  rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
     int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
     CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
  {
    point = _point;
    weights = _weights;
    count = _count;
    ivcount = _ivcount;
    x0 = _x0;
    l_count = _l_count;
    layer_sizes = _layer_sizes;
    ovcount = _ovcount;
    max_count = _max_count;
    u = _u;
    sw = _sw;
    inv_count = _inv_count;
    dEdw = _dEdw;
    dcount0 = _dcount0;
    E = _E;
    buf_sz = _buf_sz;
  }

  const CvANN_MLP* point;
  double** weights;
  int count;
  int ivcount;
  CvVectors* x0;
  int l_count;
  CvMat* layer_sizes;
  int ovcount;
  int max_count;
  CvVectors* u;
  const double* sw;
  double inv_count;
  CvMat* dEdw;
  int dcount0;
  double* E;
  int buf_sz;


  void operator()( const cv::Range& range ) const
  {
    double* buf_ptr;
    double** x = 0;
    double **df = 0;
    int total = 0;

    for(int i = 0; i < l_count; i++ )
        total += layer_sizes->data.i[i];
    CvMat* buf;
    buf = cvCreateMat( 1, buf_sz, CV_64F );
    x = (double**)cvAlloc( total*2*sizeof(x[0]) );
    df = x + total;
    buf_ptr = buf->data.db;
    for(int i = 0; i < l_count; i++ )
    {
        x[i] = buf_ptr;
        df[i] = x[i] + layer_sizes->data.i[i]*dcount0;
        buf_ptr += (df[i] - x[i])*2;
wester committed
1081 1082
    }

wester committed
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
    for(int si = range.start; si < range.end; si++ )
    {
        if (si % dcount0 != 0) continue;
        int n1, n2, k;
        double* w;
        CvMat _w, _dEdw, hdr1, hdr2, ghdr1, ghdr2, _df;
        CvMat *x1, *x2, *grad1, *grad2, *temp;
        int dcount = 0;

        dcount = MIN(count - si , dcount0 );
        w = weights[0];
        grad1 = &ghdr1; grad2 = &ghdr2;
        x1 = &hdr1; x2 = &hdr2;

        // grab and preprocess input data
        if( x0->type == CV_32F )
wester committed
1099
    {
wester committed
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
            for(int i = 0; i < dcount; i++ )
            {
                const float* x0data = x0->data.fl[si+i];
                double* xdata = x[0]+i*ivcount;
                for(int j = 0; j < ivcount; j++ )
                    xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
            }
    }
        else
            for(int i = 0; i < dcount; i++ )
            {
                const double* x0data = x0->data.db[si+i];
                double* xdata = x[0]+i*ivcount;
                for(int j = 0; j < ivcount; j++ )
                    xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
            }
        cvInitMatHeader( x1, dcount, ivcount, CV_64F, x[0] );

        // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
        for(int i = 1; i < l_count; i++ )
wester committed
1120
        {
wester committed
1121 1122 1123 1124 1125 1126 1127
            cvInitMatHeader( x2, dcount, layer_sizes->data.i[i], CV_64F, x[i] );
            cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
            cvGEMM( x1, &_w, 1, 0, 0, x2 );
            _df = *x2;
            _df.data.db = df[i];
            point->calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
            CV_SWAP( x1, x2, temp );
wester committed
1128
        }
wester committed
1129
        cvInitMatHeader( grad1, dcount, ovcount, CV_64F, buf_ptr );
wester committed
1130

wester committed
1131 1132
        w = weights[l_count+1];
        grad2->data.db = buf_ptr + max_count*dcount;
wester committed
1133

wester committed
1134 1135 1136
        // calculate error
        if( u->type == CV_32F )
            for(int i = 0; i < dcount; i++ )
wester committed
1137
            {
wester committed
1138 1139 1140 1141
                const float* udata = u->data.fl[si+i];
                const double* xdata = x[l_count-1] + i*ovcount;
                double* gdata = grad1->data.db + i*ovcount;
                double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
wester committed
1142

wester committed
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152
                for(int j = 0; j < ovcount; j++ )
                {
                    double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
                    gdata[j] = t*sweight;
                    E1 += t*t;
                }
                *E += sweight*E1;
            }
        else
            for(int i = 0; i < dcount; i++ )
wester committed
1153
            {
wester committed
1154 1155 1156 1157
                const double* udata = u->data.db[si+i];
                const double* xdata = x[l_count-1] + i*ovcount;
                double* gdata = grad1->data.db + i*ovcount;
                double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
wester committed
1158

wester committed
1159
                for(int j = 0; j < ovcount; j++ )
wester committed
1160
                {
wester committed
1161 1162 1163
                    double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
                    gdata[j] = t*sweight;
                    E1 += t*t;
wester committed
1164
                }
wester committed
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175
                *E += sweight*E1;
            }

        // backward pass, update dEdw
        static cv::Mutex mutex;

        for(int i = l_count-1; i > 0; i-- )
        {
            n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
            cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
            cvMul( grad1, &_df, grad1 );
wester committed
1176

wester committed
1177 1178 1179 1180 1181 1182 1183 1184
            {
                cv::AutoLock lock(mutex);
                cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
                cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
                cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );

                // update bias part of dEdw
                for( k = 0; k < dcount; k++ )
wester committed
1185
                {
wester committed
1186 1187 1188 1189
                    double* dst = _dEdw.data.db + n1*n2;
                    const double* src = grad1->data.db + k*n2;
                    for(int j = 0; j < n2; j++ )
                        dst[j] += src[j];
wester committed
1190 1191
                }

wester committed
1192 1193 1194
                if (i > 1)
                    cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
           }
wester committed
1195

wester committed
1196 1197 1198 1199 1200 1201 1202 1203 1204
           cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
           if( i > 1 )
               cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
           CV_SWAP( grad1, grad2, temp );
        }
    }
    cvFree(&x);
    cvReleaseMat( &buf );
}
wester committed
1205

wester committed
1206
};
wester committed
1207 1208


wester committed
1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
{
    const int max_buf_size = 1 << 16;
    CvMat* dw = 0;
    CvMat* dEdw = 0;
    CvMat* prev_dEdw_sign = 0;
    CvMat* buf = 0;
    double **x = 0, **df = 0;
    int iter = -1, count = x0.count;

    CV_FUNCNAME( "CvANN_MLP::train" );

    __BEGIN__;

    int i, ivcount, ovcount, l_count, total = 0, max_iter, buf_sz, dcount0;
    double *buf_ptr;
    double prev_E = DBL_MAX*0.5, epsilon;
    double dw_plus, dw_minus, dw_min, dw_max;
    double inv_count;

    max_iter = params.term_crit.max_iter;
    epsilon = params.term_crit.epsilon;
    dw_plus = params.rp_dw_plus;
    dw_minus = params.rp_dw_minus;
    dw_min = params.rp_dw_min;
    dw_max = params.rp_dw_max;

    l_count = layer_sizes->cols;
    ivcount = layer_sizes->data.i[0];
    ovcount = layer_sizes->data.i[l_count-1];

    // allocate buffers
    for( i = 0; i < l_count; i++ )
        total += layer_sizes->data.i[i];

    CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
    cvSet( dw, cvScalarAll(params.rp_dw0) );
    CV_CALL( dEdw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
    cvZero( dEdw );
    CV_CALL( prev_dEdw_sign = cvCreateMat( wbuf->rows, wbuf->cols, CV_8SC1 ));
    cvZero( prev_dEdw_sign );

    inv_count = 1./count;
    dcount0 = max_buf_size/(2*total);
    dcount0 = MAX( dcount0, 1 );
    dcount0 = MIN( dcount0, count );
    buf_sz = dcount0*(total + max_count)*2;

    CV_CALL( buf = cvCreateMat( 1, buf_sz, CV_64F ));

    CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
    df = x + total;
    buf_ptr = buf->data.db;

    for( i = 0; i < l_count; i++ )
    {
        x[i] = buf_ptr;
        df[i] = x[i] + layer_sizes->data.i[i]*dcount0;
        buf_ptr += (df[i] - x[i])*2;
    }
wester committed
1269

wester committed
1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290
    // run rprop loop
    /*
        y_i(t) = w_i(t)*x_{i-1}(t)
        x_i(t) = f(y_i(t))
        E = sum_over_all_samples(1/2*||u - x_N||^2)
        grad_N = (x_N - u)*f'(y_i)

                      MIN(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
        dw_i{jk}(t) = MAX(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
                      dw_i{jk}(t-1) else

        if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
           dE/dw_i{jk}(t)<-0
        else
           w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
        grad_{i-1}(t) = w_i^t(t)*grad_i(t)
    */
    for( iter = 0; iter < max_iter; iter++ )
    {
        int n1, n2, j, k;
        double E = 0;
wester committed
1291

wester committed
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317
        // first, iterate through all the samples and compute dEdw
        cv::parallel_for_(cv::Range(0, count),
            rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
                       ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
        );

        // now update weights
        for( i = 1; i < l_count; i++ )
        {
            n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
            for( k = 0; k <= n1; k++ )
            {
                double* wk = weights[i]+k*n2;
                size_t delta = wk - weights[0];
                double* dwk = dw->data.db + delta;
                double* dEdwk = dEdw->data.db + delta;
                char* prevEk = (char*)(prev_dEdw_sign->data.ptr + delta);

                for( j = 0; j < n2; j++ )
                {
                    double Eval = dEdwk[j];
                    double dval = dwk[j];
                    double wval = wk[j];
                    int s = CV_SIGN(Eval);
                    int ss = prevEk[j]*s;
                    if( ss > 0 )
wester committed
1318
                    {
wester committed
1319 1320 1321 1322
                        dval *= dw_plus;
                        dval = MIN( dval, dw_max );
                        dwk[j] = dval;
                        wk[j] = wval + dval*s;
wester committed
1323
                    }
wester committed
1324
                    else if( ss < 0 )
wester committed
1325
                    {
wester committed
1326 1327 1328 1329 1330
                        dval *= dw_minus;
                        dval = MAX( dval, dw_min );
                        prevEk[j] = 0;
                        dwk[j] = dval;
                        wk[j] = wval + dval*s;
wester committed
1331
                    }
wester committed
1332 1333 1334 1335 1336 1337
                    else
                    {
                        prevEk[j] = (char)s;
                        wk[j] = wval + dval*s;
                    }
                    dEdwk[j] = 0.;
wester committed
1338 1339 1340 1341
                }
            }
        }

wester committed
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
        //printf("%d. E = %g\n", iter, E);
        if( fabs(prev_E - E) < epsilon )
            break;
        prev_E = E;
        E = 0;
    }

    __END__;

    cvReleaseMat( &dw );
    cvReleaseMat( &dEdw );
    cvReleaseMat( &prev_dEdw_sign );
    cvReleaseMat( &buf );
    cvFree( &x );

    return iter;
}


void CvANN_MLP::write_params( CvFileStorage* fs ) const
{
    //CV_FUNCNAME( "CvANN_MLP::write_params" );

    __BEGIN__;

    const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
                            activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
                            activ_func == GAUSSIAN ? "GAUSSIAN" : 0;

    if( activ_func_name )
        cvWriteString( fs, "activation_function", activ_func_name );
    else
        cvWriteInt( fs, "activation_function", activ_func );

    if( activ_func != IDENTITY )
wester committed
1377
    {
wester committed
1378 1379 1380
        cvWriteReal( fs, "f_param1", f_param1 );
        cvWriteReal( fs, "f_param2", f_param2 );
    }
wester committed
1381

wester committed
1382 1383 1384 1385
    cvWriteReal( fs, "min_val", min_val );
    cvWriteReal( fs, "max_val", max_val );
    cvWriteReal( fs, "min_val1", min_val1 );
    cvWriteReal( fs, "max_val1", max_val1 );
wester committed
1386

wester committed
1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
    cvStartWriteStruct( fs, "training_params", CV_NODE_MAP );
    if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
    {
        cvWriteString( fs, "train_method", "BACKPROP" );
        cvWriteReal( fs, "dw_scale", params.bp_dw_scale );
        cvWriteReal( fs, "moment_scale", params.bp_moment_scale );
    }
    else if( params.train_method == CvANN_MLP_TrainParams::RPROP )
    {
        cvWriteString( fs, "train_method", "RPROP" );
        cvWriteReal( fs, "dw0", params.rp_dw0 );
        cvWriteReal( fs, "dw_plus", params.rp_dw_plus );
        cvWriteReal( fs, "dw_minus", params.rp_dw_minus );
        cvWriteReal( fs, "dw_min", params.rp_dw_min );
        cvWriteReal( fs, "dw_max", params.rp_dw_max );
    }
wester committed
1403

wester committed
1404 1405 1406 1407 1408 1409
    cvStartWriteStruct( fs, "term_criteria", CV_NODE_MAP + CV_NODE_FLOW );
    if( params.term_crit.type & CV_TERMCRIT_EPS )
        cvWriteReal( fs, "epsilon", params.term_crit.epsilon );
    if( params.term_crit.type & CV_TERMCRIT_ITER )
        cvWriteInt( fs, "iterations", params.term_crit.max_iter );
    cvEndWriteStruct( fs );
wester committed
1410

wester committed
1411
    cvEndWriteStruct( fs );
wester committed
1412

wester committed
1413 1414
    __END__;
}
wester committed
1415 1416


wester committed
1417 1418 1419
void CvANN_MLP::write( CvFileStorage* fs, const char* name ) const
{
    CV_FUNCNAME( "CvANN_MLP::write" );
wester committed
1420

wester committed
1421
    __BEGIN__;
wester committed
1422

wester committed
1423
    int i, l_count = layer_sizes->cols;
wester committed
1424

wester committed
1425 1426
    if( !layer_sizes )
        CV_ERROR( CV_StsError, "The network has not been initialized" );
wester committed
1427

wester committed
1428
    cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_ANN_MLP );
wester committed
1429

wester committed
1430
    cvWrite( fs, "layer_sizes", layer_sizes );
wester committed
1431

wester committed
1432
    write_params( fs );
wester committed
1433

wester committed
1434 1435 1436
    cvStartWriteStruct( fs, "input_scale", CV_NODE_SEQ + CV_NODE_FLOW );
    cvWriteRawData( fs, weights[0], layer_sizes->data.i[0]*2, "d" );
    cvEndWriteStruct( fs );
wester committed
1437

wester committed
1438 1439 1440
    cvStartWriteStruct( fs, "output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
    cvWriteRawData( fs, weights[l_count], layer_sizes->data.i[l_count-1]*2, "d" );
    cvEndWriteStruct( fs );
wester committed
1441

wester committed
1442 1443 1444
    cvStartWriteStruct( fs, "inv_output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
    cvWriteRawData( fs, weights[l_count+1], layer_sizes->data.i[l_count-1]*2, "d" );
    cvEndWriteStruct( fs );
wester committed
1445

wester committed
1446 1447 1448 1449 1450 1451
    cvStartWriteStruct( fs, "weights", CV_NODE_SEQ );
    for( i = 1; i < l_count; i++ )
    {
        cvStartWriteStruct( fs, 0, CV_NODE_SEQ + CV_NODE_FLOW );
        cvWriteRawData( fs, weights[i], (layer_sizes->data.i[i-1]+1)*layer_sizes->data.i[i], "d" );
        cvEndWriteStruct( fs );
wester committed
1452 1453
    }

wester committed
1454
    cvEndWriteStruct( fs );
wester committed
1455

wester committed
1456 1457
    __END__;
}
wester committed
1458 1459


wester committed
1460 1461 1462
void CvANN_MLP::read_params( CvFileStorage* fs, CvFileNode* node )
{
    //CV_FUNCNAME( "CvANN_MLP::read_params" );
wester committed
1463

wester committed
1464
    __BEGIN__;
wester committed
1465

wester committed
1466 1467
    const char* activ_func_name = cvReadStringByName( fs, node, "activation_function", 0 );
    CvFileNode* tparams_node;
wester committed
1468

wester committed
1469 1470 1471 1472 1473 1474
    if( activ_func_name )
        activ_func = strcmp( activ_func_name, "SIGMOID_SYM" ) == 0 ? SIGMOID_SYM :
                     strcmp( activ_func_name, "IDENTITY" ) == 0 ? IDENTITY :
                     strcmp( activ_func_name, "GAUSSIAN" ) == 0 ? GAUSSIAN : 0;
    else
        activ_func = cvReadIntByName( fs, node, "activation_function" );
wester committed
1475

wester committed
1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
    f_param1 = cvReadRealByName( fs, node, "f_param1", 0 );
    f_param2 = cvReadRealByName( fs, node, "f_param2", 0 );

    set_activ_func( activ_func, f_param1, f_param2 );

    min_val = cvReadRealByName( fs, node, "min_val", 0. );
    max_val = cvReadRealByName( fs, node, "max_val", 1. );
    min_val1 = cvReadRealByName( fs, node, "min_val1", 0. );
    max_val1 = cvReadRealByName( fs, node, "max_val1", 1. );

    tparams_node = cvGetFileNodeByName( fs, node, "training_params" );
    params = CvANN_MLP_TrainParams();

    if( tparams_node )
    {
        const char* tmethod_name = cvReadStringByName( fs, tparams_node, "train_method", "" );
        CvFileNode* tcrit_node;

        if( strcmp( tmethod_name, "BACKPROP" ) == 0 )
wester committed
1495
        {
wester committed
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
            params.train_method = CvANN_MLP_TrainParams::BACKPROP;
            params.bp_dw_scale = cvReadRealByName( fs, tparams_node, "dw_scale", 0 );
            params.bp_moment_scale = cvReadRealByName( fs, tparams_node, "moment_scale", 0 );
        }
        else if( strcmp( tmethod_name, "RPROP" ) == 0 )
        {
            params.train_method = CvANN_MLP_TrainParams::RPROP;
            params.rp_dw0 = cvReadRealByName( fs, tparams_node, "dw0", 0 );
            params.rp_dw_plus = cvReadRealByName( fs, tparams_node, "dw_plus", 0 );
            params.rp_dw_minus = cvReadRealByName( fs, tparams_node, "dw_minus", 0 );
            params.rp_dw_min = cvReadRealByName( fs, tparams_node, "dw_min", 0 );
            params.rp_dw_max = cvReadRealByName( fs, tparams_node, "dw_max", 0 );
wester committed
1508 1509
        }

wester committed
1510 1511
        tcrit_node = cvGetFileNodeByName( fs, tparams_node, "term_criteria" );
        if( tcrit_node )
wester committed
1512
        {
wester committed
1513 1514 1515 1516
            params.term_crit.epsilon = cvReadRealByName( fs, tcrit_node, "epsilon", -1 );
            params.term_crit.max_iter = cvReadIntByName( fs, tcrit_node, "iterations", -1 );
            params.term_crit.type = (params.term_crit.epsilon >= 0 ? CV_TERMCRIT_EPS : 0) +
                                   (params.term_crit.max_iter >= 0 ? CV_TERMCRIT_ITER : 0);
wester committed
1517
        }
wester committed
1518
    }
wester committed
1519

wester committed
1520 1521
    __END__;
}
wester committed
1522 1523


wester committed
1524 1525 1526
void CvANN_MLP::read( CvFileStorage* fs, CvFileNode* node )
{
    CvMat* _layer_sizes = 0;
wester committed
1527

wester committed
1528
    CV_FUNCNAME( "CvANN_MLP::read" );
wester committed
1529

wester committed
1530
    __BEGIN__;
wester committed
1531

wester committed
1532 1533 1534
    CvFileNode* w;
    CvSeqReader reader;
    int i, l_count;
wester committed
1535

wester committed
1536 1537
    _layer_sizes = (CvMat*)cvReadByName( fs, node, "layer_sizes" );
    CV_CALL( create( _layer_sizes, SIGMOID_SYM, 0, 0 ));
wester committed
1538

wester committed
1539 1540
    cvReleaseMat( &_layer_sizes );
    _layer_sizes = NULL;
wester committed
1541

wester committed
1542
    l_count = layer_sizes->cols;
wester committed
1543

wester committed
1544
    CV_CALL( read_params( fs, node ));
wester committed
1545

wester committed
1546 1547 1548 1549
    w = cvGetFileNodeByName( fs, node, "input_scale" );
    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
        w->data.seq->total != layer_sizes->data.i[0]*2 )
        CV_ERROR( CV_StsParseError, "input_scale tag is not found or is invalid" );
wester committed
1550

wester committed
1551
    CV_CALL( cvReadRawData( fs, w, weights[0], "d" ));
wester committed
1552

wester committed
1553 1554 1555 1556
    w = cvGetFileNodeByName( fs, node, "output_scale" );
    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
        w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
        CV_ERROR( CV_StsParseError, "output_scale tag is not found or is invalid" );
wester committed
1557

wester committed
1558
    CV_CALL( cvReadRawData( fs, w, weights[l_count], "d" ));
wester committed
1559

wester committed
1560 1561 1562 1563
    w = cvGetFileNodeByName( fs, node, "inv_output_scale" );
    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
        w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
        CV_ERROR( CV_StsParseError, "inv_output_scale tag is not found or is invalid" );
wester committed
1564

wester committed
1565
    CV_CALL( cvReadRawData( fs, w, weights[l_count+1], "d" ));
wester committed
1566

wester committed
1567 1568 1569 1570
    w = cvGetFileNodeByName( fs, node, "weights" );
    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
        w->data.seq->total != l_count - 1 )
        CV_ERROR( CV_StsParseError, "weights tag is not found or is invalid" );
wester committed
1571

wester committed
1572
    cvStartReadSeq( w->data.seq, &reader );
wester committed
1573

wester committed
1574
    for( i = 1; i < l_count; i++ )
wester committed
1575
    {
wester committed
1576 1577 1578
        w = (CvFileNode*)reader.ptr;
        CV_CALL( cvReadRawData( fs, w, weights[i], "d" ));
        CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
wester committed
1579 1580
    }

wester committed
1581 1582
    __END__;
}
wester committed
1583

wester committed
1584
using namespace cv;
wester committed
1585

wester committed
1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
CvANN_MLP::CvANN_MLP( const Mat& _layer_sizes, int _activ_func,
                      double _f_param1, double _f_param2 )
{
    layer_sizes = wbuf = 0;
    min_val = max_val = min_val1 = max_val1 = 0.;
    weights = 0;
    rng = &cv::theRNG();
    default_model_name = "my_nn";
    create( _layer_sizes, _activ_func, _f_param1, _f_param2 );
}
wester committed
1596

wester committed
1597 1598 1599 1600 1601 1602
void CvANN_MLP::create( const Mat& _layer_sizes, int _activ_func,
                       double _f_param1, double _f_param2 )
{
    CvMat cvlayer_sizes = _layer_sizes;
    create( &cvlayer_sizes, _activ_func, _f_param1, _f_param2 );
}
wester committed
1603

wester committed
1604 1605 1606
int CvANN_MLP::train( const Mat& _inputs, const Mat& _outputs,
                     const Mat& _sample_weights, const Mat& _sample_idx,
                     CvANN_MLP_TrainParams _params, int flags )
wester committed
1607
{
wester committed
1608 1609 1610
    CvMat inputs = _inputs, outputs = _outputs, sweights = _sample_weights, sidx = _sample_idx;
    return train(&inputs, &outputs, sweights.data.ptr ? &sweights : 0,
                 sidx.data.ptr ? &sidx : 0, _params, flags);
wester committed
1611 1612
}

wester committed
1613 1614 1615 1616 1617 1618 1619 1620
float CvANN_MLP::predict( const Mat& _inputs, Mat& _outputs ) const
{
    CV_Assert(layer_sizes != 0);
    _outputs.create(_inputs.rows, layer_sizes->data.i[layer_sizes->cols-1], _inputs.type());
    CvMat inputs = _inputs, outputs = _outputs;

    return predict(&inputs, &outputs);
}
wester committed
1621 1622

/* End of file. */