imgproc_calcHarris.cl 8.12 KB
Newer Older
wester committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
//    Shengen Yan,yanshengen@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////

wester committed
50 51 52 53 54 55 56
#if defined (DOUBLE_SUPPORT) && defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define FPTYPE double
#else
#define FPTYPE float
#endif

wester committed
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
#ifdef BORDER_CONSTANT
#elif defined BORDER_REPLICATE
#define EXTRAPOLATE(x, maxV) \
    { \
        x = max(min(x, maxV - 1), 0); \
    }
#elif defined BORDER_WRAP
#define EXTRAPOLATE(x, maxV) \
    { \
        if (x < 0) \
            x -= ((x - maxV + 1) / maxV) * maxV; \
        if (x >= maxV) \
            x %= maxV; \
    }
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT101)
#define EXTRAPOLATE_(x, maxV, delta) \
    { \
        if (maxV == 1) \
            x = 0; \
        else \
            do \
            { \
                if ( x < 0 ) \
                    x = -x - 1 + delta; \
                else \
                    x = maxV - 1 - (x - maxV) - delta; \
            } \
            while (x >= maxV || x < 0); \
    }
#ifdef BORDER_REFLECT
#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
#else
#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
#endif
#else
#error No extrapolation method
#endif

#define THREADS 256

///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////calcHarris////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////

wester committed
101 102 103 104
__kernel void calcHarris(__global const float *Dx, __global const float *Dy, __global float *dst,
                         int dx_offset, int dx_whole_rows, int dx_whole_cols, int dx_step,
                         int dy_offset, int dy_whole_rows, int dy_whole_cols, int dy_step,
                         int dst_offset, int dst_rows, int dst_cols, int dst_step, float k)
wester committed
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
{
    int col = get_local_id(0);
    int gX = get_group_id(0);
    int gY = get_group_id(1);
    int gly = get_global_id(1);

    int dx_x_off = (dx_offset % dx_step) >> 2;
    int dx_y_off = dx_offset / dx_step;
    int dy_x_off = (dy_offset % dy_step) >> 2;
    int dy_y_off = dy_offset / dy_step;
    int dst_x_off = (dst_offset % dst_step) >> 2;
    int dst_y_off = dst_offset / dst_step;

    int dx_startX = gX * (THREADS-ksX+1) - anX + dx_x_off;
    int dx_startY = (gY << 1) - anY + dx_y_off;
    int dy_startX = gX * (THREADS-ksX+1) - anX + dy_x_off;
    int dy_startY = (gY << 1) - anY + dy_y_off;
    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
    int dst_startY = (gY << 1) + dst_y_off;

wester committed
125 126
    float dx_data[ksY+1],dy_data[ksY+1], data[3][ksY+1];
    __local FPTYPE temp[6][THREADS];
wester committed
127 128 129 130 131

#ifdef BORDER_CONSTANT
    for (int i=0; i < ksY+1; i++)
    {
        bool dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows;
wester committed
132
        int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
wester committed
133
        float dx_s = dx_con ? Dx[indexDx] : 0.0f;
wester committed
134
        dx_data[i] = dx_s;
wester committed
135 136

        bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
wester committed
137
        int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
wester committed
138
        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
wester committed
139
        dy_data[i] = dy_s;
wester committed
140

wester committed
141 142 143
        data[0][i] = dx_data[i] * dx_data[i];
        data[1][i] = dx_data[i] * dy_data[i];
        data[2][i] = dy_data[i] * dy_data[i];
wester committed
144 145 146 147 148 149 150 151
    }
#else
    int clamped_col = min(2*dst_cols, col);
    for (int i=0; i < ksY+1; i++)
    {
        int dx_selected_row = dx_startY+i, dx_selected_col = dx_startX+clamped_col;
        EXTRAPOLATE(dx_selected_row, dx_whole_rows)
        EXTRAPOLATE(dx_selected_col, dx_whole_cols)
wester committed
152
        dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
wester committed
153 154 155 156

        int dy_selected_row = dy_startY+i, dy_selected_col = dy_startX+clamped_col;
        EXTRAPOLATE(dy_selected_row, dy_whole_rows)
        EXTRAPOLATE(dy_selected_col, dy_whole_cols)
wester committed
157
        dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
wester committed
158

wester committed
159 160 161
        data[0][i] = dx_data[i] * dx_data[i];
        data[1][i] = dx_data[i] * dy_data[i];
        data[2][i] = dy_data[i] * dy_data[i];
wester committed
162 163
    }
#endif
wester committed
164
    FPTYPE sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
wester committed
165 166 167 168 169 170 171
    for (int i=1; i < ksY; i++)
    {
        sum0 += data[0][i];
        sum1 += data[1][i];
        sum2 += data[2][i];
    }

wester committed
172 173
    FPTYPE sum01 = sum0 + data[0][0];
    FPTYPE sum02 = sum0 + data[0][ksY];
wester committed
174 175
    temp[0][col] = sum01;
    temp[1][col] = sum02;
wester committed
176 177
    FPTYPE sum11 = sum1 + data[1][0];
    FPTYPE sum12 = sum1 + data[1][ksY];
wester committed
178 179
    temp[2][col] = sum11;
    temp[3][col] = sum12;
wester committed
180 181
    FPTYPE sum21 = sum2 + data[2][0];
    FPTYPE sum22 = sum2 + data[2][ksY];
wester committed
182 183 184 185
    temp[4][col] = sum21;
    temp[5][col] = sum22;
    barrier(CLK_LOCAL_MEM_FENCE);

wester committed
186
    if (col < (THREADS- (ksX - 1)))
wester committed
187 188 189 190
    {
        col += anX;
        int posX = dst_startX - dst_x_off + col - anX;
        int posY = (gly << 1);
wester committed
191
        int till = (ksX + 1)%2;
wester committed
192 193 194
        float tmp_sum[6] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
        for (int k=0; k<6; k++)
        {
wester committed
195
            FPTYPE temp_sum = 0;
wester committed
196
            for (int i=-anX; i<=anX - till; i++)
wester committed
197
            {
wester committed
198
                temp_sum += temp[k][col+i];
wester committed
199
            }
wester committed
200 201 202 203 204
            tmp_sum[k] = temp_sum;
        }

        if (posX < dst_cols && (posY) < dst_rows)
        {
wester committed
205
            dst[(dst_startY+0) * (dst_step>>2)+ dst_startX + col - anX] =
wester committed
206 207 208 209
                    tmp_sum[0] * tmp_sum[4] - tmp_sum[2] * tmp_sum[2] - k * (tmp_sum[0] + tmp_sum[4]) * (tmp_sum[0] + tmp_sum[4]);
        }
        if (posX < dst_cols && (posY + 1) < dst_rows)
        {
wester committed
210
            dst[(dst_startY+1) * (dst_step>>2)+ dst_startX + col - anX] =
wester committed
211 212 213 214
                    tmp_sum[1] * tmp_sum[5] - tmp_sum[3] * tmp_sum[3] - k * (tmp_sum[1] + tmp_sum[5]) * (tmp_sum[1] + tmp_sum[5]);
        }
    }
}