/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #ifdef DOUBLE_SUPPORT #ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable #elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif #if DATA_DEPTH == 0 #define BASE_TYPE uchar #elif DATA_DEPTH == 1 #error data_depth char, use uchar datatype instead #elif DATA_DEPTH == 2 #define BASE_TYPE ushort #elif DATA_DEPTH == 3 #error data_depth short, use ushort datatype instead #elif DATA_DEPTH == 4 #define BASE_TYPE int #elif DATA_DEPTH == 5 #define BASE_TYPE float #elif DATA_DEPTH == 6 #define BASE_TYPE double #else #error data_depth #endif #if DATA_CHAN == 2 #define SRC_VEC_SIZE 2 #elif DATA_CHAN == 3 #define SRC_VEC_SIZE 4 // C3 is stored as C4 #elif DATA_CHAN == 4 #define SRC_VEC_SIZE 4 #else #error data_chan #endif #define __CAT(x, y) x##y #define CAT(x, y) __CAT(x, y) #define uchar1 uchar #define char1 char #define ushort1 ushort #define short1 short #define int1 int #define float1 float #define double1 double #define TYPE BASE_TYPE #define SRC_TYPE CAT(BASE_TYPE, SRC_VEC_SIZE) #define DST_VEC_TYPE CAT(BASE_TYPE, VEC_SIZE) #define vstore1 vstore #define VSTORE CAT(vstore, VEC_SIZE) #define VSTORE_ALIGNED(ptr, v) *((__global DST_VEC_TYPE*)(ptr)) = (v) #define VSTORE_UNALIGNED(ptr, v) VSTORE((v), 0, (__global TYPE*)(ptr)) #ifdef DST0_ALIGNED #define VSTORE_dst0 VSTORE_ALIGNED #else #define VSTORE_dst0 VSTORE_UNALIGNED #endif #ifdef DST1_ALIGNED #define VSTORE_dst1 VSTORE_ALIGNED #else #define VSTORE_dst1 VSTORE_UNALIGNED #endif #ifdef DST2_ALIGNED #define VSTORE_dst2 VSTORE_ALIGNED #else #define VSTORE_dst2 VSTORE_UNALIGNED #endif #ifdef DST3_ALIGNED #define VSTORE_dst3 VSTORE_ALIGNED #else #define VSTORE_dst3 VSTORE_UNALIGNED #endif __kernel void split_vector( __global SRC_TYPE* src, int srcStepBytes, int2 srcOffset, // offset.x in bytes __global TYPE* dst0, int dst0StepBytes, int2 dst0Offset, __global TYPE* dst1, int dst1StepBytes, int2 dst1Offset, #if DATA_CHAN > 2 __global TYPE* dst2, int dst2StepBytes, int2 dst2Offset, #endif #if DATA_CHAN > 3 __global TYPE* dst3, int dst3StepBytes, int2 dst3Offset, #endif int2 size) { int x = get_global_id(0) * VEC_SIZE; int y = get_global_id(1); if (x < size.x && y < size.y) { SRC_TYPE srcData[VEC_SIZE]; int xOffsetLimitBytes = srcOffset.x + size.x * sizeof(SRC_TYPE); int xOffsetBytes = srcOffset.x + x * sizeof(SRC_TYPE); int yOffsetBytes = (srcOffset.y + y) * srcStepBytes; #pragma unroll for (int i = 0; i < VEC_SIZE; i++, xOffsetBytes += sizeof(SRC_TYPE)) { srcData[i] = (xOffsetBytes >= xOffsetLimitBytes) ? (SRC_TYPE)0 : *(__global SRC_TYPE*)((__global char*)src + yOffsetBytes + xOffsetBytes); } #if VEC_SIZE == 1 TYPE dstC0 = srcData[0].s0; TYPE dstC1 = srcData[0].s1; #if DATA_CHAN > 2 TYPE dstC2 = srcData[0].s2; #endif #if DATA_CHAN > 3 TYPE dstC3 = srcData[0].s3; #endif # define VEC_TO_ARRAY(v, a) TYPE a[1] = {v}; #elif VEC_SIZE == 2 DST_VEC_TYPE dstC0 = (DST_VEC_TYPE)(srcData[0].s0, srcData[1].s0); DST_VEC_TYPE dstC1 = (DST_VEC_TYPE)(srcData[0].s1, srcData[1].s1); #if DATA_CHAN > 2 DST_VEC_TYPE dstC2 = (DST_VEC_TYPE)(srcData[0].s2, srcData[1].s2); #endif #if DATA_CHAN > 3 DST_VEC_TYPE dstC3 = (DST_VEC_TYPE)(srcData[0].s3, srcData[1].s3); #endif # define VEC_TO_ARRAY(v, a) TYPE a[2] = {v.s0, v.s1}; #elif VEC_SIZE == 4 DST_VEC_TYPE dstC0 = (DST_VEC_TYPE)(srcData[0].s0, srcData[1].s0, srcData[2].s0, srcData[3].s0); DST_VEC_TYPE dstC1 = (DST_VEC_TYPE)(srcData[0].s1, srcData[1].s1, srcData[2].s1, srcData[3].s1); #if DATA_CHAN > 2 DST_VEC_TYPE dstC2 = (DST_VEC_TYPE)(srcData[0].s2, srcData[1].s2, srcData[2].s2, srcData[3].s2); #endif #if DATA_CHAN > 3 DST_VEC_TYPE dstC3 = (DST_VEC_TYPE)(srcData[0].s3, srcData[1].s3, srcData[2].s3, srcData[3].s3); #endif # define VEC_TO_ARRAY(v, a) TYPE a[4] = {v.s0, v.s1, v.s2, v.s3}; #endif #ifndef BYPASS_VSTORE #define BYPASS_VSTORE false #endif #define WRITE_VEC_DST(dst, vecValue) \ { \ int dst ## xOffsetLimitBytes = dst ## Offset.x + size.x * sizeof(TYPE); \ int dst ## xOffsetBytes = dst ## Offset.x + x * sizeof(TYPE); \ int dst ## yOffsetBytes = (dst ## Offset.y + y) * dst ## StepBytes; \ if (!BYPASS_VSTORE && dst ## xOffsetBytes + (int)sizeof(DST_VEC_TYPE) <= dst ## xOffsetLimitBytes) \ { \ VSTORE_ ## dst(((__global char*)dst + dst ## yOffsetBytes + dst ## xOffsetBytes), vecValue); \ } \ else \ { \ VEC_TO_ARRAY(vecValue, vecValue##Array); \ for (int i = 0; i < VEC_SIZE; i++, dst ## xOffsetBytes += sizeof(TYPE)) \ { \ if (dst ## xOffsetBytes + (int)sizeof(TYPE) <= dst ## xOffsetLimitBytes) \ *(__global TYPE*)((__global char*)dst + dst ## yOffsetBytes + dst ## xOffsetBytes) = vecValue##Array[i]; \ else \ break; \ } \ } \ } WRITE_VEC_DST(dst0, dstC0); WRITE_VEC_DST(dst1, dstC1); #if DATA_CHAN > 2 WRITE_VEC_DST(dst2, dstC2); #endif #if DATA_CHAN > 3 WRITE_VEC_DST(dst3, dstC3); #endif } }