Commit 7ae7a078 by Kai Westerkamp

A3

parent 449a502c
...@@ -40,21 +40,26 @@ bool CAssignment3::DoCompute() ...@@ -40,21 +40,26 @@ bool CAssignment3::DoCompute()
} }
cout<<endl<<"########################################"<<endl; cout<<endl<<"########################################"<<endl;
cout<<"Task 2: Separable convolution"<<endl<<endl; cout<<"Task 2: Separable convolution"<<endl<<endl;
{ {
size_t HGroupSize[2] = {32, 16}; size_t HGroupSize[2] = { 32, 16 };
size_t VGroupSize[2] = {32, 16}; size_t VGroupSize[2] = { 32, 16 };
//for (int steps = 1; steps < 6; steps++) {
int steps = 4;
cout << "Stepsize = " << steps << endl;
{ {
//simple box filter //simple box filter
float ConvKernel[9]; float ConvKernel[9];
for(int i = 0; i < 9; i++) for (int i = 0; i < 9; i++)
ConvKernel[i] = 1.0f / 9.0f; ConvKernel[i] = 1.0f / 9.0f;
CConvolutionSeparableTask convTask("box_4x4", "Images/input.pfm", HGroupSize, VGroupSize, CConvolutionSeparableTask convTask("box_4x4", "Images/input.pfm", HGroupSize, VGroupSize,
4, 4, 4, ConvKernel, ConvKernel); steps, steps, 4, ConvKernel, ConvKernel);
// note: the last argument is ignored, but our framework requires it // note: the last argument is ignored, but our framework requires it
// for the horizontal and vertical passes different local sizes might be used // for the horizontal and vertical passes different local sizes might be used
RunComputeTask(convTask, HGroupSize); RunComputeTask(convTask, HGroupSize);
...@@ -63,11 +68,11 @@ bool CAssignment3::DoCompute() ...@@ -63,11 +68,11 @@ bool CAssignment3::DoCompute()
{ {
//simple box filter //simple box filter
float ConvKernel[17]; float ConvKernel[17];
for(int i = 0; i < 17; i++) for (int i = 0; i < 17; i++)
ConvKernel[i] = 1.0f / 17.0f; ConvKernel[i] = 1.0f / 17.0f;
CConvolutionSeparableTask convTask("box_8x8", "Images/input.pfm", HGroupSize, VGroupSize, CConvolutionSeparableTask convTask("box_8x8", "Images/input.pfm", HGroupSize, VGroupSize,
4, 4, 8, ConvKernel, ConvKernel); steps, steps, 8, ConvKernel, ConvKernel);
RunComputeTask(convTask, HGroupSize); RunComputeTask(convTask, HGroupSize);
} }
...@@ -77,24 +82,28 @@ bool CAssignment3::DoCompute() ...@@ -77,24 +82,28 @@ bool CAssignment3::DoCompute()
0.000817774f, 0.0286433f, 0.235018f, 0.471041f, 0.235018f, 0.0286433f, 0.000817774f 0.000817774f, 0.0286433f, 0.235018f, 0.471041f, 0.235018f, 0.0286433f, 0.000817774f
}; };
CConvolutionSeparableTask convTask("gauss_3x3", "Images/input.pfm", HGroupSize, VGroupSize, CConvolutionSeparableTask convTask("gauss_3x3", "Images/input.pfm", HGroupSize, VGroupSize,
4, 4, 3, ConvKernel, ConvKernel); steps, steps, 3, ConvKernel, ConvKernel);
RunComputeTask(convTask, HGroupSize); RunComputeTask(convTask, HGroupSize);
} }
//}
} }
cout<<endl<<"########################################"<<endl;
cout<<"Task 3: Separable bilateral convolution"<<endl<<endl;
{
size_t HGroupSize[2] = {32, 4};
size_t VGroupSize[2] = {32, 4};
float ConvKernel[9] = {0.010284844f, 0.0417071f, 0.113371652f, 0.206576619f, 0.252313252f, 0.206576619f, 0.113371652f, 0.0417071f, 0.010284844f};
CConvolutionBilateralTask convTask("Images/color.pfm", "Images/normals.pfm", "Images/depth.pfm", HGroupSize, VGroupSize, //cout<<endl<<"########################################"<<endl;
4, 4, 4, ConvKernel, ConvKernel); //cout<<"Task 3: Separable bilateral convolution"<<endl<<endl;
RunComputeTask(convTask, HGroupSize); //{
} // size_t HGroupSize[2] = {32, 4};
// size_t VGroupSize[2] = {32, 4};
// float ConvKernel[9] = {0.010284844f, 0.0417071f, 0.113371652f, 0.206576619f, 0.252313252f, 0.206576619f, 0.113371652f, 0.0417071f, 0.010284844f};
// CConvolutionBilateralTask convTask("Images/color.pfm", "Images/normals.pfm", "Images/depth.pfm", HGroupSize, VGroupSize,
// 4, 4, 4, ConvKernel, ConvKernel);
// RunComputeTask(convTask, HGroupSize);
//}
cout<<endl<<"########################################"<<endl; cout<<endl<<"########################################"<<endl;
cout<<"Task 4: Histogram"<<endl<<endl; cout<<"Task 4: Histogram"<<endl<<endl;
......
...@@ -229,7 +229,7 @@ double CConvolutionSeparableTask::ConvolutionChannelGPU(unsigned int Channel, cl ...@@ -229,7 +229,7 @@ double CConvolutionSeparableTask::ConvolutionChannelGPU(unsigned int Channel, cl
{ {
cl_int clErr; cl_int clErr;
clErr = clSetKernelArg(m_HorizontalKernel, 0, sizeof(cl_mem), (void*)&m_dGPUWorkingBuffer); clErr = clSetKernelArg(m_HorizontalKernel, 0, sizeof(cl_mem), (void*)&m_dGPUWorkingBuffer); //m_dGPUWorkingBuffer
clErr |= clSetKernelArg(m_HorizontalKernel, 1, sizeof(cl_mem), (void*)&m_dSourceChannels[Channel]); clErr |= clSetKernelArg(m_HorizontalKernel, 1, sizeof(cl_mem), (void*)&m_dSourceChannels[Channel]);
V_RETURN_0_CL(clErr, "Error setting horizontal kernel arguments"); V_RETURN_0_CL(clErr, "Error setting horizontal kernel arguments");
......
...@@ -12,6 +12,15 @@ to be the multiple of the given tile-size. ...@@ -12,6 +12,15 @@ to be the multiple of the given tile-size.
#define TILE_Y 16 #define TILE_Y 16
#define load(X, Y) _load(d_Src, X, Y, Width, Height, Pitch)
inline float _load(__global const float* d_Src, uint x, uint y, uint Width, uint Height, uint Pitch) {
if (x < Width && y < Height) {
return d_Src[y * Pitch + x];
} else {
return 0.0f;
}
}
// d_Dst is the convolution of d_Src with the kernel c_Kernel // d_Dst is the convolution of d_Src with the kernel c_Kernel
// c_Kernel is assumed to be a float[11] array of the 3x3 convolution constants, one multiplier (for normalization) and an offset (in this order!) // c_Kernel is assumed to be a float[11] array of the 3x3 convolution constants, one multiplier (for normalization) and an offset (in this order!)
// With & Height are the image dimensions (should be multiple of the tile size) // With & Height are the image dimensions (should be multiple of the tile size)
...@@ -30,15 +39,71 @@ void Convolution( ...@@ -30,15 +39,71 @@ void Convolution(
// the size of the local memory necessary for the convolution is the tile size + the halo area // the size of the local memory necessary for the convolution is the tile size + the halo area
__local float tile[TILE_Y + 2][TILE_X + 2]; __local float tile[TILE_Y + 2][TILE_X + 2];
// TO DO... const uint2 gid = (uint2)(get_global_id(0), get_global_id(1));
const uint2 lid = (uint2)(get_local_id(0), get_local_id(1));
// Fill the halo with zeros // TO DO...
// Fill the halo with zeros -> laod function
// Load main filtered area from d_Src // Load main filtered area from d_Src
// Load halo regions from d_Src (edges and corners separately), check for image bounds! // Load halo regions from d_Src (edges and corners separately), check for image bounds!
tile[lid.y + 1][lid.x + 1] = load(gid.x, gid.y);
if (lid.x == 0) {
tile[lid.y + 1][0] = load(gid.x - 1, gid.y);
if(lid.y == 0)
{
tile[0][0] = load(gid.x - 1, gid.y - 1);
}
if(lid.y == TILE_Y - 1)
{
tile[TILE_Y + 1][0] = load(gid.x - 1, gid.y + 1);
}
}
if (lid.x == TILE_X - 1) {
tile[lid.y + 1][lid.x + 2] = load(gid.x + 1, gid.y);
if (lid.y == 0) {
tile[0][TILE_X + 1] = load(gid.x + 1, gid.y - 1);
}
if(lid.y == TILE_Y - 1)
{
tile[TILE_Y + 1][TILE_X + 1] = load(gid.x + 1, gid.y + 1);
}
}
if (lid.y == 0) {
tile[0][lid.x + 1] = load(gid.x, gid.y - 1);
}
if (lid.y == TILE_Y - 1) {
tile[lid.y + 2][lid.x + 1] = load(gid.x, gid.y + 1);
}
// Sync threads // Sync threads
barrier(CLK_LOCAL_MEM_FENCE);
// Perform the convolution and store the convolved signal to d_Dst. // Perform the convolution and store the convolved signal to d_Dst.
float tmp;
tmp = c_Kernel[0] * tile[lid.y+0][lid.x+0];
tmp += c_Kernel[1] * tile[lid.y+0][lid.x+1];
tmp += c_Kernel[2] * tile[lid.y+0][lid.x+2];
tmp += c_Kernel[3] * tile[lid.y+1][lid.x+0];
tmp += c_Kernel[4] * tile[lid.y+1][lid.x+1];
tmp += c_Kernel[5] * tile[lid.y+1][lid.x+2];
tmp += c_Kernel[6] * tile[lid.y+2][lid.x+0];
tmp += c_Kernel[7] * tile[lid.y+2][lid.x+1];
tmp += c_Kernel[8] * tile[lid.y+2][lid.x+2];
tmp = c_Kernel[9] * tmp + c_Kernel[10];
d_Dst[gid.y * Pitch + gid.x] = tmp;
} }
\ No newline at end of file
...@@ -52,18 +52,37 @@ void ConvHorizontal( ...@@ -52,18 +52,37 @@ void ConvHorizontal(
__local float tile[H_GROUPSIZE_Y][(H_RESULT_STEPS + 2) * H_GROUPSIZE_X]; __local float tile[H_GROUPSIZE_Y][(H_RESULT_STEPS + 2) * H_GROUPSIZE_X];
// TODO: // TODO:
//const int baseX = ... const int baseX = (get_group_id(0) * H_RESULT_STEPS - 1) * H_GROUPSIZE_X + get_local_id(0);
//const int baseY = ... const int baseY = get_group_id(1) * H_GROUPSIZE_Y + get_local_id(1);
//const int offset = ... //const int offset = ...
// Load left halo (check for left bound) // Load left halo (check for left bound)
// Load main data + right halo (check for right bound) // Load main data + right halo (check for right bound)
// for (int tileID = 1; tileID < ...) // for (int tileID = 1; tileID < ...)
tile[get_local_id(1)][get_local_id(0)] = (baseX > 0) ? d_Src[baseY * Pitch + baseX ] : 0.0f;
#pragma unroll
for (int i = 1; i < H_RESULT_STEPS + 2; i++) {
tile[get_local_id(1)][get_local_id(0) + i * H_GROUPSIZE_X] = (baseX + i * H_GROUPSIZE_X < Width) ? d_Src[baseY * Pitch + baseX + i * H_GROUPSIZE_X] : 0.0f;
}
// Sync the work-items after loading // Sync the work-items after loading
barrier(CLK_LOCAL_MEM_FENCE);
// Convolve and store the result // Convolve and store the result
#pragma unroll
for (int i = 1; i < H_RESULT_STEPS + 1; i++) {
if (baseX + i * H_GROUPSIZE_X < Width) {
float result = 0.0f;
#pragma unroll
for (int x = -KERNEL_RADIUS; x <= KERNEL_RADIUS; x++) {
result += c_Kernel[KERNEL_RADIUS + x] * tile[get_local_id(1)][get_local_id(0) + i * H_GROUPSIZE_X + x];
}
d_Dst[baseY * Pitch + baseX + i * H_GROUPSIZE_X] = result;
}
}
} }
...@@ -86,7 +105,31 @@ void ConvVertical( ...@@ -86,7 +105,31 @@ void ConvVertical(
// Conceptually similar to ConvHorizontal // Conceptually similar to ConvHorizontal
// Load top halo + main data + bottom halo // Load top halo + main data + bottom halo
// Compute and store results const int baseX = get_group_id(0) * V_GROUPSIZE_X + get_local_id(0);
const int baseY = (get_group_id(1) * V_RESULT_STEPS - 1) * V_GROUPSIZE_Y + get_local_id(1);
tile[get_local_id(1)][get_local_id(0)] = (baseY >= 0) ? d_Src[baseY * Pitch + baseX] : 0.0f;
#pragma unroll
for (int i = 1; i < V_RESULT_STEPS + 1; i++) {
tile[get_local_id(1) + i * V_GROUPSIZE_Y][get_local_id(0)] = (baseY + i * V_GROUPSIZE_Y < Height) ? d_Src[(baseY + i * V_GROUPSIZE_Y) * Pitch + baseX] : 0.0f;
}
tile[get_local_id(1) + (V_RESULT_STEPS + 1) * V_GROUPSIZE_Y][get_local_id(0)] = (baseY + (V_RESULT_STEPS + 1) * V_GROUPSIZE_Y < Height) ? d_Src[(baseY + (V_RESULT_STEPS + 1) * V_GROUPSIZE_Y) * Pitch + baseX] : 0.0f;
// Compute and store results
barrier(CLK_LOCAL_MEM_FENCE);
#pragma unroll
for (int i = 1; i < V_RESULT_STEPS + 1; i++) {
if (baseY + i * V_GROUPSIZE_Y < Height) {
float result = 0.0f;
#pragma unroll
for (int y = -KERNEL_RADIUS; y <= KERNEL_RADIUS; y++) {
result += c_Kernel[KERNEL_RADIUS + y] * tile[get_local_id(1) + i * V_GROUPSIZE_Y + y][get_local_id(0)];
}
d_Dst[(baseY + i * V_GROUPSIZE_Y) * Pitch + baseX] = result;
}
}
} }
...@@ -22,6 +22,12 @@ compute_histogram( ...@@ -22,6 +22,12 @@ compute_histogram(
) )
{ {
// Insert your kernel code here // Insert your kernel code here
const uint2 gid = (uint2)(get_global_id(0), get_global_id(1));
if (gid.x < width && gid.y < height) {
float pixel = img[gid.y * pitch + gid.x];
int bin = min(num_hist_bins - 1, max(0, (int)(pixel * ((float) num_hist_bins))));
atomic_inc(&(histogram[bin]));
}
} }
__kernel void __kernel void
...@@ -35,5 +41,28 @@ compute_histogram_local_memory( ...@@ -35,5 +41,28 @@ compute_histogram_local_memory(
__local int *local_hist __local int *local_hist
) )
{ {
// Insert your kernel code here const uint index = get_local_id(1) * get_local_size(0) + get_local_id(0);
const uint2 gid = (uint2)(get_global_id(0), get_global_id(1));
// init 0
if (index < num_hist_bins) {
local_hist[index] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (gid.x < width && gid.y < height) {
float pixel = img[gid.y * pitch + gid.x];
int bin = min(num_hist_bins - 1, max(0, (int)(pixel * ((float) num_hist_bins))));
atomic_inc(&(local_hist[bin]));
}
barrier(CLK_LOCAL_MEM_FENCE);
//write back
if (index < num_hist_bins) {
atomic_add(&(histogram[index]), local_hist[index]);
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment