Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
G
GPGPU
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
CI / CD
CI / CD
Pipelines
Schedules
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Kai Westerkamp
GPGPU
Commits
7ae7a078
Commit
7ae7a078
authored
Nov 27, 2016
by
Kai Westerkamp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
A3
parent
449a502c
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
202 additions
and
55 deletions
+202
-55
CAssignment3.cpp
Assignment3/Assignment3/CAssignment3.cpp
+55
-46
CConvolutionSeparableTask.cpp
Assignment3/Assignment3/CConvolutionSeparableTask.cpp
+1
-1
Convolution3x3.cl
Assignment3/Assignment3/Convolution3x3.cl
+69
-3
ConvolutionSeparable.cl
Assignment3/Assignment3/ConvolutionSeparable.cl
+47
-4
Times.pdf
Assignment3/Assignment3/Images/Times.pdf
+0
-0
Times.xlsx
Assignment3/Assignment3/Images/Times.xlsx
+0
-0
histogram.cl
Assignment3/Assignment3/histogram.cl
+30
-1
No files found.
Assignment3/Assignment3/CAssignment3.cpp
View file @
7ae7a078
...
...
@@ -38,63 +38,72 @@ bool CAssignment3::DoCompute()
CConvolution3x3Task
convTask
(
"Images/input.pfm"
,
TileSize
,
ConvKernel
,
true
,
0.0
f
);
RunComputeTask
(
convTask
,
TileSize
);
}
cout
<<
endl
<<
"########################################"
<<
endl
;
cout
<<
"Task 2: Separable convolution"
<<
endl
<<
endl
;
{
size_t
HGroupSize
[
2
]
=
{
32
,
16
};
size_t
VGroupSize
[
2
]
=
{
32
,
16
};
size_t
HGroupSize
[
2
]
=
{
32
,
16
};
size_t
VGroupSize
[
2
]
=
{
32
,
16
};
//for (int steps = 1; steps < 6; steps++) {
int
steps
=
4
;
cout
<<
"Stepsize = "
<<
steps
<<
endl
;
{
//simple box filter
float
ConvKernel
[
9
];
for
(
int
i
=
0
;
i
<
9
;
i
++
)
ConvKernel
[
i
]
=
1.0
f
/
9.0
f
;
CConvolutionSeparableTask
convTask
(
"box_4x4"
,
"Images/input.pfm"
,
HGroupSize
,
VGroupSize
,
steps
,
steps
,
4
,
ConvKernel
,
ConvKernel
);
// note: the last argument is ignored, but our framework requires it
// for the horizontal and vertical passes different local sizes might be used
RunComputeTask
(
convTask
,
HGroupSize
);
}
{
//simple box filter
float
ConvKernel
[
17
];
for
(
int
i
=
0
;
i
<
17
;
i
++
)
ConvKernel
[
i
]
=
1.0
f
/
17.0
f
;
CConvolutionSeparableTask
convTask
(
"box_8x8"
,
"Images/input.pfm"
,
HGroupSize
,
VGroupSize
,
steps
,
steps
,
8
,
ConvKernel
,
ConvKernel
);
RunComputeTask
(
convTask
,
HGroupSize
);
}
{
// Gaussian blur
float
ConvKernel
[
7
]
=
{
0.000817774
f
,
0.0286433
f
,
0.235018
f
,
0.471041
f
,
0.235018
f
,
0.0286433
f
,
0.000817774
f
};
CConvolutionSeparableTask
convTask
(
"gauss_3x3"
,
"Images/input.pfm"
,
HGroupSize
,
VGroupSize
,
steps
,
steps
,
3
,
ConvKernel
,
ConvKernel
);
RunComputeTask
(
convTask
,
HGroupSize
);
}
//}
}
{
//simple box filter
float
ConvKernel
[
9
];
for
(
int
i
=
0
;
i
<
9
;
i
++
)
ConvKernel
[
i
]
=
1.0
f
/
9.0
f
;
CConvolutionSeparableTask
convTask
(
"box_4x4"
,
"Images/input.pfm"
,
HGroupSize
,
VGroupSize
,
4
,
4
,
4
,
ConvKernel
,
ConvKernel
);
// note: the last argument is ignored, but our framework requires it
// for the horizontal and vertical passes different local sizes might be used
RunComputeTask
(
convTask
,
HGroupSize
);
}
{
//simple box filter
float
ConvKernel
[
17
];
for
(
int
i
=
0
;
i
<
17
;
i
++
)
ConvKernel
[
i
]
=
1.0
f
/
17.0
f
;
CConvolutionSeparableTask
convTask
(
"box_8x8"
,
"Images/input.pfm"
,
HGroupSize
,
VGroupSize
,
4
,
4
,
8
,
ConvKernel
,
ConvKernel
);
RunComputeTask
(
convTask
,
HGroupSize
);
}
{
// Gaussian blur
float
ConvKernel
[
7
]
=
{
0.000817774
f
,
0.0286433
f
,
0.235018
f
,
0.471041
f
,
0.235018
f
,
0.0286433
f
,
0.000817774
f
};
CConvolutionSeparableTask
convTask
(
"gauss_3x3"
,
"Images/input.pfm"
,
HGroupSize
,
VGroupSize
,
4
,
4
,
3
,
ConvKernel
,
ConvKernel
);
RunComputeTask
(
convTask
,
HGroupSize
);
}
}
//cout<<endl<<"########################################"<<endl;
//cout<<"Task 3: Separable bilateral convolution"<<endl<<endl;
//{
// size_t HGroupSize[2] = {32, 4};
// size_t VGroupSize[2] = {32, 4};
cout
<<
endl
<<
"########################################"
<<
endl
;
cout
<<
"Task 3: Separable bilateral convolution"
<<
endl
<<
endl
;
{
size_t
HGroupSize
[
2
]
=
{
32
,
4
};
size_t
VGroupSize
[
2
]
=
{
32
,
4
};
float
ConvKernel
[
9
]
=
{
0.010284844
f
,
0.0417071
f
,
0.113371652
f
,
0.206576619
f
,
0.252313252
f
,
0.206576619
f
,
0.113371652
f
,
0.0417071
f
,
0.010284844
f
};
// float ConvKernel[9] = {0.010284844f, 0.0417071f, 0.113371652f, 0.206576619f, 0.252313252f, 0.206576619f, 0.113371652f, 0.0417071f, 0.010284844f};
CConvolutionBilateralTask
convTask
(
"Images/color.pfm"
,
"Images/normals.pfm"
,
"Images/depth.pfm"
,
HGroupSize
,
VGroupSize
,
4
,
4
,
4
,
ConvKernel
,
ConvKernel
);
RunComputeTask
(
convTask
,
HGroupSize
);
}
//
CConvolutionBilateralTask convTask("Images/color.pfm", "Images/normals.pfm", "Images/depth.pfm", HGroupSize, VGroupSize,
//
4, 4, 4, ConvKernel, ConvKernel);
//
RunComputeTask(convTask, HGroupSize);
//
}
cout
<<
endl
<<
"########################################"
<<
endl
;
cout
<<
"Task 4: Histogram"
<<
endl
<<
endl
;
...
...
Assignment3/Assignment3/CConvolutionSeparableTask.cpp
View file @
7ae7a078
...
...
@@ -229,7 +229,7 @@ double CConvolutionSeparableTask::ConvolutionChannelGPU(unsigned int Channel, cl
{
cl_int
clErr
;
clErr
=
clSetKernelArg
(
m_HorizontalKernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
m_dGPUWorkingBuffer
);
clErr
=
clSetKernelArg
(
m_HorizontalKernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
m_dGPUWorkingBuffer
);
//m_dGPUWorkingBuffer
clErr
|=
clSetKernelArg
(
m_HorizontalKernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
m_dSourceChannels
[
Channel
]);
V_RETURN_0_CL
(
clErr
,
"Error setting horizontal kernel arguments"
);
...
...
Assignment3/Assignment3/Convolution3x3.cl
View file @
7ae7a078
...
...
@@ -12,6 +12,15 @@ to be the multiple of the given tile-size.
#
define
TILE_Y
16
#
define
load
(
X,
Y
)
_load
(
d_Src,
X,
Y,
Width,
Height,
Pitch
)
inline
float
_load
(
__global
const
float*
d_Src,
uint
x,
uint
y,
uint
Width,
uint
Height,
uint
Pitch
)
{
if
(
x
<
Width
&&
y
<
Height
)
{
return
d_Src[y
*
Pitch
+
x]
;
}
else
{
return
0.0f
;
}
}
//
d_Dst
is
the
convolution
of
d_Src
with
the
kernel
c_Kernel
//
c_Kernel
is
assumed
to
be
a
float[11]
array
of
the
3x3
convolution
constants,
one
multiplier
(
for
normalization
)
and
an
offset
(
in
this
order!
)
//
With
&
Height
are
the
image
dimensions
(
should
be
multiple
of
the
tile
size
)
...
...
@@ -30,15 +39,71 @@ void Convolution(
//
the
size
of
the
local
memory
necessary
for
the
convolution
is
the
tile
size
+
the
halo
area
__local
float
tile[TILE_Y
+
2][TILE_X
+
2]
;
//
TO
DO...
const
uint2
gid
=
(
uint2
)(
get_global_id
(
0
)
,
get_global_id
(
1
))
;
const
uint2
lid
=
(
uint2
)(
get_local_id
(
0
)
,
get_local_id
(
1
))
;
//
Fill
the
halo
with
zeros
//
TO
DO...
//
Fill
the
halo
with
zeros
->
laod
function
//
Load
main
filtered
area
from
d_Src
//
Load
halo
regions
from
d_Src
(
edges
and
corners
separately
)
,
check
for
image
bounds!
tile[lid.y
+
1][lid.x
+
1]
=
load
(
gid.x,
gid.y
)
;
if
(
lid.x
==
0
)
{
tile[lid.y
+
1][0]
=
load
(
gid.x
-
1
,
gid.y
)
;
if
(
lid.y
==
0
)
{
tile[0][0]
=
load
(
gid.x
-
1
,
gid.y
-
1
)
;
}
if
(
lid.y
==
TILE_Y
-
1
)
{
tile[TILE_Y
+
1][0]
=
load
(
gid.x
-
1
,
gid.y
+
1
)
;
}
}
if
(
lid.x
==
TILE_X
-
1
)
{
tile[lid.y
+
1][lid.x
+
2]
=
load
(
gid.x
+
1
,
gid.y
)
;
if
(
lid.y
==
0
)
{
tile[0][TILE_X
+
1]
=
load
(
gid.x
+
1
,
gid.y
-
1
)
;
}
if
(
lid.y
==
TILE_Y
-
1
)
{
tile[TILE_Y
+
1][TILE_X
+
1]
=
load
(
gid.x
+
1
,
gid.y
+
1
)
;
}
}
if
(
lid.y
==
0
)
{
tile[0][lid.x
+
1]
=
load
(
gid.x,
gid.y
-
1
)
;
}
if
(
lid.y
==
TILE_Y
-
1
)
{
tile[lid.y
+
2][lid.x
+
1]
=
load
(
gid.x,
gid.y
+
1
)
;
}
//
Sync
threads
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Perform
the
convolution
and
store
the
convolved
signal
to
d_Dst.
float
tmp
;
tmp
=
c_Kernel[0]
*
tile[lid.y+0][lid.x+0]
;
tmp
+=
c_Kernel[1]
*
tile[lid.y+0][lid.x+1]
;
tmp
+=
c_Kernel[2]
*
tile[lid.y+0][lid.x+2]
;
tmp
+=
c_Kernel[3]
*
tile[lid.y+1][lid.x+0]
;
tmp
+=
c_Kernel[4]
*
tile[lid.y+1][lid.x+1]
;
tmp
+=
c_Kernel[5]
*
tile[lid.y+1][lid.x+2]
;
tmp
+=
c_Kernel[6]
*
tile[lid.y+2][lid.x+0]
;
tmp
+=
c_Kernel[7]
*
tile[lid.y+2][lid.x+1]
;
tmp
+=
c_Kernel[8]
*
tile[lid.y+2][lid.x+2]
;
tmp
=
c_Kernel[9]
*
tmp
+
c_Kernel[10]
;
d_Dst[gid.y
*
Pitch
+
gid.x]
=
tmp
;
}
\ No newline at end of file
Assignment3/Assignment3/ConvolutionSeparable.cl
View file @
7ae7a078
...
...
@@ -52,18 +52,37 @@ void ConvHorizontal(
__local
float
tile[H_GROUPSIZE_Y][
(
H_RESULT_STEPS
+
2
)
*
H_GROUPSIZE_X]
;
//
TODO:
//const
int
baseX
=
...
//const
int
baseY
=
...
const
int
baseX
=
(
get_group_id
(
0
)
*
H_RESULT_STEPS
-
1
)
*
H_GROUPSIZE_X
+
get_local_id
(
0
)
;
const
int
baseY
=
get_group_id
(
1
)
*
H_GROUPSIZE_Y
+
get_local_id
(
1
)
;
//const
int
offset
=
...
//
Load
left
halo
(
check
for
left
bound
)
//
Load
main
data
+
right
halo
(
check
for
right
bound
)
//
for
(
int
tileID
=
1
; tileID < ...)
tile[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
(
baseX
>
0
)
?
d_Src[baseY
*
Pitch
+
baseX
]
:
0.0f
;
#
pragma
unroll
for
(
int
i
=
1
; i < H_RESULT_STEPS + 2; i++) {
tile[get_local_id
(
1
)
][get_local_id
(
0
)
+
i
*
H_GROUPSIZE_X]
=
(
baseX
+
i
*
H_GROUPSIZE_X
<
Width
)
?
d_Src[baseY
*
Pitch
+
baseX
+
i
*
H_GROUPSIZE_X]
:
0.0f
;
}
//
Sync
the
work-items
after
loading
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Convolve
and
store
the
result
#
pragma
unroll
for
(
int
i
=
1
; i < H_RESULT_STEPS + 1; i++) {
if
(
baseX
+
i
*
H_GROUPSIZE_X
<
Width
)
{
float
result
=
0.0f
;
#
pragma
unroll
for
(
int
x
=
-KERNEL_RADIUS
; x <= KERNEL_RADIUS; x++) {
result
+=
c_Kernel[KERNEL_RADIUS
+
x]
*
tile[get_local_id
(
1
)
][get_local_id
(
0
)
+
i
*
H_GROUPSIZE_X
+
x]
;
}
d_Dst[baseY
*
Pitch
+
baseX
+
i
*
H_GROUPSIZE_X]
=
result
;
}
}
}
...
...
@@ -86,7 +105,31 @@ void ConvVertical(
//
Conceptually
similar
to
ConvHorizontal
//
Load
top
halo
+
main
data
+
bottom
halo
//
Compute
and
store
results
const
int
baseX
=
get_group_id
(
0
)
*
V_GROUPSIZE_X
+
get_local_id
(
0
)
;
const
int
baseY
=
(
get_group_id
(
1
)
*
V_RESULT_STEPS
-
1
)
*
V_GROUPSIZE_Y
+
get_local_id
(
1
)
;
tile[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
(
baseY
>=
0
)
?
d_Src[baseY
*
Pitch
+
baseX]
:
0.0f
;
#
pragma
unroll
for
(
int
i
=
1
; i < V_RESULT_STEPS + 1; i++) {
tile[get_local_id
(
1
)
+
i
*
V_GROUPSIZE_Y][get_local_id
(
0
)
]
=
(
baseY
+
i
*
V_GROUPSIZE_Y
<
Height
)
?
d_Src[
(
baseY
+
i
*
V_GROUPSIZE_Y
)
*
Pitch
+
baseX]
:
0.0f
;
}
tile[get_local_id
(
1
)
+
(
V_RESULT_STEPS
+
1
)
*
V_GROUPSIZE_Y][get_local_id
(
0
)
]
=
(
baseY
+
(
V_RESULT_STEPS
+
1
)
*
V_GROUPSIZE_Y
<
Height
)
?
d_Src[
(
baseY
+
(
V_RESULT_STEPS
+
1
)
*
V_GROUPSIZE_Y
)
*
Pitch
+
baseX]
:
0.0f
;
//
Compute
and
store
results
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
pragma
unroll
for
(
int
i
=
1
; i < V_RESULT_STEPS + 1; i++) {
if
(
baseY
+
i
*
V_GROUPSIZE_Y
<
Height
)
{
float
result
=
0.0f
;
#
pragma
unroll
for
(
int
y
=
-KERNEL_RADIUS
; y <= KERNEL_RADIUS; y++) {
result
+=
c_Kernel[KERNEL_RADIUS
+
y]
*
tile[get_local_id
(
1
)
+
i
*
V_GROUPSIZE_Y
+
y][get_local_id
(
0
)
]
;
}
d_Dst[
(
baseY
+
i
*
V_GROUPSIZE_Y
)
*
Pitch
+
baseX]
=
result
;
}
}
}
Assignment3/Assignment3/Images/Times.pdf
0 → 100644
View file @
7ae7a078
File added
Assignment3/Assignment3/Images/Times.xlsx
0 → 100644
View file @
7ae7a078
File added
Assignment3/Assignment3/histogram.cl
View file @
7ae7a078
...
...
@@ -22,6 +22,12 @@ compute_histogram(
)
{
//
Insert
your
kernel
code
here
const
uint2
gid
=
(
uint2
)(
get_global_id
(
0
)
,
get_global_id
(
1
))
;
if
(
gid.x
<
width
&&
gid.y
<
height
)
{
float
pixel
=
img[gid.y
*
pitch
+
gid.x]
;
int
bin
=
min
(
num_hist_bins
-
1
,
max
(
0
,
(
int
)(
pixel
*
((
float
)
num_hist_bins
))))
;
atomic_inc
(
&
(
histogram[bin]
))
;
}
}
__kernel
void
...
...
@@ -35,5 +41,28 @@ compute_histogram_local_memory(
__local
int
*local_hist
)
{
//
Insert
your
kernel
code
here
const
uint
index
=
get_local_id
(
1
)
*
get_local_size
(
0
)
+
get_local_id
(
0
)
;
const
uint2
gid
=
(
uint2
)(
get_global_id
(
0
)
,
get_global_id
(
1
))
;
//
init
0
if
(
index
<
num_hist_bins
)
{
local_hist[index]
=
0
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
gid.x
<
width
&&
gid.y
<
height
)
{
float
pixel
=
img[gid.y
*
pitch
+
gid.x]
;
int
bin
=
min
(
num_hist_bins
-
1
,
max
(
0
,
(
int
)(
pixel
*
((
float
)
num_hist_bins
))))
;
atomic_inc
(
&
(
local_hist[bin]
))
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//write
back
if
(
index
<
num_hist_bins
)
{
atomic_add
(
&
(
histogram[index]
)
,
local_hist[index]
)
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment