Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
G
GPGPU
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
CI / CD
CI / CD
Pipelines
Schedules
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Kai Westerkamp
GPGPU
Commits
a752caf0
Commit
a752caf0
authored
Nov 07, 2016
by
Kai Westerkamp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
interleaved, + sequential
parent
e1738542
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
97 additions
and
15 deletions
+97
-15
CAssignment2.cpp
Assignment2/Assignment2/CAssignment2.cpp
+2
-2
CReductionTask.cpp
Assignment2/Assignment2/CReductionTask.cpp
+62
-10
CReductionTask.h
Assignment2/Assignment2/CReductionTask.h
+2
-0
Reduction.cl
Assignment2/Assignment2/Reduction.cl
+31
-3
ALL_BUILD.vcxproj
Assignment2/buildVS15/ALL_BUILD.vcxproj
+0
-0
Assignment.vcxproj
Assignment2/buildVS15/Assignment.vcxproj
+0
-0
ZERO_CHECK.vcxproj
Assignment2/buildVS15/ZERO_CHECK.vcxproj
+0
-0
No files found.
Assignment2/Assignment2/CAssignment2.cpp
View file @
a752caf0
...
@@ -31,8 +31,8 @@ bool CAssignment2::DoCompute()
...
@@ -31,8 +31,8 @@ bool CAssignment2::DoCompute()
cout
<<
"Running parallel prefix sum task..."
<<
endl
<<
endl
;
cout
<<
"Running parallel prefix sum task..."
<<
endl
<<
endl
;
{
{
size_t
LocalWorkSize
[
3
]
=
{
256
,
1
,
1
};
size_t
LocalWorkSize
[
3
]
=
{
256
,
1
,
1
};
CScanTask
scan
(
1024
*
1024
*
64
,
LocalWorkSize
[
0
]);
//
CScanTask scan(1024 * 1024 * 64, LocalWorkSize[0]);
RunComputeTask
(
scan
,
LocalWorkSize
);
//
RunComputeTask(scan, LocalWorkSize);
}
}
...
...
Assignment2/Assignment2/CReductionTask.cpp
View file @
a752caf0
...
@@ -38,11 +38,12 @@ bool CReductionTask::InitResources(cl_device_id Device, cl_context Context)
...
@@ -38,11 +38,12 @@ bool CReductionTask::InitResources(cl_device_id Device, cl_context Context)
{
{
//CPU resources
//CPU resources
m_hInput
=
new
unsigned
int
[
m_N
];
m_hInput
=
new
unsigned
int
[
m_N
];
m_hDebug
=
new
unsigned
int
[
m_N
];
//fill the array with some values
//fill the array with some values
for
(
unsigned
int
i
=
0
;
i
<
m_N
;
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
m_N
;
i
++
)
//
m_hInput[i] = 1; // Use this for debugging
m_hInput
[
i
]
=
1
;
// Use this for debugging
m_hInput
[
i
]
=
rand
()
&
15
;
//
m_hInput[i] = rand() & 15;
//device resources
//device resources
cl_int
clError
,
clError2
;
cl_int
clError
,
clError2
;
...
@@ -141,26 +142,75 @@ bool CReductionTask::ValidateResults()
...
@@ -141,26 +142,75 @@ bool CReductionTask::ValidateResults()
void
CReductionTask
::
Reduction_InterleavedAddressing
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
])
void
CReductionTask
::
Reduction_InterleavedAddressing
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
])
{
{
//cl_int clErr;
cl_int
clErr
;
//size_t globalWorkSize[1];
size_t
globalWorkSize
[
1
];
//size_t localWorkSize[1];
size_t
localWorkSize
[
1
];
//unsigned int stride = ...;
clErr
=
clSetKernelArg
(
m_InterleavedAddressingKernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
m_dPingArray
);
V_RETURN_CL
(
clErr
,
"Error setting Kernel Arg 1"
);
unsigned
int
stride
=
1
;
for
(
unsigned
int
i
=
m_N
/
2
;
i
>
0
;
i
/=
2
)
{
clErr
|=
clSetKernelArg
(
m_InterleavedAddressingKernel
,
1
,
sizeof
(
cl_uint
),
(
void
*
)
&
stride
);
V_RETURN_CL
(
clErr
,
"Error setting Kernel Arg 2"
);
// TO DO: Implement reduction with interleaved addressing
localWorkSize
[
0
]
=
min
(
LocalWorkSize
[
0
],
i
);
globalWorkSize
[
0
]
=
CLUtil
::
GetGlobalWorkSize
(
i
,
localWorkSize
[
0
]);
//for (...) {
clErr
=
clEnqueueNDRangeKernel
(
CommandQueue
,
m_InterleavedAddressingKernel
,
1
,
NULL
,
globalWorkSize
,
localWorkSize
,
0
,
NULL
,
NULL
);
//}
V_RETURN_CL
(
clErr
,
"Error running Kernel"
);
stride
*=
2
;
}
}
}
void
CReductionTask
::
Reduction_SequentialAddressing
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
])
void
CReductionTask
::
Reduction_SequentialAddressing
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
])
{
{
// TO DO: Implement reduction with sequential addressing
cl_int
clErr
;
size_t
globalWorkSize
[
1
];
size_t
localWorkSize
[
1
];
clErr
=
clSetKernelArg
(
m_SequentialAddressingKernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
m_dPingArray
);
V_RETURN_CL
(
clErr
,
"Error setting Kernel Arg 1"
);
for
(
unsigned
int
i
=
m_N
/
2
;
i
>
0
;
i
/=
2
)
{
clErr
|=
clSetKernelArg
(
m_SequentialAddressingKernel
,
1
,
sizeof
(
cl_uint
),
(
void
*
)
&
i
);
V_RETURN_CL
(
clErr
,
"Error setting Kernel Arg 2"
);
localWorkSize
[
0
]
=
min
(
LocalWorkSize
[
0
],
i
);
globalWorkSize
[
0
]
=
CLUtil
::
GetGlobalWorkSize
(
i
,
localWorkSize
[
0
]);
clErr
=
clEnqueueNDRangeKernel
(
CommandQueue
,
m_SequentialAddressingKernel
,
1
,
NULL
,
globalWorkSize
,
localWorkSize
,
0
,
NULL
,
NULL
);
V_RETURN_CL
(
clErr
,
"Error running Kernel"
);
}
}
}
void
CReductionTask
::
Reduction_Decomp
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
])
void
CReductionTask
::
Reduction_Decomp
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
])
{
{
cl_int
clErr
;
size_t
globalWorkSize
[
1
];
size_t
localWorkSize
[
1
];
localWorkSize
[
0
]
=
LocalWorkSize
[
0
];
for
(
unsigned
int
i
=
m_N
/
2
;
i
>
1
;
i
/=
2
*
localWorkSize
[
0
])
{
localWorkSize
[
0
]
=
min
(
localWorkSize
[
0
],
i
);
clErr
=
clSetKernelArg
(
m_DecompKernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
m_dPingArray
);
clErr
|=
clSetKernelArg
(
m_DecompKernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
m_dPongArray
);
clErr
|=
clSetKernelArg
(
m_DecompKernel
,
2
,
sizeof
(
cl_uint
),
(
void
*
)
&
i
);
clErr
|=
clSetKernelArg
(
m_DecompKernel
,
3
,
localWorkSize
[
0
]
*
sizeof
(
cl_uint
),
NULL
);
V_RETURN_CL
(
clErr
,
"Error setting Kernel args"
);
globalWorkSize
[
0
]
=
CLUtil
::
GetGlobalWorkSize
(
i
,
localWorkSize
[
0
]);
clErr
=
clEnqueueNDRangeKernel
(
CommandQueue
,
m_DecompKernel
,
1
,
NULL
,
globalWorkSize
,
localWorkSize
,
0
,
NULL
,
NULL
);
V_RETURN_CL
(
clErr
,
"Error running Kernel"
);
swap
(
m_dPingArray
,
m_dPongArray
);
}
// TO DO: Implement reduction with kernel decomposition
// TO DO: Implement reduction with kernel decomposition
...
@@ -214,6 +264,8 @@ void CReductionTask::ExecuteTask(cl_context Context, cl_command_queue CommandQue
...
@@ -214,6 +264,8 @@ void CReductionTask::ExecuteTask(cl_context Context, cl_command_queue CommandQue
void
CReductionTask
::
TestPerformance
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
],
unsigned
int
Task
)
void
CReductionTask
::
TestPerformance
(
cl_context
Context
,
cl_command_queue
CommandQueue
,
size_t
LocalWorkSize
[
3
],
unsigned
int
Task
)
{
{
//return; //TODO
cout
<<
"Testing performance of task "
<<
g_kernelNames
[
Task
]
<<
endl
;
cout
<<
"Testing performance of task "
<<
g_kernelNames
[
Task
]
<<
endl
;
//write input data to the GPU
//write input data to the GPU
...
...
Assignment2/Assignment2/CReductionTask.h
View file @
a752caf0
...
@@ -59,6 +59,8 @@ protected:
...
@@ -59,6 +59,8 @@ protected:
// input data
// input data
unsigned
int
*
m_hInput
;
unsigned
int
*
m_hInput
;
unsigned
int
*
m_hDebug
;
// results
// results
unsigned
int
m_resultCPU
;
unsigned
int
m_resultCPU
;
unsigned
int
m_resultGPU
[
4
];
unsigned
int
m_resultGPU
[
4
];
...
...
Assignment2/Assignment2/Reduction.cl
View file @
a752caf0
...
@@ -2,21 +2,49 @@
...
@@ -2,21 +2,49 @@
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
Reduction_InterleavedAddressing
(
__global
uint*
array,
uint
stride
)
__kernel
void
Reduction_InterleavedAddressing
(
__global
uint*
array,
uint
stride
)
{
{
//
TO
DO:
Kernel
implementation
int
GID
=
get_global_id
(
0
)
;
int
pos1
=
GID*2*stride
;
int
pos2
=
pos1+stride
;
array[pos1]
=
array[pos1]
+
array[pos2]
;
array[pos2]
=
0
;
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
Reduction_SequentialAddressing
(
__global
uint*
array,
uint
stride
)
__kernel
void
Reduction_SequentialAddressing
(
__global
uint*
array,
uint
stride
)
{
{
//
TO
DO:
Kernel
implementation
int
GID
=
get_global_id
(
0
)
;
int
pos2
=
GID
+
stride
;
array[GID]
=
array[GID]
+
array[pos2]
;
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
Reduction_Decomp
(
const
__global
uint*
inArray,
__global
uint*
outArray,
uint
N,
__local
uint*
localBlock
)
__kernel
void
Reduction_Decomp
(
const
__global
uint*
inArray,
__global
uint*
outArray,
uint
N,
__local
uint*
localBlock
)
{
{
//
TO
DO:
Kernel
implementation
int
ls
=
get_local_size
(
0
)
;
int
gs
=
get_global_size
(
0
)
;
int
gid
=
get_global_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
int
groupID
=
get_group_id
(
0
)
;
localBlock[lid]
=
inArray[gid]
+
inArray[gid
+
gs]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
uint
stride
=
ls
/
2
; stride > 0; ls /= 2){
if
(
lid
<
stride
)
{
localBlock[lid]
=
inArray[lid]
+
inArray[lid
+
stride]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
=
0
)
outArray[groupID]
=
localBlock[0]
;
}
}
...
...
Assignment2/buildVS15/ALL_BUILD.vcxproj
View file @
a752caf0
This diff is collapsed.
Click to expand it.
Assignment2/buildVS15/Assignment.vcxproj
View file @
a752caf0
This diff is collapsed.
Click to expand it.
Assignment2/buildVS15/ZERO_CHECK.vcxproj
View file @
a752caf0
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment