Commit d365cd44 by Philipp Adolf

Enable executing NDRange kernels

parent 2e0bd273
......@@ -34,7 +34,7 @@ It is mainly developed and tested on Linux and with Intel and Nvidia GPUs. It sh
- [x] creating and releasing kernels
- [x] setting arguments
- [ ] querying kernel, work group and argument info
- [ ] executing kernels
- [x] executing kernels
- [ ] event objects
- [ ] creating and releasing user events (not planned for now)
- [ ] query event info
......
......@@ -86,3 +86,33 @@ func (q CommandQueue) EnqueueWriteBuffer(memory Memory, offset uintptr, buffer [
}
return nil
}
func (q CommandQueue) EnqueueNDRangeKernel(kernel Kernel, globalWorkOffset, globalWorkSize, localWorkSize []uintptr, waitList []Event) (*Event, error) {
if len(globalWorkOffset) != len(globalWorkSize) || len(globalWorkOffset) != len(localWorkSize) {
return nil, fmt.Errorf("globalWorkOffset, globalWorkSize and localWorkSize have to have the same length")
}
// TODO: check that len(dim) > 0
dim := C.cl_uint(len(globalWorkOffset))
clGlobalWorkOffset := make([]C.size_t, dim)
clGlobalWorkSize := make([]C.size_t, dim)
clLocalWorkSize := make([]C.size_t, dim)
for i := range globalWorkOffset {
clGlobalWorkOffset[i] = C.size_t(globalWorkOffset[i])
clGlobalWorkSize[i] = C.size_t(globalWorkSize[i])
clLocalWorkSize[i] = C.size_t(localWorkSize[i])
}
var clEventsPtr *C.cl_event
if len(waitList) > 0 {
clEventsPtr = &asCLEventList(waitList)[0]
}
var event Event
err := C.clEnqueueNDRangeKernel(q.queue, kernel.kernel, dim, &clGlobalWorkOffset[0], &clGlobalWorkSize[0], &clLocalWorkSize[0], C.cl_uint(len(waitList)), clEventsPtr, &event.event)
if err != C.CL_SUCCESS {
return nil, fmt.Errorf("failed to enqueue kernel: %d", err)
}
return &event, nil
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment