diff options
Diffstat (limited to 'src/core/events.cpp')
-rw-r--r-- | src/core/events.cpp | 1519 |
1 files changed, 1519 insertions, 0 deletions
diff --git a/src/core/events.cpp b/src/core/events.cpp new file mode 100644 index 0000000..629a0c9 --- /dev/null +++ b/src/core/events.cpp @@ -0,0 +1,1519 @@ +/* + * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr> + * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/ + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the copyright holder nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file events.cpp + * \brief Events inheriting \c Coal::Event + */ + +#include "events.h" +#include "commandqueue.h" +#include "memobject.h" +#include "kernel.h" +#include "deviceinterface.h" + +#include <cstdlib> +#include <cstring> +#include <iostream> + +using namespace Coal; + +/* + * Read/Write buffers + */ + +BufferEvent::BufferEvent(CommandQueue *parent, + MemObject *buffer, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret), + p_buffer(buffer) +{ + clRetainMemObject((cl_mem) p_buffer); + + if (*errcode_ret != CL_SUCCESS) return; + + // Correct buffer + if (!buffer) + { + *errcode_ret = CL_INVALID_MEM_OBJECT; + return; + } + + // Buffer's context must match the CommandQueue one + Context *ctx = 0; + *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(Context *), &ctx, 0); + + if (*errcode_ret != CL_SUCCESS) return; + + if ((Context *)buffer->parent() != ctx) + { + *errcode_ret = CL_INVALID_CONTEXT; + return; + } + + // Alignment of SubBuffers + DeviceInterface *device = 0; + *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), + &device, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + if (!isSubBufferAligned(buffer, device)) + { + *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET; + return; + } + + // Allocate the buffer for the device + if (!buffer->allocate(device)) + { + *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE; + return; + } +} + +BufferEvent::~BufferEvent() +{ + clReleaseMemObject((cl_mem) p_buffer); +} + +MemObject *BufferEvent::buffer() const +{ + return p_buffer; +} + +bool BufferEvent::isSubBufferAligned(const MemObject *buffer, + const DeviceInterface *device) +{ + cl_uint align; + cl_int rs; + + if (buffer->type() != MemObject::SubBuffer) + return true; + + rs = device->info(CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint), + &align, 0); + + if (rs != CL_SUCCESS) + return false; + + size_t mask = 0; + if (align != 0) mask = align - 1; + + if (((SubBuffer *)buffer)->offset() & mask) + return false; + + return true; +} + +ReadWriteBufferEvent::ReadWriteBufferEvent(CommandQueue *parent, + MemObject *buffer, + size_t offset, + size_t cb, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: BufferEvent(parent, buffer, num_events_in_wait_list, event_wait_list, errcode_ret), + p_offset(offset), p_cb(cb), p_ptr(ptr) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Check for out-of-bounds reads + if (!ptr) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + if (offset + cb > buffer->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +size_t ReadWriteBufferEvent::offset() const +{ + return p_offset; +} + +size_t ReadWriteBufferEvent::cb() const +{ + return p_cb; +} + +void *ReadWriteBufferEvent::ptr() const +{ + return p_ptr; +} + +ReadBufferEvent::ReadBufferEvent(CommandQueue *parent, + MemObject *buffer, + size_t offset, + size_t cb, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteBufferEvent(parent, buffer, offset, cb, ptr, num_events_in_wait_list, + event_wait_list, errcode_ret) +{} + +Event::Type ReadBufferEvent::type() const +{ + return Event::ReadBuffer; +} + +WriteBufferEvent::WriteBufferEvent(CommandQueue *parent, + MemObject *buffer, + size_t offset, + size_t cb, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteBufferEvent(parent, buffer, offset, cb, ptr, num_events_in_wait_list, + event_wait_list, errcode_ret) +{} + +Event::Type WriteBufferEvent::type() const +{ + return Event::WriteBuffer; +} + +MapBufferEvent::MapBufferEvent(CommandQueue *parent, + MemObject *buffer, + size_t offset, + size_t cb, + cl_map_flags map_flags, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: BufferEvent(parent, buffer, num_events_in_wait_list, event_wait_list, errcode_ret), + p_offset(offset), p_cb(cb), p_map_flags(map_flags) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Check flags + if (map_flags & ~(CL_MAP_READ | CL_MAP_WRITE)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check for out-of-bounds values + if (offset + cb > buffer->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +Event::Type MapBufferEvent::type() const +{ + return Event::MapBuffer; +} + +size_t MapBufferEvent::offset() const +{ + return p_offset; +} + +size_t MapBufferEvent::cb() const +{ + return p_cb; +} + +cl_map_flags MapBufferEvent::flags() const +{ + return p_map_flags; +} + +void *MapBufferEvent::ptr() const +{ + return p_ptr; +} + +void MapBufferEvent::setPtr(void *ptr) +{ + p_ptr = ptr; +} + +MapImageEvent::MapImageEvent(CommandQueue *parent, + Image2D *image, + cl_map_flags map_flags, + const size_t origin[3], + const size_t region[3], + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: BufferEvent (parent, image, num_events_in_wait_list, event_wait_list, errcode_ret) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Check flags + if (map_flags & ~(CL_MAP_READ | CL_MAP_WRITE)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Copy the vectors + if (origin) + std::memcpy(&p_origin, origin, 3 * sizeof(size_t)); + else + std::memset(&p_origin, 0, 3 * sizeof(size_t)); + + for (unsigned int i=0; i<3; ++i) + { + if (!region[i]) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + p_region[i] = region[i]; + } + + // Multiply the elements (for images) + p_region[0] *= image->pixel_size(); + p_origin[0] *= image->pixel_size(); + + // Check for overflow + if (image->type() == MemObject::Image2D && + (origin[2] != 0 || region[2] != 1)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check for out-of-bounds + if ((p_origin[0] + p_region[0]) > image->row_pitch() || + (p_origin[1] + p_region[1]) * image->row_pitch() > image->slice_pitch() || + (p_origin[2] + p_region[2]) * image->slice_pitch() > image->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +Event::Type MapImageEvent::type() const +{ + return Event::MapImage; +} + + +cl_map_flags MapImageEvent::flags() const +{ + return p_map_flags; +} + +size_t MapImageEvent::origin (unsigned int index) const +{ + return p_origin[index]; +} + +size_t MapImageEvent::region (unsigned int index) const +{ + return p_region[index]; +} + +size_t MapImageEvent::row_pitch() const +{ + return p_row_pitch; +} + +size_t MapImageEvent::slice_pitch() const +{ + return p_slice_pitch; +} + +void *MapImageEvent::ptr() const +{ + return p_ptr; +} + +void MapImageEvent::setRowPitch (size_t row_pitch) +{ + p_row_pitch = row_pitch; +} + +void MapImageEvent::setSlicePitch (size_t slice_pitch) +{ + p_slice_pitch = slice_pitch; +} + +void MapImageEvent::setPtr (void *ptr) +{ + p_ptr = ptr; +} + +UnmapBufferEvent::UnmapBufferEvent(CommandQueue *parent, + MemObject *buffer, + void *mapped_addr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: BufferEvent(parent, buffer, num_events_in_wait_list, event_wait_list, errcode_ret), + p_mapping(mapped_addr) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // TODO: Check that p_mapping is ok (will be done in the drivers) + if (!mapped_addr) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +Event::Type UnmapBufferEvent::type() const +{ + return Event::UnmapMemObject; +} + +void *UnmapBufferEvent::mapping() const +{ + return p_mapping; +} + +CopyBufferEvent::CopyBufferEvent(CommandQueue *parent, + MemObject *source, + MemObject *destination, + size_t src_offset, + size_t dst_offset, + size_t cb, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: BufferEvent(parent, source, num_events_in_wait_list, event_wait_list, + errcode_ret), p_destination(destination), p_src_offset(src_offset), + p_dst_offset(dst_offset), p_cb(cb) +{ + clRetainMemObject((cl_mem) p_destination); + + if (*errcode_ret != CL_SUCCESS) return; + + if (!destination) + { + *errcode_ret = CL_INVALID_MEM_OBJECT; + return; + } + + // Check for out-of-bounds + if (src_offset + cb > source->size() || + dst_offset + cb > destination->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check for overlap + if (source == destination) + { + if ((src_offset < dst_offset && src_offset + cb > dst_offset) || + (dst_offset < src_offset && dst_offset + cb > src_offset)) + { + *errcode_ret = CL_MEM_COPY_OVERLAP; + return; + } + } + + // Check alignement of destination + DeviceInterface *device = 0; + *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), + &device, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + if (!isSubBufferAligned(destination, device)) + { + *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET; + return; + } + + // Allocate the buffer for the device + if (!destination->allocate(device)) + { + *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE; + return; + } +} + +CopyBufferEvent::~CopyBufferEvent() +{ + clReleaseMemObject((cl_mem) p_destination); +} + +MemObject *CopyBufferEvent::source() const +{ + return buffer(); +} + +MemObject *CopyBufferEvent::destination() const +{ + return p_destination; +} + +size_t CopyBufferEvent::src_offset() const +{ + return p_src_offset; +} + +size_t CopyBufferEvent::dst_offset() const +{ + return p_dst_offset; +} + +size_t CopyBufferEvent::cb() const +{ + return p_cb; +} + +Event::Type CopyBufferEvent::type() const +{ + return Event::CopyBuffer; +} + +/* + * Native kernel + */ +NativeKernelEvent::NativeKernelEvent(CommandQueue *parent, + void (*user_func)(void *), + void *args, + size_t cb_args, + cl_uint num_mem_objects, + const MemObject **mem_list, + const void **args_mem_loc, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: Event (parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret), + p_user_func((void *)user_func), p_args(0) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Parameters sanity + if (!user_func) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + if (!args && (cb_args || num_mem_objects)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + if (args && !cb_args) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + if (num_mem_objects && (!mem_list || !args_mem_loc)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + if (!num_mem_objects && (mem_list || args_mem_loc)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check that the device can execute a native kernel + DeviceInterface *device; + cl_device_exec_capabilities caps; + + *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), + &device, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + *errcode_ret = device->info(CL_DEVICE_EXECUTION_CAPABILITIES, + sizeof(cl_device_exec_capabilities), &caps, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + if ((caps & CL_EXEC_NATIVE_KERNEL) == 0) + { + *errcode_ret = CL_INVALID_OPERATION; + return; + } + + // Copy the arguments in a new list + if (cb_args) + { + p_args = std::malloc(cb_args); + + if (!p_args) + { + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return; + } + + std::memcpy((void *)p_args, (void *)args, cb_args); + + // Replace memory objects with global pointers + for (cl_uint i=0; i<num_mem_objects; ++i) + { + const MemObject *buffer = mem_list[i]; + const char *loc = (const char *)args_mem_loc[i]; + + if (!buffer) + { + *errcode_ret = CL_INVALID_MEM_OBJECT; + return; + } + + // We need to do relocation : loc is in args, we need it in p_args + size_t delta = (char *)p_args - (char *)args; + loc += delta; + + *(void **)loc = buffer->deviceBuffer(device)->nativeGlobalPointer(); + } + } +} + +NativeKernelEvent::~NativeKernelEvent() +{ + if (p_args) + std::free((void *)p_args); +} + +Event::Type NativeKernelEvent::type() const +{ + return Event::NativeKernel; +} + +void *NativeKernelEvent::function() const +{ + return p_user_func; +} + +void *NativeKernelEvent::args() const +{ + return p_args; +} + +/* + * Kernel event + */ +KernelEvent::KernelEvent(CommandQueue *parent, + Kernel *kernel, + cl_uint work_dim, + const size_t *global_work_offset, + const size_t *global_work_size, + const size_t *local_work_size, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret), + p_work_dim(work_dim), p_kernel(kernel) +{ + clRetainKernel((cl_kernel) p_kernel); + + if (*errcode_ret != CL_SUCCESS) return; + + *errcode_ret = CL_SUCCESS; + + // Sanity checks + if (!kernel) + { + *errcode_ret = CL_INVALID_KERNEL; + return; + } + + // Check that the kernel was built for parent's device. + DeviceInterface *device; + Context *k_ctx, *q_ctx; + size_t max_work_group_size; + cl_uint max_dims = 0; + + *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), + &device, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(Context *), &q_ctx, 0); + *errcode_ret |= kernel->info(CL_KERNEL_CONTEXT, sizeof(Context *), &k_ctx, 0); + *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), + &max_work_group_size, 0); + *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(size_t), + &max_dims, 0); + *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_SIZES, + max_dims * sizeof(size_t), p_max_work_item_sizes, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + p_dev_kernel = kernel->deviceDependentKernel(device); + + if (!p_dev_kernel) + { + *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE; + return; + } + + // Check that contexts match + if (k_ctx != q_ctx) + { + *errcode_ret = CL_INVALID_CONTEXT; + return; + } + + // Check args + if (!kernel->argsSpecified()) + { + *errcode_ret = CL_INVALID_KERNEL_ARGS; + return; + } + + // Check dimension + if (work_dim == 0 || work_dim > max_dims) + { + *errcode_ret = CL_INVALID_WORK_DIMENSION; + return; + } + + // Populate work_offset, work_size and local_work_size + size_t work_group_size = 1; + boost::tuple <uint,uint,uint> reqd_work_group_size( + kernel->reqdWorkGroupSize(kernel->deviceDependentModule(device))); + + uint reqd_x = reqd_work_group_size.get<0>(); + uint reqd_y = reqd_work_group_size.get<1>(); + uint reqd_z = reqd_work_group_size.get<2>(); + bool reqd_any = reqd_x > 0 || reqd_y > 0 || reqd_z > 0; + + if (reqd_any) + { + // if __attribute__((reqd_work_group_size(X, Y, Z))) is set and local size not specified + if (!local_work_size) + { + *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; + return; + } + + // if __attribute__((reqd_work_group_size(X, Y, Z))) doesn't match + else + { + if (( local_work_size[0] != reqd_x) || + (work_dim > 1 && local_work_size[1] != reqd_y) || + (work_dim > 2 && local_work_size[2] != reqd_z)) + { + *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; + return; + } + } + } + + cl_uint i; + for (i=0; i<work_dim; ++i) + { + if (global_work_offset) + { + p_global_work_offset[i] = global_work_offset[i]; + } + else + { + p_global_work_offset[i] = 0; + } + + if (!global_work_size || !global_work_size[i]) + { + *errcode_ret = CL_INVALID_GLOBAL_WORK_SIZE; + } + p_global_work_size[i] = global_work_size[i]; + + if (!local_work_size) + { + // Guess the best value according to the device + p_local_work_size[i] = + p_dev_kernel->guessWorkGroupSize(work_dim, i, global_work_size[i]); + } + else + { + // Check divisibility + if ((global_work_size[i] % local_work_size[i]) != 0) + { + *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; + return; + } + + // Not too big ? + if (local_work_size[i] > p_max_work_item_sizes[i]) + { + *errcode_ret = CL_INVALID_WORK_ITEM_SIZE; + return; + } + + p_local_work_size[i] = local_work_size[i]; + work_group_size *= local_work_size[i]; + } + } + // initialize missing dimensions + for (; i < max_dims; i++) + { + p_global_work_offset[i] = 0; + p_global_work_size[i] = 1; + p_local_work_size[i] = 1; + } + + // Check we don't ask too much to the device + if (work_group_size > max_work_group_size) + { + *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; + return; + } + + // Check arguments (buffer alignment, image size, ...) + for (unsigned int i=0; i<kernel->numArgs(); ++i) + { + const Kernel::Arg *a = kernel->arg(i); + + if (a->kind() == Kernel::Arg::Buffer && a->file() != Kernel::Arg::Local) + { + const MemObject *buffer = *(const MemObject **)(a->value(0)); + + if (!BufferEvent::isSubBufferAligned(buffer, device)) + { + *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET; + return; + } + } + else if (a->kind() == Kernel::Arg::Image2D) + { + const Image2D *image = *(const Image2D **)(a->value(0)); + size_t maxWidth, maxHeight; + + *errcode_ret = device->info(CL_DEVICE_IMAGE2D_MAX_WIDTH, + sizeof(size_t), &maxWidth, 0); + *errcode_ret |= device->info(CL_DEVICE_IMAGE2D_MAX_HEIGHT, + sizeof(size_t), &maxHeight, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + if (image->width() > maxWidth || image->height() > maxHeight) + { + *errcode_ret = CL_INVALID_IMAGE_SIZE; + return; + } + } + else if (a->kind() == Kernel::Arg::Image3D) + { + const Image3D *image = *(const Image3D **)a->value(0); + size_t maxWidth, maxHeight, maxDepth; + + *errcode_ret = device->info(CL_DEVICE_IMAGE3D_MAX_WIDTH, + sizeof(size_t), &maxWidth, 0); + *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_HEIGHT, + sizeof(size_t), &maxHeight, 0); + *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_DEPTH, + sizeof(size_t), &maxDepth, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + if (image->width() > maxWidth || image->height() > maxHeight || + image->depth() > maxDepth) + { + *errcode_ret = CL_INVALID_IMAGE_SIZE; + return; + } + } + } +} + +KernelEvent::~KernelEvent() +{ + clReleaseKernel((cl_kernel) p_kernel); +} + +cl_uint KernelEvent::work_dim() const +{ + return p_work_dim; +} + +size_t KernelEvent::global_work_offset(cl_uint dim) const +{ + return p_global_work_offset[dim]; +} + +size_t KernelEvent::global_work_size(cl_uint dim) const +{ + return p_global_work_size[dim]; +} + +size_t KernelEvent::local_work_size(cl_uint dim) const +{ + return p_local_work_size[dim]; +} + +Kernel *KernelEvent::kernel() const +{ + return p_kernel; +} + +DeviceKernel *KernelEvent::deviceKernel() const +{ + return p_dev_kernel; +} + +Event::Type KernelEvent::type() const +{ + return Event::NDRangeKernel; +} + +static size_t one = 1; + +TaskEvent::TaskEvent(CommandQueue *parent, + Kernel *kernel, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: KernelEvent(parent, kernel, 1, 0, &one, &one, num_events_in_wait_list, + event_wait_list, errcode_ret) +{ + // TODO: CL_INVALID_WORK_GROUP_SIZE if + // __attribute__((reqd_work_group_size(X, Y, Z))) != (1, 1, 1) +} + +Event::Type TaskEvent::type() const +{ + return Event::TaskKernel; +} + +/* + * User event + */ +UserEvent::UserEvent(Context *context, cl_int *errcode_ret) +: Event(0, Submitted, 0, 0, errcode_ret), p_context(context) +{} + +Event::Type UserEvent::type() const +{ + return Event::User; +} + +Context *UserEvent::context() const +{ + return p_context; +} + +/* + * ReadWriteBufferRectEvent + */ +ReadWriteCopyBufferRectEvent::ReadWriteCopyBufferRectEvent(CommandQueue *parent, + MemObject *source, + const size_t src_origin[3], + const size_t dst_origin[3], + const size_t region[3], + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + unsigned int bytes_per_element, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: BufferEvent (parent, source, num_events_in_wait_list, event_wait_list, + errcode_ret) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Copy the vectors + if (src_origin) + std::memcpy(&p_src_origin, src_origin, 3 * sizeof(size_t)); + else + std::memset(&p_src_origin, 0, 3 * sizeof(size_t)); + + if (dst_origin) + std::memcpy(&p_dst_origin, dst_origin, 3 * sizeof(size_t)); + else + std::memset(&p_dst_origin, 0, 3 * sizeof(size_t)); + + for (unsigned int i=0; i<3; ++i) + { + if (!region[i]) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + p_region[i] = region[i]; + } + + // Multiply the elements (for images) + p_region[0] *= bytes_per_element; + p_src_origin[0] *= bytes_per_element; + p_dst_origin[0] *= bytes_per_element; + + // Compute the pitches + p_src_row_pitch = p_region[0]; + + if (src_row_pitch) + { + if (src_row_pitch < p_src_row_pitch) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + p_src_row_pitch = src_row_pitch; + } + + p_src_slice_pitch = p_region[1] * p_src_row_pitch; + + if (src_slice_pitch) + { + if (src_slice_pitch < p_src_slice_pitch) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + p_src_slice_pitch = src_slice_pitch; + } + + p_dst_row_pitch = p_region[0]; + + if (dst_row_pitch) + { + if (dst_row_pitch < p_dst_row_pitch) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + p_dst_row_pitch = dst_row_pitch; + } + + p_dst_slice_pitch = p_region[1] * p_dst_row_pitch; + + if (dst_slice_pitch) + { + if (dst_slice_pitch < p_dst_slice_pitch) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + p_dst_slice_pitch = dst_slice_pitch; + } +} + +size_t ReadWriteCopyBufferRectEvent::src_origin(unsigned int index) const +{ + return p_src_origin[index]; +} + +size_t ReadWriteCopyBufferRectEvent::dst_origin(unsigned int index) const +{ + return p_dst_origin[index]; +} + +size_t ReadWriteCopyBufferRectEvent::region(unsigned int index) const +{ + return p_region[index]; +} + +size_t ReadWriteCopyBufferRectEvent::src_row_pitch() const +{ + return p_src_row_pitch; +} + +size_t ReadWriteCopyBufferRectEvent::src_slice_pitch() const +{ + return p_src_slice_pitch; +} + +size_t ReadWriteCopyBufferRectEvent::dst_row_pitch() const +{ + return p_dst_row_pitch; +} + +size_t ReadWriteCopyBufferRectEvent::dst_slice_pitch() const +{ + return p_dst_slice_pitch; +} + +MemObject *ReadWriteCopyBufferRectEvent::source() const +{ + return buffer(); +} + +CopyBufferRectEvent::CopyBufferRectEvent(CommandQueue *parent, + MemObject *source, + MemObject *destination, + const size_t src_origin[3], + const size_t dst_origin[3], + const size_t region[3], + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + unsigned int bytes_per_element, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteCopyBufferRectEvent(parent, source, src_origin, dst_origin, region, + src_row_pitch, src_slice_pitch, dst_row_pitch, + dst_slice_pitch, bytes_per_element, + num_events_in_wait_list, event_wait_list, errcode_ret), + p_destination(destination) +{ + if (*errcode_ret != CL_SUCCESS) return; + + if (!destination) + { + *errcode_ret = CL_INVALID_MEM_OBJECT; + return; + } + + // Check for out-of-bounds + if ((p_src_origin[0] + p_region[0]) > p_src_row_pitch || + (p_src_origin[1] + p_region[1]) * p_src_row_pitch > p_src_slice_pitch || + (p_src_origin[2] + p_region[2]) * p_src_slice_pitch > source->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + if ((p_dst_origin[0] + p_region[0]) > p_dst_row_pitch || + (p_dst_origin[1] + p_region[1]) * p_dst_row_pitch > p_dst_slice_pitch || + (p_dst_origin[2] + p_region[2]) * p_dst_slice_pitch > destination->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check for overlapping + if (source == destination) + { + unsigned char overlapping_dimensions = 0; + + for (unsigned int i=0; i<3; ++i) + { + if ((p_dst_origin[i] < p_src_origin[i] && p_dst_origin[i] + p_region[i] > p_src_origin[i]) || + (p_src_origin[i] < p_dst_origin[i] && p_src_origin[i] + p_region[i] > p_dst_origin[i])) + overlapping_dimensions++; + } + + if (overlapping_dimensions == 3) + { + // If all the dimensions are overlapping, the region is overlapping + *errcode_ret = CL_MEM_COPY_OVERLAP; + return; + } + } + + // Check alignment of destination (source already checked by BufferEvent) + DeviceInterface *device = 0; + *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), + &device, 0); + + if (*errcode_ret != CL_SUCCESS) + return; + + if (!isSubBufferAligned(destination, device)) + { + *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET; + return; + } + + // Allocate the buffer for the device + if (!destination->allocate(device)) + { + *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE; + return; + } +} + +Event::Type CopyBufferRectEvent::type() const +{ + return Event::CopyBufferRect; +} + +MemObject *CopyBufferRectEvent::destination() const +{ + return p_destination; +} + +ReadWriteBufferRectEvent::ReadWriteBufferRectEvent(CommandQueue *parent, + MemObject *buffer, + const size_t buffer_origin[3], + const size_t host_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + unsigned int bytes_per_element, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteCopyBufferRectEvent(parent, buffer, buffer_origin, host_origin, region, + buffer_row_pitch, buffer_slice_pitch, + host_row_pitch, host_slice_pitch, bytes_per_element, + num_events_in_wait_list, event_wait_list, errcode_ret), + p_ptr(ptr) +{ + if (*errcode_ret != CL_SUCCESS) return; + + if (!ptr) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check for out-of-bounds + if ((p_src_origin[0] + p_region[0]) > p_src_row_pitch || + (p_src_origin[1] + p_region[1]) * p_src_row_pitch > p_src_slice_pitch || + (p_src_origin[2] + p_region[2]) * p_src_slice_pitch > buffer->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +void *ReadWriteBufferRectEvent::ptr() const +{ + return p_ptr; +} + +ReadBufferRectEvent::ReadBufferRectEvent (CommandQueue *parent, + MemObject *buffer, + const size_t buffer_origin[3], + const size_t host_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteBufferRectEvent(parent, buffer, buffer_origin, host_origin, region, + buffer_row_pitch, buffer_slice_pitch, host_row_pitch, + host_slice_pitch, ptr, 1, num_events_in_wait_list, + event_wait_list, errcode_ret) +{ +} + +Event::Type ReadBufferRectEvent::type() const +{ + return ReadBufferRect; +} + +WriteBufferRectEvent::WriteBufferRectEvent (CommandQueue *parent, + MemObject *buffer, + const size_t buffer_origin[3], + const size_t host_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteBufferRectEvent (parent, buffer, buffer_origin, host_origin, region, + buffer_row_pitch, buffer_slice_pitch, host_row_pitch, + host_slice_pitch, ptr, 1, num_events_in_wait_list, + event_wait_list, errcode_ret) +{ +} + +Event::Type WriteBufferRectEvent::type() const +{ + return WriteBufferRect; +} + +ReadWriteImageEvent::ReadWriteImageEvent (CommandQueue *parent, + Image2D *image, + const size_t origin[3], + const size_t region[3], + size_t row_pitch, + size_t slice_pitch, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteBufferRectEvent(parent, image, origin, 0, region, image->row_pitch(), + image->slice_pitch(), row_pitch, slice_pitch, ptr, + image->pixel_size(), num_events_in_wait_list, + event_wait_list, errcode_ret) +{ + if (*errcode_ret != CL_SUCCESS) return; + + if (image->type() == MemObject::Image2D && + (origin[2] != 0 || region[2] != 1)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +ReadImageEvent::ReadImageEvent(CommandQueue *parent, + Image2D *image, + const size_t origin[3], + const size_t region[3], + size_t row_pitch, + size_t slice_pitch, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteImageEvent(parent, image, origin, region, row_pitch, slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, errcode_ret) +{} + +Event::Type ReadImageEvent::type() const +{ + return Event::ReadImage; +} + +WriteImageEvent::WriteImageEvent(CommandQueue *parent, + Image2D *image, + const size_t origin[3], + const size_t region[3], + size_t row_pitch, + size_t slice_pitch, + void *ptr, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: ReadWriteImageEvent (parent, image, origin, region, row_pitch, slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, errcode_ret) +{} + +Event::Type WriteImageEvent::type() const +{ + return Event::WriteImage; +} + +static bool operator!=(const cl_image_format &a, const cl_image_format &b) +{ + return (a.image_channel_data_type != b.image_channel_data_type) || + (a.image_channel_order != b.image_channel_order); +} + +CopyImageEvent::CopyImageEvent(CommandQueue *parent, + Image2D *source, + Image2D *destination, + const size_t src_origin[3], + const size_t dst_origin[3], + const size_t region[3], + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: CopyBufferRectEvent (parent, source, destination, src_origin, dst_origin, + region, source->row_pitch(), source->slice_pitch(), + destination->row_pitch(), destination->slice_pitch(), + source->pixel_size(), num_events_in_wait_list, + event_wait_list, errcode_ret) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Check bounds + if (source->type() == MemObject::Image2D && + (src_origin[2] != 0 || region[2] != 1)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + if (destination->type() == MemObject::Image2D && + (dst_origin[2] != 0 || region[2] != 1)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Formats must match + if (source->format() != destination->format()) + { + *errcode_ret = CL_IMAGE_FORMAT_MISMATCH; + return; + } +} + +Event::Type CopyImageEvent::type() const +{ + return Event::CopyImage; +} + +CopyImageToBufferEvent::CopyImageToBufferEvent(CommandQueue *parent, + Image2D *source, + MemObject *destination, + const size_t src_origin[3], + const size_t region[3], + size_t dst_offset, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: CopyBufferRectEvent(parent, source, destination, src_origin, 0, region, + source->row_pitch(), source->slice_pitch(), 0, 0, + source->pixel_size(), num_events_in_wait_list, + event_wait_list, errcode_ret), + p_offset(dst_offset) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Check for buffer overflow + size_t dst_cb = region[2] * region[1] * region[0] * source->pixel_size(); + + if (dst_offset + dst_cb > destination->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check validity + if (source->type() == MemObject::Image2D && + (src_origin[2] != 0 || region[2] != 1)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +size_t CopyImageToBufferEvent::offset() const +{ + return p_offset; +} + +Event::Type CopyImageToBufferEvent::type() const +{ + return Event::CopyImageToBuffer; +} + +CopyBufferToImageEvent::CopyBufferToImageEvent(CommandQueue *parent, + MemObject *source, + Image2D *destination, + size_t src_offset, + const size_t dst_origin[3], + const size_t region[3], + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: CopyBufferRectEvent(parent, source, destination, 0, dst_origin, region, 0, 0, + destination->row_pitch(), destination->slice_pitch(), + destination->pixel_size(), num_events_in_wait_list, + event_wait_list, errcode_ret), + p_offset(src_offset) +{ + if (*errcode_ret != CL_SUCCESS) return; + + // Check for buffer overflow + size_t src_cb = region[2] * region[1] * region[0] * destination->pixel_size(); + + if (src_offset + src_cb > source->size()) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } + + // Check validity + if (destination->type() == MemObject::Image2D && + (dst_origin[2] != 0 || region[2] != 1)) + { + *errcode_ret = CL_INVALID_VALUE; + return; + } +} + +size_t CopyBufferToImageEvent::offset() const +{ + return p_offset; +} + +Event::Type CopyBufferToImageEvent::type() const +{ + return Event::CopyBufferToImage; +} + +/* + * Barrier + */ + +BarrierEvent::BarrierEvent(CommandQueue *parent, cl_int *errcode_ret) +: Event(parent, Queued, 0, 0, errcode_ret) +{} + +Event::Type BarrierEvent::type() const +{ + return Event::Barrier; +} + +/* + * WaitForEvents + */ + +WaitForEventsEvent::WaitForEventsEvent(CommandQueue *parent, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret) +{} + +Event::Type WaitForEventsEvent::type() const +{ + return Event::WaitForEvents; +} + +/* + * Marker + */ +MarkerEvent::MarkerEvent(CommandQueue *parent, + cl_uint num_events_in_wait_list, + const Event **event_wait_list, + cl_int *errcode_ret) +: WaitForEventsEvent(parent, num_events_in_wait_list, event_wait_list, errcode_ret) +{} + +Event::Type MarkerEvent::type() const +{ + return Event::Marker; +} |