From 339e5752735eafd65a1cd72106391dcb635c2678 Mon Sep 17 00:00:00 2001 From: Gil Pitney Date: Fri, 12 Jun 2015 23:14:58 +0000 Subject: Allow CPUBuffers for CPUDevices to share copied host pointer data Device data allocated for Buffer objects allocated via clCreateBuffer() using the CL_MEM_COPY_HOST_PTR flag for CPUDevices should be allocated only once in global device memory, and shared between the CPUDevices. Previously, shamrock was creating a brand new allocation for each device buffer, for the same MemObject. This was causing the test_device_partition Khronos test to fail (for device fission). This is now fixed, by enabling sharing of device data. Signed-off-by: Gil Pitney --- src/core/cpu/buffer.cpp | 36 +++++++++++++++++++++++------------- src/core/memobject.cpp | 17 +++++++++++++---- src/core/memobject.h | 4 +++- 3 files changed, 39 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/core/cpu/buffer.cpp b/src/core/cpu/buffer.cpp index 00d9279..97c61d9 100644 --- a/src/core/cpu/buffer.cpp +++ b/src/core/cpu/buffer.cpp @@ -90,25 +90,35 @@ bool CPUBuffer::allocate() { size_t buf_size = p_buffer->size(); int retval; + void *shared_ptr = p_buffer->shared_ptr(); if (buf_size == 0) // Something went wrong... return false; - if (!p_data) - { - // We don't use a host ptr, we need to allocate a buffer - retval = posix_memalign(&p_data, 128, buf_size); // align for type double16 size. - if (retval) - return false; - - p_data_malloced = true; + if (!shared_ptr) { + if (!p_data) + { + // We don't use a host ptr, we need to allocate a buffer + retval = posix_memalign(&p_data, 128, buf_size); // align for type double16 size. + if (retval) + return false; + + p_data_malloced = true; + + // Now set the shared data pointer, so we need not allocate again for this MemObject: + p_buffer->setSharedPtr(p_data); + } + + if (p_buffer->type() != MemObject::SubBuffer && + p_buffer->flags() & CL_MEM_COPY_HOST_PTR) + { + std::memcpy(p_data, p_buffer->host_ptr(), buf_size); + } } - - if (p_buffer->type() != MemObject::SubBuffer && - p_buffer->flags() & CL_MEM_COPY_HOST_PTR) - { - std::memcpy(p_data, p_buffer->host_ptr(), buf_size); + else { + // If the CPUBuffer data has already been allocated by the first device, use it: + if (!p_data) p_data = shared_ptr; } // Say to the memobject that we are allocated diff --git a/src/core/memobject.cpp b/src/core/memobject.cpp index 3912740..bd8736f 100644 --- a/src/core/memobject.cpp +++ b/src/core/memobject.cpp @@ -51,7 +51,7 @@ using namespace Coal; MemObject::MemObject(Context *ctx, cl_mem_flags flags, void *host_ptr, cl_int *errcode_ret) : Object(Object::T_MemObject, ctx), p_num_devices(0), p_flags(flags), - p_host_ptr(host_ptr), p_devicebuffers(0), p_dtor_callback_stack() + p_host_ptr(host_ptr), p_devicebuffers(0), p_dtor_callback_stack(), p_shared_ptr(NULL) { // Check the flags value const cl_mem_flags all_flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | @@ -168,8 +168,7 @@ cl_int MemObject::init() // If we have more than one device, the allocation on the devices is // defered to first use, so host_ptr can become invalid. So, copy it in - // a RAM location and keep it. Also, set a flag telling CPU devices that - // they don't need to reallocate and re-copy host_ptr + // a RAM location and keep it. // SubBuffer should simply reuse Buffer data if (p_num_devices > 1 && (p_flags & CL_MEM_COPY_HOST_PTR) && type() != SubBuffer) @@ -215,7 +214,7 @@ cl_int MemObject::init() std::free((void *)devices); devices = 0; - // If we have only one device, already allocate the buffer + // If we have only one device, pre-allocate the buffer if (p_num_devices == 1) { if (!p_devicebuffers[0]->allocate()) @@ -259,6 +258,16 @@ void *MemObject::host_ptr() const } } +void *MemObject::shared_ptr() const +{ + return p_shared_ptr; +} + +void MemObject::setSharedPtr(void *ptr) +{ + p_shared_ptr = ptr; +} + DeviceBuffer *MemObject::deviceBuffer(DeviceInterface *device) const { for (unsigned int i=0; i dtor_callback_t; concurrent_stack p_dtor_callback_stack; -- cgit v1.2.3