From cc4318ce5886355ad1b06d5489820db6a3a2a044 Mon Sep 17 00:00:00 2001 From: Gil Pitney Date: Wed, 19 Nov 2014 10:40:13 -0800 Subject: clCreateBuffer(): Ensure allocation meets minimum alignment for double16 type Previously, clCreateBuffer() was using malloc to allocate buffers, which caused havoc with NEON instructions expecting 128 bit alignment for float4 vectors. Now, use posix_memallign() to ensure alignment meets requirements of largest OpenCL data type (double16). Also, update clGetDeviceInfo()'s CL_DEVICE_MEM_BASE_ADDR_ALIGN and CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE accordingly. Signed-off-by: Gil Pitney --- src/core/cpu/buffer.cpp | 6 +++--- src/core/cpu/device.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/cpu/buffer.cpp b/src/core/cpu/buffer.cpp index 9125872..00d9279 100644 --- a/src/core/cpu/buffer.cpp +++ b/src/core/cpu/buffer.cpp @@ -89,6 +89,7 @@ void *CPUBuffer::nativeGlobalPointer() const bool CPUBuffer::allocate() { size_t buf_size = p_buffer->size(); + int retval; if (buf_size == 0) // Something went wrong... @@ -97,9 +98,8 @@ bool CPUBuffer::allocate() if (!p_data) { // We don't use a host ptr, we need to allocate a buffer - p_data = std::malloc(buf_size); - - if (!p_data) + retval = posix_memalign(&p_data, 128, buf_size); // align for type double16 size. + if (retval) return false; p_data_malloced = true; diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp index eb3fcb1..e444deb 100644 --- a/src/core/cpu/device.cpp +++ b/src/core/cpu/device.cpp @@ -460,12 +460,12 @@ cl_int CPUDevice::info(cl_device_info param_name, SIMPLE_ASSIGN(cl_uint, 0); //images not supported break; - case CL_DEVICE_MEM_BASE_ADDR_ALIGN: - SIMPLE_ASSIGN(cl_uint, 1024 /* sizeof(long16)*8) */); // 128 byte + case CL_DEVICE_MEM_BASE_ADDR_ALIGN: // in bits! + SIMPLE_ASSIGN(cl_uint, 1024 /* sizeof(double16)*8) */); // 128 byte break; - case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: - SIMPLE_ASSIGN(cl_uint, 16); + case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: // in bytes! + SIMPLE_ASSIGN(cl_uint, 128 /* sizeof(double16) */); break; case CL_DEVICE_SINGLE_FP_CONFIG: -- cgit v1.2.3