From 183e63662bf50e5cb18e49db7a122f38c6a43f7b Mon Sep 17 00:00:00 2001 From: Gil Pitney Date: Fri, 21 Nov 2014 18:16:58 -0800 Subject: Fixed kernel stub argument marshalling code to ensure proper alignment Previously, shamrock was generating load instructions in the kernel stub with a strict type alignment, which was not being followed by the argument marshalling code. This resulted in a NEON vld1 instruction failing in the kernel stub, and trashing a base register which was not 16 byte aligned. Now the marshalling code calculates the proper alignement for each argument, and is based on a buffer aligned to double16 to begin with. With this patch, all the vector sub-tests of the Khronos basic_parameter_types test now pass. Signed-off-by: Gil Pitney --- src/core/cpu/kernel.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/core/cpu/kernel.cpp b/src/core/cpu/kernel.cpp index e81391f..49e4dcc 100644 --- a/src/core/cpu/kernel.cpp +++ b/src/core/cpu/kernel.cpp @@ -290,10 +290,11 @@ size_t CPUKernel::typeOffset(size_t &offset, size_t type_len) // Align offset to stype_len type_len = next_power_of_two(type_len); - size_t mask = ~(type_len - 1); + size_t mask = (type_len - 1); - while (rs & mask != rs) - rs++; + if (rs&mask) { + rs += (type_len - rs%type_len); + } // Where to try to place the next value offset = rs + type_len; @@ -566,9 +567,8 @@ void *CPUKernelWorkGroup::callArgs(std::vector &locals_to_free) CPUKernel::typeOffset(args_size, arg->valueSize() * arg->vecDim()); } - rs = std::malloc(args_size); - - if (!rs) + int retval = posix_memalign(&rs, 128, args_size); // align for type double16 size. + if (retval || !rs) return NULL; size_t arg_offset = 0; @@ -649,6 +649,10 @@ bool CPUKernelWorkGroup::run() std::vector locals_to_free; llvm::Function *kernel_func = p_kernel->callFunction(); +#if 0 // Let's see the stub's IR: + kernel_func->dump(); +#endif + if (!kernel_func) return false; -- cgit v1.2.3