aboutsummaryrefslogtreecommitdiff
path: root/src/core/cpu/sampler.cpp
diff options
context:
space:
mode:
authorGil Pitney <gil.pitney@linaro.org>2014-10-28 18:00:42 -0700
committerGil Pitney <gil.pitney@linaro.org>2014-10-28 18:00:42 -0700
commit61b2c94d9e64758e55730be6a3fc9006c171db85 (patch)
treef564f09ebf93ba293dfa225bd374df6f1f37aa01 /src/core/cpu/sampler.cpp
Initial Commit: Based on TI OpenCL v0.8, originally based on clover.shamrock_v0.8
This is a continuation of the clover OpenCL project: http://people.freedesktop.org/~steckdenis/clover based on the contributions from Texas Instruments for Keystone II DSP device: git.ti.com/opencl and adding contributions from Linaro for ARM CPU-only support. See README.txt for more info, and build instructions. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
Diffstat (limited to 'src/core/cpu/sampler.cpp')
-rw-r--r--src/core/cpu/sampler.cpp769
1 files changed, 769 insertions, 0 deletions
diff --git a/src/core/cpu/sampler.cpp b/src/core/cpu/sampler.cpp
new file mode 100644
index 0000000..893e66e
--- /dev/null
+++ b/src/core/cpu/sampler.cpp
@@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/sampler.cpp
+ * \brief OpenCL C image access functions
+ *
+ * It is recommended to compile this file using Clang as it supports the
+ * \c __builtin_shufflevector() built-in function, providing SSE or
+ * NEON-accelerated code.
+ */
+
+#include "../memobject.h"
+#include "../sampler.h"
+#include "kernel.h"
+#include "buffer.h"
+#include "builtins.h"
+
+#include <cstdlib>
+#include <cmath>
+// ASW #include <immintrin.h>
+
+using namespace Coal;
+
+/*
+ * Helper functions
+ */
+
+static int clamp(int a, int b, int c)
+{
+ return (a < b) ? b : ((a > c) ? c : a);
+}
+
+static int min(int a, int b)
+{
+ return (a < b ? a : b);
+}
+
+static int max(int a, int b)
+{
+ return (a > b ? a : b);
+}
+
+static float frac(float x)
+{
+ return x - std::floor(x);
+}
+
+static float round(float x)
+{
+ return (float)(int)x;
+}
+
+static bool handle_address_mode(Image2D *image, int &x, int &y, int &z,
+ uint32_t sampler)
+{
+ bool is_3d = (image->type() == MemObject::Image3D);
+ int w = image->width(),
+ h = image->height(),
+ d = (is_3d ? ((Image3D *)image)->depth() : 1);
+
+ if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP_TO_EDGE)
+ {
+ x = clamp(x, 0, w - 1);
+ y = clamp(y, 0, h - 1);
+ if (is_3d) z = clamp(z, 0, d - 1);
+ }
+ else if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP)
+ {
+ x = clamp(x, 0, w);
+ y = clamp(y, 0, h);
+ if (is_3d) z = clamp(z, 0, d);
+ }
+
+ return (x == w || y == h || z == d);
+}
+
+/*
+ * Macros or functions used to accelerate the functions
+ */
+#ifndef __has_builtin
+ #define __has_builtin(x) 0
+#endif
+
+static void slow_shuffle4(uint32_t *rs, uint32_t *a, uint32_t *b,
+ int x, int y, int z, int w)
+{
+ rs[0] = (x < 4 ? a[x] : b[x - 4]);
+ rs[1] = (y < 4 ? a[y] : b[y - 4]);
+ rs[2] = (z < 4 ? a[z] : b[z - 4]);
+ rs[3] = (w < 4 ? a[w] : b[w - 4]);
+}
+
+static void convert_to_format(void *dest, float *data,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_FLOAT)
+ std::memcpy(dest, data, channels * sizeof(float));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SNORM_INT8:
+ ((int8_t *)dest)[i] = data[i] * 128.0f;
+ break;
+ case CL_SNORM_INT16:
+ ((int16_t *)dest)[i] = data[i] * 32767.0f;
+ break;
+ case CL_UNORM_INT8:
+ ((uint8_t *)dest)[i] = data[i] * 255.0f;
+ break;
+ case CL_UNORM_INT16:
+ ((uint16_t *)dest)[i] = data[i] * 65535.0f;
+ break;
+ }
+ }
+}
+
+static void convert_from_format(float *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_FLOAT)
+ std::memcpy(data, source, channels * sizeof(float));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SNORM_INT8:
+ data[i] = (float)((int8_t *)source)[i] / 127.0f;
+ break;
+ case CL_SNORM_INT16:
+ data[i] = (float)((int16_t *)source)[i] / 32767.0f;
+ break;
+ case CL_UNORM_INT8:
+ data[i] = (float)((uint8_t *)source)[i] / 127.0f;
+ break;
+ case CL_UNORM_INT16:
+ data[i] = (float)((uint16_t *)source)[i] / 127.0f;
+ break;
+ }
+ }
+}
+
+static void convert_to_format(void *dest, int *data,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_SIGNED_INT32)
+ std::memcpy(dest, data, channels * sizeof(int32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SIGNED_INT8:
+ ((int8_t *)dest)[i] = data[i];
+ break;
+ case CL_SIGNED_INT16:
+ ((int16_t *)dest)[i] = data[i];
+ break;
+ }
+ }
+}
+
+static void convert_from_format(int32_t *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_SIGNED_INT32)
+ std::memcpy(data, source, channels * sizeof(int32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SIGNED_INT8:
+ data[i] = ((int8_t *)source)[i];
+ break;
+ case CL_SIGNED_INT16:
+ data[i] = ((int16_t *)source)[i];
+ break;
+ }
+ }
+}
+
+static void convert_to_format(void *dest, uint32_t *data,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_UNSIGNED_INT32)
+ std::memcpy(dest, data, channels * sizeof(uint32_t));
+
+ for (unsigned int i=0; i<3; ++i)
+ {
+ switch (type)
+ {
+ case CL_UNSIGNED_INT8:
+ ((uint8_t *)dest)[i] = data[i];
+ break;
+ case CL_UNSIGNED_INT16:
+ ((uint16_t *)dest)[i] = data[i];
+ break;
+ }
+ }
+}
+
+static void convert_from_format(uint32_t *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_UNSIGNED_INT32)
+ std::memcpy(data, source, channels * sizeof(uint32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_UNSIGNED_INT8:
+ data[i] = ((uint8_t *)source)[i];
+ break;
+ case CL_UNSIGNED_INT16:
+ data[i] = ((uint16_t *)source)[i];
+ break;
+ }
+ }
+}
+
+template<typename T>
+static void vec4_scalar_mul(T *vec, float val)
+{
+ for (unsigned int i=0; i<4; ++i)
+ vec[i] *= val;
+}
+
+template<typename T>
+static void vec4_add(T *vec1, T *vec2)
+{
+ for (unsigned int i=0; i<4; ++i)
+ vec1[i] += vec2[i];
+}
+
+template<typename T>
+void CPUKernelWorkGroup::linear3D(T *result, float a, float b, float c,
+ int i0, int j0, int k0, int i1, int j1, int k1,
+ Image3D *image) const
+{
+ T accum[4];
+
+ readImageImplI<T>(result, image, i0, j0, k0, 0);
+ vec4_scalar_mul(result, (1.0f - a) * (1.0f - b) * (1.0f - c ));
+
+ readImageImplI<T>(accum, image, i1, j0, k0, 0);
+ vec4_scalar_mul(accum, a * (1.0f - b) * (1.0f - c ));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j1, k0, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * b * (1.0f - c ));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j1, k0, 0);
+ vec4_scalar_mul(accum, a * b * (1.0f -c ));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j0, k1, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * (1.0f - b) * c);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j0, k1, 0);
+ vec4_scalar_mul(accum, a * (1.0f - b) * c);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j1, k1, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * b * c);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j1, k1, 0);
+ vec4_scalar_mul(accum, a * b * c);
+ vec4_add(result, accum);
+}
+
+template<typename T>
+void CPUKernelWorkGroup::linear2D(T *result, float a, float b, float c, int i0, int j0,
+ int i1, int j1, Image2D *image) const
+{
+ T accum[4];
+
+ readImageImplI<T>(result, image, i0, j0, 0, 0);
+ vec4_scalar_mul(result, (1.0f - a) * (1.0f - b));
+
+ readImageImplI<T>(accum, image, i1, j0, 0, 0);
+ vec4_scalar_mul(accum, a * (1.0f - b));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j1, 0, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * b);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j1, 0, 0);
+ vec4_scalar_mul(accum, a * b);
+ vec4_add(result, accum);
+}
+
+#if __has_builtin(__builtin_shufflevector)
+ #define shuffle4(rs, a, b, x, y, z, w) \
+ *(__v4sf *)rs = __builtin_shufflevector(*(__v4sf *)a, *(__v4sf *)b, \
+ x, y, z, w)
+#else
+ #define shuffle4(rs, a, b, x, y, z, w) \
+ slow_shuffle4(rs, a, b, x, y, z, w)
+#endif
+
+static void swizzle(uint32_t *target, uint32_t *source,
+ cl_channel_order order, bool reading, uint32_t t_max)
+{
+ uint32_t special[4] = {0, t_max, 0, 0 };
+
+ if (reading)
+ {
+ switch (order)
+ {
+ case CL_R:
+ case CL_Rx:
+ // target = {source->x, 0, 0, t_max}
+ shuffle4(target, source, special, 0, 4, 4, 5);
+ break;
+ case CL_A:
+ // target = {0, 0, 0, source->x}
+ shuffle4(target, source, special, 4, 4, 4, 0);
+ break;
+ case CL_INTENSITY:
+ // target = {source->x, source->x, source->x, source->x}
+ shuffle4(target, source, source, 0, 0, 0, 0);
+ break;
+ case CL_LUMINANCE:
+ // target = {source->x, source->x, source->x, t_max}
+ shuffle4(target, source, special, 0, 0, 0, 5);
+ break;
+ case CL_RG:
+ case CL_RGx:
+ // target = {source->x, source->y, 0, t_max}
+ shuffle4(target, source, special, 0, 1, 4, 5);
+ break;
+ case CL_RA:
+ // target = {source->x, 0, 0, source->y}
+ shuffle4(target, source, special, 0, 4, 4, 1);
+ break;
+ case CL_RGB:
+ case CL_RGBx:
+ case CL_RGBA:
+ // Nothing to do, already the good order
+ std::memcpy(target, source, 16);
+ break;
+ case CL_ARGB:
+ // target = {source->y, source->z, source->w, source->x}
+ shuffle4(target, source, source, 1, 2, 3, 0);
+ break;
+ case CL_BGRA:
+ // target = {source->z, source->y, source->x, source->w}
+ shuffle4(target, source, source, 2, 1, 0, 3);
+ break;
+ }
+ }
+ else
+ {
+ switch (order)
+ {
+ case CL_A:
+ // target = {source->w, undef, undef, undef}
+ shuffle4(target, source, source, 3, 3, 3, 3);
+ break;
+ case CL_RA:
+ // target = {source->x, source->w, undef, undef}
+ shuffle4(target, source, source, 0, 3, 3, 3);
+ break;
+ case CL_ARGB:
+ // target = {source->w, source->x, source->y, source->z}
+ shuffle4(target, source, source, 3, 0, 1, 2);
+ break;
+ case CL_BGRA:
+ // target = {source->z, source->y, source->x, source->w}
+ shuffle4(target, source, source, 2, 1, 0, 3);
+ break;
+ default:
+ std::memcpy(target, source, 16);
+ }
+ }
+}
+
+/*
+ * Actual implementation of the built-ins
+ */
+
+void *CPUKernelWorkGroup::getImageData(Image2D *image, int x, int y, int z) const
+{
+ CPUBuffer *buffer =
+ (CPUBuffer *)image->deviceBuffer((DeviceInterface *)p_kernel->device());
+
+ return imageData((unsigned char *)buffer->data(),
+ x, y, z,
+ image->row_pitch(),
+ image->slice_pitch(),
+ image->pixel_size());
+}
+
+template<typename T>
+void CPUKernelWorkGroup::writeImageImpl(Image2D *image, int x, int y, int z,
+ T *color) const
+{
+ T converted[4];
+
+ // Swizzle to the correct order (float, int and uint are 32-bit, so the
+ // type has no importance
+ swizzle((uint32_t *)converted, (uint32_t *)color,
+ image->format().image_channel_order, false, 0);
+
+ // Get a pointer in the image where to write the data
+ void *target = getImageData(image, x, y, z);
+
+ // Convert color to the correct format
+ convert_to_format(target,
+ converted,
+ image->format().image_channel_data_type,
+ image->channels());
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+ float *color) const
+{
+ writeImageImpl<float>(image, x, y, z, color);
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+ int32_t *color) const
+{
+ writeImageImpl<int32_t>(image, x, y, z, color);
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+ uint32_t *color) const
+{
+ writeImageImpl<uint32_t>(image, x, y, z, color);
+}
+
+template<typename T>
+uint32_t type_max_value()
+{
+ return 0;
+}
+
+template<>
+uint32_t type_max_value<float>()
+{
+ return 1065353216; // 1.0f in decimal form
+}
+
+template<>
+uint32_t type_max_value<int32_t>()
+{
+ return 0x7fffffff;
+}
+
+template<>
+uint32_t type_max_value<uint32_t>()
+{
+ return 0xffffffff;
+}
+
+template<typename T>
+void CPUKernelWorkGroup::readImageImplI(T *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ // Handle the addressing mode of the sampler
+ if (handle_address_mode(image, x, y, z, sampler))
+ {
+ // Border color
+ result[0] = 0.0f;
+ result[1] = 0.0f;
+ result[2] = 0.0f;
+
+ switch (image->format().image_channel_order)
+ {
+ case CL_R:
+ case CL_RG:
+ case CL_RGB:
+ case CL_LUMINANCE:
+ result[3] = 1.0f;
+ break;
+ default:
+ result[3] = 0.0f;
+ }
+
+ return;
+ }
+
+ // Load the data from the image, converting it
+ void *source = getImageData(image, x, y, z);
+ T converted[4];
+
+ convert_from_format(converted,
+ source,
+ image->format().image_channel_data_type,
+ image->channels());
+
+ // Swizzle the pixel just read and place it in result
+ swizzle((uint32_t *)result, (uint32_t *)converted,
+ image->format().image_channel_order, true, type_max_value<T>());
+}
+
+void CPUKernelWorkGroup::readImage(float *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ readImageImplI<float>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ readImageImplI<int32_t>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ readImageImplI<uint32_t>(result, image, x, y, z, sampler);
+}
+
+template<typename T>
+void CPUKernelWorkGroup::readImageImplF(T *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ bool is_3d = (image->type() == MemObject::Image3D);
+ Image3D *image3d = (Image3D *)image;
+
+ int w = image->width(),
+ h = image->height(),
+ d = (is_3d ? image3d->depth() : 1);
+
+ switch (sampler & 0xf0)
+ {
+ case CLK_ADDRESS_NONE:
+ case CLK_ADDRESS_CLAMP:
+ case CLK_ADDRESS_CLAMP_TO_EDGE:
+ /* De-normalize coordinates */
+ if ((sampler & 0xf) == CLK_NORMALIZED_COORDS_TRUE)
+ {
+ x *= (float)w;
+ y *= (float)h;
+ if (is_3d) z *= (float)d;
+ }
+
+ switch (sampler & 0xf00)
+ {
+ case CLK_FILTER_NEAREST:
+ {
+ readImageImplI<T>(result, image, std::floor(x),
+ std::floor(y), std::floor(z), sampler);
+ }
+ case CLK_FILTER_LINEAR:
+ {
+ float a, b, c;
+
+ a = frac(x - 0.5f);
+ b = frac(y - 0.5f);
+ c = frac(z - 0.5f);
+
+ if (is_3d)
+ {
+ linear3D<T>(result, a, b, c,
+ std::floor(x - 0.5f),
+ std::floor(y - 0.5f),
+ std::floor(z - 0.5f),
+ std::floor(x - 0.5f) + 1,
+ std::floor(y - 0.5f) + 1,
+ std::floor(z - 0.5f) + 1,
+ image3d);
+ }
+ else
+ {
+ linear2D<T>(result, a, b, c,
+ std::floor(x - 0.5f),
+ std::floor(y - 0.5f),
+ std::floor(x - 0.5f) + 1,
+ std::floor(y - 0.5f) + 1,
+ image);
+ }
+ }
+ }
+ break;
+ case CLK_ADDRESS_REPEAT:
+ switch (sampler & 0xf00)
+ {
+ case CLK_FILTER_NEAREST:
+ {
+ int i, j, k;
+
+ x = (x - std::floor(x)) * (float)w;
+ i = std::floor(x);
+ if (i > w - 1)
+ i = i - w;
+
+ y = (y - std::floor(y)) * (float)h;
+ j = std::floor(y);
+ if (j > h - 1)
+ j = j - h;
+
+ if (is_3d)
+ {
+ z = (z - std::floor(z)) * (float)d;
+ k = std::floor(z);
+ if (k > d - 1)
+ k = k - d;
+ }
+
+ readImageImplI<T>(result, image, i, j, k, sampler);
+ }
+ case CLK_FILTER_LINEAR:
+ {
+ float a, b, c;
+ int i0, i1, j0, j1, k0, k1;
+
+ x = (x - std::floor(x)) * (float)w;
+ i0 = std::floor(x - 0.5f);
+ i1 = i0 + 1;
+ if (i0 < 0)
+ i0 = w + i0;
+ if (i1 > w - 1)
+ i1 = i1 - w;
+
+ y = (y - std::floor(y)) * (float)h;
+ j0 = std::floor(y - 0.5f);
+ j1 = j0 + 1;
+ if (j0 < 0)
+ j0 = h + j0;
+ if (j1 > h - 1)
+ j1 = j1 - h;
+
+ if (is_3d)
+ {
+ z = (z - std::floor(z)) * (float)d;
+ k0 = std::floor(z - 0.5f);
+ k1 = k0 + 1;
+ if (k0 < 0)
+ k0 = d + k0;
+ if (k1 > d - 1)
+ k1 = k1 - d;
+ }
+
+ a = frac(x - 0.5f);
+ b = frac(y - 0.5f);
+ c = frac(z - 0.5f);
+
+ if (is_3d)
+ {
+ linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1,
+ image3d);
+ }
+ else
+ {
+ linear2D<T>(result, a, b, c, i0, j0, i1, j1, image);
+ }
+ }
+ }
+ break;
+ case CLK_ADDRESS_MIRRORED_REPEAT:
+ switch (sampler & 0xf00)
+ {
+ case CLK_FILTER_NEAREST:
+ {
+ x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w;
+ y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h;
+ if (is_3d)
+ z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d;
+
+ readImageImplI<T>(result, image,
+ min(std::floor(x), w - 1),
+ min(std::floor(y), h - 1),
+ min(std::floor(z), d - 1),
+ sampler);
+ }
+ case CLK_FILTER_LINEAR:
+ {
+ float a, b, c;
+ int i0, i1, j0, j1, k0, k1;
+
+ x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w;
+ i0 = std::floor(x - 0.5f);
+ i1 = i0 + 1;
+ i0 = max(i0, 0);
+ i1 = min(i1, w - 1);
+
+ y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h;
+ j0 = std::floor(y - 0.5f);
+ j1 = j0 + 1;
+ j0 = max(j0, 0);
+ j1 = min(j1, h - 1);
+
+ if (is_3d)
+ {
+ z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d;
+ k0 = std::floor(z - 0.5f);
+ k1 = k0 + 1;
+ k0 = max(k0, 0);
+ k1 = min(k1, d - 1);
+ }
+
+ a = frac(x - 0.5f);
+ b = frac(y - 0.5f);
+ c = frac(z - 0.5f);
+
+ if (is_3d)
+ {
+ linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1,
+ image3d);
+ }
+ else
+ {
+ linear2D<T>(result, a, b, c, i0, j0, i1, j1, image);
+ }
+ }
+ }
+ break;
+ }
+}
+
+void CPUKernelWorkGroup::readImage(float *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ readImageImplF<float>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ readImageImplF<int32_t>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ readImageImplF<uint32_t>(result, image, x, y, z, sampler);
+}