diff options
author | Gil Pitney <gil.pitney@linaro.org> | 2015-04-29 16:55:55 +0000 |
---|---|---|
committer | Gil Pitney <gil.pitney@linaro.org> | 2015-04-29 16:55:55 +0000 |
commit | 7dd00e516e90d5ad84aa6ebedf7fc2bfea25247f (patch) | |
tree | 733ef242e3cca9d00279b3e451629b9b86e6e0ce | |
parent | 6e94d7f24bf1d4c15fc21003289ad968a240d8b3 (diff) |
Added printf builtin for v1.2
This adds an OpenCL C file taken from pocl, with some minor tweaks.
Per the Khronos v1.2 test_printf test case, this enables all of the 57
sub tests to pass, with two exceptions:
*** Testing printf for vector ***
0)testing printf("%2.2v4hlf",(1.0f,2.0f,3.0f,4.0f))
*** FAILED ***
4)testing printf("%v2ld",(12345678,98765432))
*** FAILED ***
Some debugging indicates a possible issue involving va_args and floating
point types, which becomes apparent when passing vectors of floats to a
variadic function.
Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r-- | include/cpu.h | 3 | ||||
-rw-r--r-- | src/builtins/CMakeLists.txt | 29 | ||||
-rw-r--r-- | src/builtins/Makefile | 16 | ||||
-rw-r--r-- | src/builtins/_kernel_c.h | 315 | ||||
-rw-r--r-- | src/builtins/pocl_types.h | 75 | ||||
-rw-r--r-- | src/builtins/printf.c | 476 | ||||
-rw-r--r-- | src/core/cpu/builtins.cpp | 14 |
7 files changed, 919 insertions, 9 deletions
diff --git a/include/cpu.h b/include/cpu.h index 1e8380f..0f74f1f 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -267,7 +267,8 @@ TERNARY_VEC_DECL(uint, uint, mad_sat) TERNARY_VEC_DECL(long, long, mad_sat) TERNARY_VEC_DECL(ulong, ulong, mad_sat) -int printf(__constant char* _format, ...); +int _cl_printf(__constant char* restrict _format, ...); +#define printf _cl_printf void *memcpy(void *dst, const void * src, uint size); _CLC_DECL size_t get_local_id (uint dim); diff --git a/src/builtins/CMakeLists.txt b/src/builtins/CMakeLists.txt index d91c775..078dc20 100644 --- a/src/builtins/CMakeLists.txt +++ b/src/builtins/CMakeLists.txt @@ -1,9 +1,13 @@ if (SHAMROCK_BUILD) -set(CUSTOM_COMMAND ${CLANG_EXECUTABLE} -cc1 -emit-llvm-bc -x cl -O2 -fno-builtin -nobuiltininc -Fvisibility=protected -ffake-address-space-map -cl-std=CL1.2 -ffp-contract=off ) +set(CUSTOM_COMMAND_C ${CLANG_EXECUTABLE} -cc1 -emit-llvm-bc -O2 -fno-builtin -nobuiltininc -Fvisibility=protected -ffake-address-space-map -cl-std=CL1.2 -ffp-contract=off ) + +set(CUSTOM_COMMAND ${CUSTOM_COMMAND_C} -x cl ) + FILE(GLOB CL_SOURCES ${CLC_BUILTINS_DIR}/*.cl) -#MESSAGE(STATUS "CL_SOURCES: ${CL_SOURCES}" ) +FILE(GLOB C_SOURCES ${CLC_BUILTINS_DIR}/*.c) +#MESSAGE(STATUS "C_SOURCES: ${C_SOURCES}" ) set(BC_SOURCES) foreach(f ${CL_SOURCES}) @@ -18,6 +22,27 @@ foreach(f ${CL_SOURCES}) COMMENT "Generating ${bc}") list(APPEND BC_SOURCES ${bc}) endforeach() + + +MESSAGE( STATUS "LLVM_LIB_DIR: ${LLVM_LIB_DIR}") +MESSAGE( STATUS "LLVM_VERSION: ${LLVM_VERSION}") +set(CLANG_INCLUDE_DIR "${LLVM_LIB_DIR}/clang/${LLVM_VERSION}/include" ) +MESSAGE( STATUS "CLANG_INCLUDE_DIR: ${CLANG_INCLUDE_DIR}") + +foreach(f ${C_SOURCES}) + get_filename_component(fn ${f} NAME_WE) + #MESSAGE(STATUS "C_SOURCE: ${f}" ) + set(bc ${CMAKE_CURRENT_BINARY_DIR}/${fn}.bc) + add_custom_command(OUTPUT ${bc} + COMMAND ${CUSTOM_COMMAND_C} + -I${OCL_BUILTINS_DIR}/include + -I${CLANG_INCLUDE_DIR} + -o ${bc} ${f} + DEPENDS ${f} + COMMENT "Generating ${bc}") + list(APPEND BC_SOURCES ${bc}) +endforeach() + #MESSAGE( STATUS "BC_SOURCES: ${BC_SOURCES}") add_custom_target(generate_bc_files DEPENDS ${BC_SOURCES}) diff --git a/src/builtins/Makefile b/src/builtins/Makefile index 1d3349b..b9191dd 100644 --- a/src/builtins/Makefile +++ b/src/builtins/Makefile @@ -1,21 +1,29 @@ CLANG = clang -CLANG_CFLAGS = -cc1 -emit-llvm-bc -x cl -O2 -fno-builtin -nobuiltininc +CLANG_CFLAGS = -cc1 -emit-llvm-bc -O2 -fno-builtin -nobuiltininc CLANG_CFLAGS += -Fvisibility=protected -cl-std=CL1.2 -ffp-contract=off -CLANG_CFLAGS += -I../../include +CLANG_CFLAGS += -I../../include -I/opt/llvm/lib/clang/3.6.0/include + +CLANG_CL_FLAGS += $(CLANG_CFLAGS) -x cl CL_FILES = $(wildcard *.cl) +C_FILES = $(wildcard *.c) BYTECODE := ${CL_FILES:.cl=.bc} +BYTECODE_FROM_C := ${C_FILES:.c=.bc} all: builtins.lib -builtins.lib: $(BYTECODE) +builtins.lib: $(BYTECODE) $(BYTECODE_FROM_C) @echo $@ Linking bytecode modules llvm-link -o $@ $^ -%.bc: %.cl +%.bc: %.c @echo $< Parsing @$(CLANG) $(CLANG_CFLAGS) $< -o $@ +%.bc: %.cl + @echo $< Parsing + @$(CLANG) $(CLANG_CL_FLAGS) $< -o $@ + %.ll: %.bc @echo $< Disassembling llvm-dis $< diff --git a/src/builtins/_kernel_c.h b/src/builtins/_kernel_c.h new file mode 100644 index 0000000..dc03bd8 --- /dev/null +++ b/src/builtins/_kernel_c.h @@ -0,0 +1,315 @@ +/* pocl/_kernel_c.h - C compatible OpenCL types and runtime library + functions declarations. + + Copyright (c) 2011 Universidad Rey Juan Carlos + Copyright (c) 2011-2013 Pekka Jääskeläinen / TUT + Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca> + Perimeter Institute for Theoretical Physics + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ +/** + * Header that can be implemented in C compiled implementations of + * built-in functions to introduce the OpenCL C compatible types etc. + */ +#ifndef _KERNEL_C_H +#define _KERNEL_C_H + +#include "pocl_types.h" + +/* Function/type attributes supported by Clang/SPIR */ +#if __has_attribute(__always_inline__) +# define _CL_ALWAYSINLINE __attribute__((__always_inline__)) +#else +# define _CL_ALWAYSINLINE +#endif +#if __has_attribute(__noinline__) +# define _CL_NOINLINE __attribute__((__noinline__)) +#else +# define _CL_NOINLINE +#endif +#if __has_attribute(__overloadable__) +# define _CL_OVERLOADABLE __attribute__((__overloadable__)) +#else +# define _CL_OVERLOADABLE +#endif +#if (__clang_major__ == 3) && (__clang_minor__ >= 2) +/* This causes an error with Clang 3.1: */ +/* #if __has_attribute(__const__) */ +# define _CL_READNONE __attribute__((__const__)) +#else +# define _CL_READNONE +#endif +#if __has_attribute(__pure__) +# define _CL_READONLY __attribute__((__pure__)) +#else +# define _CL_READONLY +#endif +#if __has_attribute(__unavailable__) +# define _CL_UNAVAILABLE __attribute__((__unavailable__)) +#else +# define _CL_UNAVAILABLE +#endif + +typedef char char2 __attribute__((__ext_vector_type__(2))); +typedef char char3 __attribute__((__ext_vector_type__(3))); +typedef char char4 __attribute__((__ext_vector_type__(4))); +typedef char char8 __attribute__((__ext_vector_type__(8))); +typedef char char16 __attribute__((__ext_vector_type__(16))); + +typedef uchar uchar2 __attribute__((__ext_vector_type__(2))); +typedef uchar uchar3 __attribute__((__ext_vector_type__(3))); +typedef uchar uchar4 __attribute__((__ext_vector_type__(4))); +typedef uchar uchar8 __attribute__((__ext_vector_type__(8))); +typedef uchar uchar16 __attribute__((__ext_vector_type__(16))); + +typedef short short2 __attribute__((__ext_vector_type__(2))); +typedef short short3 __attribute__((__ext_vector_type__(3))); +typedef short short4 __attribute__((__ext_vector_type__(4))); +typedef short short8 __attribute__((__ext_vector_type__(8))); +typedef short short16 __attribute__((__ext_vector_type__(16))); + +typedef ushort ushort2 __attribute__((__ext_vector_type__(2))); +typedef ushort ushort3 __attribute__((__ext_vector_type__(3))); +typedef ushort ushort4 __attribute__((__ext_vector_type__(4))); +typedef ushort ushort8 __attribute__((__ext_vector_type__(8))); +typedef ushort ushort16 __attribute__((__ext_vector_type__(16))); + +typedef int int2 __attribute__((__ext_vector_type__(2))); +typedef int int3 __attribute__((__ext_vector_type__(3))); +typedef int int4 __attribute__((__ext_vector_type__(4))); +typedef int int8 __attribute__((__ext_vector_type__(8))); +typedef int int16 __attribute__((__ext_vector_type__(16))); + +typedef uint uint2 __attribute__((__ext_vector_type__(2))); +typedef uint uint3 __attribute__((__ext_vector_type__(3))); +typedef uint uint4 __attribute__((__ext_vector_type__(4))); +typedef uint uint8 __attribute__((__ext_vector_type__(8))); +typedef uint uint16 __attribute__((__ext_vector_type__(16))); + +#if defined(__CBUILD__) && defined(cl_khr_fp16) +/* NOTE: the Clang's __fp16 does not work robustly in C mode, + it might produce invalid code at least with half vectors. + Using the native 'half' type in OpenCL C mode works better. */ +typedef __fp16 half; +#endif + +#ifdef cl_khr_fp16 +typedef half half2 __attribute__((__ext_vector_type__(2))); +typedef half half3 __attribute__((__ext_vector_type__(3))); +typedef half half4 __attribute__((__ext_vector_type__(4))); +typedef half half8 __attribute__((__ext_vector_type__(8))); +typedef half half16 __attribute__((__ext_vector_type__(16))); +#endif + +typedef float float2 __attribute__((__ext_vector_type__(2))); +typedef float float3 __attribute__((__ext_vector_type__(3))); +typedef float float4 __attribute__((__ext_vector_type__(4))); +typedef float float8 __attribute__((__ext_vector_type__(8))); +typedef float float16 __attribute__((__ext_vector_type__(16))); + +#ifdef cl_khr_fp64 +# ifndef __CBUILD__ +# pragma OPENCL EXTENSION cl_khr_fp64 : enable +# endif +typedef double double2 __attribute__((__ext_vector_type__(2))); +typedef double double3 __attribute__((__ext_vector_type__(3))); +typedef double double4 __attribute__((__ext_vector_type__(4))); +typedef double double8 __attribute__((__ext_vector_type__(8))); +typedef double double16 __attribute__((__ext_vector_type__(16))); +#endif + +#ifdef cl_khr_int64 +typedef long long2 __attribute__((__ext_vector_type__(2))); +typedef long long3 __attribute__((__ext_vector_type__(3))); +typedef long long4 __attribute__((__ext_vector_type__(4))); +typedef long long8 __attribute__((__ext_vector_type__(8))); +typedef long long16 __attribute__((__ext_vector_type__(16))); + +typedef ulong ulong2 __attribute__((__ext_vector_type__(2))); +typedef ulong ulong3 __attribute__((__ext_vector_type__(3))); +typedef ulong ulong4 __attribute__((__ext_vector_type__(4))); +typedef ulong ulong8 __attribute__((__ext_vector_type__(8))); +typedef ulong ulong16 __attribute__((__ext_vector_type__(16))); +#endif + +/* Image support */ + +/* Starting from Clang 3.3 the image and sampler are detected + as opaque types by the frontend. In order to define + the default builtins we use C functions which require + the typedefs to the actual underlying types. Clang 3.2 + the typedefs throughout as the types are not detected + by the frontend. */ +#if !defined(_CL_HAS_IMAGE_ACCESS) +typedef int sampler_t; + +/* Since some built-ins have different return types + * (e.g. get_image_dim returns an int2 for 2D images and arrays, + * but an int4 for 3D images) we want each image type to + * point to a different type which is actually always the same. + * We do this by making it pointer to structs whose only element is a + * dev_image_t. The structs are not anonymous to allow identification + * by name. + */ +typedef struct _pocl_image2d_t { dev_image_t base; }* image2d_t; +typedef struct _pocl_image3d_t { dev_image_t base; }* image3d_t; +typedef struct _pocl_image1d_t { dev_image_t base; }* image1d_t; +typedef struct _pocl_image1d_buffer_t { dev_image_t base; }* image1d_buffer_t; +typedef struct _pocl_image2d_array_t { dev_image_t base; }* image2d_array_t; +typedef struct _pocl_image1d_array_t { dev_image_t base; }* image1d_array_t; +#endif + + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#define CL_UNORM_INT24 0x10DF + +/* cl_addressing _mode */ +#define CLK_ADDRESS_NONE 0x00 +#define CLK_ADDRESS_MIRRORED_REPEAT 0x01 +#define CLK_ADDRESS_REPEAT 0x02 +#define CLK_ADDRESS_CLAMP_TO_EDGE 0x03 +#define CLK_ADDRESS_CLAMP 0x04 + +/* cl_sampler_info */ +#define CLK_NORMALIZED_COORDS_FALSE 0x00 +#define CLK_NORMALIZED_COORDS_TRUE 0x08 + +/* filter_mode */ +#define CLK_FILTER_NEAREST 0x00 +#define CLK_FILTER_LINEAR 0x10 + +//#ifdef _CL_HAS_IMAGE_ACCESS + +float4 _CL_OVERLOADABLE read_imagef (image2d_t image, sampler_t sampler, + int2 coord); + +float4 _CL_OVERLOADABLE read_imagef (image2d_t image, sampler_t sampler, + float2 coord); + +uint4 _CL_OVERLOADABLE read_imageui (image2d_t image, sampler_t sampler, + int2 coord); + +uint4 _CL_OVERLOADABLE read_imageui (image2d_t image, sampler_t sampler, + int4 coord); + +uint4 _CL_OVERLOADABLE read_imageui (image3d_t image, sampler_t sampler, + int4 coord); + +int4 _CL_OVERLOADABLE read_imagei (image2d_t image, sampler_t sampler, + int2 coord); + + +void _CL_OVERLOADABLE write_imagei (image2d_t image, int2 coord, int4 color); + +void _CL_OVERLOADABLE write_imageui (image2d_t image, int2 coord, uint4 color); + + + +void _CL_OVERLOADABLE write_imagef (image2d_t image, int2 coord, + float4 color); +/* not implemented +void _CL_OVERLOADABLE write_imagef (image2d_array_t image, int4 coord, + float4 color); + +void _CL_OVERLOADABLE write_imagei (image2d_array_t image, int4 coord, + int4 color); + +void _CL_OVERLOADABLE write_imageui (image2d_array_t image, int4 coord, + uint4 color); + +void _CL_OVERLOADABLE write_imagef (image1d_t image, int coord, + float4 color); + +void _CL_OVERLOADABLE write_imagei (image1d_t image, int coord, + int4 color); + +void _CL_OVERLOADABLE write_imageui (image1d_t image, int coord, + uint4 color); + +void _CL_OVERLOADABLE write_imagef (image1d_buffer_t image, int coord, + float4 color); + +void _CL_OVERLOADABLE write_imagei (image1d_buffer_t image, int coord, + int4 color); + +void _CL_OVERLOADABLE write_imageui (image1d_buffer_t image, int coord, + uint4 color); + +void _CL_OVERLOADABLE write_imagef (image1d_array_t image, int2 coord, + float4 color); + +void _CL_OVERLOADABLE write_imagei (image1d_array_t image, int2 coord, + int4 color); + +void _CL_OVERLOADABLE write_imageui (image1d_array_t image, int2 coord, + uint4 color); + +void _CL_OVERLOADABLE write_imageui (image3d_t image, int4 coord, + uint4 color); +*/ +int _CL_OVERLOADABLE get_image_width (image1d_t image); +int _CL_OVERLOADABLE get_image_width (image2d_t image); +int _CL_OVERLOADABLE get_image_width (image3d_t image); + +int _CL_OVERLOADABLE get_image_height (image1d_t image); +int _CL_OVERLOADABLE get_image_height (image2d_t image); +int _CL_OVERLOADABLE get_image_height (image3d_t image); + +int _CL_OVERLOADABLE get_image_depth (image1d_t image); +int _CL_OVERLOADABLE get_image_depth (image2d_t image); +int _CL_OVERLOADABLE get_image_depth (image3d_t image); + +int2 _CL_OVERLOADABLE get_image_dim (image2d_t image); +int2 _CL_OVERLOADABLE get_image_dim (image2d_array_t image); +int4 _CL_OVERLOADABLE get_image_dim (image3d_t image); + +#endif diff --git a/src/builtins/pocl_types.h b/src/builtins/pocl_types.h new file mode 100644 index 0000000..3f280cf --- /dev/null +++ b/src/builtins/pocl_types.h @@ -0,0 +1,75 @@ +// Scalar type definitions + +//#include "pocl_features.h" + +#if 0 // GP: ??? +#if defined cl_khr_fp64 && !defined cl_khr_int64 +# error "cl_khr_fp64 requires cl_khr_int64" +#endif +#endif + + +/* Disable undefined datatypes */ + +/* The definitions below intentionally lead to errors if these types + are used when they are not available in the language. This prevents + accidentally using them if the compiler does not disable these + types, but only e.g. defines them with an incorrect size.*/ + +#ifndef cl_khr_int64 +typedef struct error_undefined_type_long error_undefined_type_long; +# define long error_undefined_type_long +typedef struct error_undefined_type_ulong error_undefined_type_ulong; +# define ulong error_undefined_type_ulong +#endif + +#ifndef cl_khr_fp16 +typedef struct error_undefined_type_half error_undefined_type_half; +# define half error_undefined_type_half +#endif + +#ifndef cl_khr_fp64 +typedef struct error_undefined_type_double error_undefined_type_double; +# define double error_undefined_type_double +#endif + + +/* Define unsigned datatypes */ + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +#ifdef cl_khr_int64 +typedef unsigned long ulong; +#endif + +/* Define pointer helper types */ + +typedef __SIZE_TYPE__ size_t; +typedef __PTRDIFF_TYPE__ ptrdiff_t; +typedef ptrdiff_t intptr_t; +typedef size_t uintptr_t; + + +/* Image types. + * Note: there is a duplicate definition in + * lib/CL/devices/dev_image.h - keep in sync? + */ +typedef int dev_sampler_t; + +typedef struct dev_image_t { + void* data; + int width; + int height; + int depth; + int image_array_size; + int row_pitch; + int slice_pitch; + int num_mip_levels; /* maybe not needed */ + int num_samples; /* maybe not needed */ + int order; + int data_type; + int num_channels; + int elem_size; +} dev_image_t; + diff --git a/src/builtins/printf.c b/src/builtins/printf.c new file mode 100644 index 0000000..196373c --- /dev/null +++ b/src/builtins/printf.c @@ -0,0 +1,476 @@ +/* OpenCL built-in library: printf() + + Copyright (c) 2013 Erik Schnetter <eschnetter@perimeterinstitute.ca> + Perimeter Institute for Theoretical Physics + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +// Make the C99 printf visible again +#undef printf + +// GP: Add the OpenCL types for "C": +#define cl_khr_fp64 +#include "_kernel_c.h" + +#include <limits.h> +#include <stdarg.h> +#include <stdbool.h> + +// We implement the OpenCL printf by calling the C99 printf. This is +// not very efficient, but is easy to implement. +int printf(const char* restrict fmt, ...); +int snprintf(char* restrict str, size_t size, const char* restrict fmt, ...); + +// For debugging +void debug_ptr(void * arg); + +// Use as: DEBUG_PRINTF((fmt, args...)) -- note double parentheses! +//#define DEBUG_PRINTF(args) printf args +#define DEBUG_PRINTF(args) ((void)0) + +// Conversion flags +typedef struct { + bool left; + bool plus; + bool space; + bool alt; + bool zero; +} flags_t; + + + +// Helper routines to output integers + +#define INT_CONV_char "hh" +#define INT_CONV_short "h" +#define INT_CONV_int "" +#define INT_CONV_long "ll" // C99 printf uses "ll" for int64_t + +#define DEFINE_PRINT_INTS(WIDTH) \ + void _cl_print_ints_##WIDTH(flags_t flags, int field_width, int precision, \ + char conv, const void* vals, int n) \ + { \ + DEBUG_PRINTF(("[printf:ints:n=%df]\n", n)); \ + char outfmt[1000]; \ + snprintf(outfmt, sizeof outfmt, \ + "%%%s%s%s%s%s%.0d%s%.0d" INT_CONV_##WIDTH "%c", \ + flags.left ? "-" : "", \ + flags.plus ? "+" : "", \ + flags.space ? " " : "", \ + flags.alt ? "#" : "", \ + flags.zero ? "0" : "", \ + field_width, \ + precision != -1 ? "." : "", \ + precision != -1 ? precision : 0, \ + conv); \ + DEBUG_PRINTF(("[printf:ints:outfmt=%s]\n", outfmt)); \ + for (int d=0; d<n; ++d) { \ + DEBUG_PRINTF(("[printf:ints:d=%d]\n", d)); \ + if (d != 0) printf(","); \ + printf(outfmt, ((const WIDTH*)vals)[d]); \ + } \ + DEBUG_PRINTF(("[printf:ints:done]\n")); \ + } + +DEFINE_PRINT_INTS(char) +DEFINE_PRINT_INTS(short) +DEFINE_PRINT_INTS(int) +#ifdef cl_khr_int64 +DEFINE_PRINT_INTS(long) +#endif + +#undef DEFINE_PRINT_INTS + + + +// Helper routines to output floats + +// Defined in OpenCL +float __attribute__((overloadable)) vload_half(size_t offset, const half *p); + +// Note: To simplify implementation, we print double values with %lf, +// although %f would suffice as well +#define FLOAT_CONV_half "h" +#define FLOAT_CONV_float "" +#define FLOAT_CONV_double "l" +#define FLOAT_GET_half(ptr) vload_half(0, ptr) +#define FLOAT_GET_float(ptr) (*(ptr)) +#define FLOAT_GET_double(ptr) (*(ptr)) + +#define DEFINE_PRINT_FLOATS(WIDTH) \ + void _cl_print_floats_##WIDTH(flags_t flags, int field_width, int precision, \ + char conv, const void* vals, int n) \ + { \ + char outfmt[1000]; \ + DEBUG_PRINTF(("[printf:floats:n=%dd]\n", n)); \ + snprintf(outfmt, sizeof outfmt, \ + "%%%s%s%s%s%s%.0d%s%.0d" FLOAT_CONV_##WIDTH "%c", \ + flags.left ? "-" : "", \ + flags.plus ? "+" : "", \ + flags.space ? " " : "", \ + flags.alt ? "#" : "", \ + flags.zero ? "0" : "", \ + field_width, \ + precision != -1 ? "." : "", \ + precision != -1 ? precision : 0, \ + conv); \ + DEBUG_PRINTF(("[printf:floats:outfmt=%s]\n", outfmt)); \ + debug_ptr((void *)outfmt); \ + for (int d=0; d<n; ++d) { \ + DEBUG_PRINTF(("[printf:floats:d=%d]\n", d)); \ + if (d != 0) printf(","); \ + debug_ptr((void *)((const WIDTH*)vals+d)); \ + printf(outfmt, FLOAT_GET_##WIDTH((const WIDTH*)vals+d)); \ + } \ + DEBUG_PRINTF(("[printf:floats:done]\n")); \ + } + +#ifdef cl_khr_fp16 +DEFINE_PRINT_FLOATS(half) +#endif +DEFINE_PRINT_FLOATS(float) +#ifdef cl_khr_fp64 +DEFINE_PRINT_FLOATS(double) +#endif + +#undef DEFINE_PRINT_FLOATS + + + +// Helper routines to output characters, strings, and pointers + +void _cl_print_char(flags_t flags, int field_width, int val) +{ + DEBUG_PRINTF(("[printf:char]\n")); + char outfmt[1000]; + snprintf(outfmt, sizeof outfmt, + "%%%s%.0dc", + flags.left ? "-" : "", + field_width); + DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt)); + printf(outfmt, val); + DEBUG_PRINTF(("[printf:char:done]\n")); +} + +void _cl_print_string(flags_t flags, int field_width, int precision, const char* val) +{ + DEBUG_PRINTF(("[printf:char]\n")); + char outfmt[1000]; + snprintf(outfmt, sizeof outfmt, + precision < 0 ? "%%%s%.0ds" : "%%%s.%.0ds", + flags.left ? "-" : "", + precision < 0 ? field_width : precision); + DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt)); + debug_ptr((void *)outfmt); + printf(outfmt, val); + DEBUG_PRINTF(("[printf:char:done]\n")); +} + +void _cl_print_pointer(flags_t flags, int field_width, const void* val) +{ + DEBUG_PRINTF(("[printf:char]\n")); + char outfmt[1000]; + snprintf(outfmt, sizeof outfmt, + "%%%s%.0dp", + flags.left ? "-" : "", + field_width); + DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt)); + printf(outfmt, val); + DEBUG_PRINTF(("[printf:char:done]\n")); +} + + + +// The OpenCL printf routine. + +// The implementation is straightforward: +// - walk through the format string +// - when a variable should be output, parse flags, field width, +// precision, vector specifier, length, and conversion specifier +// - call a helper routine to perform the actual output +// - the helper routine is based on calling C99 printf, and constructs +// a format string via snprintf +// - if there is an error during parsing, a "goto error" aborts the +// routine, returning -1 + +#define OCL_CONSTANT_AS __attribute__((address_space(3))) +int _cl_printf(const OCL_CONSTANT_AS char* restrict format, ...) +{ + DEBUG_PRINTF(("[printf:format=%s]\n", format)); + va_list ap; + va_start(ap, format); + + char ch = *format; + while (ch) { + if (ch == '%') { + ch = *++format; + + if (ch == '%') { + DEBUG_PRINTF(("[printf:%%]\n")); + printf("%%"); // literal % + ch = *++format; + } else { + DEBUG_PRINTF(("[printf:arg]\n")); + // Flags + flags_t flags; + flags.left = false; + flags.plus = false; + flags.space = false; + flags.alt = false; + flags.zero = false; + for (;;) { + switch (ch) { + case '-': if (flags.left) goto error; flags.left = true; break; + case '+': if (flags.plus) goto error; flags.plus = true; break; + case ' ': if (flags.space) goto error; flags.space = true; break; + case '#': if (flags.alt) goto error; flags.alt = true; break; + case '0': if (flags.zero) goto error; flags.zero = true; break; + default: goto flags_done; + } + ch = *++format; + } + flags_done:; + DEBUG_PRINTF(("[printf:flags:left=%d,plus=%d,space=%d,alt=%d,zero=%d]\n", + flags.left, flags.plus, flags.space, flags.alt, flags.zero)); + + // Field width + int field_width = 0; + while (ch >= '0' && ch <= '9') { + if (ch == '0' && field_width == 0) goto error; + if (field_width > (INT_MAX - 9) / 10) goto error; + field_width = 10 * field_width + (ch - '0'); + ch = *++format; + } + DEBUG_PRINTF(("[printf:width=%d]\n", field_width)); + + // Precision + int precision = -1; + if (ch == '.') { + ch = *++format; + precision = 0; + while (ch >= '0' && ch <= '9') { + if (precision > (INT_MAX - 9) / 10) goto error; + precision = 10 * precision + (ch - '0'); + ch = *++format; + } + } + DEBUG_PRINTF(("[printf:precision=%d]\n", precision)); + + // Vector specifier + int vector_length = 0; + if (ch == 'v') { + ch = *++format; + while (ch >= '0' && ch <= '9') { + if (ch == '0' && vector_length == 0) goto error; + if (vector_length > (INT_MAX - 9) / 10) goto error; + vector_length = 10 * vector_length + (ch - '0'); + ch = *++format; + } + if (! (vector_length == 2 || + vector_length == 3 || + vector_length == 4 || + vector_length == 8 || + vector_length == 16)) goto error; + } + DEBUG_PRINTF(("[printf:vector_length=%d]\n", vector_length)); + + // Length modifier + int length = 0; // default + if (ch == 'h') { + ch = *++format; + if (ch == 'h') { + ch = *++format; + length = 1; // "hh" -> char + } else if (ch == 'l') { + ch = *++format; + length = 4; // "hl" -> int or float + } else { + length = 2; // "h" -> short + } + } else if (ch == 'l') { + ch = *++format; + length = 8; // "l" -> long + } + if (vector_length > 0 && length == 0) goto error; + if (vector_length == 0 && length == 4) goto error; + if (vector_length == 0) vector_length = 1; + DEBUG_PRINTF(("[printf:length=%d]\n", length)); + + // Conversion specifier + switch (ch) { + + // Output integers + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + +#define CALL_PRINT_INTS(WIDTH, PROMOTED_WIDTH) \ + { \ + WIDTH##16 val; \ + switch (vector_length) { \ + default: __builtin_unreachable(); \ + case 1: val.s0 = va_arg(ap, PROMOTED_WIDTH); break; \ + case 2: val.s01 = va_arg(ap, WIDTH##2); break; \ + case 3: val.s012 = va_arg(ap, WIDTH##3); break; \ + case 4: val.s0123 = va_arg(ap, WIDTH##4); break; \ + case 8: val.lo = va_arg(ap, WIDTH##8); break; \ + case 16: val = va_arg(ap, WIDTH##16); break; \ + } \ + _cl_print_ints_##WIDTH(flags, field_width, precision, \ + ch, &val, vector_length); \ + } + + DEBUG_PRINTF(("[printf:int:conversion=%c]\n", ch)); + switch (length) { + default: __builtin_unreachable(); + case 1: CALL_PRINT_INTS(char, int); break; + case 2: CALL_PRINT_INTS(short, int); break; + case 0: + case 4: CALL_PRINT_INTS(int, int); break; +#ifdef cl_khr_int64 + case 8: CALL_PRINT_INTS(long, long); break; +#endif + } + +#undef CALL_PRINT_INTS + + break; + + // Output floats + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + case 'a': + case 'A': + +#define CALL_PRINT_FLOATS(WIDTH, PROMOTED_WIDTH) \ + { \ + WIDTH##16 val; \ + switch (vector_length) { \ + default: __builtin_unreachable(); \ + case 1: val.s0 = va_arg(ap, PROMOTED_WIDTH); break; \ + case 2: val.s01 = va_arg(ap, WIDTH##2); break; \ + case 3: val.s012 = va_arg(ap, WIDTH##3); break; \ + case 4: val.s0123 = va_arg(ap, WIDTH##4); break; \ + case 8: val.lo = va_arg(ap, WIDTH##8); break; \ + case 16: val = va_arg(ap, WIDTH##16); break; \ + } \ + float tmp;\ + tmp = val.s0; \ + debug_ptr((void *)&tmp); \ + tmp = val.s1; \ + debug_ptr((void *)&tmp); \ + tmp = val.s2; \ + debug_ptr((void *)&tmp); \ + tmp = val.s3; \ + debug_ptr((void *)&tmp); \ + _cl_print_floats_##WIDTH(flags, field_width, precision, \ + ch, &val, vector_length); \ + } + + DEBUG_PRINTF(("[printf:float:conversion=%c]\n", ch)); + switch (length) { + default: __builtin_unreachable(); +#ifdef cl_khr_fp16 + // case 2: CALL_PRINT_FLOATS(half, double); break; + case 2: goto error; // not yet implemented +#endif + case 0: + // Note: width 0 cleverly falls through to float if double + // is not supported +#ifdef cl_khr_fp64 + case 8: CALL_PRINT_FLOATS(double, double); break; + case 4: CALL_PRINT_FLOATS(float, double); break; +#else + break; +#endif + } + +#undef CALL_PRINT_FLOATS + + break; + + // Output a character + case 'c': { + DEBUG_PRINTF(("[printf:char]\n")); + if (flags.plus || flags.space || flags.alt || flags.zero) goto error; + DEBUG_PRINTF(("[printf:char1]\n")); + if (precision != -1) goto error; + DEBUG_PRINTF(("[printf:char2]\n")); + if (vector_length != 1) goto error; + DEBUG_PRINTF(("[printf:char3]\n")); + if (length != 0) goto error; + DEBUG_PRINTF(("[printf:char4]\n")); + int val = va_arg(ap, int); + _cl_print_char(flags, field_width, val); + break; + } + + // Output a string + case 's': { + if (flags.plus || flags.space || flags.alt || flags.zero) goto error; + if (vector_length != 1) goto error; + if (length != 0) goto error; + const char* val = va_arg(ap, const char*); + // GP: Note: v1.2 Khronos test_printf tests for "%.1s", so need to check precision + _cl_print_string(flags, field_width, precision, val); + break; + } + + // Output a pointer + case 'p': { + if (flags.plus || flags.space || flags.alt || flags.zero) goto error; + if (precision != -1) goto error; + if (vector_length != 1) goto error; + if (length != 0) goto error; + const void* val = va_arg(ap, const void*); + _cl_print_pointer(flags, field_width, val); + break; + } + + default: goto error; + } + ch = *++format; + + } // not a literal % + + } else { + DEBUG_PRINTF(("[printf:literal]\n")); + printf("%c", ch); + ch = *++format; + } + } + + va_end(ap); + DEBUG_PRINTF(("[printf:done]\n")); + return 0; + + error:; + va_end(ap); + DEBUG_PRINTF(("[printf:error]\n")); + printf("(printf format string error)"); + return -1; +} diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp index 137d34e..df8fd3c 100644 --- a/src/core/cpu/builtins.cpp +++ b/src/core/cpu/builtins.cpp @@ -411,6 +411,14 @@ static void unimplemented_stub() { } +void debug_ptr(void * arg) +{ + char *s = (char *)arg; + float f = *(float *)arg; + double d = *(double *)arg; + int i = *(int *)arg; +} + void *getBuiltin(const std::string &name) { if (name == "get_global_id") @@ -465,8 +473,10 @@ void *getBuiltin(const std::string &name) else if (name == "__cpu_read_imageuif") return (void *)&read_imageuif; - else if (name == "debug") - return (void *)&printf; + // Generic hook to set debugger bpt to inspect stack variable passed as (void *) + else if (name == "debug_ptr") + return(void *)&debug_ptr; + else if (name == "__aeabi_unwind_cpp_pr0") return (void *)&dummy_fxn; else if (name == "__aeabi_unwind_cpp_pr1") |