Added printf builtin for v1.2

This adds an OpenCL C file taken from pocl, with some minor tweaks. Per the Khronos v1.2 test_printf test case, this enables all of the 57 sub tests to pass, with two exceptions: *** Testing printf for vector *** 0)testing printf("%2.2v4hlf",(1.0f,2.0f,3.0f,4.0f)) *** FAILED *** 4)testing printf("%v2ld",(12345678,98765432)) *** FAILED *** Some debugging indicates a possible issue involving va_args and floating point types, which becomes apparent when passing vectors of floats to a variadic function. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
author: Gil Pitney <gil.pitney@linaro.org> 2015-04-29 16:55:55 +0000
committer: Gil Pitney <gil.pitney@linaro.org> 2015-04-29 16:55:55 +0000
commit: 7dd00e516e90d5ad84aa6ebedf7fc2bfea25247f (patch)
tree: 733ef242e3cca9d00279b3e451629b9b86e6e0ce
parent: 6e94d7f24bf1d4c15fc21003289ad968a240d8b3 (diff)
7 files changed, 919 insertions, 9 deletions
diff --git a/include/cpu.h b/include/cpu.h
index 1e8380f..0f74f1f 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -267,7 +267,8 @@ TERNARY_VEC_DECL(uint, uint, mad_sat)
 TERNARY_VEC_DECL(long, long, mad_sat)
 TERNARY_VEC_DECL(ulong, ulong, mad_sat)
 
-int printf(__constant char* _format, ...);
+int _cl_printf(__constant char* restrict _format, ...);
+#define printf _cl_printf
 void *memcpy(void *dst, const void * src, uint size);
 
 _CLC_DECL  size_t  get_local_id     (uint dim);
diff --git a/src/builtins/CMakeLists.txt b/src/builtins/CMakeLists.txt
index d91c775..078dc20 100644
--- a/src/builtins/CMakeLists.txt
+++ b/src/builtins/CMakeLists.txt
@@ -1,9 +1,13 @@
 if (SHAMROCK_BUILD)
 
-set(CUSTOM_COMMAND ${CLANG_EXECUTABLE} -cc1 -emit-llvm-bc -x cl -O2 -fno-builtin -nobuiltininc  -Fvisibility=protected -ffake-address-space-map -cl-std=CL1.2 -ffp-contract=off )
+set(CUSTOM_COMMAND_C ${CLANG_EXECUTABLE} -cc1 -emit-llvm-bc -O2 -fno-builtin -nobuiltininc  -Fvisibility=protected -ffake-address-space-map -cl-std=CL1.2 -ffp-contract=off )
+
+set(CUSTOM_COMMAND ${CUSTOM_COMMAND_C} -x cl )
+
 
 FILE(GLOB CL_SOURCES ${CLC_BUILTINS_DIR}/*.cl)
-#MESSAGE(STATUS "CL_SOURCES: ${CL_SOURCES}" )
+FILE(GLOB C_SOURCES  ${CLC_BUILTINS_DIR}/*.c)
+#MESSAGE(STATUS "C_SOURCES: ${C_SOURCES}" )
 
 set(BC_SOURCES)
 foreach(f ${CL_SOURCES})
@@ -18,6 +22,27 @@ foreach(f ${CL_SOURCES})
           COMMENT "Generating ${bc}")
     list(APPEND BC_SOURCES ${bc})
 endforeach()
+
+
+MESSAGE( STATUS "LLVM_LIB_DIR: ${LLVM_LIB_DIR}")
+MESSAGE( STATUS "LLVM_VERSION: ${LLVM_VERSION}")
+set(CLANG_INCLUDE_DIR "${LLVM_LIB_DIR}/clang/${LLVM_VERSION}/include" )
+MESSAGE( STATUS "CLANG_INCLUDE_DIR: ${CLANG_INCLUDE_DIR}")
+
+foreach(f ${C_SOURCES})
+    get_filename_component(fn ${f} NAME_WE)
+    #MESSAGE(STATUS "C_SOURCE: ${f}" )
+    set(bc ${CMAKE_CURRENT_BINARY_DIR}/${fn}.bc)
+    add_custom_command(OUTPUT ${bc}
+          COMMAND ${CUSTOM_COMMAND_C}
+            -I${OCL_BUILTINS_DIR}/include
+            -I${CLANG_INCLUDE_DIR}
+            -o ${bc} ${f}
+          DEPENDS ${f}
+          COMMENT "Generating ${bc}")
+    list(APPEND BC_SOURCES ${bc})
+endforeach()
+
 #MESSAGE( STATUS "BC_SOURCES: ${BC_SOURCES}")
 
 add_custom_target(generate_bc_files DEPENDS ${BC_SOURCES})
diff --git a/src/builtins/Makefile b/src/builtins/Makefile
index 1d3349b..b9191dd 100644
--- a/src/builtins/Makefile
+++ b/src/builtins/Makefile
@@ -1,21 +1,29 @@
 CLANG   = clang
-CLANG_CFLAGS =  -cc1 -emit-llvm-bc -x cl -O2 -fno-builtin -nobuiltininc
+CLANG_CFLAGS =  -cc1 -emit-llvm-bc -O2 -fno-builtin -nobuiltininc
 CLANG_CFLAGS += -Fvisibility=protected -cl-std=CL1.2 -ffp-contract=off 
-CLANG_CFLAGS += -I../../include 
+CLANG_CFLAGS += -I../../include -I/opt/llvm/lib/clang/3.6.0/include
+
+CLANG_CL_FLAGS += $(CLANG_CFLAGS) -x cl
 
 CL_FILES = $(wildcard *.cl)
+C_FILES = $(wildcard *.c)
 BYTECODE := ${CL_FILES:.cl=.bc}
+BYTECODE_FROM_C := ${C_FILES:.c=.bc}
 
 all: builtins.lib
 
-builtins.lib: $(BYTECODE)
+builtins.lib: $(BYTECODE) $(BYTECODE_FROM_C)
 	@echo $@ Linking bytecode modules 
 	llvm-link -o $@ $^
 
-%.bc: %.cl
+%.bc: %.c
 	@echo $< Parsing
 	@$(CLANG) $(CLANG_CFLAGS) $< -o $@
 
+%.bc: %.cl
+	@echo $< Parsing
+	@$(CLANG) $(CLANG_CL_FLAGS) $< -o $@
+
 %.ll: %.bc
 	@echo $< Disassembling
 	llvm-dis $<
diff --git a/src/builtins/_kernel_c.h b/src/builtins/_kernel_c.h
new file mode 100644
index 0000000..dc03bd8
--- /dev/null
+++ b/src/builtins/_kernel_c.h
@@ -0,0 +1,315 @@
+/* pocl/_kernel_c.h - C compatible OpenCL types and runtime library
+   functions declarations.
+
+   Copyright (c) 2011 Universidad Rey Juan Carlos
+   Copyright (c) 2011-2013 Pekka Jääskeläinen / TUT
+   Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
+                           Perimeter Institute for Theoretical Physics
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+/**
+ * Header that can be implemented in C compiled implementations of
+ * built-in functions to introduce the OpenCL C compatible types etc.
+ */
+#ifndef _KERNEL_C_H
+#define _KERNEL_C_H
+
+#include "pocl_types.h"
+
+/* Function/type attributes supported by Clang/SPIR */
+#if __has_attribute(__always_inline__)
+#  define _CL_ALWAYSINLINE __attribute__((__always_inline__))
+#else
+#  define _CL_ALWAYSINLINE
+#endif
+#if __has_attribute(__noinline__)
+#  define _CL_NOINLINE __attribute__((__noinline__))
+#else
+#  define _CL_NOINLINE
+#endif
+#if __has_attribute(__overloadable__)
+#  define _CL_OVERLOADABLE __attribute__((__overloadable__))
+#else
+#  define _CL_OVERLOADABLE
+#endif
+#if (__clang_major__ == 3) && (__clang_minor__ >= 2)
+/* This causes an error with Clang 3.1: */
+/* #if __has_attribute(__const__) */
+#  define _CL_READNONE __attribute__((__const__))
+#else
+#  define _CL_READNONE
+#endif
+#if __has_attribute(__pure__)
+#  define _CL_READONLY __attribute__((__pure__))
+#else
+#  define _CL_READONLY
+#endif
+#if __has_attribute(__unavailable__)
+#  define _CL_UNAVAILABLE __attribute__((__unavailable__))
+#else
+#  define _CL_UNAVAILABLE
+#endif
+
+typedef char char2  __attribute__((__ext_vector_type__(2)));
+typedef char char3  __attribute__((__ext_vector_type__(3)));
+typedef char char4  __attribute__((__ext_vector_type__(4)));
+typedef char char8  __attribute__((__ext_vector_type__(8)));
+typedef char char16 __attribute__((__ext_vector_type__(16)));
+
+typedef uchar uchar2  __attribute__((__ext_vector_type__(2)));
+typedef uchar uchar3  __attribute__((__ext_vector_type__(3)));
+typedef uchar uchar4  __attribute__((__ext_vector_type__(4)));
+typedef uchar uchar8  __attribute__((__ext_vector_type__(8)));
+typedef uchar uchar16 __attribute__((__ext_vector_type__(16)));
+
+typedef short short2  __attribute__((__ext_vector_type__(2)));
+typedef short short3  __attribute__((__ext_vector_type__(3)));
+typedef short short4  __attribute__((__ext_vector_type__(4)));
+typedef short short8  __attribute__((__ext_vector_type__(8)));
+typedef short short16 __attribute__((__ext_vector_type__(16)));
+
+typedef ushort ushort2  __attribute__((__ext_vector_type__(2)));
+typedef ushort ushort3  __attribute__((__ext_vector_type__(3)));
+typedef ushort ushort4  __attribute__((__ext_vector_type__(4)));
+typedef ushort ushort8  __attribute__((__ext_vector_type__(8)));
+typedef ushort ushort16 __attribute__((__ext_vector_type__(16)));
+
+typedef int int2  __attribute__((__ext_vector_type__(2)));
+typedef int int3  __attribute__((__ext_vector_type__(3)));
+typedef int int4  __attribute__((__ext_vector_type__(4)));
+typedef int int8  __attribute__((__ext_vector_type__(8)));
+typedef int int16 __attribute__((__ext_vector_type__(16)));
+
+typedef uint uint2  __attribute__((__ext_vector_type__(2)));
+typedef uint uint3  __attribute__((__ext_vector_type__(3)));
+typedef uint uint4  __attribute__((__ext_vector_type__(4)));
+typedef uint uint8  __attribute__((__ext_vector_type__(8)));
+typedef uint uint16 __attribute__((__ext_vector_type__(16)));
+
+#if defined(__CBUILD__) && defined(cl_khr_fp16)
+/* NOTE: the Clang's __fp16 does not work robustly in C mode, 
+   it might produce invalid code at least with half vectors.
+   Using the native 'half' type in OpenCL C mode works better. */
+typedef __fp16 half;
+#endif
+
+#ifdef cl_khr_fp16
+typedef half half2  __attribute__((__ext_vector_type__(2)));
+typedef half half3  __attribute__((__ext_vector_type__(3)));
+typedef half half4  __attribute__((__ext_vector_type__(4)));
+typedef half half8  __attribute__((__ext_vector_type__(8)));
+typedef half half16 __attribute__((__ext_vector_type__(16)));
+#endif
+
+typedef float float2  __attribute__((__ext_vector_type__(2)));
+typedef float float3  __attribute__((__ext_vector_type__(3)));
+typedef float float4  __attribute__((__ext_vector_type__(4)));
+typedef float float8  __attribute__((__ext_vector_type__(8)));
+typedef float float16 __attribute__((__ext_vector_type__(16)));
+
+#ifdef cl_khr_fp64
+#  ifndef __CBUILD__
+#    pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#  endif
+typedef double double2  __attribute__((__ext_vector_type__(2)));
+typedef double double3  __attribute__((__ext_vector_type__(3)));
+typedef double double4  __attribute__((__ext_vector_type__(4)));
+typedef double double8  __attribute__((__ext_vector_type__(8)));
+typedef double double16 __attribute__((__ext_vector_type__(16)));
+#endif
+
+#ifdef cl_khr_int64
+typedef long long2  __attribute__((__ext_vector_type__(2)));
+typedef long long3  __attribute__((__ext_vector_type__(3)));
+typedef long long4  __attribute__((__ext_vector_type__(4)));
+typedef long long8  __attribute__((__ext_vector_type__(8)));
+typedef long long16 __attribute__((__ext_vector_type__(16)));
+
+typedef ulong ulong2  __attribute__((__ext_vector_type__(2)));
+typedef ulong ulong3  __attribute__((__ext_vector_type__(3)));
+typedef ulong ulong4  __attribute__((__ext_vector_type__(4)));
+typedef ulong ulong8  __attribute__((__ext_vector_type__(8)));
+typedef ulong ulong16 __attribute__((__ext_vector_type__(16)));
+#endif
+
+/* Image support */
+
+/* Starting from Clang 3.3 the image and sampler are detected
+   as opaque types by the frontend. In order to define
+   the default builtins we use C functions which require 
+   the typedefs to the actual underlying types. Clang 3.2
+   the typedefs throughout as the types are not detected
+   by the frontend. */
+#if !defined(_CL_HAS_IMAGE_ACCESS)
+typedef int sampler_t;
+
+/* Since some built-ins have different return types
+ * (e.g. get_image_dim returns an int2 for 2D images and arrays,
+ *  but an int4 for 3D images) we want each image type to
+ * point to a different type which is actually always the same.
+ * We do this by making it pointer to structs whose only element is a
+ * dev_image_t. The structs are not anonymous to allow identification
+ * by name.
+ */
+typedef struct _pocl_image2d_t { dev_image_t base; }* image2d_t;
+typedef struct _pocl_image3d_t { dev_image_t base; }* image3d_t;
+typedef struct _pocl_image1d_t { dev_image_t base; }* image1d_t;
+typedef struct _pocl_image1d_buffer_t { dev_image_t base; }* image1d_buffer_t;
+typedef struct _pocl_image2d_array_t { dev_image_t base; }* image2d_array_t;
+typedef struct _pocl_image1d_array_t { dev_image_t base; }* image1d_array_t;
+#endif
+
+
+/* cl_channel_order */
+#define CL_R                                        0x10B0
+#define CL_A                                        0x10B1
+#define CL_RG                                       0x10B2
+#define CL_RA                                       0x10B3
+#define CL_RGB                                      0x10B4
+#define CL_RGBA                                     0x10B5
+#define CL_BGRA                                     0x10B6
+#define CL_ARGB                                     0x10B7
+#define CL_INTENSITY                                0x10B8
+#define CL_LUMINANCE                                0x10B9
+#define CL_Rx                                       0x10BA
+#define CL_RGx                                      0x10BB
+#define CL_RGBx                                     0x10BC
+#define CL_DEPTH                                    0x10BD
+#define CL_DEPTH_STENCIL                            0x10BE
+
+/* cl_channel_type */
+#define CL_SNORM_INT8                               0x10D0
+#define CL_SNORM_INT16                              0x10D1
+#define CL_UNORM_INT8                               0x10D2
+#define CL_UNORM_INT16                              0x10D3
+#define CL_UNORM_SHORT_565                          0x10D4
+#define CL_UNORM_SHORT_555                          0x10D5
+#define CL_UNORM_INT_101010                         0x10D6
+#define CL_SIGNED_INT8                              0x10D7
+#define CL_SIGNED_INT16                             0x10D8
+#define CL_SIGNED_INT32                             0x10D9
+#define CL_UNSIGNED_INT8                            0x10DA
+#define CL_UNSIGNED_INT16                           0x10DB
+#define CL_UNSIGNED_INT32                           0x10DC
+#define CL_HALF_FLOAT                               0x10DD
+#define CL_FLOAT                                    0x10DE
+#define CL_UNORM_INT24                              0x10DF
+
+/* cl_addressing _mode */
+#define CLK_ADDRESS_NONE                            0x00
+#define CLK_ADDRESS_MIRRORED_REPEAT                 0x01
+#define CLK_ADDRESS_REPEAT                          0x02
+#define CLK_ADDRESS_CLAMP_TO_EDGE                   0x03
+#define CLK_ADDRESS_CLAMP                           0x04
+
+/* cl_sampler_info */
+#define CLK_NORMALIZED_COORDS_FALSE                 0x00
+#define CLK_NORMALIZED_COORDS_TRUE                  0x08
+
+/* filter_mode */
+#define CLK_FILTER_NEAREST                          0x00
+#define CLK_FILTER_LINEAR                           0x10
+
+//#ifdef _CL_HAS_IMAGE_ACCESS
+
+float4 _CL_OVERLOADABLE read_imagef (image2d_t image, sampler_t sampler,
+                                     int2 coord);
+
+float4 _CL_OVERLOADABLE read_imagef (image2d_t image, sampler_t sampler,
+                                     float2 coord);
+
+uint4 _CL_OVERLOADABLE read_imageui (image2d_t image, sampler_t sampler, 
+                                     int2 coord);
+
+uint4 _CL_OVERLOADABLE read_imageui (image2d_t image, sampler_t sampler, 
+                                     int4 coord);
+
+uint4 _CL_OVERLOADABLE read_imageui (image3d_t image, sampler_t sampler, 
+                                     int4 coord);
+
+int4 _CL_OVERLOADABLE read_imagei (image2d_t image, sampler_t sampler, 
+                                   int2 coord);
+
+
+void _CL_OVERLOADABLE write_imagei (image2d_t image, int2 coord, int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image2d_t image, int2 coord, uint4 color);
+
+
+
+void _CL_OVERLOADABLE write_imagef (image2d_t image, int2 coord,
+                                    float4 color);
+/* not implemented 
+void _CL_OVERLOADABLE write_imagef (image2d_array_t image, int4 coord,
+                                    float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image2d_array_t image, int4 coord,
+                                    int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image2d_array_t image, int4 coord,
+                                     uint4 color);
+
+void _CL_OVERLOADABLE write_imagef (image1d_t image, int coord,
+                                    float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image1d_t image, int coord,
+                                    int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image1d_t image, int coord, 
+                                     uint4 color);
+
+void _CL_OVERLOADABLE write_imagef (image1d_buffer_t image, int coord, 
+                                    float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image1d_buffer_t image, int coord,
+                                     int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image1d_buffer_t image, int coord,
+                                     uint4 color);
+
+void _CL_OVERLOADABLE write_imagef (image1d_array_t image, int2 coord,
+                                    float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image1d_array_t image, int2 coord,
+                                    int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image1d_array_t image, int2 coord,
+                                     uint4 color);
+
+void _CL_OVERLOADABLE write_imageui (image3d_t image, int4 coord,
+                                     uint4 color);
+*/
+int _CL_OVERLOADABLE get_image_width (image1d_t image);
+int _CL_OVERLOADABLE get_image_width (image2d_t image);
+int _CL_OVERLOADABLE get_image_width (image3d_t image);
+
+int _CL_OVERLOADABLE get_image_height (image1d_t image);
+int _CL_OVERLOADABLE get_image_height (image2d_t image);
+int _CL_OVERLOADABLE get_image_height (image3d_t image);
+
+int _CL_OVERLOADABLE get_image_depth (image1d_t image);
+int _CL_OVERLOADABLE get_image_depth (image2d_t image);
+int _CL_OVERLOADABLE get_image_depth (image3d_t image);
+
+int2 _CL_OVERLOADABLE get_image_dim (image2d_t image);
+int2 _CL_OVERLOADABLE get_image_dim (image2d_array_t image);
+int4 _CL_OVERLOADABLE get_image_dim (image3d_t image);
+
+#endif
diff --git a/src/builtins/pocl_types.h b/src/builtins/pocl_types.h
new file mode 100644
index 0000000..3f280cf
--- /dev/null
+++ b/src/builtins/pocl_types.h
@@ -0,0 +1,75 @@
+// Scalar type definitions
+
+//#include "pocl_features.h"
+
+#if 0 // GP: ???
+#if defined cl_khr_fp64 && !defined cl_khr_int64
+#  error "cl_khr_fp64 requires cl_khr_int64"
+#endif
+#endif
+
+
+/* Disable undefined datatypes */
+
+/* The definitions below intentionally lead to errors if these types
+   are used when they are not available in the language. This prevents
+   accidentally using them if the compiler does not disable these
+   types, but only e.g. defines them with an incorrect size.*/
+
+#ifndef cl_khr_int64
+typedef struct error_undefined_type_long error_undefined_type_long;
+#  define long error_undefined_type_long
+typedef struct error_undefined_type_ulong error_undefined_type_ulong;
+#  define ulong error_undefined_type_ulong
+#endif
+
+#ifndef cl_khr_fp16
+typedef struct error_undefined_type_half error_undefined_type_half;
+#  define half error_undefined_type_half
+#endif
+
+#ifndef cl_khr_fp64
+typedef struct error_undefined_type_double error_undefined_type_double;
+#  define double error_undefined_type_double
+#endif
+
+
+/* Define unsigned datatypes */
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+#ifdef cl_khr_int64
+typedef unsigned long ulong;
+#endif
+
+/* Define pointer helper types */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef ptrdiff_t intptr_t;
+typedef size_t uintptr_t;
+
+
+/* Image types.
+ * Note: there is a duplicate definition in
+ * lib/CL/devices/dev_image.h - keep in sync?
+ */
+typedef int dev_sampler_t;
+
+typedef struct dev_image_t {
+  void* data;
+  int width;
+  int height;
+  int depth;
+  int image_array_size;
+  int row_pitch;
+  int slice_pitch;
+  int num_mip_levels; /* maybe not needed */
+  int num_samples; /* maybe not needed */
+  int order;
+  int data_type;
+  int num_channels;
+  int elem_size;
+} dev_image_t;
+
diff --git a/src/builtins/printf.c b/src/builtins/printf.c
new file mode 100644
index 0000000..196373c
--- /dev/null
+++ b/src/builtins/printf.c
@@ -0,0 +1,476 @@
+/* OpenCL built-in library: printf()
+
+   Copyright (c) 2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
+                      Perimeter Institute for Theoretical Physics
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+// Make the C99 printf visible again
+#undef printf
+
+// GP: Add the OpenCL types for "C":
+#define cl_khr_fp64
+#include "_kernel_c.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stdbool.h>
+
+// We implement the OpenCL printf by calling the C99 printf. This is
+// not very efficient, but is easy to implement.
+int printf(const char* restrict fmt, ...);
+int snprintf(char* restrict str, size_t size, const char* restrict fmt, ...);
+
+// For debugging
+void debug_ptr(void * arg);
+
+// Use as: DEBUG_PRINTF((fmt, args...)) -- note double parentheses!
+//#define DEBUG_PRINTF(args) printf args
+#define DEBUG_PRINTF(args) ((void)0)
+
+// Conversion flags
+typedef struct {
+  bool left;
+  bool plus;
+  bool space;
+  bool alt;
+  bool zero;
+} flags_t;
+
+
+
+// Helper routines to output integers
+
+#define INT_CONV_char  "hh"
+#define INT_CONV_short "h"
+#define INT_CONV_int   ""
+#define INT_CONV_long  "ll"     // C99 printf uses "ll" for int64_t
+
+#define DEFINE_PRINT_INTS(WIDTH)                                        \
+  void _cl_print_ints_##WIDTH(flags_t flags, int field_width, int precision, \
+                              char conv, const void* vals, int n)       \
+  {                                                                     \
+    DEBUG_PRINTF(("[printf:ints:n=%df]\n", n));                         \
+    char outfmt[1000];                                                  \
+    snprintf(outfmt, sizeof outfmt,                                     \
+             "%%%s%s%s%s%s%.0d%s%.0d" INT_CONV_##WIDTH "%c",            \
+             flags.left ? "-" : "",                                     \
+             flags.plus ? "+" : "",                                     \
+             flags.space ? " " : "",                                    \
+             flags.alt ? "#" : "",                                      \
+             flags.zero ? "0" : "",                                     \
+             field_width,                                               \
+             precision != -1 ? "." : "",                                \
+             precision != -1 ? precision : 0,                           \
+             conv);                                                     \
+    DEBUG_PRINTF(("[printf:ints:outfmt=%s]\n", outfmt));                \
+    for (int d=0; d<n; ++d) {                                           \
+      DEBUG_PRINTF(("[printf:ints:d=%d]\n", d));                        \
+      if (d != 0) printf(",");                                          \
+      printf(outfmt, ((const WIDTH*)vals)[d]);                          \
+    }                                                                   \
+    DEBUG_PRINTF(("[printf:ints:done]\n"));                             \
+  }
+
+DEFINE_PRINT_INTS(char)
+DEFINE_PRINT_INTS(short)
+DEFINE_PRINT_INTS(int)
+#ifdef cl_khr_int64
+DEFINE_PRINT_INTS(long)
+#endif
+
+#undef DEFINE_PRINT_INTS
+
+
+
+// Helper routines to output floats
+
+// Defined in OpenCL
+float __attribute__((overloadable)) vload_half(size_t offset, const half *p);
+
+// Note: To simplify implementation, we print double values with %lf,
+// although %f would suffice as well
+#define FLOAT_CONV_half   "h"
+#define FLOAT_CONV_float  ""
+#define FLOAT_CONV_double "l"
+#define FLOAT_GET_half(ptr)   vload_half(0, ptr)
+#define FLOAT_GET_float(ptr)  (*(ptr))
+#define FLOAT_GET_double(ptr) (*(ptr))
+
+#define DEFINE_PRINT_FLOATS(WIDTH)                                      \
+  void _cl_print_floats_##WIDTH(flags_t flags, int field_width, int precision, \
+                                char conv, const void* vals, int n)     \
+  {                                                                     \
+    char outfmt[1000];                                                  \
+    DEBUG_PRINTF(("[printf:floats:n=%dd]\n", n));                       \
+    snprintf(outfmt, sizeof outfmt,                                     \
+             "%%%s%s%s%s%s%.0d%s%.0d" FLOAT_CONV_##WIDTH "%c",          \
+             flags.left ? "-" : "",                                     \
+             flags.plus ? "+" : "",                                     \
+             flags.space ? " " : "",                                    \
+             flags.alt ? "#" : "",                                      \
+             flags.zero ? "0" : "",                                     \
+             field_width,                                               \
+             precision != -1 ? "." : "",                                \
+             precision != -1 ? precision : 0,                           \
+             conv);                                                     \
+    DEBUG_PRINTF(("[printf:floats:outfmt=%s]\n", outfmt));              \
+    debug_ptr((void *)outfmt); \
+    for (int d=0; d<n; ++d) {                                           \
+      DEBUG_PRINTF(("[printf:floats:d=%d]\n", d));                      \
+      if (d != 0) printf(",");                                          \
+      debug_ptr((void *)((const WIDTH*)vals+d));			\
+      printf(outfmt, FLOAT_GET_##WIDTH((const WIDTH*)vals+d));	\
+    }                                                                   \
+    DEBUG_PRINTF(("[printf:floats:done]\n"));                           \
+  }
+
+#ifdef cl_khr_fp16
+DEFINE_PRINT_FLOATS(half)
+#endif
+DEFINE_PRINT_FLOATS(float)
+#ifdef cl_khr_fp64
+DEFINE_PRINT_FLOATS(double)
+#endif
+
+#undef DEFINE_PRINT_FLOATS
+
+
+
+// Helper routines to output characters, strings, and pointers
+
+void _cl_print_char(flags_t flags, int field_width, int val)
+{
+  DEBUG_PRINTF(("[printf:char]\n"));
+  char outfmt[1000];
+  snprintf(outfmt, sizeof outfmt,
+           "%%%s%.0dc",
+           flags.left ? "-" : "",
+           field_width);
+  DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt));
+  printf(outfmt, val);
+  DEBUG_PRINTF(("[printf:char:done]\n"));
+}
+
+void _cl_print_string(flags_t flags, int field_width, int precision, const char* val)
+{
+  DEBUG_PRINTF(("[printf:char]\n"));
+  char outfmt[1000];
+  snprintf(outfmt, sizeof outfmt,
+           precision < 0 ? "%%%s%.0ds" : "%%%s.%.0ds",
+           flags.left ? "-" : "",
+	   precision < 0 ? field_width : precision);
+  DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt));
+  debug_ptr((void *)outfmt);
+  printf(outfmt, val);
+  DEBUG_PRINTF(("[printf:char:done]\n"));
+}
+
+void _cl_print_pointer(flags_t flags, int field_width, const void* val)
+{
+  DEBUG_PRINTF(("[printf:char]\n"));
+  char outfmt[1000];
+  snprintf(outfmt, sizeof outfmt,
+           "%%%s%.0dp",
+           flags.left ? "-" : "",
+           field_width);
+  DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt));
+  printf(outfmt, val);
+  DEBUG_PRINTF(("[printf:char:done]\n"));
+}
+
+
+
+// The OpenCL printf routine.
+
+// The implementation is straightforward:
+// - walk through the format string
+// - when a variable should be output, parse flags, field width,
+//   precision, vector specifier, length, and conversion specifier
+// - call a helper routine to perform the actual output
+// - the helper routine is based on calling C99 printf, and constructs
+//   a format string via snprintf
+// - if there is an error during parsing, a "goto error" aborts the
+//   routine, returning -1
+
+#define OCL_CONSTANT_AS __attribute__((address_space(3)))
+int _cl_printf(const OCL_CONSTANT_AS char* restrict format, ...)
+{
+  DEBUG_PRINTF(("[printf:format=%s]\n", format));
+  va_list ap;
+  va_start(ap, format);
+  
+  char ch = *format;
+  while (ch) {
+    if (ch == '%') {
+      ch = *++format;
+      
+      if (ch == '%') {
+        DEBUG_PRINTF(("[printf:%%]\n"));
+        printf("%%");           // literal %
+        ch = *++format;
+      } else {
+        DEBUG_PRINTF(("[printf:arg]\n"));
+        // Flags
+        flags_t flags;
+        flags.left = false;
+        flags.plus = false;
+        flags.space = false;
+        flags.alt = false;
+        flags.zero = false;
+        for (;;) {
+          switch (ch) {
+          case '-': if (flags.left) goto error; flags.left = true; break;
+          case '+': if (flags.plus) goto error; flags.plus = true; break;
+          case ' ': if (flags.space) goto error; flags.space = true; break;
+          case '#': if (flags.alt) goto error; flags.alt = true; break;
+          case '0': if (flags.zero) goto error; flags.zero = true; break;
+          default: goto flags_done;
+          }
+          ch = *++format;
+        }
+      flags_done:;
+        DEBUG_PRINTF(("[printf:flags:left=%d,plus=%d,space=%d,alt=%d,zero=%d]\n",
+                      flags.left, flags.plus, flags.space, flags.alt, flags.zero));
+        
+        // Field width
+        int field_width = 0;
+        while (ch >= '0' && ch <= '9') {
+          if (ch == '0' && field_width == 0) goto error;
+          if (field_width > (INT_MAX - 9) / 10) goto error;
+          field_width = 10 * field_width + (ch - '0');
+          ch = *++format;
+        }
+        DEBUG_PRINTF(("[printf:width=%d]\n", field_width));
+        
+        // Precision
+        int precision = -1;
+        if (ch == '.') {
+          ch = *++format;
+          precision = 0;
+          while (ch >= '0' && ch <= '9') {
+            if (precision > (INT_MAX - 9) / 10) goto error;
+            precision = 10 * precision + (ch - '0');
+            ch = *++format;
+          }
+        }
+        DEBUG_PRINTF(("[printf:precision=%d]\n", precision));
+        
+        // Vector specifier
+        int vector_length = 0;
+        if (ch == 'v') {
+          ch = *++format;
+          while (ch >= '0' && ch <= '9') {
+            if (ch == '0' && vector_length == 0) goto error;
+            if (vector_length > (INT_MAX - 9) / 10) goto error;
+            vector_length = 10 * vector_length + (ch - '0');
+            ch = *++format;
+          }
+          if (! (vector_length == 2 ||
+                 vector_length == 3 ||
+                 vector_length == 4 ||
+                 vector_length == 8 ||
+                 vector_length == 16)) goto error;
+        }
+        DEBUG_PRINTF(("[printf:vector_length=%d]\n", vector_length));
+        
+        // Length modifier
+        int length = 0;           // default
+        if (ch == 'h') {
+          ch = *++format;
+          if (ch == 'h') {
+            ch = *++format;
+            length = 1;           // "hh" -> char
+          } else if (ch == 'l') {
+            ch = *++format;
+            length = 4;           // "hl" -> int or float
+          } else {
+            length = 2;           // "h" -> short
+          }
+        } else if (ch == 'l') {
+          ch = *++format;
+          length = 8;             // "l" -> long
+        }
+        if (vector_length > 0 && length == 0) goto error;
+        if (vector_length == 0 && length == 4) goto error;
+        if (vector_length == 0) vector_length = 1;
+        DEBUG_PRINTF(("[printf:length=%d]\n", length));
+        
+        // Conversion specifier
+        switch (ch) {
+          
+          // Output integers
+        case 'd':
+        case 'i':
+        case 'o':
+        case 'u':
+        case 'x':
+        case 'X':
+          
+#define CALL_PRINT_INTS(WIDTH, PROMOTED_WIDTH)                          \
+          {                                                             \
+            WIDTH##16 val;                                              \
+            switch (vector_length) {                                    \
+            default: __builtin_unreachable();                           \
+            case 1: val.s0 = va_arg(ap, PROMOTED_WIDTH); break;         \
+            case 2: val.s01 = va_arg(ap, WIDTH##2); break;              \
+            case 3: val.s012 = va_arg(ap, WIDTH##3); break;             \
+            case 4: val.s0123 = va_arg(ap, WIDTH##4); break;            \
+            case 8: val.lo = va_arg(ap, WIDTH##8); break;               \
+            case 16: val = va_arg(ap, WIDTH##16); break;                \
+            }                                                           \
+            _cl_print_ints_##WIDTH(flags, field_width, precision,       \
+                                   ch, &val, vector_length);            \
+          }
+          
+          DEBUG_PRINTF(("[printf:int:conversion=%c]\n", ch));
+          switch (length) {
+          default: __builtin_unreachable();
+          case 1: CALL_PRINT_INTS(char, int); break;
+          case 2: CALL_PRINT_INTS(short, int); break;
+          case 0:
+          case 4: CALL_PRINT_INTS(int, int); break;
+#ifdef cl_khr_int64
+          case 8: CALL_PRINT_INTS(long, long); break;
+#endif
+          }
+
+#undef CALL_PRINT_INTS
+          
+          break;
+          
+          // Output floats
+        case 'f':
+        case 'F':
+        case 'e':
+        case 'E':
+        case 'g':
+        case 'G':
+        case 'a':
+        case 'A':
+          
+#define CALL_PRINT_FLOATS(WIDTH, PROMOTED_WIDTH)                        \
+          {                                                             \
+            WIDTH##16 val;                                              \
+            switch (vector_length) {                                    \
+            default: __builtin_unreachable();                           \
+            case 1: val.s0 = va_arg(ap, PROMOTED_WIDTH); break;         \
+            case 2: val.s01 = va_arg(ap, WIDTH##2); break;              \
+            case 3: val.s012 = va_arg(ap, WIDTH##3); break;             \
+            case 4: val.s0123 = va_arg(ap, WIDTH##4); break;            \
+            case 8: val.lo = va_arg(ap, WIDTH##8); break;               \
+            case 16: val = va_arg(ap, WIDTH##16); break;                \
+            }                                                           \
+            float tmp;\
+	    tmp = val.s0; \
+            debug_ptr((void *)&tmp);				\
+	    tmp = val.s1; \
+            debug_ptr((void *)&tmp);				\
+	    tmp = val.s2; \
+            debug_ptr((void *)&tmp);				\
+	    tmp = val.s3; \
+            debug_ptr((void *)&tmp);				\
+            _cl_print_floats_##WIDTH(flags, field_width, precision,     \
+                                     ch, &val, vector_length);          \
+          }
+          
+          DEBUG_PRINTF(("[printf:float:conversion=%c]\n", ch));
+          switch (length) {
+          default: __builtin_unreachable();
+#ifdef cl_khr_fp16
+            // case 2: CALL_PRINT_FLOATS(half, double); break;
+          case 2: goto error;   // not yet implemented
+#endif
+          case 0:
+            // Note: width 0 cleverly falls through to float if double
+            // is not supported
+#ifdef cl_khr_fp64
+          case 8: CALL_PRINT_FLOATS(double, double); break;
+          case 4: CALL_PRINT_FLOATS(float, double); break;
+#else
+              break;
+#endif
+          }
+          
+#undef CALL_PRINT_FLOATS
+          
+          break;
+          
+          // Output a character
+        case 'c': {
+          DEBUG_PRINTF(("[printf:char]\n"));
+          if (flags.plus || flags.space || flags.alt || flags.zero) goto error;
+          DEBUG_PRINTF(("[printf:char1]\n"));
+          if (precision != -1) goto error;
+          DEBUG_PRINTF(("[printf:char2]\n"));
+          if (vector_length != 1) goto error;
+          DEBUG_PRINTF(("[printf:char3]\n"));
+          if (length != 0) goto error;
+          DEBUG_PRINTF(("[printf:char4]\n"));
+          int val = va_arg(ap, int);
+          _cl_print_char(flags, field_width, val);
+          break;
+        }
+          
+          // Output a string
+        case 's': {
+          if (flags.plus || flags.space || flags.alt || flags.zero) goto error;
+          if (vector_length != 1) goto error;
+          if (length != 0) goto error;
+          const char* val = va_arg(ap, const char*);
+          // GP: Note: v1.2 Khronos test_printf tests for "%.1s", so need to check precision
+          _cl_print_string(flags, field_width, precision, val);
+          break;
+        }
+          
+          // Output a pointer
+        case 'p': {
+          if (flags.plus || flags.space || flags.alt || flags.zero) goto error;
+          if (precision != -1) goto error;
+          if (vector_length != 1) goto error;
+          if (length != 0) goto error;
+          const void* val = va_arg(ap, const void*);
+          _cl_print_pointer(flags, field_width, val);
+          break;
+        }
+          
+        default: goto error;
+        }
+        ch = *++format;
+        
+      } // not a literal %
+
+    } else {
+      DEBUG_PRINTF(("[printf:literal]\n"));
+      printf("%c", ch);
+      ch = *++format;
+    }
+  }
+  
+  va_end(ap);
+  DEBUG_PRINTF(("[printf:done]\n"));
+  return 0;
+  
+ error:;
+  va_end(ap);
+  DEBUG_PRINTF(("[printf:error]\n"));
+  printf("(printf format string error)");
+  return -1;
+}
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp
index 137d34e..df8fd3c 100644
--- a/src/core/cpu/builtins.cpp
+++ b/src/core/cpu/builtins.cpp
@@ -411,6 +411,14 @@ static void unimplemented_stub()
 {
 }
 
+void debug_ptr(void * arg)
+{
+  char *s =   (char *)arg;
+  float f = *(float *)arg;
+  double d = *(double *)arg;
+  int    i = *(int *)arg;
+}
+
 void *getBuiltin(const std::string &name)
 {
     if (name == "get_global_id")
@@ -465,8 +473,10 @@ void *getBuiltin(const std::string &name)
     else if (name == "__cpu_read_imageuif")
         return (void *)&read_imageuif;
 
-    else if (name == "debug")
-        return (void *)&printf;
+    // Generic hook to set debugger bpt to inspect stack variable passed as (void *)
+    else if (name == "debug_ptr")
+        return(void *)&debug_ptr;
+
     else if (name == "__aeabi_unwind_cpp_pr0")
         return (void *)&dummy_fxn;
     else if (name == "__aeabi_unwind_cpp_pr1")
author	Gil Pitney <gil.pitney@linaro.org>	2015-04-29 16:55:55 +0000
committer	Gil Pitney <gil.pitney@linaro.org>	2015-04-29 16:55:55 +0000
commit	7dd00e516e90d5ad84aa6ebedf7fc2bfea25247f (patch)
tree	733ef242e3cca9d00279b3e451629b9b86e6e0ce
parent	6e94d7f24bf1d4c15fc21003289ad968a240d8b3 (diff)