aboutsummaryrefslogtreecommitdiff
path: root/include/cpu.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/cpu.h')
-rw-r--r--include/cpu.h262
1 files changed, 262 insertions, 0 deletions
diff --git a/include/cpu.h b/include/cpu.h
new file mode 100644
index 0000000..6fa0d90
--- /dev/null
+++ b/include/cpu.h
@@ -0,0 +1,262 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _CPU_CLC_H_
+#define _CPU_CLC_H_
+
+#include "clc.h"
+
+#define PREFETCH_VECTORIZE(PRIM_TYPE) \
+ _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE *p, size_t num_gentypes); \
+ _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##2 *p, size_t num_gentypes); \
+ _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##3 *p, size_t num_gentypes); \
+ _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##4 *p, size_t num_gentypes); \
+ _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##8 *p, size_t num_gentypes); \
+ _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##16 *p, size_t num_gentypes); \
+
+PREFETCH_VECTORIZE(char)
+PREFETCH_VECTORIZE(uchar)
+PREFETCH_VECTORIZE(short)
+PREFETCH_VECTORIZE(ushort)
+PREFETCH_VECTORIZE(int)
+PREFETCH_VECTORIZE(uint)
+PREFETCH_VECTORIZE(long)
+PREFETCH_VECTORIZE(ulong)
+PREFETCH_VECTORIZE(float)
+PREFETCH_VECTORIZE(double)
+
+/*-----------------------------------------------------------------------------
+* This can be empty since our copy routines are currently synchronous. When
+* the copy routines are improved to be asynchronous, then this function will
+* need a real implementation.
+*----------------------------------------------------------------------------*/
+#define wait_group_events(num_events, event_list)
+
+#define CROSS_SIZES(type) \
+ TEMPLATE(type) \
+ TEMPLATE(_VEC_TYPE(type,2)) \
+ TEMPLATE(_VEC_TYPE(type,3)) \
+ TEMPLATE(_VEC_TYPE(type,4)) \
+ TEMPLATE(_VEC_TYPE(type,8)) \
+ TEMPLATE(_VEC_TYPE(type,16)) \
+
+#define TEMPLATE(gentype) \
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(local gentype *dst, const global gentype *src, \
+ size_t num_gentypes, event_t event); \
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(global gentype *dst, const local gentype *src, \
+ size_t num_gentypes, event_t event); \
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(global gentype *dst, const global gentype *src, \
+ size_t num_gentypes, event_t event); \
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(local gentype *dst, const global gentype *src, \
+ size_t num_gentypes, size_t src_stride, event_t event); \
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(global gentype *dst, const local gentype *src, \
+ size_t num_gentypes, size_t dst_stride, event_t event); \
+
+CROSS_SIZES(char)
+CROSS_SIZES(uchar)
+CROSS_SIZES(short)
+CROSS_SIZES(ushort)
+CROSS_SIZES(int)
+CROSS_SIZES(uint)
+CROSS_SIZES(long)
+CROSS_SIZES(ulong)
+CROSS_SIZES(float)
+CROSS_SIZES(double)
+
+#undef CROSS_SIZES
+#undef TEMPLATE
+
+_CLC_OVERLOAD _CLC_DECL char rotate(char v, char i);
+_CLC_OVERLOAD _CLC_DECL uchar rotate(uchar v, uchar i);
+_CLC_OVERLOAD _CLC_DECL short rotate(short v, short i);
+_CLC_OVERLOAD _CLC_DECL ushort rotate(ushort v, ushort i);
+_CLC_OVERLOAD _CLC_DECL long rotate(long v, long i);
+_CLC_OVERLOAD _CLC_DECL ulong rotate(ulong v, ulong i);
+_CLC_OVERLOAD _CLC_DECL int rotate(int v, int i);
+_CLC_OVERLOAD _CLC_DECL uint rotate(uint v, uint i);
+
+BINARY_VEC_DECL(char, char, rotate)
+BINARY_VEC_DECL(uchar, uchar, rotate)
+BINARY_VEC_DECL(short, short, rotate)
+BINARY_VEC_DECL(ushort, ushort, rotate)
+BINARY_VEC_DECL(int, int, rotate)
+BINARY_VEC_DECL(uint, uint, rotate)
+BINARY_VEC_DECL(long, long, rotate)
+BINARY_VEC_DECL(ulong, ulong, rotate)
+
+_CLC_OVERLOAD _CLC_DECL char clz(char v) ;
+_CLC_OVERLOAD _CLC_DECL uchar clz(uchar v) ;
+_CLC_OVERLOAD _CLC_DECL short clz(short v) ;
+_CLC_OVERLOAD _CLC_DECL ushort clz(ushort v) ;
+_CLC_OVERLOAD _CLC_DECL int clz(int v) ;
+_CLC_OVERLOAD _CLC_DECL uint clz(uint v) ;
+_CLC_OVERLOAD _CLC_DECL long clz(long v) ;
+_CLC_OVERLOAD _CLC_DECL ulong clz(ulong v);
+
+UNARY_VEC_DECL(char, char, clz)
+UNARY_VEC_DECL(uchar, uchar, clz)
+UNARY_VEC_DECL(short, short, clz)
+UNARY_VEC_DECL(ushort, ushort, clz)
+UNARY_VEC_DECL(int, int, clz)
+UNARY_VEC_DECL(uint, uint, clz)
+UNARY_VEC_DECL(long, long, clz)
+UNARY_VEC_DECL(ulong, ulong, clz)
+
+_CLC_OVERLOAD _CLC_DECL uchar abs(char x) ;
+_CLC_OVERLOAD _CLC_DECL ushort abs(short x) ;
+_CLC_OVERLOAD _CLC_DECL uint abs(int x) ;
+_CLC_OVERLOAD _CLC_DECL ulong abs(long x) ;
+_CLC_OVERLOAD _CLC_DECL uchar abs(uchar x) ;
+_CLC_OVERLOAD _CLC_DECL ushort abs(ushort x) ;
+_CLC_OVERLOAD _CLC_DECL uint abs(uint x) ;
+_CLC_OVERLOAD _CLC_DECL ulong abs(ulong x) ;
+
+UNARY_VEC_DECL(char, uchar, abs)
+UNARY_VEC_DECL(short, ushort, abs)
+UNARY_VEC_DECL(int, uint, abs)
+UNARY_VEC_DECL(long, ulong, abs)
+
+/*-----------------------------------------------------------------------------
+* ABS for unsigned types is straightforward
+*----------------------------------------------------------------------------*/
+#define DEFINE(type, utype) \
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,2) abs(_VEC_TYPE(utype,2) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,3) abs(_VEC_TYPE(utype,3) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,4) abs(_VEC_TYPE(utype,4) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,8) abs(_VEC_TYPE(utype,8) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,16) abs(_VEC_TYPE(utype,16) x) {return x;}\
+
+DEFINE(uchar, uchar)
+DEFINE(ushort, ushort)
+DEFINE(uint, uint)
+DEFINE(ulong, ulong)
+
+#undef DEFINE
+
+_CLC_OVERLOAD _CLC_DECL long mul_hi(long x, long y);
+_CLC_OVERLOAD _CLC_DECL ulong mul_hi(ulong x, ulong y);
+_CLC_OVERLOAD _CLC_DECL char mul_hi(char x, char y);
+_CLC_OVERLOAD _CLC_DECL uchar mul_hi(uchar x, uchar y);
+_CLC_OVERLOAD _CLC_DECL short mul_hi(short x, short y);
+_CLC_OVERLOAD _CLC_DECL ushort mul_hi(ushort x, ushort y);
+_CLC_OVERLOAD _CLC_DECL int mul_hi(int x, int y);
+_CLC_OVERLOAD _CLC_DECL uint mul_hi(uint x, uint y);
+
+BINARY_VEC_DECL(char, char, mul_hi)
+BINARY_VEC_DECL(uchar, uchar, mul_hi)
+BINARY_VEC_DECL(short, short, mul_hi)
+BINARY_VEC_DECL(ushort, ushort, mul_hi)
+BINARY_VEC_DECL(int, int, mul_hi)
+BINARY_VEC_DECL(uint, uint, mul_hi)
+BINARY_VEC_DECL(long, long, mul_hi)
+BINARY_VEC_DECL(ulong, ulong, mul_hi)
+
+
+_CLC_OVERLOAD _CLC_DECL char add_sat(char x, char y) ;
+_CLC_OVERLOAD _CLC_DECL uchar add_sat(uchar x, uchar y) ;
+_CLC_OVERLOAD _CLC_DECL short add_sat(short x, short y) ;
+_CLC_OVERLOAD _CLC_DECL ushort add_sat(ushort x, ushort y) ;
+_CLC_OVERLOAD _CLC_DECL int add_sat(int x, int y) ;
+_CLC_OVERLOAD _CLC_DECL uint add_sat(uint x, uint y) ;
+_CLC_OVERLOAD _CLC_DECL long add_sat(long x, long y) ;
+_CLC_OVERLOAD _CLC_DECL ulong add_sat(ulong x, ulong y) ;
+
+BINARY_VEC_DECL(char, char, add_sat)
+BINARY_VEC_DECL(uchar, uchar, add_sat)
+BINARY_VEC_DECL(short, short, add_sat)
+BINARY_VEC_DECL(ushort, ushort, add_sat)
+BINARY_VEC_DECL(int, int, add_sat)
+BINARY_VEC_DECL(uint, uint, add_sat)
+BINARY_VEC_DECL(long, long, add_sat)
+BINARY_VEC_DECL(ulong, ulong, add_sat)
+
+
+_CLC_OVERLOAD _CLC_DECL char sub_sat(char x, char y) ;
+_CLC_OVERLOAD _CLC_DECL uchar sub_sat(uchar x, uchar y) ;
+_CLC_OVERLOAD _CLC_DECL short sub_sat(short x, short y) ;
+_CLC_OVERLOAD _CLC_DECL ushort sub_sat(ushort x, ushort y) ;
+_CLC_OVERLOAD _CLC_DECL int sub_sat(int x, int y) ;
+_CLC_OVERLOAD _CLC_DECL uint sub_sat(uint x, uint y) ;
+_CLC_OVERLOAD _CLC_DECL long sub_sat(long x, long y) ;
+_CLC_OVERLOAD _CLC_DECL ulong sub_sat(ulong x, ulong y) ;
+
+BINARY_VEC_DECL(char, char, sub_sat)
+BINARY_VEC_DECL(uchar, uchar, sub_sat)
+BINARY_VEC_DECL(short, short, sub_sat)
+BINARY_VEC_DECL(ushort, ushort, sub_sat)
+BINARY_VEC_DECL(int, int, sub_sat)
+BINARY_VEC_DECL(uint, uint, sub_sat)
+BINARY_VEC_DECL(long, long, sub_sat)
+BINARY_VEC_DECL(ulong, ulong, sub_sat)
+
+_CLC_OVERLOAD _CLC_DECL short upsample(char x, uchar y) ;
+_CLC_OVERLOAD _CLC_DECL ushort upsample(uchar x, uchar y) ;
+_CLC_OVERLOAD _CLC_DECL int upsample(short x, ushort y) ;
+_CLC_OVERLOAD _CLC_DECL uint upsample(ushort x, ushort y) ;
+_CLC_OVERLOAD _CLC_DECL long upsample(int x, uint y) ;
+_CLC_OVERLOAD _CLC_DECL ulong upsample(uint x, uint y) ;
+
+BINARY_VEC_DECL_ALT(char, short, uchar, upsample)
+BINARY_VEC_DECL_ALT(uchar, ushort, uchar, upsample)
+BINARY_VEC_DECL_ALT(short, int, ushort, upsample)
+BINARY_VEC_DECL_ALT(ushort, uint, ushort, upsample)
+BINARY_VEC_DECL_ALT(int, long, uint, upsample)
+BINARY_VEC_DECL_ALT(uint, ulong, uint, upsample)
+
+_CLC_OVERLOAD _CLC_DECL char mad_sat(char a, char b, char c);
+_CLC_OVERLOAD _CLC_DECL uchar mad_sat(uchar a, uchar b, uchar c);
+_CLC_OVERLOAD _CLC_DECL short mad_sat(short a, short b, short c);
+_CLC_OVERLOAD _CLC_DECL ushort mad_sat(ushort a, ushort b, ushort c);
+_CLC_OVERLOAD _CLC_DECL int mad_sat(int a, int b, int c);
+_CLC_OVERLOAD _CLC_DECL uint mad_sat(uint a, uint b, uint c);
+_CLC_OVERLOAD _CLC_DECL long mad_sat(long a, long b, long c);
+_CLC_OVERLOAD _CLC_DECL ulong mad_sat(ulong a, ulong b, ulong c);
+
+TERNARY_VEC_DECL(char, char, mad_sat)
+TERNARY_VEC_DECL(uchar, uchar, mad_sat)
+TERNARY_VEC_DECL(short, short, mad_sat)
+TERNARY_VEC_DECL(ushort, ushort, mad_sat)
+TERNARY_VEC_DECL(int, int, mad_sat)
+TERNARY_VEC_DECL(uint, uint, mad_sat)
+TERNARY_VEC_DECL(long, long, mad_sat)
+TERNARY_VEC_DECL(ulong, ulong, mad_sat)
+
+int printf(const char* _format, ...);
+void *memcpy(void *dst, const void * src, uint size);
+
+_CLC_DECL size_t get_local_id (uint dim);
+_CLC_DECL uint get_work_dim (void) ;
+_CLC_DECL size_t get_global_size (uint dim) ;
+_CLC_DECL size_t get_local_size (uint dim) ;
+_CLC_DECL size_t get_global_offset(uint dim) ;
+_CLC_DECL size_t __get_global_first(uint dim) ;
+_CLC_DECL size_t get_num_groups (uint dim) ;
+_CLC_DECL size_t get_global_id (uint dim) ;
+_CLC_DECL size_t get_group_id (uint dim) ;
+
+#endif //_CPU_CLC_H_
+