aboutsummaryrefslogtreecommitdiff
path: root/include/dsp.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/dsp.h')
-rw-r--r--include/dsp.h490
1 files changed, 490 insertions, 0 deletions
diff --git a/include/dsp.h b/include/dsp.h
new file mode 100644
index 0000000..b4fe9d5
--- /dev/null
+++ b/include/dsp.h
@@ -0,0 +1,490 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _DSP_CLC_H_
+#define _DSP_CLC_H_
+
+#include "clc.h"
+
+void __touch(const __global char *p, uint32_t size);
+
+#define PREFETCH_VECTORIZE(PRIM_TYPE) \
+ _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE *p, size_t num_gentypes) \
+ { __touch((const __global char*)p, (uint32_t)(num_gentypes * sizeof(*p))); } \
+ _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##2 *p, size_t num_gentypes) \
+ { __touch((const __global char*)p, (uint32_t)(num_gentypes * sizeof(*p))); } \
+ _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##3 *p, size_t num_gentypes) \
+ { __touch((const __global char*)p, (uint32_t)(num_gentypes * sizeof(*p))); } \
+ _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##4 *p, size_t num_gentypes) \
+ { __touch((const __global char*)p, (uint32_t)(num_gentypes * sizeof(*p))); } \
+ _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##8 *p, size_t num_gentypes) \
+ { __touch((const __global char*)p, (uint32_t)(num_gentypes * sizeof(*p))); } \
+ _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##16 *p, size_t num_gentypes) \
+ { __touch((const __global char*)p, (uint32_t)(num_gentypes * sizeof(*p))); } \
+
+#define PREFETCH_TYPES() \
+ PREFETCH_VECTORIZE(char) \
+ PREFETCH_VECTORIZE(uchar) \
+ PREFETCH_VECTORIZE(short) \
+ PREFETCH_VECTORIZE(ushort) \
+ PREFETCH_VECTORIZE(int) \
+ PREFETCH_VECTORIZE(uint) \
+ PREFETCH_VECTORIZE(long) \
+ PREFETCH_VECTORIZE(ulong) \
+ PREFETCH_VECTORIZE(float) \
+ PREFETCH_VECTORIZE(double) \
+
+PREFETCH_TYPES()
+
+_CLC_DECL size_t get_local_id (uint dim);
+
+void *memcpy(void *dst, const void * src, uint size);
+
+/*-----------------------------------------------------------------------------
+* This can be empty since our copy routines are currently synchronous. When
+* the copy routines are improved to be asynchronous, then this function will
+* need a real implementation.
+*----------------------------------------------------------------------------*/
+#define wait_group_events(num_events, event_list)
+
+#define VEC_TYPE(type,sz) type##sz
+
+#define CROSS_SIZES(type) \
+ TEMPLATE(type) \
+ TEMPLATE(VEC_TYPE(type,2)) \
+ TEMPLATE(VEC_TYPE(type,3)) \
+ TEMPLATE(VEC_TYPE(type,4)) \
+ TEMPLATE(VEC_TYPE(type,8)) \
+ TEMPLATE(VEC_TYPE(type,16)) \
+
+#define CROSS_TYPES() \
+ CROSS_SIZES(char) \
+ CROSS_SIZES(uchar) \
+ CROSS_SIZES(short) \
+ CROSS_SIZES(ushort) \
+ CROSS_SIZES(int) \
+ CROSS_SIZES(uint) \
+ CROSS_SIZES(long) \
+ CROSS_SIZES(ulong) \
+ CROSS_SIZES(float) \
+ CROSS_SIZES(double) \
+
+#define TEMPLATE(gentype) \
+_CLC_OVERLOAD _CLC_INLINE event_t async_work_group_copy(local gentype *dst, const global gentype *src, \
+ size_t num_gentypes, event_t event) \
+{ \
+ if ((get_local_id(0) | get_local_id(1) | get_local_id(2)) == 0) \
+ memcpy((char*)dst, (const char*) src, num_gentypes * sizeof(gentype)); \
+ return 0; \
+} \
+_CLC_OVERLOAD _CLC_INLINE event_t async_work_group_copy(global gentype *dst, const local gentype *src, \
+ size_t num_gentypes, event_t event) \
+{ \
+ if ((get_local_id(0) | get_local_id(1) | get_local_id(2)) == 0) \
+ memcpy((char*)dst, (const char*) src, num_gentypes * sizeof(gentype)); \
+ return 0; \
+} \
+_CLC_OVERLOAD _CLC_INLINE event_t async_work_group_copy(global gentype *dst, const global gentype *src, \
+ size_t num_gentypes, event_t event) \
+{ \
+ if ((get_local_id(0) | get_local_id(1) | get_local_id(2)) == 0) \
+ memcpy((char*)dst, (const char*) src, num_gentypes * sizeof(gentype)); \
+ return 0; \
+} \
+
+CROSS_TYPES()
+
+#undef TEMPLATE
+#define TEMPLATE(gentype) \
+_CLC_OVERLOAD _CLC_INLINE event_t async_work_group_strided_copy(local gentype *dst, const global gentype *src, \
+ size_t num_gentypes, size_t src_stride, event_t event) \
+{ int i; \
+ if ((get_local_id(0) | get_local_id(1) | get_local_id(2)) == 0) \
+ for (i=0; i < num_gentypes; ++i) dst[i] = src[i*src_stride]; \
+ return 0; \
+} \
+_CLC_OVERLOAD _CLC_INLINE event_t async_work_group_strided_copy(global gentype *dst, const local gentype *src, \
+ size_t num_gentypes, size_t dst_stride, event_t event) \
+{ int i; \
+ if ((get_local_id(0) | get_local_id(1) | get_local_id(2)) == 0) \
+ for (i=0; i < num_gentypes; ++i) dst[i*dst_stride] = src[i]; \
+ return 0; \
+} \
+
+CROSS_TYPES()
+
+#undef VEC_TYPE
+#undef CROSS_SIZES
+#undef CROSS_TYPES
+#undef TEMPLATE
+
+
+_CLC_OVERLOAD _CLC_DECL char rotate(char v, char i);
+_CLC_OVERLOAD _CLC_DECL uchar rotate(uchar v, uchar i);
+_CLC_OVERLOAD _CLC_DECL short rotate(short v, short i);
+_CLC_OVERLOAD _CLC_DECL ushort rotate(ushort v, ushort i);
+_CLC_OVERLOAD _CLC_INLINE int rotate(int v, int i) { return _rotl(v,i); }
+_CLC_OVERLOAD _CLC_INLINE uint rotate(uint v, uint i) { return _rotl(v,i); }
+_CLC_OVERLOAD _CLC_DECL long rotate(long v, long i);
+_CLC_OVERLOAD _CLC_DECL ulong rotate(ulong v, ulong i);
+
+BINARY_VEC_DECL(char, char, rotate)
+BINARY_VEC_DECL(uchar, uchar, rotate)
+BINARY_VEC_DECL(short, short, rotate)
+BINARY_VEC_DECL(ushort, ushort, rotate)
+BINARY_VEC_DECL(int, int, rotate)
+BINARY_VEC_DECL(uint, uint, rotate)
+BINARY_VEC_DECL(long, long, rotate)
+BINARY_VEC_DECL(ulong, ulong, rotate)
+
+_CLC_OVERLOAD _CLC_INLINE char clz(char v) { return v<0?0: _lmbd(1,v)-24; }
+_CLC_OVERLOAD _CLC_INLINE uchar clz(uchar v) { return _lmbd(1, v) - 24; }
+_CLC_OVERLOAD _CLC_INLINE short clz(short v) { return v<0?0: _lmbd(1,v)-16; }
+_CLC_OVERLOAD _CLC_INLINE ushort clz(ushort v) { return _lmbd(1, v) - 16; }
+_CLC_OVERLOAD _CLC_INLINE int clz(int v) { return _lmbd(1, v); }
+_CLC_OVERLOAD _CLC_INLINE uint clz(uint v) { return _lmbd(1, v); }
+
+_CLC_OVERLOAD _CLC_INLINE long clz(long v)
+{
+ uint2 tmp = as_uint2(v);
+ return tmp.hi ? _lmbd(1, tmp.hi) : _lmbd(1, tmp.lo) + 32;
+}
+
+_CLC_OVERLOAD _CLC_INLINE ulong clz(ulong v)
+{
+ uint2 tmp = as_uint2(v);
+ return tmp.hi ? _lmbd(1, tmp.hi) : _lmbd(1, tmp.lo) + 32;
+}
+
+UNARY_VEC_DECL(char, char, clz)
+UNARY_VEC_DECL(uchar, uchar, clz)
+UNARY_VEC_DECL(short, short, clz)
+UNARY_VEC_DECL(ushort, ushort, clz)
+UNARY_VEC_DECL(int, int, clz)
+UNARY_VEC_DECL(uint, uint, clz)
+UNARY_VEC_DECL(long, long, clz)
+UNARY_VEC_DECL(ulong, ulong, clz)
+
+
+_CLC_OVERLOAD _CLC_INLINE uchar abs(char x) { return _abs(x); }
+_CLC_OVERLOAD _CLC_INLINE ushort abs(short x) { return _abs(x); }
+_CLC_OVERLOAD _CLC_INLINE uint abs(int x) { return _abs(x); }
+_CLC_OVERLOAD _CLC_INLINE ulong abs(long x) { if (x < 0) x = -x; return x; }
+
+_CLC_OVERLOAD _CLC_INLINE uchar abs(uchar x) { return x; }
+_CLC_OVERLOAD _CLC_INLINE ushort abs(ushort x) { return x; }
+_CLC_OVERLOAD _CLC_INLINE uint abs(uint x) { return x; }
+_CLC_OVERLOAD _CLC_INLINE ulong abs(ulong x) { return x; }
+
+UNARY_VEC_DECL(char, uchar, abs)
+UNARY_VEC_DECL(short, ushort, abs)
+UNARY_VEC_DECL(int, uint, abs)
+UNARY_VEC_DECL(long, ulong, abs)
+
+/*-----------------------------------------------------------------------------
+* ABS for unsigned types is straightforward
+*----------------------------------------------------------------------------*/
+#define DEFINE(type, utype) \
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,2) abs(_VEC_TYPE(utype,2) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,3) abs(_VEC_TYPE(utype,3) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,4) abs(_VEC_TYPE(utype,4) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,8) abs(_VEC_TYPE(utype,8) x) {return x;}\
+ _CLC_OVERLOAD _CLC_INLINE _VEC_TYPE(utype,16) abs(_VEC_TYPE(utype,16) x) {return x;}\
+
+DEFINE(uchar, uchar)
+DEFINE(ushort, ushort)
+DEFINE(uint, uint)
+DEFINE(ulong, ulong)
+
+#undef DEFINE
+
+_CLC_OVERLOAD _CLC_DECL long mul_hi(long x, long y);
+_CLC_OVERLOAD _CLC_DECL ulong mul_hi(ulong x, ulong y);
+
+_CLC_OVERLOAD _CLC_INLINE char mul_hi(char x, char y)
+{ return _mpy(x,y) >> 8; }
+
+_CLC_OVERLOAD _CLC_INLINE uchar mul_hi(uchar x, uchar y)
+{ return _mpyu(x,y) >> 8; }
+
+_CLC_OVERLOAD _CLC_INLINE short mul_hi(short x, short y)
+{ return _mpy(x,y) >> 16; }
+
+_CLC_OVERLOAD _CLC_INLINE ushort mul_hi(ushort x, ushort y)
+{ return _mpyu(x,y) >> 16; }
+
+_CLC_OVERLOAD _CLC_INLINE int mul_hi(int x, int y)
+{ return ((long)x * (long)y) >> 32; }
+
+_CLC_OVERLOAD _CLC_INLINE uint mul_hi(uint x, uint y)
+{ return ((ulong)x * (ulong)y) >> 32; }
+
+BINARY_VEC_DECL(char, char, mul_hi)
+BINARY_VEC_DECL(uchar, uchar, mul_hi)
+BINARY_VEC_DECL(short, short, mul_hi)
+BINARY_VEC_DECL(ushort, ushort, mul_hi)
+BINARY_VEC_DECL(int, int, mul_hi)
+BINARY_VEC_DECL(uint, uint, mul_hi)
+BINARY_VEC_DECL(long, long, mul_hi)
+BINARY_VEC_DECL(ulong, ulong, mul_hi)
+
+_CLC_OVERLOAD _CLC_INLINE char add_sat(char x, char y)
+{ return _sadd(x<<24, y<<24)>>24; }
+
+_CLC_OVERLOAD _CLC_INLINE uchar add_sat(uchar x, uchar y)
+{ return _saddu4(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE short add_sat(short x, short y)
+{ return _sadd2(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE ushort add_sat(ushort x, ushort y)
+{
+ int tmp = x + y;
+ if (tmp >> 16) return USHRT_MAX;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE int add_sat(int x, int y)
+{ return _sadd(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE uint add_sat(uint x, uint y)
+{
+ ulong tmp = (ulong)x + (ulong)y;
+ if (tmp >> 32) return UINT_MAX;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE long add_sat(long x, long y)
+{
+ if (x > 0 && y > (LONG_MAX-x)) return LONG_MAX;
+ if (x < 0 && y < (LONG_MIN-x)) return LONG_MIN;
+ return x + y;
+}
+
+_CLC_OVERLOAD _CLC_INLINE ulong add_sat(ulong x, ulong y)
+{
+ if (y > (ULONG_MAX-x)) return ULONG_MAX;
+ return x + y;
+}
+
+BINARY_VEC_DECL(char, char, add_sat)
+BINARY_VEC_DECL(uchar, uchar, add_sat)
+BINARY_VEC_DECL(short, short, add_sat)
+BINARY_VEC_DECL(ushort, ushort, add_sat)
+BINARY_VEC_DECL(int, int, add_sat)
+BINARY_VEC_DECL(uint, uint, add_sat)
+BINARY_VEC_DECL(long, long, add_sat)
+BINARY_VEC_DECL(ulong, ulong, add_sat)
+
+
+_CLC_OVERLOAD _CLC_INLINE char sub_sat(char x, char y)
+{ return _ssub(x<<24, y<<24)>>24; }
+
+_CLC_OVERLOAD _CLC_INLINE uchar sub_sat(uchar x, uchar y)
+{
+ if (y > x) return 0;
+ return x-y;
+}
+
+_CLC_OVERLOAD _CLC_INLINE short sub_sat(short x, short y)
+{ return _ssub2(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE ushort sub_sat(ushort x, ushort y)
+{
+ if (y > x) return 0;
+ return x-y;
+}
+
+_CLC_OVERLOAD _CLC_INLINE int sub_sat(int x, int y)
+{ return _ssub(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE uint sub_sat(uint x, uint y)
+{
+ if (y > x) return 0;
+ return x-y;
+}
+
+_CLC_OVERLOAD _CLC_INLINE long sub_sat(long x, long y)
+{
+ if (x > 0 && -y > (LONG_MAX-x)) return LONG_MAX;
+ if (x < 0 && -y < (LONG_MIN-x)) return LONG_MIN;
+
+ return x - y;
+}
+
+_CLC_OVERLOAD _CLC_INLINE ulong sub_sat(ulong x, ulong y)
+{
+ if (y > x) return 0;
+ return x-y;
+}
+
+BINARY_VEC_DECL(char, char, sub_sat)
+BINARY_VEC_DECL(uchar, uchar, sub_sat)
+BINARY_VEC_DECL(short, short, sub_sat)
+BINARY_VEC_DECL(ushort, ushort, sub_sat)
+BINARY_VEC_DECL(int, int, sub_sat)
+BINARY_VEC_DECL(uint, uint, sub_sat)
+BINARY_VEC_DECL(long, long, sub_sat)
+BINARY_VEC_DECL(ulong, ulong, sub_sat)
+
+
+_CLC_OVERLOAD _CLC_INLINE short upsample(char x, uchar y)
+{ return (short)x << 8 | y; }
+
+_CLC_OVERLOAD _CLC_INLINE ushort upsample(uchar x, uchar y)
+{ return (ushort)x << 8 | y; }
+
+_CLC_OVERLOAD _CLC_INLINE int upsample(short x, ushort y)
+{ return (int) _pack2(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE uint upsample(ushort x, ushort y)
+{ return (uint) _pack2(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE long upsample(int x, uint y)
+{ return (long) _itoll(x,y); }
+
+_CLC_OVERLOAD _CLC_INLINE ulong upsample(uint x, uint y)
+{ return (ulong) _itoll(x,y); }
+
+BINARY_VEC_DECL_ALT(char, short, uchar, upsample)
+BINARY_VEC_DECL_ALT(uchar, ushort, uchar, upsample)
+BINARY_VEC_DECL_ALT(short, int, ushort, upsample)
+BINARY_VEC_DECL_ALT(ushort, uint, ushort, upsample)
+BINARY_VEC_DECL_ALT(int, long, uint, upsample)
+BINARY_VEC_DECL_ALT(uint, ulong, uint, upsample)
+
+
+_CLC_OVERLOAD _CLC_INLINE char mad_sat(char a, char b, char c)
+{
+ int tmp = _mpy32(a,b);
+ tmp += c;
+
+ if (tmp > (int)CHAR_MAX) return CHAR_MAX;
+ if (tmp < (int)CHAR_MIN) return CHAR_MIN;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE uchar mad_sat(uchar a, uchar b, uchar c)
+{
+ uint tmp = _mpy32u(a,b);
+ tmp += c;
+
+ if (tmp > (uint)UCHAR_MAX) return UCHAR_MAX;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE short mad_sat(short a, short b, short c)
+{
+ int tmp = _mpy32(a,b);
+ tmp += c;
+
+ if (tmp > (int)SHRT_MAX) return SHRT_MAX;
+ if (tmp < (int)SHRT_MIN) return SHRT_MIN;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE ushort mad_sat(ushort a, ushort b, ushort c)
+{
+ uint tmp = _mpy32u(a,b);
+ tmp += c;
+
+ if (tmp > (uint)USHRT_MAX) return USHRT_MAX;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE int mad_sat(int a, int b, int c)
+{
+ long tmp = (long)a * (long)b + (long)c;
+ if (tmp > (long)INT_MAX) return INT_MAX;
+ if (tmp < (long)INT_MIN) return INT_MIN;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE uint mad_sat(uint a, uint b, uint c)
+{
+ ulong tmp = _mpy32u(a,b);
+ tmp += c;
+
+ if (tmp > (ulong)UINT_MAX) return UINT_MAX;
+ return tmp;
+}
+
+_CLC_OVERLOAD _CLC_INLINE long mad_sat(long a, long b, long c)
+{
+ if (a > 0 && b > 0 && a > (LONG_MAX/b)) return LONG_MAX;
+ if (a > 0 && b < 0 && b < (LONG_MIN/a)) return LONG_MIN;
+ if (a < 0 && b > 0 && a < (LONG_MIN/b)) return LONG_MIN;
+ if (a < 0 && b < 0 && b < (LONG_MAX/a)) return LONG_MAX;
+
+ return add_sat(a*b, c);
+}
+
+_CLC_OVERLOAD _CLC_INLINE ulong mad_sat(ulong a, ulong b, ulong c)
+{
+ if (a > (ULONG_MAX/b)) return ULONG_MAX;
+ return add_sat(a*b, c);
+}
+
+TERNARY_VEC_DECL(char, char, mad_sat)
+TERNARY_VEC_DECL(uchar, uchar, mad_sat)
+TERNARY_VEC_DECL(short, short, mad_sat)
+TERNARY_VEC_DECL(ushort, ushort, mad_sat)
+TERNARY_VEC_DECL(int, int, mad_sat)
+TERNARY_VEC_DECL(uint, uint, mad_sat)
+TERNARY_VEC_DECL(long, long, mad_sat)
+TERNARY_VEC_DECL(ulong, ulong, mad_sat)
+
+
+int printf(const char* _format, ...);
+
+uint32_t __core_num (void);
+uint32_t __clock (void);
+uint64_t __clock64 (void);
+void __cycle_delay (uint64_t cyclesToDelay);
+void __mfence (void);
+
+void __ocl_cache_l1d_off (void);
+void __ocl_cache_l1d_std (void);
+void __ocl_cache_l1d_half (void);
+void __ocl_cache_l1d_wbinv_all (void);
+
+extern constant const uint kernel_config_l2[32];
+
+_CLC_DECL size_t get_local_id (uint dim);
+
+_CLC_INLINE uint get_work_dim (void) { return kernel_config_l2[0]; }
+_CLC_INLINE size_t get_global_size (uint dim) { return kernel_config_l2[1+dim]; }
+_CLC_INLINE size_t get_local_size (uint dim) { return kernel_config_l2[4+dim]; }
+_CLC_INLINE size_t get_global_offset(uint dim) { return kernel_config_l2[7+dim]; }
+_CLC_INLINE size_t __get_global_first(uint dim) { return kernel_config_l2[10+dim]; }
+_CLC_INLINE size_t get_num_groups (uint dim) { return get_global_size(dim) / get_local_size(dim); }
+_CLC_INLINE size_t get_global_id (uint dim) { return __get_global_first(dim) + get_local_id(dim); }
+_CLC_INLINE size_t get_group_id (uint dim)
+ { return (__get_global_first(dim) - get_global_offset(dim)) / get_local_size(dim); }
+
+#endif //_DSP_CLC_H_
+