aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBalaji V. Iyer <balaji.v.iyer@intel.com>2012-12-10 19:39:34 +0000
committerBalaji V. Iyer <balaji.v.iyer@intel.com>2012-12-10 19:39:34 +0000
commitaa72dc62d3401a8128645bc2bc99ea39f37a9b21 (patch)
tree0b5ece678cf98ed2555cda14d9cd3dfe17371911
parentfaa2abc0b28c7f98aec82c34ac1363820db8ab2a (diff)
Added several Elemental function changes for C (from patch to branch).
gcc/c-family/ChangeLog.cilkplus +2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com> + + * c-cpp-elem-function.c: New file. + gcc/testsuite/ChangeLog.cilkplus +2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com> + + * gcc.dg/cilk-plus/elem_fn_tests/test1.c: Remove. + * gcc.dg/cilk-plus/elem_fn_tests/test2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test3.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test4.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test5.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test6.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test7.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test8.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test9.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test10.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test11.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test12.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test13.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test14.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test15.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test16.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test17.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test18.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c: New test. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c: Likewise. + gcc/ChangeLog.cilkplus +2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com> + + * doc/tm.texi (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added + documentation for this hook. + (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_CODE): Likewise. + * doc/tm.texi.in (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added + hook. + (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC): Likewise. + * targhooks.c (default_builtin_map_processor_to_attr): New function. + (default_builtin_find_processor_code): Likewise. + (default_builtin_find_vlength_code): Likewise. + (default_builtin_find_isa_code): Likewise. + * c/c-parser.c (c_parser_elem_fn_processor_clause): Fixed syntax issue + in error message. Added several checks and error reporting for invalid + values. + (c_parser_elem_fn_uniform_clause): Likewise. + (c_parser_elem_fn_linear_clause): Likewise. + (c_parser_elem_fn_vlength_clause): Likewise. + * c/c-decl.c (c_builtin_function_ext_scope): Added a check if external + scope is non-null. + * target.def (TARGET_CILKPLUS): Added this hook and several definitions + such as builtin_map_processor_to_attr, builtin_find_processor_code, + builtin_find_vlength_for_proc, builtin_find_isa_code. + * tree.h (enum elem_fn_parm_type): Moved this to c-common. + * cilk.h (elem_fn_info): Added some extra fields into struct. + * elem-function-common.c (find_processor_code): Called the target + dependent version. Also made several changes to satisfy the new + elemental function mangling format. Finally, fixed up header comments. + (find_suffix): Likewise. + (extract_elem_fn_values): Likewise. + * tree-vect-stmts.c (elem_fn_vect_get_vec_def_for_operand): Return + scalar version for the uniform or linear parameters. + (vectorizable_call): Set the function type correctly. + * tree-inline.c (remap_var_for_cilk): Remove. + (tree_elem_fn_versioning): Fix up header function and reflected changes + in tree_function_versioning into this function. + (copy_tree_body_r): Removed a call for remap_var_for_cilk. + (elem_fn_copy_arguments_for_versioning): Fixed header comments. + * Makefile.in (C_COMMON_OBJS): Added c-family/c-cpp-elem-function.o. + * config/i386/i386.c (type_natural_mode): Added a check for cilk flag + before emitting a note. + (ix86_frame_pointer_required): Added a flag_enable_cilk check. + (ix86_cilkplus_map_proc_to_attr): New function. + (ix86_cilkplus_find_proc_code): Likewise. + (ix86_cilkplus_find_isa_code): Likewise. + (ix86_builtin_find_vlength_for_proc): Likewise. + git-svn-id: https://gcc.gnu.org/svn/gcc/branches/cilkplus@194366 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.cilkplus51
-rw-r--r--gcc/Makefile.in5
-rw-r--r--gcc/c-family/ChangeLog.cilkplus4
-rw-r--r--gcc/c-family/c-cpp-elem-function.c814
-rw-r--r--gcc/c/c-decl.c5
-rw-r--r--gcc/c/c-parser.c74
-rw-r--r--gcc/cilk.h15
-rw-r--r--gcc/config/i386/i386.c232
-rw-r--r--gcc/doc/tm.texi24
-rw-r--r--gcc/doc/tm.texi.in9
-rw-r--r--gcc/elem-function-common.c178
-rw-r--r--gcc/target.def40
-rw-r--r--gcc/targhooks.c33
-rw-r--r--gcc/testsuite/ChangeLog.cilkplus53
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c30
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test10.c)4
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test11.c)4
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test12.c)4
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test7.c)4
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test8.c)4
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test9.c)4
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c)1
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test1.c)10
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test13.c)1
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test14.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test15.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test16.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test17.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test18.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test2.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test3.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test4.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test5.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c (renamed from gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test6.c)2
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp53
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c24
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c33
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c29
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c30
-rw-r--r--gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c45
-rw-r--r--gcc/tree-inline.c62
-rw-r--r--gcc/tree-vect-stmts.c8
-rw-r--r--gcc/tree.c5
-rw-r--r--gcc/tree.h9
46 files changed, 1703 insertions, 222 deletions
diff --git a/gcc/ChangeLog.cilkplus b/gcc/ChangeLog.cilkplus
index ff6c2c9113f..5bbf9287353 100644
--- a/gcc/ChangeLog.cilkplus
+++ b/gcc/ChangeLog.cilkplus
@@ -1,3 +1,54 @@
+2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com>
+
+ * doc/tm.texi (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added
+ documentation for this hook.
+ (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_CODE): Likewise.
+ * doc/tm.texi.in (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added
+ hook.
+ (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC): Likewise.
+ * targhooks.c (default_builtin_map_processor_to_attr): New function.
+ (default_builtin_find_processor_code): Likewise.
+ (default_builtin_find_vlength_code): Likewise.
+ (default_builtin_find_isa_code): Likewise.
+ * c/c-parser.c (c_parser_elem_fn_processor_clause): Fixed syntax issue
+ in error message. Added several checks and error reporting for invalid
+ values.
+ (c_parser_elem_fn_uniform_clause): Likewise.
+ (c_parser_elem_fn_linear_clause): Likewise.
+ (c_parser_elem_fn_vlength_clause): Likewise.
+ * c/c-decl.c (c_builtin_function_ext_scope): Added a check if external
+ scope is non-null.
+ * target.def (TARGET_CILKPLUS): Added this hook and several definitions
+ such as builtin_map_processor_to_attr, builtin_find_processor_code,
+ builtin_find_vlength_for_proc, builtin_find_isa_code.
+ * tree.h (enum elem_fn_parm_type): Moved this to c-common.
+ * cilk.h (elem_fn_info): Added some extra fields into struct.
+ * elem-function-common.c (find_processor_code): Called the target
+ dependent version. Also made several changes to satisfy the new
+ elemental function mangling format. Finally, fixed up header comments.
+ (find_suffix): Likewise.
+ (extract_elem_fn_values): Likewise.
+ * tree-vect-stmts.c (elem_fn_vect_get_vec_def_for_operand): Return
+ scalar version for the uniform or linear parameters.
+ (vectorizable_call): Set the function type correctly.
+ * tree-inline.c (remap_var_for_cilk): Remove.
+ (tree_elem_fn_versioning): Fix up header function and reflected changes
+ in tree_function_versioning into this function.
+ (copy_tree_body_r): Removed a call for remap_var_for_cilk.
+ (elem_fn_copy_arguments_for_versioning): Fixed header comments.
+ * Makefile.in (C_COMMON_OBJS): Added c-family/c-cpp-elem-function.o.
+ * config/i386/i386.c (type_natural_mode): Added a check for cilk flag
+ before emitting a note.
+ (ix86_frame_pointer_required): Added a flag_enable_cilk check.
+ (ix86_cilkplus_map_proc_to_attr): New function.
+ (ix86_cilkplus_find_proc_code): Likewise.
+ (ix86_cilkplus_find_isa_code): Likewise.
+ (ix86_builtin_find_vlength_for_proc): Likewise.
+
2012-12-06 Balaji V. Iyer <balaji.v.iyer@intel.com>
* array-notation-common.c: Delete.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 0c434bf58ad..4bdfa7fdaa6 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1140,7 +1140,8 @@ C_COMMON_OBJS = c-family/c-common.o c-family/c-cppbuiltin.o c-family/c-dump.o \
c-family/c-omp.o c-family/c-opts.o c-family/c-pch.o \
c-family/c-ppoutput.o c-family/c-pragma.o c-family/c-pretty-print.o \
c-family/c-semantics.o c-family/c-ada-spec.o tree-mudflap.o \
- c-family/array-notation-common.o c-family/c-cilk.o c-family/elem-function.o
+ c-family/array-notation-common.o c-family/c-cilk.o \
+ c-family/c-cpp-elem-function.o
# Language-independent object files.
# We put the insn-*.o files first so that a parallel make will build
@@ -1216,6 +1217,7 @@ OBJS = \
dwarf2asm.o \
dwarf2cfi.o \
dwarf2out.o \
+ elem-function-common.o \
emit-rtl.o \
et-forest.o \
except.o \
@@ -1462,7 +1464,6 @@ OBJS = \
web.o \
xcoffout.o \
cilk.o \
- elem-function-common.o \
$(out_object_file) \
$(EXTRA_OBJS) \
$(host_hook_obj)
diff --git a/gcc/c-family/ChangeLog.cilkplus b/gcc/c-family/ChangeLog.cilkplus
index 939b54ffa28..82e4d39a70a 100644
--- a/gcc/c-family/ChangeLog.cilkplus
+++ b/gcc/c-family/ChangeLog.cilkplus
@@ -1,3 +1,7 @@
+2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com>
+
+ * c-cpp-elem-function.c: New file.
+
2012-12-06 Balaji V. Iyer <balaji.v.iyer@intel.com>
* c-common.def (ARRAY_NOTATION_REF): New expression definiton.
diff --git a/gcc/c-family/c-cpp-elem-function.c b/gcc/c-family/c-cpp-elem-function.c
new file mode 100644
index 00000000000..117e5a1569b
--- /dev/null
+++ b/gcc/c-family/c-cpp-elem-function.c
@@ -0,0 +1,814 @@
+/* This file is part of the Intel(R) Cilk(TM) Plus support
+ This file contains C/C++ specific functions for elemental
+ functions.
+
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ Written by Balaji V. Iyer <balaji.v.iyer@intel.com>,
+ Intel Corporation
+
+ Many Thanks to Karthik Kumar for advice on the basic technique
+ about cloning functions.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "langhooks.h"
+#include "cilk.h"
+#include "basic-block.h"
+#include "output.h"
+#include "c-family/c-common.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "tree-pass.h"
+#include "timevar.h"
+#include "flags.h"
+#include "c/c-tree.h"
+#include "tree-inline.h"
+#include "cgraph.h"
+#include "ipa-prop.h"
+#include "opts.h"
+#include "tree-iterator.h"
+#include "options.h"
+#include "intl.h"
+#include "vec.h"
+#include "target.h"
+
+static tree create_optimize_attribute (int);
+static tree create_processor_attribute (elem_fn_info *, tree *);
+static tree elem_fn_build_array (tree base_var, tree index);
+
+enum elem_fn_parm_size {
+ SCALAR = 0,
+ VECTOR_SEGMENTED = 1,
+ VECTOR_NONSEGMENTED = 2
+};
+
+typedef struct
+{
+ tree arg;
+ tree replacement;
+} args_data;
+
+typedef struct
+{
+ tree var_name;
+ vec<tree, va_gc> *substitute_vars;
+} var_expand_struct;
+
+/* Creates the appropriate __target__ attribute for the processor information
+ given in ELEM_FN_VALUES->proc_type. The function also returns the opposite
+ attribute through OPPOSITE_ATTR for the scalar function. */
+
+static tree
+create_processor_attribute (elem_fn_info *elem_fn_values, tree *opposite_attr)
+{
+ if (elem_fn_values)
+ return targetm.cilkplus.builtin_map_processor_to_attr
+ (elem_fn_values->proc_type, opposite_attr);
+ else
+ {
+ /* We should never get here. If we get here, something wrong has
+ happened, so we reset the whole proc. attribute. */
+ *opposite_attr = NULL_TREE;
+ return NULL_TREE;
+ }
+}
+
+/* Goes through all the uniform and linear variables in the ELEM_FN_VALUES and
+ if those variables are set to VECTOR_TYPE in FNDECL by the
+ tree_elem_fn_versioning function then we convert it back to scalar. */
+
+static void
+scalarize_uniform_linear_params (tree fndecl, elem_fn_info *elem_fn_values)
+{
+ size_t ii = 0;
+ tree ii_tree;
+ if (!elem_fn_values || !fndecl)
+ return;
+
+ for (ii_tree = DECL_ARGUMENTS (fndecl); ii_tree;
+ ii_tree = DECL_CHAIN (ii_tree))
+ {
+ for (ii = 0; ii < (size_t) elem_fn_values->no_uvars; ii++)
+ if (!strcmp (elem_fn_values->uniform_vars[ii],
+ IDENTIFIER_POINTER (DECL_NAME (ii_tree))))
+ {
+ tree type_t = TREE_TYPE (TREE_TYPE (ii_tree));
+ TREE_TYPE (ii_tree) = type_t;
+ DECL_ARG_TYPE (ii_tree) = type_t;
+ }
+ for (ii = 0; ii < (size_t) elem_fn_values->no_lvars; ii++)
+ if (!strcmp (elem_fn_values->linear_vars[ii],
+ IDENTIFIER_POINTER (DECL_NAME (ii_tree))))
+ {
+ tree type_t = TREE_TYPE (TREE_TYPE (ii_tree));
+ TREE_TYPE (ii_tree) = type_t;
+ DECL_ARG_TYPE (ii_tree) = type_t;
+ }
+ }
+ return;
+}
+
+
+/* Returns an optimize attribute for the Optimization level given by OPTION. */
+
+static tree
+create_optimize_attribute (int option)
+{
+ tree opt_attr;
+ vec<tree, va_gc> *opt_vec;
+ char optimization[2];
+
+ optimization[0] = 'O';
+ vec_alloc (opt_vec, 4);
+
+ if (option == 3)
+ optimization[1] = '3';
+ else if (option == 2)
+ optimization[1] = '2';
+ else if (option == 1)
+ optimization[1] = '1';
+ else if (option == 0)
+ optimization[1] = '0';
+
+ vec_safe_push (opt_vec, build_string (2, optimization));
+ opt_attr = build_tree_list_vec (opt_vec);
+ vec_safe_truncate (opt_vec, 0);
+ opt_attr = build_tree_list (get_identifier ("optimize"), opt_attr);
+ return opt_attr;
+}
+
+/* This function will replace parameter stored in DATA->arg with
+ DATA->replacement in *TP. If *WALK_SUBTREES is set to 0, then the subtrees
+ of *TP will not be stepped through. */
+
+static tree
+replace_parm_decl (tree *tp, int *walk_subtrees, void *data)
+{
+ if (!tp)
+ return NULL_TREE;
+
+ if (TREE_CODE (*tp) == PARM_DECL)
+ {
+ args_data *value = (args_data *) data;
+ if (DECL_NAME (*tp) == DECL_NAME (value->arg))
+ *tp = value->replacement;
+
+ *walk_subtrees = 0;
+ }
+ return NULL_TREE;
+}
+
+
+/* Stores the return expression to a temporary var in DATA (typecasted to tree)
+ in a set of *TP tree. If *WALK_SUBTREES is set to 1, then we walk through
+ the subtrees of *TP. */
+
+static tree
+replace_return_with_new_var (tree *tp, int *walk_subtrees, void *data)
+{
+ tree mod_expr = NULL_TREE, return_var = NULL_TREE, ret_expr = NULL_TREE;
+
+ if (!*tp)
+ return NULL_TREE;
+
+ if (TREE_CODE (*tp) == RETURN_EXPR)
+ {
+ return_var = (tree) data;
+ ret_expr = TREE_OPERAND (TREE_OPERAND (*tp, 0), 1);
+ mod_expr = build2 (MODIFY_EXPR, TREE_TYPE (return_var), return_var,
+ ret_expr);
+ *tp = mod_expr;
+ *walk_subtrees = 0;
+ }
+ return NULL_TREE;
+}
+
+/* Creates an ARRAY_REF expression for BASE_VAR array with INDEX as the
+ appropriate index. */
+
+static tree
+elem_fn_build_array (tree base_var, tree index)
+{
+ return build_array_ref (EXPR_LOCATION (base_var), base_var, index);
+}
+
+/* Replaces all the vector references in *TP with array references stored in
+ DATA (type casted to fn_vect_elements that stores this info). If
+ *WALK_SUBTREES is set to 1, then we recurse through all the subtrees of
+ *TP. */
+
+static tree
+replace_array_ref_for_vec (tree *tp, int *walk_subtrees, void *data)
+{
+ size_t ii = 0;
+ tree ii_var, add_expr = NULL_TREE, mult_expr = NULL_TREE;
+ fn_vect_elements *func_data;
+ if (!*tp)
+ return NULL_TREE;
+
+ if (TREE_CODE (*tp) == VAR_DECL || TREE_CODE (*tp) == PARM_DECL)
+ {
+ func_data = (fn_vect_elements *) data;
+ gcc_assert (func_data->induction_var);
+ for (ii_var = func_data->arguments; ii_var; ii_var = DECL_CHAIN (ii_var))
+ {
+ if (DECL_NAME (ii_var) == DECL_NAME (*tp))
+ {
+ /* If the TREE_CODE of the TREE_TYPE is not a vector, then it
+ means that the variable is a UNIFORM or LINEAR and thus we do
+ not need to break it up into array. */
+ if (TREE_CODE (TREE_TYPE (*tp)) == VECTOR_TYPE)
+ *tp = elem_fn_build_array (*tp, func_data->induction_var);
+ else
+ /* Now we go through all the linear variable list and
+ if we have a hit, then we multiply the induction var with
+ step-size and add it to the variable. */
+ for (ii = 0; ii < (size_t) func_data->no_lvars; ii++)
+ if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (*tp)),
+ func_data->linear_vars[ii]))
+ {
+ mult_expr = fold_build2
+ (MULT_EXPR, TREE_TYPE (func_data->induction_var),
+ func_data->induction_var,
+ build_int_cst (integer_type_node,
+ func_data->linear_steps[ii]));
+ mult_expr = build_c_cast (EXPR_LOCATION (*tp),
+ TREE_TYPE (*tp), mult_expr);
+ add_expr = fold_build2 (PLUS_EXPR, TREE_TYPE (*tp),
+ *tp, mult_expr);
+ *tp = add_expr;
+ }
+
+ *walk_subtrees = 0;
+ return NULL_TREE;
+ }
+ }
+ if (func_data->return_var
+ && (DECL_NAME (*tp) == DECL_NAME (func_data->return_var)))
+ {
+ *tp = elem_fn_build_array (*tp, func_data->induction_var);
+ *walk_subtrees = 0;
+ }
+ }
+ return NULL_TREE;
+}
+
+/* Moves the return values of function FNDECL toward the end of the function.
+ The return is replaced with INDUCTION_VAR. */
+
+static void
+fix_elem_fn_return_value (tree fndecl, elem_fn_info *elem_fn_values,
+ tree induction_var)
+{
+ size_t ii = 0;
+ fn_vect_elements data;
+ tree old_fndecl;
+ tree new_var, new_var_init, new_body = NULL_TREE;
+ tree ret_expr, ret_stmt = NULL_TREE;
+ if (!fndecl || !DECL_SAVED_TREE (fndecl))
+ return;
+
+ if (TREE_TYPE (DECL_RESULT (fndecl)) == void_type_node)
+ return;
+
+ old_fndecl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (fndecl));
+ current_function_decl = fndecl;
+
+ new_var = create_tmp_var (TREE_TYPE (DECL_RESULT (fndecl)), "elem_fn_ret");
+ new_var_init =
+ build_vector_from_val
+ (TREE_TYPE (DECL_RESULT (fndecl)),
+ build_zero_cst (TREE_TYPE (TREE_TYPE (DECL_RESULT (fndecl)))));
+ DECL_INITIAL (new_var) = new_var_init;
+ walk_tree (&DECL_SAVED_TREE (fndecl), replace_return_with_new_var,
+ (void *) new_var, NULL);
+ data.return_var = new_var;
+ data.arguments = DECL_ARGUMENTS (fndecl);
+ data.induction_var = induction_var;
+ for (ii = 0; ii < (size_t) elem_fn_values->no_lvars; ii++)
+ {
+ data.linear_vars[ii] = xstrdup (elem_fn_values->linear_vars[ii]);
+ data.linear_steps[ii] = elem_fn_values->linear_steps[ii];
+ }
+ data.no_lvars = elem_fn_values->no_lvars;
+ walk_tree (&DECL_SAVED_TREE (fndecl), replace_array_ref_for_vec,
+ (void *) &data, NULL);
+ ret_expr = build2 (MODIFY_EXPR, TREE_TYPE (new_var),
+ DECL_RESULT (fndecl), new_var);
+
+ ret_stmt = build1 (RETURN_EXPR, TREE_TYPE (ret_expr), ret_expr);
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ {
+ if (!BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)))
+ ;
+ else if (TREE_CODE (BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl))) !=
+ TREE_LIST)
+ {
+ append_to_statement_list_force
+ (BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)), &new_body);
+ append_to_statement_list_force (ret_stmt, &new_body);
+ }
+ else
+ {
+ new_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl));
+ append_to_statement_list_force (ret_stmt, &new_body);
+ }
+ BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = new_body;
+ }
+
+ pop_cfun ();
+ current_function_decl = old_fndecl;
+ return;
+}
+
+/* Converts the vector value in FNDECL to a scalar one with a for-loop that
+ goes from 0->(VLENGTH-1). */
+
+static tree
+add_elem_fn_loop (tree fndecl, int vlength)
+{
+ tree exit_label = NULL_TREE, if_label = NULL_TREE, body_label = NULL_TREE;
+ tree fn_body, loop = NULL_TREE, loop_var, mod_var, incr_expr, cond_expr;
+ tree cmp_expr, old_fndecl;
+
+ if (!fndecl)
+ return NULL_TREE;
+
+ if (!DECL_SAVED_TREE (fndecl))
+ return NULL_TREE;
+
+ old_fndecl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (fndecl));
+ current_function_decl = fndecl;
+
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ fn_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl));
+ else
+ fn_body = DECL_SAVED_TREE (fndecl);
+
+ loop = alloc_stmt_list ();
+
+ loop_var = create_tmp_var (size_type_node, "ii_elem_fn_vec_val");
+ mod_var = build2 (MODIFY_EXPR, void_type_node, loop_var,
+ build_int_cst (size_type_node, 0));
+ append_to_statement_list_force (mod_var, &loop);
+
+ if_label = build_decl (EXPR_LOCATION (fndecl), LABEL_DECL,
+ get_identifier ("if_lab"), void_type_node);
+ DECL_CONTEXT (if_label) = fndecl;
+ DECL_ARTIFICIAL (if_label) = 0;
+ DECL_IGNORED_P (if_label) = 1;
+
+ exit_label = build_decl (EXPR_LOCATION (fndecl), LABEL_DECL,
+ get_identifier ("exit_label"), void_type_node);
+ DECL_CONTEXT (exit_label) = fndecl;
+ DECL_ARTIFICIAL (exit_label) = 0;
+ DECL_IGNORED_P (exit_label) = 1;
+
+ body_label = build_decl (EXPR_LOCATION (fndecl), LABEL_DECL,
+ get_identifier ("body_label"), void_type_node);
+ DECL_CONTEXT (body_label) = fndecl;
+ DECL_ARTIFICIAL (body_label) = 0;
+ DECL_IGNORED_P (body_label) = 1;
+ append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node,
+ if_label), &loop);
+ cmp_expr = build2 (LT_EXPR, boolean_type_node, loop_var,
+ build_int_cst (size_type_node, vlength));
+ cond_expr = build3 (COND_EXPR, void_type_node, cmp_expr,
+ build1 (GOTO_EXPR, void_type_node, body_label),
+ build1 (GOTO_EXPR, void_type_node, exit_label));
+
+ append_to_statement_list_force (cond_expr, &loop);
+ append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node,
+ body_label), &loop);
+ append_to_statement_list_force (fn_body, &loop);
+
+ incr_expr = build2 (MODIFY_EXPR, void_type_node, loop_var,
+ build2 (PLUS_EXPR, TREE_TYPE (loop_var), loop_var,
+ build_int_cst (size_type_node, 1)));
+
+ append_to_statement_list_force (incr_expr, &loop);
+ append_to_statement_list_force (build1 (GOTO_EXPR, void_type_node, if_label),
+ &loop);
+ append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node,
+ exit_label), &loop);
+
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = loop;
+ else
+ DECL_SAVED_TREE (fndecl) = loop;
+
+ pop_cfun ();
+ current_function_decl = old_fndecl;
+
+ return loop_var;
+}
+
+/* Adds a mask if-statement for FNDECL function. */
+
+static void
+add_elem_fn_mask (tree fndecl)
+{
+ tree ii_arg;
+ tree cond_expr, cmp_expr, old_fndecl;
+ tree fn_body = NULL_TREE;
+
+ old_fndecl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (fndecl));
+ current_function_decl = fndecl;
+
+ if (!DECL_SAVED_TREE (fndecl))
+ return;
+
+ for (ii_arg = DECL_ARGUMENTS (fndecl); DECL_CHAIN (ii_arg);
+ ii_arg = DECL_CHAIN (ii_arg))
+ {
+ ;
+ }
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ fn_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl));
+ else
+ fn_body = DECL_SAVED_TREE (fndecl); /* Not sure if we ever get here. */
+
+ gcc_assert (DECL_NAME (ii_arg) == get_identifier ("__elem_fn_mask"));
+
+ cmp_expr = fold_build2 (NE_EXPR, TREE_TYPE (ii_arg), ii_arg,
+ build_int_cst (TREE_TYPE (TREE_TYPE (ii_arg)), 0));
+ cond_expr = fold_build3 (COND_EXPR, void_type_node, cmp_expr, fn_body,
+ build_empty_stmt (EXPR_LOCATION (fndecl)));
+
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = cond_expr;
+ else
+ DECL_SAVED_TREE (fndecl) = cond_expr;
+
+ pop_cfun ();
+ current_function_decl = old_fndecl;
+
+ return;
+
+}
+
+/* Inserts the tree expression EXPR as the first statement for the function
+ FNDECL. */
+
+static void
+insert_as_first_stmt (tree expr, tree fndecl)
+{
+ tree body = NULL_TREE, new_body = NULL_TREE;
+ if (fndecl == NULL_TREE)
+ return;
+ if (expr == NULL_TREE)
+ return;
+
+ body = DECL_SAVED_TREE (fndecl);
+ if (!body)
+ return;
+
+ if (TREE_CODE (body) == BIND_EXPR)
+ body = BIND_EXPR_BODY (body);
+
+ if (TREE_CODE (body) == STATEMENT_LIST)
+ {
+ tree_stmt_iterator tsi = tsi_start (body);
+ tsi_link_before (&tsi, expr, TSI_CONTINUE_LINKING);
+ }
+ else
+ {
+ new_body = alloc_stmt_list ();
+ append_to_statement_list_force (expr, &new_body);
+ append_to_statement_list_force (body, &new_body);
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = new_body;
+ }
+ return;
+}
+
+/* Segments all the vector parameters of FNDECL into the sizes of largest
+ vector register possible. */
+
+static void
+segment_params_for_reg_size (tree fndecl)
+{
+ size_t ii = 0, jj = 0, kk = 0, ll = 0, ii_narg = 0, nargs_reqd = 0;
+ enum elem_fn_parm_size *param_array;
+ unsigned HOST_WIDE_INT biggest_vec_reg_size = BIGGEST_ALIGNMENT;
+ tree p = NULL_TREE, new_expr = NULL_TREE;
+ tree *param_var_array = NULL, *parm_type_array = NULL, *new_parm_var = NULL;
+ unsigned HOST_WIDE_INT *param_index = NULL;
+ unsigned HOST_WIDE_INT param_no = 0;
+ int *param_length = NULL;
+ struct function *f = DECL_STRUCT_FUNCTION (fndecl);
+
+ gcc_assert (f);
+ push_cfun (f);
+
+ for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p))
+ param_no++;
+
+ param_length = XNEWVEC (int, param_no);
+ gcc_assert (param_length);
+ ii = 0;
+ for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p))
+ {
+ unsigned HOST_WIDE_INT p_size = 0;
+ if (TREE_TYPE (p) && TREE_CODE (TREE_TYPE (p)) == VECTOR_TYPE)
+ p_size = tree_low_cst (TYPE_SIZE (TREE_TYPE (p)), 1);
+ if (p_size > biggest_vec_reg_size)
+ {
+ nargs_reqd += p_size / biggest_vec_reg_size;
+ param_length[ii] = p_size / biggest_vec_reg_size;
+ }
+ else
+ {
+ nargs_reqd++;
+ param_length[ii] = 1;
+ }
+ ii++;
+ }
+ if (nargs_reqd == 0)
+ return;
+
+ param_array = XNEWVEC (enum elem_fn_parm_size, nargs_reqd);
+ gcc_assert (param_array != NULL);
+
+ param_var_array = XNEWVEC (tree, nargs_reqd);
+ gcc_assert (param_var_array != NULL);
+
+ parm_type_array = XNEWVEC (tree, nargs_reqd);
+ gcc_assert (parm_type_array != NULL);
+
+ param_index = XNEWVEC (unsigned HOST_WIDE_INT, nargs_reqd);
+ gcc_assert (param_index != NULL);
+ param_no = 0;
+ for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p))
+ {
+ unsigned HOST_WIDE_INT p_size = 0;
+ if (TREE_TYPE (p) && TREE_CODE (TREE_TYPE (p)) == VECTOR_TYPE)
+ {
+ p_size = tree_low_cst (TYPE_SIZE (TREE_TYPE (p)), 1);
+ if (p_size > biggest_vec_reg_size)
+ {
+ for (ii = 0; ii < (size_t) ((int) (p_size/biggest_vec_reg_size));
+ ii++)
+ {
+ unsigned HOST_WIDE_INT var_size = 0;
+ param_array[ii + ii_narg] = VECTOR_SEGMENTED;
+ var_size = tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (p)),
+ 1);
+ parm_type_array[ii + ii_narg] =
+ build_vector_type (TREE_TYPE (TREE_TYPE (p)),
+ (biggest_vec_reg_size / var_size));
+ param_index[ii + ii_narg] = param_no;
+ }
+ ii_narg += (int) (p_size / biggest_vec_reg_size);
+ }
+ else
+ {
+ param_array[ii_narg] = VECTOR_NONSEGMENTED;
+ parm_type_array[ii_narg] = TREE_TYPE (p);
+ param_index[ii_narg] = param_no;
+ ii_narg++;
+ }
+ }
+ else
+ {
+ param_array[ii_narg] = SCALAR;
+ parm_type_array[ii_narg] = TREE_TYPE (p);
+ param_index[ii_narg] = param_no;
+ ii_narg++;
+ }
+ param_no++;
+ }
+
+ new_parm_var = XNEWVEC (tree, param_no);
+ gcc_assert (new_parm_var != NULL);
+
+ ii = 0;
+ for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), ii++)
+ new_parm_var[ii] = create_tmp_var (TREE_TYPE (p),
+ IDENTIFIER_POINTER (DECL_NAME (p)));
+
+
+ for (ii = 0; ii < (size_t) nargs_reqd; ii++)
+ {
+ param_var_array[ii] = build_decl (EXPR_LOCATION (fndecl), PARM_DECL,
+ NULL_TREE, parm_type_array[ii]);
+ DECL_ARG_TYPE (param_var_array[ii]) = parm_type_array[ii];
+ DECL_CONTEXT (param_var_array[ii]) = fndecl;
+ DECL_ARTIFICIAL (param_var_array[ii]) = 1;
+ lang_hooks.dup_lang_specific_decl (param_var_array[ii]);
+ }
+
+ for (ii = 1; ii < nargs_reqd; ii++)
+ TREE_CHAIN (param_var_array[ii-1]) = param_var_array[ii];
+
+ ii = 0;
+ for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p))
+ {
+ args_data d;
+ d.arg = p;
+ d.replacement = new_parm_var[ii];
+ walk_tree (&DECL_SAVED_TREE (fndecl), replace_parm_decl, (void *) &d,
+ NULL);
+ ii++;
+ }
+
+ DECL_ARGUMENTS (fndecl) = param_var_array[0];
+
+ /* Now we have to do group the split up register value into one big variable.
+ For Example, let's say we split vector(8) int x into:
+ vector(4) int D124 and vector(4) int D123
+ We have to regroup them into the following:
+
+ x (8, 7, 6, 5) = D124
+ x (4, 3, 2, 1) = D123
+
+ So, the final thing will look something like this:
+
+ data_type foo (vec(8) int x)
+ |
+ |
+ V
+ data_type foo (vec (4) int D124, vec (4) int D123)
+ {
+ x = {D124, D123}
+ < REST OF FUNCTION BODY >
+ }
+ */
+
+ ii = 0;
+ while (ii < nargs_reqd)
+ {
+ if (param_array[ii] == SCALAR || param_array[ii] == VECTOR_NONSEGMENTED)
+ {
+ new_expr = build2 (MODIFY_EXPR, parm_type_array[ii],
+ new_parm_var[jj], param_var_array[ii]);
+ insert_as_first_stmt (new_expr, fndecl);
+ ii++;
+ }
+ else
+ for (kk = 0; kk < (size_t) param_length[jj]; kk++)
+ {
+ for (ll = 0; ll < (size_t) param_length[jj]; ll++)
+ {
+ tree m_type = TREE_TYPE (TREE_TYPE (new_parm_var[jj]));
+ tree lhs_array =
+ elem_fn_build_array
+ (new_parm_var[jj], build_int_cst
+ (size_type_node, kk * param_length[jj] + ll));
+ tree rhs_array =
+ elem_fn_build_array (param_var_array[ii],
+ build_int_cst (size_type_node, ll));
+ new_expr = build2 (MODIFY_EXPR, m_type, lhs_array, rhs_array);
+ insert_as_first_stmt (new_expr, fndecl);
+ }
+ ii++;
+ }
+ jj++;
+ }
+ pop_cfun ();
+ return;
+}
+
+/* Does all the call-graph hacks necessary to make FNDECL a recognized
+ function. */
+
+static void
+call_graph_add_fn (tree fndecl)
+{
+ const tree outer = current_function_decl;
+ struct function *f = DECL_STRUCT_FUNCTION (fndecl);
+
+ if (cfun)
+ f->curr_properties = cfun->curr_properties;
+ push_cfun (f);
+ current_function_decl = fndecl;
+
+ cgraph_add_new_function (fndecl, false);
+ cgraph_finalize_function (fndecl, true);
+
+ pop_cfun ();
+ current_function_decl = outer;
+
+ return;
+}
+
+/* Clones the function FNDECL to elemental functions (masked and unmasked
+ versions, if applicable) since vector attribute is set. */
+
+void
+elem_fn_create_fn (tree fndecl)
+{
+ tree new_masked_fn = NULL_TREE, new_unmasked_fn = NULL_TREE;
+ tree induction_var = NULL_TREE;
+ elem_fn_info *elem_fn_values = NULL;
+ char *masked_suffix = NULL, *unmasked_suffix = NULL;
+ tree proc_attr = NULL_TREE, opp_proc_attr = NULL_TREE, opt_attr = NULL_TREE;
+
+ if (!fndecl)
+ return;
+
+ elem_fn_values = extract_elem_fn_values (fndecl);
+ if (!elem_fn_values)
+ return;
+
+ if (elem_fn_values->mask == USE_MASK)
+ masked_suffix = find_suffix (elem_fn_values, true);
+ else if (elem_fn_values->mask == USE_NOMASK)
+ unmasked_suffix = find_suffix (elem_fn_values, false);
+ else
+ {
+ masked_suffix = find_suffix (elem_fn_values, true);
+ unmasked_suffix = find_suffix (elem_fn_values, false);
+ }
+ if (masked_suffix)
+ {
+ new_masked_fn = copy_node (fndecl);
+ new_masked_fn = rename_elem_fn (new_masked_fn, masked_suffix);
+ SET_DECL_RTL (new_masked_fn, NULL);
+ TREE_SYMBOL_REFERENCED (DECL_NAME (new_masked_fn)) = 1;
+ tree_elem_fn_versioning (fndecl, new_masked_fn, NULL, false, NULL, false,
+ NULL, NULL, elem_fn_values->vectorlength, true);
+ scalarize_uniform_linear_params (new_masked_fn, elem_fn_values);
+ proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr);
+ if (proc_attr)
+ decl_attributes (&new_masked_fn, proc_attr, 0);
+ if (opp_proc_attr)
+ decl_attributes (&fndecl, opp_proc_attr, 0);
+
+ opt_attr = create_optimize_attribute (3); /* Turn vectorizer on. */
+ if (opt_attr)
+ decl_attributes (&new_masked_fn, opt_attr, 0);
+
+ DECL_ATTRIBUTES (new_masked_fn) =
+ remove_attribute ("vector", DECL_ATTRIBUTES (new_masked_fn));
+
+ add_elem_fn_mask (new_masked_fn);
+ induction_var = add_elem_fn_loop (new_masked_fn,
+ elem_fn_values->vectorlength);
+ fix_elem_fn_return_value (new_masked_fn, elem_fn_values, induction_var);
+ segment_params_for_reg_size (new_masked_fn);
+ call_graph_add_fn (new_masked_fn);
+ SET_DECL_ASSEMBLER_NAME (new_masked_fn, DECL_NAME (new_masked_fn));
+ DECL_ELEM_FN_ALREADY_CLONED (new_masked_fn) = true;
+ if (DECL_STRUCT_FUNCTION (new_masked_fn))
+ DECL_STRUCT_FUNCTION (new_masked_fn)->elem_fn_already_cloned = true;
+ }
+ if (unmasked_suffix)
+ {
+ new_unmasked_fn = copy_node (fndecl);
+ new_unmasked_fn = rename_elem_fn (new_unmasked_fn, unmasked_suffix);
+ SET_DECL_RTL (new_unmasked_fn, NULL);
+ TREE_SYMBOL_REFERENCED (DECL_NAME (new_unmasked_fn)) = 1;
+ tree_elem_fn_versioning (fndecl, new_unmasked_fn, NULL, false, NULL,
+ false, NULL, NULL,
+ elem_fn_values->vectorlength, false);
+ scalarize_uniform_linear_params (new_unmasked_fn, elem_fn_values);
+ proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr);
+ if (proc_attr)
+ decl_attributes (&new_unmasked_fn, proc_attr, 0);
+ if (opp_proc_attr)
+ decl_attributes (&fndecl, opp_proc_attr, 0);
+
+ opt_attr = create_optimize_attribute (3); /* Turn vectorizer on. */
+ if (opt_attr)
+ decl_attributes (&new_unmasked_fn, opt_attr, 0);
+
+ DECL_ATTRIBUTES (new_unmasked_fn) =
+ remove_attribute ("vector", DECL_ATTRIBUTES (new_unmasked_fn));
+ induction_var = add_elem_fn_loop (new_unmasked_fn,
+ elem_fn_values->vectorlength);
+ fix_elem_fn_return_value (new_unmasked_fn, elem_fn_values,
+ induction_var);
+ segment_params_for_reg_size (new_unmasked_fn);
+ call_graph_add_fn (new_unmasked_fn);
+ SET_DECL_ASSEMBLER_NAME (new_unmasked_fn, DECL_NAME (new_unmasked_fn));
+ DECL_ELEM_FN_ALREADY_CLONED (new_unmasked_fn) = true;
+ if (DECL_STRUCT_FUNCTION (new_unmasked_fn))
+ DECL_STRUCT_FUNCTION (new_unmasked_fn)->elem_fn_already_cloned = true;
+ }
+
+ XDELETEVEC (elem_fn_values);
+ return;
+}
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index 6b8ff154ccc..1240f940aee 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -3641,8 +3641,9 @@ c_builtin_function_ext_scope (tree decl)
/* Should never be called on a symbol with a preexisting meaning. */
gcc_assert (!I_SYMBOL_BINDING (id));
- bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
- UNKNOWN_LOCATION);
+ if (external_scope)
+ bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
+ UNKNOWN_LOCATION);
/* Builtins in the implementation namespace are made visible without
needing to be explicitly declared. See push_file_scope. */
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index af84f9cffc6..25ee5a381dc 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -12094,7 +12094,7 @@ c_parser_elem_fn_processor_clause (c_parser *parser)
token = c_parser_peek_token (parser);
if (!c_parser_next_token_is (parser, CPP_OPEN_PAREN))
{
- c_parser_error (parser, "expected %<)%>");
+ c_parser_error (parser, "expected %<(%>");
c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
return NULL_TREE;
}
@@ -12147,8 +12147,16 @@ c_parser_elem_fn_processor_clause (c_parser *parser)
build_string (strlen ("core_i7_sse4_2"),
"core_i7_sse4_2"));
}
+ else if (!token->value || TREE_CODE (token->value) != IDENTIFIER_NODE)
+ {
+ c_parser_error (parser, "expected processor-name");
+ }
else
- sorry ("Processor type not supported");
+ {
+ c_parser_consume_token (parser);
+ error_at (input_location, "processor %s not supported",
+ IDENTIFIER_POINTER (token->value));
+ }
if (c_parser_next_token_is (parser, CPP_CLOSE_PAREN))
c_parser_consume_token (parser);
@@ -12165,7 +12173,7 @@ c_parser_elem_fn_processor_clause (c_parser *parser)
return proc_tree_list;
}
-/* This function parses the uniform clause of Cilk Plus elemental functions. */
+/* This function parses "uniform" clause of Cilk Plus elemental functions. */
static tree
c_parser_elem_fn_uniform_clause (c_parser *parser)
@@ -12173,7 +12181,7 @@ c_parser_elem_fn_uniform_clause (c_parser *parser)
c_token *token;
tree uniform_tree;
tree str_token = NULL_TREE;
- vec<tree,va_gc> *uniform_vec = NULL;
+ vec<tree, va_gc> *uniform_vec = NULL;
if (!c_parser_next_token_is (parser, CPP_OPEN_PAREN))
{
@@ -12215,7 +12223,7 @@ c_parser_elem_fn_uniform_clause (c_parser *parser)
}
else
{
- c_parser_error (parser, "expected number or comma");
+ c_parser_error (parser, "expected variable-name");
c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
return NULL_TREE;
}
@@ -12227,6 +12235,7 @@ c_parser_elem_fn_uniform_clause (c_parser *parser)
return uniform_tree;
}
+
/* This function parses the linear clause of Cilk Plus Elemental functions. */
static tree
@@ -12260,10 +12269,22 @@ c_parser_elem_fn_linear_clause (c_parser *parser)
c_parser_consume_token (parser);
token = c_parser_peek_token (parser);
if (token->value && token->type == CPP_NUMBER)
- step_size = token->value;
+ {
+ step_size = token->value;
+ if (TREE_TYPE (step_size)
+ && TREE_CODE (TREE_TYPE (step_size)) == REAL_TYPE)
+ {
+ error_at (input_location, "step-size must be an integer "
+ "constant expression");
+ c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
+ NULL);
+ return NULL_TREE;
+ }
+ }
else
{
c_parser_error (parser, "expected step-size");
+ c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
return NULL_TREE;
}
c_parser_consume_token (parser);
@@ -12316,8 +12337,7 @@ c_parser_elem_fn_vlength_clause (c_parser *parser)
if (!c_parser_next_token_is (parser, CPP_OPEN_PAREN))
{
- c_parser_error (parser, "expected %<)%>");
- c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
+ c_parser_skip_until_found (parser, CPP_COMMA, "expected %<(%>");
return NULL_TREE;
}
else
@@ -12329,29 +12349,39 @@ c_parser_elem_fn_vlength_clause (c_parser *parser)
token = c_parser_peek_token (parser);
if (token->value && token->type == CPP_NUMBER)
{
- vec_safe_push (vlength_vec, token->value);
- c_parser_consume_token (parser);
- if (c_parser_next_token_is (parser, CPP_COMMA))
+ if (TREE_TYPE (token->value)
+ && TREE_CODE (TREE_TYPE (token->value)) == REAL_TYPE)
{
- c_parser_consume_token (parser);
- if (c_parser_next_token_is_not (parser, CPP_NUMBER))
- {
- c_parser_error (parser, "expected vectorlength after %<,%>");
- c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
- return NULL_TREE;
- }
+ error_at (input_location, "vectorlength must be an integer.");
+ c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
+ return NULL_TREE;
}
- else if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN))
+ if (!integer_pow2p (token->value))
{
- c_parser_error (parser,
- "expected %<,%> or %<)%> after vectorlength");
+ error_at (input_location, "vectorlength must be a power of 2.");
+ c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
+ return NULL_TREE;
+ }
+ else if (compare_tree_int (token->value, 8) == 1
+ || compare_tree_int (token->value, 2) == -1)
+ {
+ error_at (input_location,
+ "vectorlength must be between 2 and 8.");
+ c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
+ return NULL_TREE;
+ }
+ vec_safe_push (vlength_vec, token->value);
+ c_parser_consume_token (parser);
+ if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN))
+ {
+ c_parser_error (parser, "expected %<)%> after vectorlength");
c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
return NULL_TREE;
}
}
else
{
- c_parser_error (parser, "expected number or comma");
+ c_parser_error (parser, "expected number");
c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
return NULL_TREE;
}
diff --git a/gcc/cilk.h b/gcc/cilk.h
index c32efab5e0a..c14b8e6910c 100644
--- a/gcc/cilk.h
+++ b/gcc/cilk.h
@@ -213,8 +213,9 @@ enum mask_options {
typedef struct
{
char *proc_type;
+ char *isa_type;
enum mask_options mask;
- int vectorlength[MAX_VARS];
+ int vectorlength;
int no_vlengths;
char *uniform_vars[MAX_VARS];
int no_uvars;
@@ -230,13 +231,23 @@ typedef struct
} elem_fn_info;
/* This data structure will hold all the arguments in the function. */
-typedef struct
+typedef struct
{
tree induction_var;
tree arguments;
tree return_var;
+ int no_lvars;
+ char *linear_vars[MAX_VARS];
+ int linear_steps[MAX_VARS];
} fn_vect_elements;
+enum elem_fn_parm_type
+{
+ TYPE_NONE = 0,
+ TYPE_UNIFORM = 1,
+ TYPE_LINEAR = 2
+};
+
/* Offset of fields in the Cilk frame descriptor.
Index is same as for cilk_trees. If the index
does not correspond to a field of the Cilk frame
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a61a0352971..216f8e1799e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5780,20 +5780,16 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
{
static bool warnedavx;
+ /* In Cilk Plus you can create code for a processor that
+ is enabled with elemental functions. */
if (cum
+ && !flag_enable_cilk
&& !warnedavx
&& cum->warn_avx)
{
- /* For Cilk Plus with elemental functions, the user
- can generate code for a hardware that is not the
- target hardware. So, this warning is not valid for
- us. */
- if (!flag_enable_cilk)
- {
- warnedavx = true;
- warning (0, "AVX vector argument without AVX "
- "enabled changes the ABI");
- }
+ warnedavx = true;
+ warning (0, "AVX vector argument without AVX "
+ "enabled changes the ABI");
}
return TYPE_MODE (type);
}
@@ -5802,6 +5798,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
static bool warnedsse;
if (cum
+ && !flag_enable_cilk
&& !warnedsse
&& cum->warn_sse)
{
@@ -7182,18 +7179,12 @@ ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
&& !warned
&& align != ix86_compat_function_arg_boundary (mode, type,
saved_align))
- {
- /* For Cilk Plus with elemental functions, the user can generate
- code for a hardware that is not the target hardware. So, this
- warning is not valid for Cilk Plus. */
- if (!flag_enable_cilk)
- {
- warned = true;
- inform (input_location,
- "The ABI for passing parameters with %d-byte"
- " alignment has changed in GCC 4.6",
- align / BITS_PER_UNIT);
- }
+ {
+ warned = true;
+ inform (input_location,
+ "The ABI for passing parameters with %d-byte"
+ " alignment has changed in GCC 4.6",
+ align / BITS_PER_UNIT);
}
}
@@ -8517,7 +8508,7 @@ static bool
ix86_frame_pointer_required (void)
{
/* For all Cilk specific functions, we frame pointer is required. */
- if (cfun->is_cilk_function == 1)
+ if (flag_enable_cilk && cfun->is_cilk_function == 1)
return true;
/* If we accessed previous frames, then the generated code expects
@@ -42139,6 +42130,187 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
return val;
}
+/* Return the specific arch attribute for the *PROC_NAME of Elemental
+ function in Cilk Plus. The *OPPOSITE_ATTR will rutrn the opposite of return
+ value (in terms of optimization) for the scalar function. */
+
+static tree
+ix86_cilkplus_map_proc_to_attr (char *proc_name, tree *opposite_attr)
+{
+ /* You will need the opposite attribute for the scalar code part. */
+ tree proc_attr, opp_proc_attr;
+ vec<tree, va_gc> *proc_vec_list, *opp_proc_vec_list;
+
+ vec_alloc (proc_vec_list, 4);
+ vec_alloc (opp_proc_vec_list, 4);
+
+ if (!proc_name)
+ return NULL_TREE;
+
+ if (!strcmp (proc_name, "pentium_4"))
+ {
+ vec_safe_push (proc_vec_list,
+ build_string (strlen ("arch=pentium4"), "arch=pentium4"));
+ vec_safe_push (proc_vec_list, build_string (strlen ("mmx"), "mmx"));
+ if (opposite_attr)
+ {
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("no-mmx"), "no-mmx"));
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("arch=pentium4"),
+ "arch=pentium4"));
+ }
+ }
+ else if (!strcmp (proc_name, "pentium_4_sse3"))
+ {
+ vec_safe_push (proc_vec_list,
+ build_string (strlen ("arch=pentium4"), "arch=pentium4"));
+ vec_safe_push (proc_vec_list, build_string (strlen ("sse3"), "sse3"));
+ if (opposite_attr)
+ {
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("arch=pentium4"),
+ "arch=pentium4"));
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("no-sse3"), "no-sse3"));
+ }
+ }
+ else if (!strcmp (proc_name, "core2_duo_sse3"))
+ {
+ vec_safe_push (proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ vec_safe_push (proc_vec_list, build_string (strlen ("sse3"), "sse3"));
+ if (opposite_attr)
+ {
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("no-sse3"), "no-sse3"));
+ }
+ }
+ else if (!strcmp (proc_name, "core_2_duo_sse_4_1"))
+ {
+ vec_safe_push (proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ vec_safe_push (proc_vec_list, build_string (strlen ("sse4.1"), "sse4.1"));
+ if (opposite_attr)
+ {
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("no-sse4.1"), "no-sse4.1"));
+ }
+ }
+ else if (!strcmp (proc_name, "core_i7_sse4_2"))
+ {
+ vec_safe_push (proc_vec_list,
+ build_string (strlen ("arch=corei7"), "arch=corei7"));
+ vec_safe_push (proc_vec_list,
+ build_string (strlen ("sse4.2"), "sse4.2"));
+ vec_safe_push (proc_vec_list, build_string (strlen ("avx"), "avx"));
+ if (opposite_attr)
+ {
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("arch=corei7"), "arch=corei7"));
+ vec_safe_push (opp_proc_vec_list,
+ build_string (strlen ("no-sse4.2"), "no-sse4.2"));
+ }
+ }
+ else
+ sorry ("Processor type not supported.");
+
+ proc_attr = build_tree_list_vec (proc_vec_list);
+ vec_safe_truncate (proc_vec_list, 0);
+ proc_attr = build_tree_list (get_identifier ("__target__"), proc_attr);
+
+ if (opposite_attr)
+ {
+ opp_proc_attr = build_tree_list_vec (opp_proc_vec_list);
+ vec_safe_truncate (opp_proc_vec_list, 0);
+ opp_proc_attr = build_tree_list (get_identifier ("__target__"),
+ opp_proc_attr);
+ *opposite_attr = opp_proc_attr;
+ }
+ return proc_attr;
+}
+
+char *
+ix86_cilkplus_find_proc_code (char *proc_name)
+{
+ if (!proc_name)
+ return xstrdup ("B");
+
+ if (!strcmp (proc_name, "pentium_4"))
+ return xstrdup ("B");
+ else if (!strcmp (proc_name, "pentium_4_sse3"))
+ return xstrdup ("D");
+ else if (!strcmp (proc_name, "core2_duo_sse3"))
+ return xstrdup ("E");
+ else if (!strcmp (proc_name, "core_2_duo_sse_4_1"))
+ return xstrdup ("F");
+ else if (!strcmp (proc_name, "core_i7_sse4_2"))
+ return xstrdup ("H");
+ else
+ gcc_unreachable ();
+
+ return NULL; /* We should never get here. */
+}
+
+/* Returns appropriate ISA string based on PROC_NAME and ISA_NAME. */
+
+char *
+ix86_cilkplus_find_isa_for_proc (char *proc_name, char *isa_name)
+{
+ if (isa_name)
+ return isa_name;
+ else if (!proc_name)
+ return xstrdup("xmm");
+ else if (!strcmp (proc_name, "pentium_4"))
+ return xstrdup ("xmm");
+ else if (!strcmp (proc_name, "pentium_4_sse3"))
+ return xstrdup ("xmm");
+ else if (!strcmp (proc_name, "core2_duo_sse3"))
+ return xstrdup ("xmm");
+ else if (!strcmp (proc_name, "core_2_duo_sse_4_1"))
+ return xstrdup ("xmm");
+ else if (!strcmp (proc_name, "core_i7_sse4_2"))
+ return xstrdup ("xmm");
+ else if (!strcmp (proc_name, "core_2nd_gen_avx"))
+ return xstrdup ("ymm1");
+ else if (!strcmp (proc_name, "core_3rd_gen_avx"))
+ return xstrdup ("ymm1");
+ else if (!strcmp (proc_name, "core_4th_gen_avx"))
+ return xstrdup ("ymm2");
+ else
+ gcc_unreachable ();
+
+ return NULL; /* We should never get here. */
+}
+
+
+/* Returns the appropriate vectorlength based on PROC_NAME. */
+
+unsigned int
+ix86_builtin_find_vlength_for_proc (char *proc_name)
+{
+ if (!proc_name)
+ return 4;
+ else if (!strcmp (proc_name, "pentium_4"))
+ return 4;
+ else if (!strcmp (proc_name, "pentium_4_sse3"))
+ return 4;
+ else if (!strcmp (proc_name, "core2_duo_sse3"))
+ return 4;
+ else if (!strcmp (proc_name, "core_2_duo_sse_4_1"))
+ return 4;
+ else if (!strcmp (proc_name, "core_i7_sse4_2"))
+ return 8;
+ else
+ /* If we got here, then we have hit a processor that we do not yet
+ support. */
+ return 0;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@@ -42505,6 +42677,20 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS ix86_spill_class
+#undef TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR
+#define TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR \
+ ix86_cilkplus_map_proc_to_attr
+
+#undef TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE
+#define TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE \
+ ix86_cilkplus_find_proc_code
+
+#undef TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE
+#define TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE ix86_cilkplus_find_isa_for_proc
+
+#undef TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC
+#define TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC \
+ ix86_builtin_find_vlength_for_proc
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index f98196434f8..1c8a8ed98b2 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5706,6 +5706,30 @@ For vector memory operations the cost may depend on type (@var{vectype}) and
misalignment value (@var{misalign}).
@end deftypefn
+@deftypefn {Target Hook} tree TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR (char *@var{}, tree *@var{})
+This hook is called by a Cilk Plus routine that will be used to map the
+processor clause to the appropriate arch and tune attributes.
+@end deftypefn
+
+@deftypefn {Target Hook} {char *} TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE (char *@var{})
+This hook is called by a Cilk Plus routine that will request the
+ processor code for processor name given in the vector attribute for
+ the elemental functions.
+@end deftypefn
+
+@deftypefn {Target Hook} {char *} TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE (char *@var{}, char *@var{})
+This hook is called by a Cilk Plus routine that will request the
+ ISA type (based on the register-set where vector parameters are passed).
+ in elemental functions.
+@end deftypefn
+
+@deftypefn {Target Hook} {unsigned int} TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC (char *@var{})
+This hook is called by a Cilk Plus routine that will request the
+ default vectorlength for the processor specified in the processor clause
+ in the elemental functions.
+@end deftypefn
+
+
@deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE (const_tree @var{type}, bool @var{is_packed})
Return true if vector alignment is reachable (by peeling N iterations) for the given type.
@end deftypefn
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 7a93f21073c..a9286e67d51 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -5622,6 +5622,15 @@ For vector memory operations the cost may depend on type (@var{vectype}) and
misalignment value (@var{misalign}).
@end deftypefn
+@hook TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR
+
+@hook TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE
+
+@hook TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE
+
+@hook TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC
+
+
@hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
Return true if vector alignment is reachable (by peeling N iterations) for the given type.
@end deftypefn
diff --git a/gcc/elem-function-common.c b/gcc/elem-function-common.c
index 0207888795d..e570683a5ea 100644
--- a/gcc/elem-function-common.c
+++ b/gcc/elem-function-common.c
@@ -28,72 +28,48 @@
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "tm.h"
#include "tree.h"
#include "langhooks.h"
-#include "cilk.h"
-#include "tm_p.h"
-#include "hard-reg-set.h"
#include "basic-block.h"
#include "output.h"
-#include "c-family/c-common.h"
#include "diagnostic.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "tree-pass.h"
#include "timevar.h"
-#include "flags.h"
-#include "c/c-tree.h"
#include "tree-inline.h"
#include "cgraph.h"
#include "ipa-prop.h"
#include "opts.h"
#include "tree-iterator.h"
-#include "toplev.h"
-#include "options.h"
-#include "intl.h"
#include "vec.h"
#include "cilk.h"
-
-#define MAX_VARS 50
+#include "target.h"
enum elem_fn_parm_type find_elem_fn_parm_type (gimple, tree, tree *);
bool is_elem_fn (tree);
tree find_elem_fn_name (tree old_fndecl, tree vectype_out, tree vectype_in);
elem_fn_info *extract_elem_fn_values (tree decl);
-/* This function will find the appropriate processor code in the function
- mangling vector function. */
+/* Uses the processor information stored in *PROC_NAME and returns and
+ appropriate string. */
char *
-find_processor_code (elem_fn_info *elem_fn_values)
+find_processor_code (char *proc_name)
{
- if (!elem_fn_values || !elem_fn_values->proc_type)
- return xstrdup ("B");
-
- if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
- return xstrdup ("B");
- else if (!strcmp (elem_fn_values->proc_type, "pentium_4_sse3"))
- return xstrdup ("D");
- else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse3"))
- return xstrdup ("E");
- else if (!strcmp (elem_fn_values->proc_type, "core_2_duo_sse_4_1"))
- return xstrdup ("F");
- else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
- return xstrdup ("H");
- else
- gcc_unreachable ();
-
- return NULL; /* should never get here */
+ return targetm.cilkplus.builtin_find_processor_code (proc_name);
}
-/* This function will return vectorlength, if specified, in string format -OR-
- it will give the default vector length for the specified architecture. */
+/* Returns the vector length in string format based on the value in the field
+ called vectorlength of ELEM_FN_VALUES. If vectorlength is not given then
+ an appropriate value is computed based on the architecture information given
+ in proc_type field of ELEM_FN_VALUES. */
char *
find_vlength_code (elem_fn_info *elem_fn_values)
{
- char *vlength_code = (char *) xmalloc (sizeof (char) * 10);
+ int v_length = 0;
+ char *vlength_code = XNEWVEC (char, 10);
if (!elem_fn_values)
{
sprintf (vlength_code, "4");
@@ -103,29 +79,19 @@ find_vlength_code (elem_fn_info *elem_fn_values)
memset (vlength_code, 10, 0);
if (elem_fn_values->no_vlengths != 0)
- sprintf (vlength_code,"%d", elem_fn_values->vectorlength[0]);
+ sprintf (vlength_code,"%d", elem_fn_values->vectorlength);
else
{
- if (!elem_fn_values->proc_type)
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "pentium_4_sse3"))
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse3"))
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "core_2_duo_sse_4_1"))
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
- sprintf (vlength_code, "4");
- else
- gcc_unreachable ();
+ v_length = targetm.cilkplus.builtin_find_vlength_for_proc
+ (elem_fn_values->proc_type);
+ if (v_length > 0)
+ sprintf(vlength_code, "%d",v_length);
}
return vlength_code;
}
-/* This function will concatinate the suffix to the existing function decl. */
+/* This function will concatinate the SUFFIX to the function name in DECL. */
tree
rename_elem_fn (tree decl, const char *suffix)
@@ -141,7 +107,7 @@ rename_elem_fn (tree decl, const char *suffix)
new_decl = decl;
length = strlen (fn_name) + strlen (suffix) + 1;
- new_fn_name = (char *)xmalloc (length);
+ new_fn_name = XNEWVEC (char, length);
strcpy (new_fn_name, fn_name);
strcat (new_fn_name, suffix);
@@ -150,25 +116,33 @@ rename_elem_fn (tree decl, const char *suffix)
}
-/* This function will find the appropriate mangling suffix for the vector
- function. */
+/* Returns the appropriate mangling suffix for the vector function based on the
+ information in ELEM_FN_VALUES field. The user can specify whether they want
+ a masked or unmasked function by setting the MASKED field to true or false,
+ respectively. */
char *
find_suffix (elem_fn_info *elem_fn_values, bool masked)
{
- char *suffix = (char*)xmalloc (100);
+ char *suffix = XNEWVEC (char, 100);
char tmp_str[10];
int arg_number, ii_pvar, ii_uvar, ii_lvar;
- strcpy (suffix, "._simdsimd_");
- strcat (suffix, find_processor_code (elem_fn_values));
- strcat (suffix, find_vlength_code (elem_fn_values));
+ if (!elem_fn_values)
+ return NULL;
+
+ strcpy (suffix, "._simdsimd");
+ strcat (suffix,
+ targetm.cilkplus.builtin_find_isa_code (elem_fn_values->proc_type,
+ elem_fn_values->isa_type));
+ strcat (suffix, "_");
if (masked)
strcat (suffix, "m");
else
strcat (suffix, "n");
+ strcat (suffix, find_vlength_code (elem_fn_values));
- for (arg_number = 1; arg_number <= elem_fn_values->total_no_args;
+ for (arg_number = 0; arg_number <= elem_fn_values->total_no_args;
arg_number++)
{
for (ii_lvar = 0; ii_lvar < elem_fn_values->no_lvars; ii_lvar++)
@@ -180,23 +154,22 @@ find_suffix (elem_fn_info *elem_fn_values, bool masked)
strcat (suffix, tmp_str);
}
}
- for (ii_uvar = 0; ii_uvar < elem_fn_values->no_uvars; ii_uvar++)
- {
- if (elem_fn_values->uniform_location[ii_uvar] == arg_number)
- strcat (suffix, "_s1");
- }
- for (ii_pvar = 0; ii_pvar < elem_fn_values->no_pvars; ii_pvar++)
- {
- if (elem_fn_values->private_location[ii_pvar] == arg_number)
- strcat (suffix, "_v1");
- }
+ for (ii_uvar = 0; ii_uvar < elem_fn_values->no_uvars; ii_uvar++)
+ if (elem_fn_values->uniform_location[ii_uvar] == arg_number)
+ strcat (suffix, "_s1");
+ for (ii_pvar = 0; ii_pvar < elem_fn_values->no_pvars; ii_pvar++)
+ if (elem_fn_values->private_location[ii_pvar] == arg_number)
+ strcat (suffix, "_v1");
}
return suffix;
}
-/* This is an helper function for find_elem_fn_param_type. */
-
+/* This is an helper function for find_elem_fn_param_type. It returns the
+ parm_type (whether TYPE_LINEAR or TYPE_UNIFORM) for the parameter number
+ (set by PARM_NO). If the return value is TYPE_LINEAR, then the *STEP_SIZE
+ is set with the appropriate step-size. */
+
static enum elem_fn_parm_type
find_elem_fn_parm_type_1 (tree fndecl, int parm_no, tree *step_size)
{
@@ -224,8 +197,9 @@ find_elem_fn_parm_type_1 (tree fndecl, int parm_no, tree *step_size)
}
-/* This function will return the type of a parameter in elemental function.
- The choices are UNIFORM or LINEAR. */
+/* Returns the parm_type (whether TYPE_LINEAR or TYPE_UNIFORM) for the
+ parameter (indicated by OP). If the return value is TYPE_LINEAR, then
+ the *STEP_SIZE is set with the appropriate step-size. */
enum elem_fn_parm_type
find_elem_fn_parm_type (gimple stmt, tree op, tree *step_size)
@@ -254,7 +228,9 @@ find_elem_fn_parm_type (gimple stmt, tree op, tree *step_size)
return return_type;
}
-/* This function will return the appropriate cloned named for the function. */
+/* This function will return the appropriate cloned for the vectorlength
+ (set by VECTYPE_OUT) named for the function whose scalar name is indicated
+ by OLD_FNDECL. */
tree
find_elem_fn_name (tree old_fndecl, tree vectype_out,
@@ -263,6 +239,7 @@ find_elem_fn_name (tree old_fndecl, tree vectype_out,
elem_fn_info *elem_fn_values = NULL;
tree new_fndecl = NULL_TREE, arg_type = NULL_TREE;
char *suffix = NULL;
+ char warning_string[90];
elem_fn_values = extract_elem_fn_values (old_fndecl);
@@ -270,11 +247,20 @@ find_elem_fn_name (tree old_fndecl, tree vectype_out,
{
if (elem_fn_values->no_vlengths > 0)
{
- if (elem_fn_values->vectorlength[0] ==
- (int)TYPE_VECTOR_SUBPARTS (vectype_out))
+ if (elem_fn_values->vectorlength ==
+ (int) TYPE_VECTOR_SUBPARTS (vectype_out))
suffix = find_suffix (elem_fn_values, false);
else
- return NULL_TREE;
+ {
+ memset (warning_string, 90, 0);
+ sprintf (warning_string, "Elemental function's vectorlength (%d) "
+ "does not match the loop's vectorlength (%d)",
+ elem_fn_values->vectorlength,
+ (int) TYPE_VECTOR_SUBPARTS (vectype_out));
+ warning_at (EXPR_LOCATION (old_fndecl), 0,
+ (const char *)warning_string);
+ return NULL_TREE;
+ }
}
else
return NULL_TREE;
@@ -304,14 +290,14 @@ find_elem_fn_name (tree old_fndecl, tree vectype_out,
return new_fndecl;
}
-/* This function will extract the elem. function values from a vector and store
- it in a data structure and return that. */
+/* Extracts all the elemental function's relevant information from the attribute
+ of DECL. The extracted information are returned in a structure of type
+ ELEM_FN_INFO. */
elem_fn_info *
extract_elem_fn_values (tree decl)
{
elem_fn_info *elem_fn_values = NULL;
- int x = 0; /* this is a dummy variable */
int arg_number = 0, ii = 0;
tree ii_tree, jj_tree, kk_tree;
tree decl_attr = DECL_ATTRIBUTES (decl);
@@ -319,7 +305,7 @@ extract_elem_fn_values (tree decl)
if (!decl_attr)
return NULL;
- elem_fn_values = (elem_fn_info *)xmalloc (sizeof (elem_fn_info));
+ elem_fn_values = XNEWVEC (elem_fn_info, 1);
gcc_assert (elem_fn_values);
decl_ret_type = TREE_TYPE (decl);
@@ -327,6 +313,7 @@ extract_elem_fn_values (tree decl)
decl_ret_type = TREE_TYPE (decl_ret_type);
elem_fn_values->proc_type = NULL;
+ elem_fn_values->isa_type = NULL;
elem_fn_values->mask = USE_BOTH;
elem_fn_values->no_vlengths = 0;
elem_fn_values->no_uvars = 0;
@@ -337,13 +324,13 @@ extract_elem_fn_values (tree decl)
switch (compare_tree_int (TYPE_SIZE (decl_ret_type), 64))
{
case 0: /* This means they are equal. */
- elem_fn_values->vectorlength[0] = 2;
+ elem_fn_values->vectorlength = 2;
break;
case -1: /* This means it is less than 64. */
- elem_fn_values->vectorlength[0] = 4;
+ elem_fn_values->vectorlength = 4;
break;
default:
- elem_fn_values->vectorlength[0] = 1;
+ elem_fn_values->vectorlength = 1;
}
@@ -393,8 +380,7 @@ extract_elem_fn_values (tree decl)
tree kk_value = TREE_VALUE (kk_tree);
if (TREE_CODE (kk_value) == INTEGER_CST)
{
- x = elem_fn_values->no_vlengths;
- elem_fn_values->vectorlength[x] =
+ elem_fn_values->vectorlength =
(int) TREE_INT_CST_LOW (kk_value);
elem_fn_values->no_vlengths++;
}
@@ -432,8 +418,7 @@ extract_elem_fn_values (tree decl)
}
}
- for (ii_tree = DECL_ARGUMENTS (decl); ii_tree;
- ii_tree = DECL_CHAIN (ii_tree))
+ for (ii_tree = DECL_ARGUMENTS (decl); ii_tree; ii_tree = DECL_CHAIN (ii_tree))
{
bool already_found = false;
for (ii = 0; ii < elem_fn_values->no_uvars; ii++)
@@ -453,9 +438,12 @@ extract_elem_fn_values (tree decl)
elem_fn_values->linear_vars[ii]))
{
if (already_found)
- fatal_error
- ("variable %s defined in both uniform and linear clause",
- elem_fn_values->linear_vars[ii]);
+ {
+ error_at (EXPR_LOCATION (ii_tree),
+ "variable %s defined in both uniform and linear "
+ "clause", elem_fn_values->linear_vars[ii]);
+ return NULL;
+ }
else
{
already_found = true;
@@ -463,7 +451,7 @@ extract_elem_fn_values (tree decl)
}
}
}
- if (!already_found) /* this means this variable is a private */
+ if (!already_found) /* This means this variable is a private. */
elem_fn_values->private_location[elem_fn_values->no_pvars++] =
arg_number;
arg_number++;
@@ -476,8 +464,8 @@ extract_elem_fn_values (tree decl)
return elem_fn_values;
}
-/* This function will check to see if the node is part of an function that
- needs to be converted to its vector equivalent. */
+/* Returns true of function FNDECL has an attribute with "vector" in it, thus
+ indicating it as an elemental function. */
bool
is_elem_fn (tree fndecl)
@@ -492,6 +480,6 @@ is_elem_fn (tree fndecl)
&& !strcmp (IDENTIFIER_POINTER (ii_value), "vector"))
return true;
}
- /* If we are here, then we didn't find a vector keyword, so it is false. */
+ /* If we are here then we didn't find a vector keyword, so it is false. */
return false;
}
diff --git a/gcc/target.def b/gcc/target.def
index 0f3164a41bd..b521c9478cd 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -941,6 +941,46 @@ hook_int_uint_mode_1)
HOOK_VECTOR_END (sched)
+/* Functions relating to Cilk Plus. */
+#undef HOOK_PREFIX
+#define HOOK_PREFIX "TARGET_CILKPLUS_"
+HOOK_VECTOR (TARGET_CILKPLUS, cilkplus)
+/* The following member value is a function that is used by Cilk Plus routines
+ to map the processor attribute to the appropriate arch and tune attributes.
+ By default, a NULL string is returned. */
+DEFHOOK
+(builtin_map_processor_to_attr,
+"This hook is called by a Cilk Plus routine that will be used to map the\n\
+processor clause to the appropriate arch and tune attributes.",
+tree, (char *, tree *),
+default_builtin_map_processor_to_attr)
+
+DEFHOOK
+(builtin_find_processor_code,
+ "This hook is called by a Cilk Plus routine that will request the\n\
+ processor code for processor name given in the vector attribute for\n\
+ the elemental functions.",
+char *, (char *),
+default_builtin_find_processor_code)
+
+DEFHOOK
+(builtin_find_vlength_for_proc,
+ "This hook is called by a Cilk Plus routine that will request the\n\
+ default vectorlength for the processor specified in the processor clause\n\
+ in the elemental functions.",
+unsigned int, (char *),
+default_builtin_find_vlength_for_proc)
+
+DEFHOOK
+(builtin_find_isa_code,
+ "This hook is called by a Cilk Plus routine that will request the \n\
+ ISA type (based on the register-set where vector parameters are passed).\n\
+ in elemental functions.",
+char *, (char *, char *),
+default_builtin_find_isa_code)
+
+HOOK_VECTOR_END (cilkplus)
+
/* Functions relating to vectorization. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_VECTORIZE_"
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index be008fdcd5d..a3ee063cae1 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1540,4 +1540,37 @@ default_member_type_forces_blk (const_tree, enum machine_mode)
return false;
}
+/* Default version of default_builtin_processor_to_arch. */
+
+tree
+default_builtin_map_processor_to_attr (char *, tree *opp_attr)
+{
+ *opp_attr = NULL_TREE;
+ return NULL_TREE;
+}
+
+/* Default version of default_builtin_find_processor_code. */
+
+char *
+default_builtin_find_processor_code (char *)
+{
+ return NULL;
+}
+
+/* Default version of default_builtin_find_vlength_for_proc. */
+
+int
+default_builtin_find_vlength_for_proc (char *)
+{
+ return 0;
+}
+
+/* Default version of default_builtin_find_isa_code. */
+
+char *
+default_builtin_find_isa_code (char *, char *)
+{
+ return NULL;
+}
+
#include "gt-targhooks.h"
diff --git a/gcc/testsuite/ChangeLog.cilkplus b/gcc/testsuite/ChangeLog.cilkplus
index aacf16c907f..b38d6685681 100644
--- a/gcc/testsuite/ChangeLog.cilkplus
+++ b/gcc/testsuite/ChangeLog.cilkplus
@@ -1,3 +1,56 @@
+2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com>
+
+ * gcc.dg/cilk-plus/elem_fn_tests/test1.c: Remove.
+ * gcc.dg/cilk-plus/elem_fn_tests/test2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test3.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test4.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test5.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test6.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test7.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test8.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test9.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test10.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test11.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test12.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test13.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test14.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test15.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test16.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test17.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test18.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c: New test.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c: Likewise.
+
2012-12-06 Balaji V. Iyer <balaji.v.iyer@intel.com>
* gcc.dg/cilk-plus/array_notation_tests/execute/builtin_fn_mutating.c:
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c
new file mode 100644
index 00000000000..a33ea3bd6ee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c
@@ -0,0 +1,30 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will insert the clone for the function ef_add inside the function
+ * main (the non-masked version).
+ */
+
+#include <stdlib.h>
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (pentium_4), uniform (x,y)))) My_Type ef_add (My_Type x, My_Type y);
+
+My_Type vhx2[10];
+int
+main (int argc, char **argv)
+{
+ My_Type vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ abort ();
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test10.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c
index 597333f6e12..477369e8fe5 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test10.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c
@@ -1,7 +1,8 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 2 clones of the function below,
* for the pentium4 with sse3 processor.
*/
-#ifdef __x86__
#define My_Type float
__attribute__ ((vector(vectorlength(4), processor (pentium_4_sse3), linear(y), uniform (x))))
My_Type ef_add (My_Type x, My_Type y)
@@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y)
{
return x + y;
}
-#endif
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test11.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c
index 6fcc0612789..197064beae0 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test11.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c
@@ -1,7 +1,8 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 1 clones of the function below, just mask
* for the pentium4 processor.
*/
-#ifdef __x86__
#define My_Type float
__attribute__ ((vector(vectorlength(4), mask, processor (pentium_4_sse3), linear(y), uniform (x))))
My_Type ef_add (My_Type x, My_Type y)
@@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y)
{
return x + y;
}
-#endif
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test12.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c
index 1426f53adcb..1c78356498a 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test12.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c
@@ -1,7 +1,8 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 1 clones of the function below, just no mask
* for the pentium4 with sse3 processor.
*/
-#ifdef __x86__
#define My_Type float
__attribute__ ((vector(vectorlength(4), nomask, processor (pentium_4_sse3), linear(y), uniform (x))))
My_Type ef_add (My_Type x, My_Type y)
@@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y)
{
return x + y;
}
-#endif
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test7.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c
index adbe8738820..6720a8c6f36 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test7.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c
@@ -1,7 +1,8 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 1 clone of the function below, just one for mask
* for the pentium4 processor.
*/
-#ifdef __x86__
#define My_Type float
__attribute__ ((vector(vectorlength(4), mask, processor (pentium_4), linear(y), uniform (x))))
My_Type ef_add (My_Type x, My_Type y)
@@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y)
{
return x + y;
}
-#endif
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test8.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c
index ff26046e6f0..7f402208750 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test8.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c
@@ -1,7 +1,8 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 1 clone of the function below, just one for nomask
* for the pentium4 processor.
*/
-#ifdef __x86__
#define My_Type float
__attribute__ ((vector(vectorlength(4), nomask, processor (pentium_4), linear(y), uniform (x))))
My_Type ef_add (My_Type x, My_Type y)
@@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y)
{
return x + y;
}
-#endif
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test9.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c
index 8a78f30c8a2..0a5caf88d69 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test9.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c
@@ -1,7 +1,8 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 2 clones of the function below,
* for the pentium4 processor.
*/
-#ifdef __x86__
#define My_Type float
__attribute__ ((vector(vectorlength(4), processor (pentium_4), linear(y), uniform (x))))
My_Type ef_add (My_Type x, My_Type y)
@@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y)
{
return x + y;
}
-#endif
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c
index 4e3a914fac3..8df372a000f 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c
@@ -1,3 +1,4 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 2 clones of the function below, one for mask and one
without the mask
*/
@@ -12,4 +13,3 @@ My_Type ef_add (My_Type x, My_Type y)
else
return (x-y);
}
-
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c
index 2b6cb4c293f..30e710ff9ab 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c
@@ -1,3 +1,5 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 2 clones of the function below, one for mask and one
without the mask
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c
index 8b745a81251..0d9db6f7de3 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c
@@ -1,3 +1,4 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 2 clones of the function below, one for mask and one
without the mask
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test1.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c
index 8b3649af210..576821aa386 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test1.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c
@@ -1,7 +1,10 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will insert the clone for the function ef_add inside the function
* main (the non-masked version).
*/
+#include <stdlib.h>
#define My_Type float
__attribute__ ((vector(vectorlength(4), processor (core_i7_sse4_2), uniform (x,y)))) My_Type ef_add (My_Type x, My_Type y);
@@ -12,9 +15,16 @@ main (int argc, char **argv)
My_Type vhx[10];
int ii = 9;
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
for (ii = 0; ii < 10; ii++)
vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test13.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c
index 887a4fc2436..18c884421c4 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test13.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c
@@ -1,3 +1,4 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 2 clones of the function below,
* for the pentium4 with sse3 processor.
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test14.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c
index 58fb99ff7a1..f154d49fdb3 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test14.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c
@@ -1,3 +1,5 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 1 clones of the function below, just the mask
* for the pentium4 with sse3 processor.
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test15.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c
index f9206c1a37d..8a458fcf2e8 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test15.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c
@@ -1,3 +1,5 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 1 clones of the function below, just the mask
* for the pentium4 with sse3 processor.
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test16.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c
index 1e83ad06ef7..fcfc5c00eb1 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test16.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c
@@ -1,3 +1,5 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 2 clones of the function below,
* for the pentium4 with sse3 processor.
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test17.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c
index eeeef73e434..847c21981e4 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test17.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c
@@ -1,3 +1,5 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 2 clones of the function below,
* for the core2_duo with sse 4.1 processor.
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test18.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c
index 07fdfbd85a4..daa15468ae4 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test18.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c
@@ -1,3 +1,5 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 1 clones of the function below, just mask
* for the core2_duo with sse 4.1 processor.
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test2.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c
index b135c7e9af8..a515c534963 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test2.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c
@@ -1,3 +1,5 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 2 clones of the function below, one for mask and one
without the mask
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test3.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c
index 3b4a6c3f827..d9e3fe1379b 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test3.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c
@@ -1,3 +1,5 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
/* This test will create 1 clone of the function below, just one for mask
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test4.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c
index 41027e6c293..9aa36ccba49 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test4.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c
@@ -1,3 +1,5 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 1 clone of the function below, just one for nomask
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test5.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c
index 32788a59004..6688980c84a 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test5.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c
@@ -1,3 +1,5 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 1 clone of the function below, just one for nomask and
do a linear for y variable
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test6.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c
index 329ec7d4164..4ec317fcacd 100644
--- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test6.c
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c
@@ -1,3 +1,5 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
/* This test will create 1 clone of the function below, just one for mask
*/
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp
new file mode 100644
index 00000000000..67cb0aa1968
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp
@@ -0,0 +1,53 @@
+# Copyright (C) 2012
+# Free Software Foundation, Inc.
+
+# Contributed by Balaji V. Iyer <balaji.v.iyer@intel.com>
+# Intel Corporation.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+
+# Exit immediately if this isn't a x86 target.
+if { ![istarget i?86*-*-*] && ![istarget x86_64-*-*] } then {
+ return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+set tests_32bit [lsort [glob -nocomplain $srcdir/$subdir/32bit/*.\[cS\]]]
+set tests_64bit [lsort [glob -nocomplain $srcdir/$subdir/64bit/*.\[cS\]]]
+set test_errors [lsort [glob -nocomplain $srcdir/$subdir/errors/*.\[cS\]]]
+
+if { [istarget i?86*-*-*] } then {
+
+# Main loop.
+dg-runtest $tests_32bit " -O3 -ftree-vectorize -fcilkplus" " "
+# All done.
+dg-finish
+#dg-runtest $tests_errors " -O3 -ftree-vectorize -fcilkplus" " "
+# All done.
+#dg-finish
+}
+
+# For 64 bit architectures, we can run both 32 bit and 64 bit tests.
+if { [istarget x86_64-*-*] } then {
+
+# Main loop.
+dg-runtest $tests_32bit "-O3 -ftree-vectorize -fcilkplus -m32" " "
+dg-runtest $tests_64bit "-O3 -ftree-vectorize -fcilkplus" " "
+dg-runtest $test_errors "-O3 -ftree-vectorize -fcilkplus" " "
+
+# All done.
+dg-finish
+}
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c
new file mode 100644
index 00000000000..538a61badca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+__attribute__ ((vector(vectorlength(2), linear (x:2), processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y);
+
+int vhx2[10];
+int
+main (int argc, char **argv)
+{
+ int vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ return 1;
+ return 0;
+}
+/* { dg-error "variable x defined in both uniform and linear clause" "" { target *-*-*} 0 } */
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c
new file mode 100644
index 00000000000..37b2ec34cc1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+
+__attribute__ ((vector(vectorlength(2), linear (x:2.5), processor (pentium_4), uniform (y)))) int ef_add (int x, int y); /* { dg-error "step-size must be an integer constant expression" } */
+
+__attribute__ ((vector(vectorlength(2), linear (x:), processor (pentium_4), uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected step-size before" } */
+
+__attribute__ ((vector(vectorlength(2), linear (y, x:2.5), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "step-size must be an integer constant expression" } */
+
+__attribute__ ((vector(vectorlength(2), linear (y, x:), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected step-size before" } */
+
+__attribute__ ((vector(vectorlength(2), linear (x:2.5,y), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "step-size must be an integer constant expression" } */
+
+__attribute__ ((vector(vectorlength(2), linear (x:, y), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected step-size before" } */
+
+int vhx2[10];
+int
+main (int argc, char **argv)
+{
+ int vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ return 1;
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c
new file mode 100644
index 00000000000..f7187a5a84b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+
+__attribute__ ((vector(vectorlength(2), processor (pentixhaum_4), uniform (y)))) int ef_add (int x, int y); /* { dg-error "processor pentixhaum_4 not" } */
+__attribute__ ((vector(vectorlength(2), processor (, uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected processor-name before" } */
+__attribute__ ((vector(vectorlength(2), processor (), uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected '\\(' and CPUID before '\\)' token" } */
+__attribute__ ((vector(vectorlength(2), processor ), uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected '\\(' before '\\)' token" } */
+__attribute__ ((vector(vectorlength(2), uniform (x), processor (pentixhaum_4)))) int ef_add (int x, int y); /* { dg-error "processor pentixhaum_4 not" } */
+__attribute__ ((vector(vectorlength(2), processor ( ))) int ef_add (int x, int y); /* { dg-error "expected '\\(' and CPUID before '\\)' token" } */
+__attribute__ ((vector(vectorlength(2), processor () ))) int ef_add (int x, int y); /* { dg-error "expected '\\(' and CPUID before '\\)' token" } */
+__attribute__ ((vector(vectorlength(2), processor )))) int ef_add (int x, int y); /* { dg-error "expected '\\(' before '\\)' token" } */
+int vhx2[10];
+int
+main (int argc, char **argv)
+{
+ int vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ return 1;
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c
new file mode 100644
index 00000000000..083c4ab681c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+
+__attribute__ ((vector(vectorlength(2), processor (pentium_4), uniform (,y)))) int ef_add (int x, int y); /* { dg-error "expected variable-name before" } */
+
+__attribute__ ((vector(vectorlength(2), linear (x), processor (pentium_4), uniform (y,)))) int ef_add (int x, int y); /* { dg-error "expected identifier after" } */
+
+__attribute__ ((vector(vectorlength(2), uniform (,y), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected variable-name before" } */
+
+__attribute__ ((vector(vectorlength(2), linear (x), uniform (y,), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected identifier after" } */
+
+int vhx2[10];
+int
+main (int argc, char **argv)
+{
+ int vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ return 1;
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c
new file mode 100644
index 00000000000..9e0b3998d9a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+
+ __attribute__ ((vector(vectorlength(2, processor (pentium_4), uniform (x,y))))) int ef_add (int x, int y); /* { dg-error "after vectorlength before" } */
+
+__attribute__ ((vector(vectorlength 2, processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "expected '\\(' before numeric constant" } */
+
+__attribute__ ((vector(vectorlength(5), processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */
+
+__attribute__ ((vector(vectorlength(6), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */
+
+__attribute__ ((vector(vectorlength(7), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */
+
+__attribute__ ((vector(uniform (x,y)))) int ef_add (int x, int y); /* { "This is OK!" } */
+
+__attribute__ ((vector(vectorlength(2.0005)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be an integer." } */
+
+__attribute__ ((vector(vectorlength(5), processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */
+
+__attribute__ ((vector(vectorlength(1)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */
+
+__attribute__ ((vector(vectorlength(16), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */
+
+__attribute__ ((vector(vectorlength(32), processor (pentium_4), linear (x:1)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */
+
+__attribute__ ((vector(vectorlength(32), processor (pentium_4), uniform (y), linear (x:1)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */
+
+int vhx2[10];
+int
+main (int argc, char **argv)
+{
+ int vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ return 1;
+ return 0;
+}
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 4e8e9a0b951..84e4ab2861a 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -903,26 +903,6 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
return NULL_TREE;
}
-/* Remap DECL if it is defined. This is used in Cilk++. */
-static bool
-remap_var_for_cilk (tree *tp, copy_body_data *id)
-{
- tree decl = *tp;
- tree *n;
-
- if (!DECL_P (decl))
- return false;
-
- n = (tree *) pointer_map_contains (id->decl_map, decl);
-
-
- if (n == NULL)
- return false;
-
- *tp = (*n);
- return true;
-}
-
/* Called from copy_body_id via walk_tree. DATA is really a
`copy_body_data *'. */
@@ -987,8 +967,6 @@ copy_tree_body_r (tree *tp, int *walk_subtrees, void *data)
*tp = new_decl;
*walk_subtrees = 0;
}
- else if (id->remap_var_for_cilk && remap_var_for_cilk (tp, id))
- *walk_subtrees = 0;
else if (TREE_CODE (*tp) == STATEMENT_LIST)
copy_statement_list (tp);
else if (TREE_CODE (*tp) == SAVE_EXPR
@@ -4912,12 +4890,10 @@ copy_decl_no_change (tree decl, copy_body_data *id)
{
TREE_ADDRESSABLE (copy) = 0;
LABEL_DECL_UID (copy) = -1;
- if (TREE_CODE(decl) == LABEL_DECL)
- {
- PRAGMA_SIMD_INDEX (copy) = PRAGMA_SIMD_INDEX (decl);
- }
+ if (flag_enable_cilk && TREE_CODE (decl) == LABEL_DECL)
+ PRAGMA_SIMD_INDEX (copy) = PRAGMA_SIMD_INDEX (decl);
else
- PRAGMA_SIMD_INDEX (copy) = 0;
+ PRAGMA_SIMD_INDEX (copy) = 0;
}
return copy_decl_for_dup_finish (id, decl, copy);
@@ -4967,7 +4943,9 @@ copy_arguments_for_versioning (tree orig_parm, copy_body_data * id,
return new_parm;
}
-/* Return a copy of the function's argument tree. */
+/* Return a copy of the function's argument tree but they are vectorized as
+ per VLENGTH value. Also add a mask variable if MASKED is set to true. */
+
static tree
elem_fn_copy_arguments_for_versioning (tree orig_parm, copy_body_data * id,
bitmap args_to_skip, tree *vars,
@@ -4995,7 +4973,7 @@ elem_fn_copy_arguments_for_versioning (tree orig_parm, copy_body_data * id,
tree new_tree = remap_decl (arg, id);
if (TREE_CODE (new_tree) != PARM_DECL)
new_tree = id->copy_decl (arg, id);
- TREE_TYPE (new_tree) = copy_node (TREE_TYPE (new_tree));
+ TREE_TYPE (new_tree) = copy_node (TREE_TYPE (new_tree));
TREE_TYPE (new_tree) = build_vector_type (TREE_TYPE (new_tree),
vlength);
DECL_ARG_TYPE (new_tree) = build_vector_type (DECL_ARG_TYPE (new_tree),
@@ -5420,12 +5398,13 @@ tree_function_versioning (tree old_decl, tree new_decl,
return;
}
+/* This function initializes the cfun struct for elemental functions. */
+
static void
initialize_elem_fn_cfun (tree new_fndecl, tree callee_fndecl)
{
struct function *src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl);
- gimple_register_cfg_hooks ();
/* Get clean struct function. */
push_struct_function (new_fndecl);
@@ -5463,9 +5442,11 @@ initialize_elem_fn_cfun (tree new_fndecl, tree callee_fndecl)
pop_cfun ();
}
+/* Elemental function's version of tree_versioning. */
+
void
tree_elem_fn_versioning (tree old_decl, tree new_decl,
- vec<ipa_replace_map_p, va_gc> * tree_map,
+ vec<ipa_replace_map_p, va_gc> *tree_map,
bool update_clones, bitmap args_to_skip,
bool skip_return,
bitmap blocks_to_copy ATTRIBUTE_UNUSED,
@@ -5476,9 +5457,9 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl,
tree p;
unsigned i;
struct ipa_replace_map *replace_info;
- vec<gimple> init_stmts;
- init_stmts.create(10);
+ vec<gimple> init_stmts;
+ init_stmts.create (10);
tree old_current_function_decl = current_function_decl;
tree vars = NULL_TREE;
@@ -5527,8 +5508,6 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl,
id.transform_new_cfg = true;
id.transform_return_to_modify = false;
id.transform_lang_insert_block = NULL;
-
- current_function_decl = new_decl;
initialize_elem_fn_cfun (new_decl, old_decl);
push_cfun (DECL_STRUCT_FUNCTION (new_decl));
@@ -5564,17 +5543,6 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl,
if (TREE_CODE (op) == VIEW_CONVERT_EXPR)
op = TREE_OPERAND (op, 0);
-
-#if 0
- if (TREE_CODE (op) == ADDR_EXPR)
- {
- op = TREE_OPERAND (op, 0);
- while (handled_component_p (op))
- op = TREE_OPERAND (op, 0);
- if (TREE_CODE (op) == VAR_DECL)
- add_referenced_var (op);
- }
-#endif
gcc_assert (TREE_CODE (replace_info->old_tree) == PARM_DECL);
init = setup_one_parameter (&id, replace_info->old_tree,
@@ -5663,8 +5631,6 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl,
if (id.debug_map)
pointer_map_destroy (id.debug_map);
- /* gcc_assert (!id.debug_stmts); */
- /* VEC_free (gimple, heap, init_stmts); */
pop_cfun ();
current_function_decl = old_current_function_decl;
gcc_assert (!current_function_decl
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 0a0a470d9f4..ba10cb004cf 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1561,7 +1561,8 @@ elem_fn_vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def,
{
parm_type = find_elem_fn_parm_type (stmt, op, &step_size);
if (parm_type == TYPE_UNIFORM || parm_type == TYPE_LINEAR)
- dt = vect_external_def;
+ /* If Linear or Uniform type, just return the scalar version. */
+ return op;
}
else
parm_type = TYPE_NONE;
@@ -2130,6 +2131,8 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
new_stmt = gimple_build_call_vec (fndecl, vargs);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
+ if (flag_enable_cilk && is_elem_fn (fndecl))
+ gimple_call_set_fntype (new_stmt, TREE_TYPE (fndecl));
vect_finish_stmt_generation (stmt, new_stmt, gsi);
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
@@ -2165,8 +2168,7 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
{
enum elem_fn_parm_type parm_type =
find_elem_fn_parm_type (stmt, op, &step_size);
- if (parm_type == TYPE_UNIFORM
- || parm_type == TYPE_LINEAR)
+ if (parm_type == TYPE_UNIFORM || parm_type == TYPE_LINEAR)
dt[i] = vect_constant_def;
}
vec_oprnd0
diff --git a/gcc/tree.c b/gcc/tree.c
index 0c1e1e63c69..75a902d3a68 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -11663,6 +11663,7 @@ build_call_list (tree return_type, tree fn, tree arglist)
/* Build a vector of type VECTYPE where all the elements are SCs. */
+
tree
build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size)
{
@@ -11684,7 +11685,6 @@ build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size)
{
tree *v = XALLOCAVEC (tree, nunits);
for (i = 0; i < nunits; ++i)
- // v[i] = sc;
v[i] = build2 (PLUS_EXPR, TREE_TYPE (sc), sc,
fold_build2 (MULT_EXPR, TREE_TYPE (step_size), step_size,
build_int_cst (integer_type_node, i)));
@@ -11696,7 +11696,6 @@ build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size)
vec_alloc (v, nunits);
for (i = 0; i < nunits; ++i)
{
- // CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, sc);
tree tmp = NULL_TREE;
tmp = build2 (PLUS_EXPR, TREE_TYPE (sc), sc,
fold_build2 (MULT_EXPR, TREE_TYPE (step_size),
@@ -11709,6 +11708,4 @@ build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size)
}
-
-
#include "gt-tree.h"
diff --git a/gcc/tree.h b/gcc/tree.h
index 0b99d05afdf..bd5a9a5d2ce 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3716,13 +3716,6 @@ struct GTY(()) tree_optimization_option {
};
-enum elem_fn_parm_type
-{
- TYPE_NONE = 0,
- TYPE_UNIFORM = 1,
- TYPE_LINEAR = 2
-};
-
#define TREE_OPTIMIZATION(NODE) \
(&OPTIMIZATION_NODE_CHECK (NODE)->optimization.opts)
@@ -4873,6 +4866,7 @@ extern tree build_vector_stat (tree, tree * MEM_STAT_DECL);
#define build_vector(t,v) build_vector_stat (t, v MEM_STAT_INFO)
extern tree build_vector_from_ctor (tree, vec<constructor_elt, va_gc> *);
extern tree build_vector_from_val (tree, tree);
+extern tree elem_fn_linear_vector_from_val (tree, tree, tree);
extern tree build_constructor (tree, vec<constructor_elt, va_gc> *);
extern tree build_constructor_single (tree, tree, tree);
extern tree build_constructor_from_list (tree, tree);
@@ -6620,7 +6614,6 @@ extern HOST_WIDE_INT find_linear_step_size (int pragma_simd_index, tree var);
tree build_call_list (tree return_type, tree fn, tree arglist);
bool is_elem_fn (tree);
-enum elem_fn_parm_type find_elem_fn_parm_type (gimple, tree, tree*);
void elem_fn_create_fn (tree) __attribute__((weak));
/* Functional interface to the builtin functions. */