diff options
author | Balaji V. Iyer <balaji.v.iyer@intel.com> | 2012-12-10 19:39:34 +0000 |
---|---|---|
committer | Balaji V. Iyer <balaji.v.iyer@intel.com> | 2012-12-10 19:39:34 +0000 |
commit | aa72dc62d3401a8128645bc2bc99ea39f37a9b21 (patch) | |
tree | 0b5ece678cf98ed2555cda14d9cd3dfe17371911 | |
parent | faa2abc0b28c7f98aec82c34ac1363820db8ab2a (diff) |
Added several Elemental function changes for C (from patch to branch).
gcc/c-family/ChangeLog.cilkplus
+2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com>
+
+ * c-cpp-elem-function.c: New file.
+
gcc/testsuite/ChangeLog.cilkplus
+2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com>
+
+ * gcc.dg/cilk-plus/elem_fn_tests/test1.c: Remove.
+ * gcc.dg/cilk-plus/elem_fn_tests/test2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test3.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test4.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test5.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test6.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test7.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test8.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test9.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test10.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test11.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test12.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test13.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test14.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test15.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test16.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test17.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/test18.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c: New test.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c: Likewise.
+ * gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c: Likewise.
+
gcc/ChangeLog.cilkplus
+2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com>
+
+ * doc/tm.texi (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added
+ documentation for this hook.
+ (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_CODE): Likewise.
+ * doc/tm.texi.in (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added
+ hook.
+ (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise.
+ (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC): Likewise.
+ * targhooks.c (default_builtin_map_processor_to_attr): New function.
+ (default_builtin_find_processor_code): Likewise.
+ (default_builtin_find_vlength_code): Likewise.
+ (default_builtin_find_isa_code): Likewise.
+ * c/c-parser.c (c_parser_elem_fn_processor_clause): Fixed syntax issue
+ in error message. Added several checks and error reporting for invalid
+ values.
+ (c_parser_elem_fn_uniform_clause): Likewise.
+ (c_parser_elem_fn_linear_clause): Likewise.
+ (c_parser_elem_fn_vlength_clause): Likewise.
+ * c/c-decl.c (c_builtin_function_ext_scope): Added a check if external
+ scope is non-null.
+ * target.def (TARGET_CILKPLUS): Added this hook and several definitions
+ such as builtin_map_processor_to_attr, builtin_find_processor_code,
+ builtin_find_vlength_for_proc, builtin_find_isa_code.
+ * tree.h (enum elem_fn_parm_type): Moved this to c-common.
+ * cilk.h (elem_fn_info): Added some extra fields into struct.
+ * elem-function-common.c (find_processor_code): Called the target
+ dependent version. Also made several changes to satisfy the new
+ elemental function mangling format. Finally, fixed up header comments.
+ (find_suffix): Likewise.
+ (extract_elem_fn_values): Likewise.
+ * tree-vect-stmts.c (elem_fn_vect_get_vec_def_for_operand): Return
+ scalar version for the uniform or linear parameters.
+ (vectorizable_call): Set the function type correctly.
+ * tree-inline.c (remap_var_for_cilk): Remove.
+ (tree_elem_fn_versioning): Fix up header function and reflected changes
+ in tree_function_versioning into this function.
+ (copy_tree_body_r): Removed a call for remap_var_for_cilk.
+ (elem_fn_copy_arguments_for_versioning): Fixed header comments.
+ * Makefile.in (C_COMMON_OBJS): Added c-family/c-cpp-elem-function.o.
+ * config/i386/i386.c (type_natural_mode): Added a check for cilk flag
+ before emitting a note.
+ (ix86_frame_pointer_required): Added a flag_enable_cilk check.
+ (ix86_cilkplus_map_proc_to_attr): New function.
+ (ix86_cilkplus_find_proc_code): Likewise.
+ (ix86_cilkplus_find_isa_code): Likewise.
+ (ix86_builtin_find_vlength_for_proc): Likewise.
+
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/cilkplus@194366 138bc75d-0d04-0410-961f-82ee72b054a4
46 files changed, 1703 insertions, 222 deletions
diff --git a/gcc/ChangeLog.cilkplus b/gcc/ChangeLog.cilkplus index ff6c2c9113f..5bbf9287353 100644 --- a/gcc/ChangeLog.cilkplus +++ b/gcc/ChangeLog.cilkplus @@ -1,3 +1,54 @@ +2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com> + + * doc/tm.texi (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added + documentation for this hook. + (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_CODE): Likewise. + * doc/tm.texi.in (TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR): Added + hook. + (TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE): Likewise. + (TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC): Likewise. + * targhooks.c (default_builtin_map_processor_to_attr): New function. + (default_builtin_find_processor_code): Likewise. + (default_builtin_find_vlength_code): Likewise. + (default_builtin_find_isa_code): Likewise. + * c/c-parser.c (c_parser_elem_fn_processor_clause): Fixed syntax issue + in error message. Added several checks and error reporting for invalid + values. + (c_parser_elem_fn_uniform_clause): Likewise. + (c_parser_elem_fn_linear_clause): Likewise. + (c_parser_elem_fn_vlength_clause): Likewise. + * c/c-decl.c (c_builtin_function_ext_scope): Added a check if external + scope is non-null. + * target.def (TARGET_CILKPLUS): Added this hook and several definitions + such as builtin_map_processor_to_attr, builtin_find_processor_code, + builtin_find_vlength_for_proc, builtin_find_isa_code. + * tree.h (enum elem_fn_parm_type): Moved this to c-common. + * cilk.h (elem_fn_info): Added some extra fields into struct. + * elem-function-common.c (find_processor_code): Called the target + dependent version. Also made several changes to satisfy the new + elemental function mangling format. Finally, fixed up header comments. + (find_suffix): Likewise. + (extract_elem_fn_values): Likewise. + * tree-vect-stmts.c (elem_fn_vect_get_vec_def_for_operand): Return + scalar version for the uniform or linear parameters. + (vectorizable_call): Set the function type correctly. + * tree-inline.c (remap_var_for_cilk): Remove. + (tree_elem_fn_versioning): Fix up header function and reflected changes + in tree_function_versioning into this function. + (copy_tree_body_r): Removed a call for remap_var_for_cilk. + (elem_fn_copy_arguments_for_versioning): Fixed header comments. + * Makefile.in (C_COMMON_OBJS): Added c-family/c-cpp-elem-function.o. + * config/i386/i386.c (type_natural_mode): Added a check for cilk flag + before emitting a note. + (ix86_frame_pointer_required): Added a flag_enable_cilk check. + (ix86_cilkplus_map_proc_to_attr): New function. + (ix86_cilkplus_find_proc_code): Likewise. + (ix86_cilkplus_find_isa_code): Likewise. + (ix86_builtin_find_vlength_for_proc): Likewise. + 2012-12-06 Balaji V. Iyer <balaji.v.iyer@intel.com> * array-notation-common.c: Delete. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 0c434bf58ad..4bdfa7fdaa6 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1140,7 +1140,8 @@ C_COMMON_OBJS = c-family/c-common.o c-family/c-cppbuiltin.o c-family/c-dump.o \ c-family/c-omp.o c-family/c-opts.o c-family/c-pch.o \ c-family/c-ppoutput.o c-family/c-pragma.o c-family/c-pretty-print.o \ c-family/c-semantics.o c-family/c-ada-spec.o tree-mudflap.o \ - c-family/array-notation-common.o c-family/c-cilk.o c-family/elem-function.o + c-family/array-notation-common.o c-family/c-cilk.o \ + c-family/c-cpp-elem-function.o # Language-independent object files. # We put the insn-*.o files first so that a parallel make will build @@ -1216,6 +1217,7 @@ OBJS = \ dwarf2asm.o \ dwarf2cfi.o \ dwarf2out.o \ + elem-function-common.o \ emit-rtl.o \ et-forest.o \ except.o \ @@ -1462,7 +1464,6 @@ OBJS = \ web.o \ xcoffout.o \ cilk.o \ - elem-function-common.o \ $(out_object_file) \ $(EXTRA_OBJS) \ $(host_hook_obj) diff --git a/gcc/c-family/ChangeLog.cilkplus b/gcc/c-family/ChangeLog.cilkplus index 939b54ffa28..82e4d39a70a 100644 --- a/gcc/c-family/ChangeLog.cilkplus +++ b/gcc/c-family/ChangeLog.cilkplus @@ -1,3 +1,7 @@ +2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com> + + * c-cpp-elem-function.c: New file. + 2012-12-06 Balaji V. Iyer <balaji.v.iyer@intel.com> * c-common.def (ARRAY_NOTATION_REF): New expression definiton. diff --git a/gcc/c-family/c-cpp-elem-function.c b/gcc/c-family/c-cpp-elem-function.c new file mode 100644 index 00000000000..117e5a1569b --- /dev/null +++ b/gcc/c-family/c-cpp-elem-function.c @@ -0,0 +1,814 @@ +/* This file is part of the Intel(R) Cilk(TM) Plus support + This file contains C/C++ specific functions for elemental + functions. + + Copyright (C) 2012 Free Software Foundation, Inc. + Written by Balaji V. Iyer <balaji.v.iyer@intel.com>, + Intel Corporation + + Many Thanks to Karthik Kumar for advice on the basic technique + about cloning functions. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "langhooks.h" +#include "cilk.h" +#include "basic-block.h" +#include "output.h" +#include "c-family/c-common.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "tree-pass.h" +#include "timevar.h" +#include "flags.h" +#include "c/c-tree.h" +#include "tree-inline.h" +#include "cgraph.h" +#include "ipa-prop.h" +#include "opts.h" +#include "tree-iterator.h" +#include "options.h" +#include "intl.h" +#include "vec.h" +#include "target.h" + +static tree create_optimize_attribute (int); +static tree create_processor_attribute (elem_fn_info *, tree *); +static tree elem_fn_build_array (tree base_var, tree index); + +enum elem_fn_parm_size { + SCALAR = 0, + VECTOR_SEGMENTED = 1, + VECTOR_NONSEGMENTED = 2 +}; + +typedef struct +{ + tree arg; + tree replacement; +} args_data; + +typedef struct +{ + tree var_name; + vec<tree, va_gc> *substitute_vars; +} var_expand_struct; + +/* Creates the appropriate __target__ attribute for the processor information + given in ELEM_FN_VALUES->proc_type. The function also returns the opposite + attribute through OPPOSITE_ATTR for the scalar function. */ + +static tree +create_processor_attribute (elem_fn_info *elem_fn_values, tree *opposite_attr) +{ + if (elem_fn_values) + return targetm.cilkplus.builtin_map_processor_to_attr + (elem_fn_values->proc_type, opposite_attr); + else + { + /* We should never get here. If we get here, something wrong has + happened, so we reset the whole proc. attribute. */ + *opposite_attr = NULL_TREE; + return NULL_TREE; + } +} + +/* Goes through all the uniform and linear variables in the ELEM_FN_VALUES and + if those variables are set to VECTOR_TYPE in FNDECL by the + tree_elem_fn_versioning function then we convert it back to scalar. */ + +static void +scalarize_uniform_linear_params (tree fndecl, elem_fn_info *elem_fn_values) +{ + size_t ii = 0; + tree ii_tree; + if (!elem_fn_values || !fndecl) + return; + + for (ii_tree = DECL_ARGUMENTS (fndecl); ii_tree; + ii_tree = DECL_CHAIN (ii_tree)) + { + for (ii = 0; ii < (size_t) elem_fn_values->no_uvars; ii++) + if (!strcmp (elem_fn_values->uniform_vars[ii], + IDENTIFIER_POINTER (DECL_NAME (ii_tree)))) + { + tree type_t = TREE_TYPE (TREE_TYPE (ii_tree)); + TREE_TYPE (ii_tree) = type_t; + DECL_ARG_TYPE (ii_tree) = type_t; + } + for (ii = 0; ii < (size_t) elem_fn_values->no_lvars; ii++) + if (!strcmp (elem_fn_values->linear_vars[ii], + IDENTIFIER_POINTER (DECL_NAME (ii_tree)))) + { + tree type_t = TREE_TYPE (TREE_TYPE (ii_tree)); + TREE_TYPE (ii_tree) = type_t; + DECL_ARG_TYPE (ii_tree) = type_t; + } + } + return; +} + + +/* Returns an optimize attribute for the Optimization level given by OPTION. */ + +static tree +create_optimize_attribute (int option) +{ + tree opt_attr; + vec<tree, va_gc> *opt_vec; + char optimization[2]; + + optimization[0] = 'O'; + vec_alloc (opt_vec, 4); + + if (option == 3) + optimization[1] = '3'; + else if (option == 2) + optimization[1] = '2'; + else if (option == 1) + optimization[1] = '1'; + else if (option == 0) + optimization[1] = '0'; + + vec_safe_push (opt_vec, build_string (2, optimization)); + opt_attr = build_tree_list_vec (opt_vec); + vec_safe_truncate (opt_vec, 0); + opt_attr = build_tree_list (get_identifier ("optimize"), opt_attr); + return opt_attr; +} + +/* This function will replace parameter stored in DATA->arg with + DATA->replacement in *TP. If *WALK_SUBTREES is set to 0, then the subtrees + of *TP will not be stepped through. */ + +static tree +replace_parm_decl (tree *tp, int *walk_subtrees, void *data) +{ + if (!tp) + return NULL_TREE; + + if (TREE_CODE (*tp) == PARM_DECL) + { + args_data *value = (args_data *) data; + if (DECL_NAME (*tp) == DECL_NAME (value->arg)) + *tp = value->replacement; + + *walk_subtrees = 0; + } + return NULL_TREE; +} + + +/* Stores the return expression to a temporary var in DATA (typecasted to tree) + in a set of *TP tree. If *WALK_SUBTREES is set to 1, then we walk through + the subtrees of *TP. */ + +static tree +replace_return_with_new_var (tree *tp, int *walk_subtrees, void *data) +{ + tree mod_expr = NULL_TREE, return_var = NULL_TREE, ret_expr = NULL_TREE; + + if (!*tp) + return NULL_TREE; + + if (TREE_CODE (*tp) == RETURN_EXPR) + { + return_var = (tree) data; + ret_expr = TREE_OPERAND (TREE_OPERAND (*tp, 0), 1); + mod_expr = build2 (MODIFY_EXPR, TREE_TYPE (return_var), return_var, + ret_expr); + *tp = mod_expr; + *walk_subtrees = 0; + } + return NULL_TREE; +} + +/* Creates an ARRAY_REF expression for BASE_VAR array with INDEX as the + appropriate index. */ + +static tree +elem_fn_build_array (tree base_var, tree index) +{ + return build_array_ref (EXPR_LOCATION (base_var), base_var, index); +} + +/* Replaces all the vector references in *TP with array references stored in + DATA (type casted to fn_vect_elements that stores this info). If + *WALK_SUBTREES is set to 1, then we recurse through all the subtrees of + *TP. */ + +static tree +replace_array_ref_for_vec (tree *tp, int *walk_subtrees, void *data) +{ + size_t ii = 0; + tree ii_var, add_expr = NULL_TREE, mult_expr = NULL_TREE; + fn_vect_elements *func_data; + if (!*tp) + return NULL_TREE; + + if (TREE_CODE (*tp) == VAR_DECL || TREE_CODE (*tp) == PARM_DECL) + { + func_data = (fn_vect_elements *) data; + gcc_assert (func_data->induction_var); + for (ii_var = func_data->arguments; ii_var; ii_var = DECL_CHAIN (ii_var)) + { + if (DECL_NAME (ii_var) == DECL_NAME (*tp)) + { + /* If the TREE_CODE of the TREE_TYPE is not a vector, then it + means that the variable is a UNIFORM or LINEAR and thus we do + not need to break it up into array. */ + if (TREE_CODE (TREE_TYPE (*tp)) == VECTOR_TYPE) + *tp = elem_fn_build_array (*tp, func_data->induction_var); + else + /* Now we go through all the linear variable list and + if we have a hit, then we multiply the induction var with + step-size and add it to the variable. */ + for (ii = 0; ii < (size_t) func_data->no_lvars; ii++) + if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (*tp)), + func_data->linear_vars[ii])) + { + mult_expr = fold_build2 + (MULT_EXPR, TREE_TYPE (func_data->induction_var), + func_data->induction_var, + build_int_cst (integer_type_node, + func_data->linear_steps[ii])); + mult_expr = build_c_cast (EXPR_LOCATION (*tp), + TREE_TYPE (*tp), mult_expr); + add_expr = fold_build2 (PLUS_EXPR, TREE_TYPE (*tp), + *tp, mult_expr); + *tp = add_expr; + } + + *walk_subtrees = 0; + return NULL_TREE; + } + } + if (func_data->return_var + && (DECL_NAME (*tp) == DECL_NAME (func_data->return_var))) + { + *tp = elem_fn_build_array (*tp, func_data->induction_var); + *walk_subtrees = 0; + } + } + return NULL_TREE; +} + +/* Moves the return values of function FNDECL toward the end of the function. + The return is replaced with INDUCTION_VAR. */ + +static void +fix_elem_fn_return_value (tree fndecl, elem_fn_info *elem_fn_values, + tree induction_var) +{ + size_t ii = 0; + fn_vect_elements data; + tree old_fndecl; + tree new_var, new_var_init, new_body = NULL_TREE; + tree ret_expr, ret_stmt = NULL_TREE; + if (!fndecl || !DECL_SAVED_TREE (fndecl)) + return; + + if (TREE_TYPE (DECL_RESULT (fndecl)) == void_type_node) + return; + + old_fndecl = current_function_decl; + push_cfun (DECL_STRUCT_FUNCTION (fndecl)); + current_function_decl = fndecl; + + new_var = create_tmp_var (TREE_TYPE (DECL_RESULT (fndecl)), "elem_fn_ret"); + new_var_init = + build_vector_from_val + (TREE_TYPE (DECL_RESULT (fndecl)), + build_zero_cst (TREE_TYPE (TREE_TYPE (DECL_RESULT (fndecl))))); + DECL_INITIAL (new_var) = new_var_init; + walk_tree (&DECL_SAVED_TREE (fndecl), replace_return_with_new_var, + (void *) new_var, NULL); + data.return_var = new_var; + data.arguments = DECL_ARGUMENTS (fndecl); + data.induction_var = induction_var; + for (ii = 0; ii < (size_t) elem_fn_values->no_lvars; ii++) + { + data.linear_vars[ii] = xstrdup (elem_fn_values->linear_vars[ii]); + data.linear_steps[ii] = elem_fn_values->linear_steps[ii]; + } + data.no_lvars = elem_fn_values->no_lvars; + walk_tree (&DECL_SAVED_TREE (fndecl), replace_array_ref_for_vec, + (void *) &data, NULL); + ret_expr = build2 (MODIFY_EXPR, TREE_TYPE (new_var), + DECL_RESULT (fndecl), new_var); + + ret_stmt = build1 (RETURN_EXPR, TREE_TYPE (ret_expr), ret_expr); + if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR) + { + if (!BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl))) + ; + else if (TREE_CODE (BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl))) != + TREE_LIST) + { + append_to_statement_list_force + (BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)), &new_body); + append_to_statement_list_force (ret_stmt, &new_body); + } + else + { + new_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)); + append_to_statement_list_force (ret_stmt, &new_body); + } + BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = new_body; + } + + pop_cfun (); + current_function_decl = old_fndecl; + return; +} + +/* Converts the vector value in FNDECL to a scalar one with a for-loop that + goes from 0->(VLENGTH-1). */ + +static tree +add_elem_fn_loop (tree fndecl, int vlength) +{ + tree exit_label = NULL_TREE, if_label = NULL_TREE, body_label = NULL_TREE; + tree fn_body, loop = NULL_TREE, loop_var, mod_var, incr_expr, cond_expr; + tree cmp_expr, old_fndecl; + + if (!fndecl) + return NULL_TREE; + + if (!DECL_SAVED_TREE (fndecl)) + return NULL_TREE; + + old_fndecl = current_function_decl; + push_cfun (DECL_STRUCT_FUNCTION (fndecl)); + current_function_decl = fndecl; + + if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR) + fn_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)); + else + fn_body = DECL_SAVED_TREE (fndecl); + + loop = alloc_stmt_list (); + + loop_var = create_tmp_var (size_type_node, "ii_elem_fn_vec_val"); + mod_var = build2 (MODIFY_EXPR, void_type_node, loop_var, + build_int_cst (size_type_node, 0)); + append_to_statement_list_force (mod_var, &loop); + + if_label = build_decl (EXPR_LOCATION (fndecl), LABEL_DECL, + get_identifier ("if_lab"), void_type_node); + DECL_CONTEXT (if_label) = fndecl; + DECL_ARTIFICIAL (if_label) = 0; + DECL_IGNORED_P (if_label) = 1; + + exit_label = build_decl (EXPR_LOCATION (fndecl), LABEL_DECL, + get_identifier ("exit_label"), void_type_node); + DECL_CONTEXT (exit_label) = fndecl; + DECL_ARTIFICIAL (exit_label) = 0; + DECL_IGNORED_P (exit_label) = 1; + + body_label = build_decl (EXPR_LOCATION (fndecl), LABEL_DECL, + get_identifier ("body_label"), void_type_node); + DECL_CONTEXT (body_label) = fndecl; + DECL_ARTIFICIAL (body_label) = 0; + DECL_IGNORED_P (body_label) = 1; + append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node, + if_label), &loop); + cmp_expr = build2 (LT_EXPR, boolean_type_node, loop_var, + build_int_cst (size_type_node, vlength)); + cond_expr = build3 (COND_EXPR, void_type_node, cmp_expr, + build1 (GOTO_EXPR, void_type_node, body_label), + build1 (GOTO_EXPR, void_type_node, exit_label)); + + append_to_statement_list_force (cond_expr, &loop); + append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node, + body_label), &loop); + append_to_statement_list_force (fn_body, &loop); + + incr_expr = build2 (MODIFY_EXPR, void_type_node, loop_var, + build2 (PLUS_EXPR, TREE_TYPE (loop_var), loop_var, + build_int_cst (size_type_node, 1))); + + append_to_statement_list_force (incr_expr, &loop); + append_to_statement_list_force (build1 (GOTO_EXPR, void_type_node, if_label), + &loop); + append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node, + exit_label), &loop); + + if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR) + BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = loop; + else + DECL_SAVED_TREE (fndecl) = loop; + + pop_cfun (); + current_function_decl = old_fndecl; + + return loop_var; +} + +/* Adds a mask if-statement for FNDECL function. */ + +static void +add_elem_fn_mask (tree fndecl) +{ + tree ii_arg; + tree cond_expr, cmp_expr, old_fndecl; + tree fn_body = NULL_TREE; + + old_fndecl = current_function_decl; + push_cfun (DECL_STRUCT_FUNCTION (fndecl)); + current_function_decl = fndecl; + + if (!DECL_SAVED_TREE (fndecl)) + return; + + for (ii_arg = DECL_ARGUMENTS (fndecl); DECL_CHAIN (ii_arg); + ii_arg = DECL_CHAIN (ii_arg)) + { + ; + } + if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR) + fn_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)); + else + fn_body = DECL_SAVED_TREE (fndecl); /* Not sure if we ever get here. */ + + gcc_assert (DECL_NAME (ii_arg) == get_identifier ("__elem_fn_mask")); + + cmp_expr = fold_build2 (NE_EXPR, TREE_TYPE (ii_arg), ii_arg, + build_int_cst (TREE_TYPE (TREE_TYPE (ii_arg)), 0)); + cond_expr = fold_build3 (COND_EXPR, void_type_node, cmp_expr, fn_body, + build_empty_stmt (EXPR_LOCATION (fndecl))); + + if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR) + BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = cond_expr; + else + DECL_SAVED_TREE (fndecl) = cond_expr; + + pop_cfun (); + current_function_decl = old_fndecl; + + return; + +} + +/* Inserts the tree expression EXPR as the first statement for the function + FNDECL. */ + +static void +insert_as_first_stmt (tree expr, tree fndecl) +{ + tree body = NULL_TREE, new_body = NULL_TREE; + if (fndecl == NULL_TREE) + return; + if (expr == NULL_TREE) + return; + + body = DECL_SAVED_TREE (fndecl); + if (!body) + return; + + if (TREE_CODE (body) == BIND_EXPR) + body = BIND_EXPR_BODY (body); + + if (TREE_CODE (body) == STATEMENT_LIST) + { + tree_stmt_iterator tsi = tsi_start (body); + tsi_link_before (&tsi, expr, TSI_CONTINUE_LINKING); + } + else + { + new_body = alloc_stmt_list (); + append_to_statement_list_force (expr, &new_body); + append_to_statement_list_force (body, &new_body); + if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR) + BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = new_body; + } + return; +} + +/* Segments all the vector parameters of FNDECL into the sizes of largest + vector register possible. */ + +static void +segment_params_for_reg_size (tree fndecl) +{ + size_t ii = 0, jj = 0, kk = 0, ll = 0, ii_narg = 0, nargs_reqd = 0; + enum elem_fn_parm_size *param_array; + unsigned HOST_WIDE_INT biggest_vec_reg_size = BIGGEST_ALIGNMENT; + tree p = NULL_TREE, new_expr = NULL_TREE; + tree *param_var_array = NULL, *parm_type_array = NULL, *new_parm_var = NULL; + unsigned HOST_WIDE_INT *param_index = NULL; + unsigned HOST_WIDE_INT param_no = 0; + int *param_length = NULL; + struct function *f = DECL_STRUCT_FUNCTION (fndecl); + + gcc_assert (f); + push_cfun (f); + + for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p)) + param_no++; + + param_length = XNEWVEC (int, param_no); + gcc_assert (param_length); + ii = 0; + for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p)) + { + unsigned HOST_WIDE_INT p_size = 0; + if (TREE_TYPE (p) && TREE_CODE (TREE_TYPE (p)) == VECTOR_TYPE) + p_size = tree_low_cst (TYPE_SIZE (TREE_TYPE (p)), 1); + if (p_size > biggest_vec_reg_size) + { + nargs_reqd += p_size / biggest_vec_reg_size; + param_length[ii] = p_size / biggest_vec_reg_size; + } + else + { + nargs_reqd++; + param_length[ii] = 1; + } + ii++; + } + if (nargs_reqd == 0) + return; + + param_array = XNEWVEC (enum elem_fn_parm_size, nargs_reqd); + gcc_assert (param_array != NULL); + + param_var_array = XNEWVEC (tree, nargs_reqd); + gcc_assert (param_var_array != NULL); + + parm_type_array = XNEWVEC (tree, nargs_reqd); + gcc_assert (parm_type_array != NULL); + + param_index = XNEWVEC (unsigned HOST_WIDE_INT, nargs_reqd); + gcc_assert (param_index != NULL); + param_no = 0; + for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p)) + { + unsigned HOST_WIDE_INT p_size = 0; + if (TREE_TYPE (p) && TREE_CODE (TREE_TYPE (p)) == VECTOR_TYPE) + { + p_size = tree_low_cst (TYPE_SIZE (TREE_TYPE (p)), 1); + if (p_size > biggest_vec_reg_size) + { + for (ii = 0; ii < (size_t) ((int) (p_size/biggest_vec_reg_size)); + ii++) + { + unsigned HOST_WIDE_INT var_size = 0; + param_array[ii + ii_narg] = VECTOR_SEGMENTED; + var_size = tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (p)), + 1); + parm_type_array[ii + ii_narg] = + build_vector_type (TREE_TYPE (TREE_TYPE (p)), + (biggest_vec_reg_size / var_size)); + param_index[ii + ii_narg] = param_no; + } + ii_narg += (int) (p_size / biggest_vec_reg_size); + } + else + { + param_array[ii_narg] = VECTOR_NONSEGMENTED; + parm_type_array[ii_narg] = TREE_TYPE (p); + param_index[ii_narg] = param_no; + ii_narg++; + } + } + else + { + param_array[ii_narg] = SCALAR; + parm_type_array[ii_narg] = TREE_TYPE (p); + param_index[ii_narg] = param_no; + ii_narg++; + } + param_no++; + } + + new_parm_var = XNEWVEC (tree, param_no); + gcc_assert (new_parm_var != NULL); + + ii = 0; + for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), ii++) + new_parm_var[ii] = create_tmp_var (TREE_TYPE (p), + IDENTIFIER_POINTER (DECL_NAME (p))); + + + for (ii = 0; ii < (size_t) nargs_reqd; ii++) + { + param_var_array[ii] = build_decl (EXPR_LOCATION (fndecl), PARM_DECL, + NULL_TREE, parm_type_array[ii]); + DECL_ARG_TYPE (param_var_array[ii]) = parm_type_array[ii]; + DECL_CONTEXT (param_var_array[ii]) = fndecl; + DECL_ARTIFICIAL (param_var_array[ii]) = 1; + lang_hooks.dup_lang_specific_decl (param_var_array[ii]); + } + + for (ii = 1; ii < nargs_reqd; ii++) + TREE_CHAIN (param_var_array[ii-1]) = param_var_array[ii]; + + ii = 0; + for (p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p)) + { + args_data d; + d.arg = p; + d.replacement = new_parm_var[ii]; + walk_tree (&DECL_SAVED_TREE (fndecl), replace_parm_decl, (void *) &d, + NULL); + ii++; + } + + DECL_ARGUMENTS (fndecl) = param_var_array[0]; + + /* Now we have to do group the split up register value into one big variable. + For Example, let's say we split vector(8) int x into: + vector(4) int D124 and vector(4) int D123 + We have to regroup them into the following: + + x (8, 7, 6, 5) = D124 + x (4, 3, 2, 1) = D123 + + So, the final thing will look something like this: + + data_type foo (vec(8) int x) + | + | + V + data_type foo (vec (4) int D124, vec (4) int D123) + { + x = {D124, D123} + < REST OF FUNCTION BODY > + } + */ + + ii = 0; + while (ii < nargs_reqd) + { + if (param_array[ii] == SCALAR || param_array[ii] == VECTOR_NONSEGMENTED) + { + new_expr = build2 (MODIFY_EXPR, parm_type_array[ii], + new_parm_var[jj], param_var_array[ii]); + insert_as_first_stmt (new_expr, fndecl); + ii++; + } + else + for (kk = 0; kk < (size_t) param_length[jj]; kk++) + { + for (ll = 0; ll < (size_t) param_length[jj]; ll++) + { + tree m_type = TREE_TYPE (TREE_TYPE (new_parm_var[jj])); + tree lhs_array = + elem_fn_build_array + (new_parm_var[jj], build_int_cst + (size_type_node, kk * param_length[jj] + ll)); + tree rhs_array = + elem_fn_build_array (param_var_array[ii], + build_int_cst (size_type_node, ll)); + new_expr = build2 (MODIFY_EXPR, m_type, lhs_array, rhs_array); + insert_as_first_stmt (new_expr, fndecl); + } + ii++; + } + jj++; + } + pop_cfun (); + return; +} + +/* Does all the call-graph hacks necessary to make FNDECL a recognized + function. */ + +static void +call_graph_add_fn (tree fndecl) +{ + const tree outer = current_function_decl; + struct function *f = DECL_STRUCT_FUNCTION (fndecl); + + if (cfun) + f->curr_properties = cfun->curr_properties; + push_cfun (f); + current_function_decl = fndecl; + + cgraph_add_new_function (fndecl, false); + cgraph_finalize_function (fndecl, true); + + pop_cfun (); + current_function_decl = outer; + + return; +} + +/* Clones the function FNDECL to elemental functions (masked and unmasked + versions, if applicable) since vector attribute is set. */ + +void +elem_fn_create_fn (tree fndecl) +{ + tree new_masked_fn = NULL_TREE, new_unmasked_fn = NULL_TREE; + tree induction_var = NULL_TREE; + elem_fn_info *elem_fn_values = NULL; + char *masked_suffix = NULL, *unmasked_suffix = NULL; + tree proc_attr = NULL_TREE, opp_proc_attr = NULL_TREE, opt_attr = NULL_TREE; + + if (!fndecl) + return; + + elem_fn_values = extract_elem_fn_values (fndecl); + if (!elem_fn_values) + return; + + if (elem_fn_values->mask == USE_MASK) + masked_suffix = find_suffix (elem_fn_values, true); + else if (elem_fn_values->mask == USE_NOMASK) + unmasked_suffix = find_suffix (elem_fn_values, false); + else + { + masked_suffix = find_suffix (elem_fn_values, true); + unmasked_suffix = find_suffix (elem_fn_values, false); + } + if (masked_suffix) + { + new_masked_fn = copy_node (fndecl); + new_masked_fn = rename_elem_fn (new_masked_fn, masked_suffix); + SET_DECL_RTL (new_masked_fn, NULL); + TREE_SYMBOL_REFERENCED (DECL_NAME (new_masked_fn)) = 1; + tree_elem_fn_versioning (fndecl, new_masked_fn, NULL, false, NULL, false, + NULL, NULL, elem_fn_values->vectorlength, true); + scalarize_uniform_linear_params (new_masked_fn, elem_fn_values); + proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr); + if (proc_attr) + decl_attributes (&new_masked_fn, proc_attr, 0); + if (opp_proc_attr) + decl_attributes (&fndecl, opp_proc_attr, 0); + + opt_attr = create_optimize_attribute (3); /* Turn vectorizer on. */ + if (opt_attr) + decl_attributes (&new_masked_fn, opt_attr, 0); + + DECL_ATTRIBUTES (new_masked_fn) = + remove_attribute ("vector", DECL_ATTRIBUTES (new_masked_fn)); + + add_elem_fn_mask (new_masked_fn); + induction_var = add_elem_fn_loop (new_masked_fn, + elem_fn_values->vectorlength); + fix_elem_fn_return_value (new_masked_fn, elem_fn_values, induction_var); + segment_params_for_reg_size (new_masked_fn); + call_graph_add_fn (new_masked_fn); + SET_DECL_ASSEMBLER_NAME (new_masked_fn, DECL_NAME (new_masked_fn)); + DECL_ELEM_FN_ALREADY_CLONED (new_masked_fn) = true; + if (DECL_STRUCT_FUNCTION (new_masked_fn)) + DECL_STRUCT_FUNCTION (new_masked_fn)->elem_fn_already_cloned = true; + } + if (unmasked_suffix) + { + new_unmasked_fn = copy_node (fndecl); + new_unmasked_fn = rename_elem_fn (new_unmasked_fn, unmasked_suffix); + SET_DECL_RTL (new_unmasked_fn, NULL); + TREE_SYMBOL_REFERENCED (DECL_NAME (new_unmasked_fn)) = 1; + tree_elem_fn_versioning (fndecl, new_unmasked_fn, NULL, false, NULL, + false, NULL, NULL, + elem_fn_values->vectorlength, false); + scalarize_uniform_linear_params (new_unmasked_fn, elem_fn_values); + proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr); + if (proc_attr) + decl_attributes (&new_unmasked_fn, proc_attr, 0); + if (opp_proc_attr) + decl_attributes (&fndecl, opp_proc_attr, 0); + + opt_attr = create_optimize_attribute (3); /* Turn vectorizer on. */ + if (opt_attr) + decl_attributes (&new_unmasked_fn, opt_attr, 0); + + DECL_ATTRIBUTES (new_unmasked_fn) = + remove_attribute ("vector", DECL_ATTRIBUTES (new_unmasked_fn)); + induction_var = add_elem_fn_loop (new_unmasked_fn, + elem_fn_values->vectorlength); + fix_elem_fn_return_value (new_unmasked_fn, elem_fn_values, + induction_var); + segment_params_for_reg_size (new_unmasked_fn); + call_graph_add_fn (new_unmasked_fn); + SET_DECL_ASSEMBLER_NAME (new_unmasked_fn, DECL_NAME (new_unmasked_fn)); + DECL_ELEM_FN_ALREADY_CLONED (new_unmasked_fn) = true; + if (DECL_STRUCT_FUNCTION (new_unmasked_fn)) + DECL_STRUCT_FUNCTION (new_unmasked_fn)->elem_fn_already_cloned = true; + } + + XDELETEVEC (elem_fn_values); + return; +} diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index 6b8ff154ccc..1240f940aee 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -3641,8 +3641,9 @@ c_builtin_function_ext_scope (tree decl) /* Should never be called on a symbol with a preexisting meaning. */ gcc_assert (!I_SYMBOL_BINDING (id)); - bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false, - UNKNOWN_LOCATION); + if (external_scope) + bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false, + UNKNOWN_LOCATION); /* Builtins in the implementation namespace are made visible without needing to be explicitly declared. See push_file_scope. */ diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index af84f9cffc6..25ee5a381dc 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -12094,7 +12094,7 @@ c_parser_elem_fn_processor_clause (c_parser *parser) token = c_parser_peek_token (parser); if (!c_parser_next_token_is (parser, CPP_OPEN_PAREN)) { - c_parser_error (parser, "expected %<)%>"); + c_parser_error (parser, "expected %<(%>"); c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); return NULL_TREE; } @@ -12147,8 +12147,16 @@ c_parser_elem_fn_processor_clause (c_parser *parser) build_string (strlen ("core_i7_sse4_2"), "core_i7_sse4_2")); } + else if (!token->value || TREE_CODE (token->value) != IDENTIFIER_NODE) + { + c_parser_error (parser, "expected processor-name"); + } else - sorry ("Processor type not supported"); + { + c_parser_consume_token (parser); + error_at (input_location, "processor %s not supported", + IDENTIFIER_POINTER (token->value)); + } if (c_parser_next_token_is (parser, CPP_CLOSE_PAREN)) c_parser_consume_token (parser); @@ -12165,7 +12173,7 @@ c_parser_elem_fn_processor_clause (c_parser *parser) return proc_tree_list; } -/* This function parses the uniform clause of Cilk Plus elemental functions. */ +/* This function parses "uniform" clause of Cilk Plus elemental functions. */ static tree c_parser_elem_fn_uniform_clause (c_parser *parser) @@ -12173,7 +12181,7 @@ c_parser_elem_fn_uniform_clause (c_parser *parser) c_token *token; tree uniform_tree; tree str_token = NULL_TREE; - vec<tree,va_gc> *uniform_vec = NULL; + vec<tree, va_gc> *uniform_vec = NULL; if (!c_parser_next_token_is (parser, CPP_OPEN_PAREN)) { @@ -12215,7 +12223,7 @@ c_parser_elem_fn_uniform_clause (c_parser *parser) } else { - c_parser_error (parser, "expected number or comma"); + c_parser_error (parser, "expected variable-name"); c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); return NULL_TREE; } @@ -12227,6 +12235,7 @@ c_parser_elem_fn_uniform_clause (c_parser *parser) return uniform_tree; } + /* This function parses the linear clause of Cilk Plus Elemental functions. */ static tree @@ -12260,10 +12269,22 @@ c_parser_elem_fn_linear_clause (c_parser *parser) c_parser_consume_token (parser); token = c_parser_peek_token (parser); if (token->value && token->type == CPP_NUMBER) - step_size = token->value; + { + step_size = token->value; + if (TREE_TYPE (step_size) + && TREE_CODE (TREE_TYPE (step_size)) == REAL_TYPE) + { + error_at (input_location, "step-size must be an integer " + "constant expression"); + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, + NULL); + return NULL_TREE; + } + } else { c_parser_error (parser, "expected step-size"); + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); return NULL_TREE; } c_parser_consume_token (parser); @@ -12316,8 +12337,7 @@ c_parser_elem_fn_vlength_clause (c_parser *parser) if (!c_parser_next_token_is (parser, CPP_OPEN_PAREN)) { - c_parser_error (parser, "expected %<)%>"); - c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); + c_parser_skip_until_found (parser, CPP_COMMA, "expected %<(%>"); return NULL_TREE; } else @@ -12329,29 +12349,39 @@ c_parser_elem_fn_vlength_clause (c_parser *parser) token = c_parser_peek_token (parser); if (token->value && token->type == CPP_NUMBER) { - vec_safe_push (vlength_vec, token->value); - c_parser_consume_token (parser); - if (c_parser_next_token_is (parser, CPP_COMMA)) + if (TREE_TYPE (token->value) + && TREE_CODE (TREE_TYPE (token->value)) == REAL_TYPE) { - c_parser_consume_token (parser); - if (c_parser_next_token_is_not (parser, CPP_NUMBER)) - { - c_parser_error (parser, "expected vectorlength after %<,%>"); - c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); - return NULL_TREE; - } + error_at (input_location, "vectorlength must be an integer."); + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); + return NULL_TREE; } - else if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN)) + if (!integer_pow2p (token->value)) { - c_parser_error (parser, - "expected %<,%> or %<)%> after vectorlength"); + error_at (input_location, "vectorlength must be a power of 2."); + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); + return NULL_TREE; + } + else if (compare_tree_int (token->value, 8) == 1 + || compare_tree_int (token->value, 2) == -1) + { + error_at (input_location, + "vectorlength must be between 2 and 8."); + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); + return NULL_TREE; + } + vec_safe_push (vlength_vec, token->value); + c_parser_consume_token (parser); + if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN)) + { + c_parser_error (parser, "expected %<)%> after vectorlength"); c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); return NULL_TREE; } } else { - c_parser_error (parser, "expected number or comma"); + c_parser_error (parser, "expected number"); c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); return NULL_TREE; } diff --git a/gcc/cilk.h b/gcc/cilk.h index c32efab5e0a..c14b8e6910c 100644 --- a/gcc/cilk.h +++ b/gcc/cilk.h @@ -213,8 +213,9 @@ enum mask_options { typedef struct { char *proc_type; + char *isa_type; enum mask_options mask; - int vectorlength[MAX_VARS]; + int vectorlength; int no_vlengths; char *uniform_vars[MAX_VARS]; int no_uvars; @@ -230,13 +231,23 @@ typedef struct } elem_fn_info; /* This data structure will hold all the arguments in the function. */ -typedef struct +typedef struct { tree induction_var; tree arguments; tree return_var; + int no_lvars; + char *linear_vars[MAX_VARS]; + int linear_steps[MAX_VARS]; } fn_vect_elements; +enum elem_fn_parm_type +{ + TYPE_NONE = 0, + TYPE_UNIFORM = 1, + TYPE_LINEAR = 2 +}; + /* Offset of fields in the Cilk frame descriptor. Index is same as for cilk_trees. If the index does not correspond to a field of the Cilk frame diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a61a0352971..216f8e1799e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5780,20 +5780,16 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum) { static bool warnedavx; + /* In Cilk Plus you can create code for a processor that + is enabled with elemental functions. */ if (cum + && !flag_enable_cilk && !warnedavx && cum->warn_avx) { - /* For Cilk Plus with elemental functions, the user - can generate code for a hardware that is not the - target hardware. So, this warning is not valid for - us. */ - if (!flag_enable_cilk) - { - warnedavx = true; - warning (0, "AVX vector argument without AVX " - "enabled changes the ABI"); - } + warnedavx = true; + warning (0, "AVX vector argument without AVX " + "enabled changes the ABI"); } return TYPE_MODE (type); } @@ -5802,6 +5798,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum) static bool warnedsse; if (cum + && !flag_enable_cilk && !warnedsse && cum->warn_sse) { @@ -7182,18 +7179,12 @@ ix86_function_arg_boundary (enum machine_mode mode, const_tree type) && !warned && align != ix86_compat_function_arg_boundary (mode, type, saved_align)) - { - /* For Cilk Plus with elemental functions, the user can generate - code for a hardware that is not the target hardware. So, this - warning is not valid for Cilk Plus. */ - if (!flag_enable_cilk) - { - warned = true; - inform (input_location, - "The ABI for passing parameters with %d-byte" - " alignment has changed in GCC 4.6", - align / BITS_PER_UNIT); - } + { + warned = true; + inform (input_location, + "The ABI for passing parameters with %d-byte" + " alignment has changed in GCC 4.6", + align / BITS_PER_UNIT); } } @@ -8517,7 +8508,7 @@ static bool ix86_frame_pointer_required (void) { /* For all Cilk specific functions, we frame pointer is required. */ - if (cfun->is_cilk_function == 1) + if (flag_enable_cilk && cfun->is_cilk_function == 1) return true; /* If we accessed previous frames, then the generated code expects @@ -42139,6 +42130,187 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) return val; } +/* Return the specific arch attribute for the *PROC_NAME of Elemental + function in Cilk Plus. The *OPPOSITE_ATTR will rutrn the opposite of return + value (in terms of optimization) for the scalar function. */ + +static tree +ix86_cilkplus_map_proc_to_attr (char *proc_name, tree *opposite_attr) +{ + /* You will need the opposite attribute for the scalar code part. */ + tree proc_attr, opp_proc_attr; + vec<tree, va_gc> *proc_vec_list, *opp_proc_vec_list; + + vec_alloc (proc_vec_list, 4); + vec_alloc (opp_proc_vec_list, 4); + + if (!proc_name) + return NULL_TREE; + + if (!strcmp (proc_name, "pentium_4")) + { + vec_safe_push (proc_vec_list, + build_string (strlen ("arch=pentium4"), "arch=pentium4")); + vec_safe_push (proc_vec_list, build_string (strlen ("mmx"), "mmx")); + if (opposite_attr) + { + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("no-mmx"), "no-mmx")); + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("arch=pentium4"), + "arch=pentium4")); + } + } + else if (!strcmp (proc_name, "pentium_4_sse3")) + { + vec_safe_push (proc_vec_list, + build_string (strlen ("arch=pentium4"), "arch=pentium4")); + vec_safe_push (proc_vec_list, build_string (strlen ("sse3"), "sse3")); + if (opposite_attr) + { + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("arch=pentium4"), + "arch=pentium4")); + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("no-sse3"), "no-sse3")); + } + } + else if (!strcmp (proc_name, "core2_duo_sse3")) + { + vec_safe_push (proc_vec_list, + build_string (strlen ("arch=core2"), "arch=core2")); + vec_safe_push (proc_vec_list, build_string (strlen ("sse3"), "sse3")); + if (opposite_attr) + { + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("arch=core2"), "arch=core2")); + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("no-sse3"), "no-sse3")); + } + } + else if (!strcmp (proc_name, "core_2_duo_sse_4_1")) + { + vec_safe_push (proc_vec_list, + build_string (strlen ("arch=core2"), "arch=core2")); + vec_safe_push (proc_vec_list, build_string (strlen ("sse4.1"), "sse4.1")); + if (opposite_attr) + { + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("arch=core2"), "arch=core2")); + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("no-sse4.1"), "no-sse4.1")); + } + } + else if (!strcmp (proc_name, "core_i7_sse4_2")) + { + vec_safe_push (proc_vec_list, + build_string (strlen ("arch=corei7"), "arch=corei7")); + vec_safe_push (proc_vec_list, + build_string (strlen ("sse4.2"), "sse4.2")); + vec_safe_push (proc_vec_list, build_string (strlen ("avx"), "avx")); + if (opposite_attr) + { + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("arch=corei7"), "arch=corei7")); + vec_safe_push (opp_proc_vec_list, + build_string (strlen ("no-sse4.2"), "no-sse4.2")); + } + } + else + sorry ("Processor type not supported."); + + proc_attr = build_tree_list_vec (proc_vec_list); + vec_safe_truncate (proc_vec_list, 0); + proc_attr = build_tree_list (get_identifier ("__target__"), proc_attr); + + if (opposite_attr) + { + opp_proc_attr = build_tree_list_vec (opp_proc_vec_list); + vec_safe_truncate (opp_proc_vec_list, 0); + opp_proc_attr = build_tree_list (get_identifier ("__target__"), + opp_proc_attr); + *opposite_attr = opp_proc_attr; + } + return proc_attr; +} + +char * +ix86_cilkplus_find_proc_code (char *proc_name) +{ + if (!proc_name) + return xstrdup ("B"); + + if (!strcmp (proc_name, "pentium_4")) + return xstrdup ("B"); + else if (!strcmp (proc_name, "pentium_4_sse3")) + return xstrdup ("D"); + else if (!strcmp (proc_name, "core2_duo_sse3")) + return xstrdup ("E"); + else if (!strcmp (proc_name, "core_2_duo_sse_4_1")) + return xstrdup ("F"); + else if (!strcmp (proc_name, "core_i7_sse4_2")) + return xstrdup ("H"); + else + gcc_unreachable (); + + return NULL; /* We should never get here. */ +} + +/* Returns appropriate ISA string based on PROC_NAME and ISA_NAME. */ + +char * +ix86_cilkplus_find_isa_for_proc (char *proc_name, char *isa_name) +{ + if (isa_name) + return isa_name; + else if (!proc_name) + return xstrdup("xmm"); + else if (!strcmp (proc_name, "pentium_4")) + return xstrdup ("xmm"); + else if (!strcmp (proc_name, "pentium_4_sse3")) + return xstrdup ("xmm"); + else if (!strcmp (proc_name, "core2_duo_sse3")) + return xstrdup ("xmm"); + else if (!strcmp (proc_name, "core_2_duo_sse_4_1")) + return xstrdup ("xmm"); + else if (!strcmp (proc_name, "core_i7_sse4_2")) + return xstrdup ("xmm"); + else if (!strcmp (proc_name, "core_2nd_gen_avx")) + return xstrdup ("ymm1"); + else if (!strcmp (proc_name, "core_3rd_gen_avx")) + return xstrdup ("ymm1"); + else if (!strcmp (proc_name, "core_4th_gen_avx")) + return xstrdup ("ymm2"); + else + gcc_unreachable (); + + return NULL; /* We should never get here. */ +} + + +/* Returns the appropriate vectorlength based on PROC_NAME. */ + +unsigned int +ix86_builtin_find_vlength_for_proc (char *proc_name) +{ + if (!proc_name) + return 4; + else if (!strcmp (proc_name, "pentium_4")) + return 4; + else if (!strcmp (proc_name, "pentium_4_sse3")) + return 4; + else if (!strcmp (proc_name, "core2_duo_sse3")) + return 4; + else if (!strcmp (proc_name, "core_2_duo_sse_4_1")) + return 4; + else if (!strcmp (proc_name, "core_i7_sse4_2")) + return 8; + else + /* If we got here, then we have hit a processor that we do not yet + support. */ + return 0; +} + /* Initialize the GCC target structure. */ #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory @@ -42505,6 +42677,20 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) #undef TARGET_SPILL_CLASS #define TARGET_SPILL_CLASS ix86_spill_class +#undef TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR +#define TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR \ + ix86_cilkplus_map_proc_to_attr + +#undef TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE +#define TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE \ + ix86_cilkplus_find_proc_code + +#undef TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE +#define TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE ix86_cilkplus_find_isa_for_proc + +#undef TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC +#define TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC \ + ix86_builtin_find_vlength_for_proc struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index f98196434f8..1c8a8ed98b2 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5706,6 +5706,30 @@ For vector memory operations the cost may depend on type (@var{vectype}) and misalignment value (@var{misalign}). @end deftypefn +@deftypefn {Target Hook} tree TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR (char *@var{}, tree *@var{}) +This hook is called by a Cilk Plus routine that will be used to map the +processor clause to the appropriate arch and tune attributes. +@end deftypefn + +@deftypefn {Target Hook} {char *} TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE (char *@var{}) +This hook is called by a Cilk Plus routine that will request the + processor code for processor name given in the vector attribute for + the elemental functions. +@end deftypefn + +@deftypefn {Target Hook} {char *} TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE (char *@var{}, char *@var{}) +This hook is called by a Cilk Plus routine that will request the + ISA type (based on the register-set where vector parameters are passed). + in elemental functions. +@end deftypefn + +@deftypefn {Target Hook} {unsigned int} TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC (char *@var{}) +This hook is called by a Cilk Plus routine that will request the + default vectorlength for the processor specified in the processor clause + in the elemental functions. +@end deftypefn + + @deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE (const_tree @var{type}, bool @var{is_packed}) Return true if vector alignment is reachable (by peeling N iterations) for the given type. @end deftypefn diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 7a93f21073c..a9286e67d51 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -5622,6 +5622,15 @@ For vector memory operations the cost may depend on type (@var{vectype}) and misalignment value (@var{misalign}). @end deftypefn +@hook TARGET_CILKPLUS_BUILTIN_MAP_PROCESSOR_TO_ATTR + +@hook TARGET_CILKPLUS_BUILTIN_FIND_PROCESSOR_CODE + +@hook TARGET_CILKPLUS_BUILTIN_FIND_ISA_CODE + +@hook TARGET_CILKPLUS_BUILTIN_FIND_VLENGTH_FOR_PROC + + @hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE Return true if vector alignment is reachable (by peeling N iterations) for the given type. @end deftypefn diff --git a/gcc/elem-function-common.c b/gcc/elem-function-common.c index 0207888795d..e570683a5ea 100644 --- a/gcc/elem-function-common.c +++ b/gcc/elem-function-common.c @@ -28,72 +28,48 @@ #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" #include "tree.h" #include "langhooks.h" -#include "cilk.h" -#include "tm_p.h" -#include "hard-reg-set.h" #include "basic-block.h" #include "output.h" -#include "c-family/c-common.h" #include "diagnostic.h" #include "tree-flow.h" #include "tree-dump.h" #include "tree-pass.h" #include "timevar.h" -#include "flags.h" -#include "c/c-tree.h" #include "tree-inline.h" #include "cgraph.h" #include "ipa-prop.h" #include "opts.h" #include "tree-iterator.h" -#include "toplev.h" -#include "options.h" -#include "intl.h" #include "vec.h" #include "cilk.h" - -#define MAX_VARS 50 +#include "target.h" enum elem_fn_parm_type find_elem_fn_parm_type (gimple, tree, tree *); bool is_elem_fn (tree); tree find_elem_fn_name (tree old_fndecl, tree vectype_out, tree vectype_in); elem_fn_info *extract_elem_fn_values (tree decl); -/* This function will find the appropriate processor code in the function - mangling vector function. */ +/* Uses the processor information stored in *PROC_NAME and returns and + appropriate string. */ char * -find_processor_code (elem_fn_info *elem_fn_values) +find_processor_code (char *proc_name) { - if (!elem_fn_values || !elem_fn_values->proc_type) - return xstrdup ("B"); - - if (!strcmp (elem_fn_values->proc_type, "pentium_4")) - return xstrdup ("B"); - else if (!strcmp (elem_fn_values->proc_type, "pentium_4_sse3")) - return xstrdup ("D"); - else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse3")) - return xstrdup ("E"); - else if (!strcmp (elem_fn_values->proc_type, "core_2_duo_sse_4_1")) - return xstrdup ("F"); - else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2")) - return xstrdup ("H"); - else - gcc_unreachable (); - - return NULL; /* should never get here */ + return targetm.cilkplus.builtin_find_processor_code (proc_name); } -/* This function will return vectorlength, if specified, in string format -OR- - it will give the default vector length for the specified architecture. */ +/* Returns the vector length in string format based on the value in the field + called vectorlength of ELEM_FN_VALUES. If vectorlength is not given then + an appropriate value is computed based on the architecture information given + in proc_type field of ELEM_FN_VALUES. */ char * find_vlength_code (elem_fn_info *elem_fn_values) { - char *vlength_code = (char *) xmalloc (sizeof (char) * 10); + int v_length = 0; + char *vlength_code = XNEWVEC (char, 10); if (!elem_fn_values) { sprintf (vlength_code, "4"); @@ -103,29 +79,19 @@ find_vlength_code (elem_fn_info *elem_fn_values) memset (vlength_code, 10, 0); if (elem_fn_values->no_vlengths != 0) - sprintf (vlength_code,"%d", elem_fn_values->vectorlength[0]); + sprintf (vlength_code,"%d", elem_fn_values->vectorlength); else { - if (!elem_fn_values->proc_type) - sprintf (vlength_code, "4"); - else if (!strcmp (elem_fn_values->proc_type, "pentium_4")) - sprintf (vlength_code, "4"); - else if (!strcmp (elem_fn_values->proc_type, "pentium_4_sse3")) - sprintf (vlength_code, "4"); - else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse3")) - sprintf (vlength_code, "4"); - else if (!strcmp (elem_fn_values->proc_type, "core_2_duo_sse_4_1")) - sprintf (vlength_code, "4"); - else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2")) - sprintf (vlength_code, "4"); - else - gcc_unreachable (); + v_length = targetm.cilkplus.builtin_find_vlength_for_proc + (elem_fn_values->proc_type); + if (v_length > 0) + sprintf(vlength_code, "%d",v_length); } return vlength_code; } -/* This function will concatinate the suffix to the existing function decl. */ +/* This function will concatinate the SUFFIX to the function name in DECL. */ tree rename_elem_fn (tree decl, const char *suffix) @@ -141,7 +107,7 @@ rename_elem_fn (tree decl, const char *suffix) new_decl = decl; length = strlen (fn_name) + strlen (suffix) + 1; - new_fn_name = (char *)xmalloc (length); + new_fn_name = XNEWVEC (char, length); strcpy (new_fn_name, fn_name); strcat (new_fn_name, suffix); @@ -150,25 +116,33 @@ rename_elem_fn (tree decl, const char *suffix) } -/* This function will find the appropriate mangling suffix for the vector - function. */ +/* Returns the appropriate mangling suffix for the vector function based on the + information in ELEM_FN_VALUES field. The user can specify whether they want + a masked or unmasked function by setting the MASKED field to true or false, + respectively. */ char * find_suffix (elem_fn_info *elem_fn_values, bool masked) { - char *suffix = (char*)xmalloc (100); + char *suffix = XNEWVEC (char, 100); char tmp_str[10]; int arg_number, ii_pvar, ii_uvar, ii_lvar; - strcpy (suffix, "._simdsimd_"); - strcat (suffix, find_processor_code (elem_fn_values)); - strcat (suffix, find_vlength_code (elem_fn_values)); + if (!elem_fn_values) + return NULL; + + strcpy (suffix, "._simdsimd"); + strcat (suffix, + targetm.cilkplus.builtin_find_isa_code (elem_fn_values->proc_type, + elem_fn_values->isa_type)); + strcat (suffix, "_"); if (masked) strcat (suffix, "m"); else strcat (suffix, "n"); + strcat (suffix, find_vlength_code (elem_fn_values)); - for (arg_number = 1; arg_number <= elem_fn_values->total_no_args; + for (arg_number = 0; arg_number <= elem_fn_values->total_no_args; arg_number++) { for (ii_lvar = 0; ii_lvar < elem_fn_values->no_lvars; ii_lvar++) @@ -180,23 +154,22 @@ find_suffix (elem_fn_info *elem_fn_values, bool masked) strcat (suffix, tmp_str); } } - for (ii_uvar = 0; ii_uvar < elem_fn_values->no_uvars; ii_uvar++) - { - if (elem_fn_values->uniform_location[ii_uvar] == arg_number) - strcat (suffix, "_s1"); - } - for (ii_pvar = 0; ii_pvar < elem_fn_values->no_pvars; ii_pvar++) - { - if (elem_fn_values->private_location[ii_pvar] == arg_number) - strcat (suffix, "_v1"); - } + for (ii_uvar = 0; ii_uvar < elem_fn_values->no_uvars; ii_uvar++) + if (elem_fn_values->uniform_location[ii_uvar] == arg_number) + strcat (suffix, "_s1"); + for (ii_pvar = 0; ii_pvar < elem_fn_values->no_pvars; ii_pvar++) + if (elem_fn_values->private_location[ii_pvar] == arg_number) + strcat (suffix, "_v1"); } return suffix; } -/* This is an helper function for find_elem_fn_param_type. */ - +/* This is an helper function for find_elem_fn_param_type. It returns the + parm_type (whether TYPE_LINEAR or TYPE_UNIFORM) for the parameter number + (set by PARM_NO). If the return value is TYPE_LINEAR, then the *STEP_SIZE + is set with the appropriate step-size. */ + static enum elem_fn_parm_type find_elem_fn_parm_type_1 (tree fndecl, int parm_no, tree *step_size) { @@ -224,8 +197,9 @@ find_elem_fn_parm_type_1 (tree fndecl, int parm_no, tree *step_size) } -/* This function will return the type of a parameter in elemental function. - The choices are UNIFORM or LINEAR. */ +/* Returns the parm_type (whether TYPE_LINEAR or TYPE_UNIFORM) for the + parameter (indicated by OP). If the return value is TYPE_LINEAR, then + the *STEP_SIZE is set with the appropriate step-size. */ enum elem_fn_parm_type find_elem_fn_parm_type (gimple stmt, tree op, tree *step_size) @@ -254,7 +228,9 @@ find_elem_fn_parm_type (gimple stmt, tree op, tree *step_size) return return_type; } -/* This function will return the appropriate cloned named for the function. */ +/* This function will return the appropriate cloned for the vectorlength + (set by VECTYPE_OUT) named for the function whose scalar name is indicated + by OLD_FNDECL. */ tree find_elem_fn_name (tree old_fndecl, tree vectype_out, @@ -263,6 +239,7 @@ find_elem_fn_name (tree old_fndecl, tree vectype_out, elem_fn_info *elem_fn_values = NULL; tree new_fndecl = NULL_TREE, arg_type = NULL_TREE; char *suffix = NULL; + char warning_string[90]; elem_fn_values = extract_elem_fn_values (old_fndecl); @@ -270,11 +247,20 @@ find_elem_fn_name (tree old_fndecl, tree vectype_out, { if (elem_fn_values->no_vlengths > 0) { - if (elem_fn_values->vectorlength[0] == - (int)TYPE_VECTOR_SUBPARTS (vectype_out)) + if (elem_fn_values->vectorlength == + (int) TYPE_VECTOR_SUBPARTS (vectype_out)) suffix = find_suffix (elem_fn_values, false); else - return NULL_TREE; + { + memset (warning_string, 90, 0); + sprintf (warning_string, "Elemental function's vectorlength (%d) " + "does not match the loop's vectorlength (%d)", + elem_fn_values->vectorlength, + (int) TYPE_VECTOR_SUBPARTS (vectype_out)); + warning_at (EXPR_LOCATION (old_fndecl), 0, + (const char *)warning_string); + return NULL_TREE; + } } else return NULL_TREE; @@ -304,14 +290,14 @@ find_elem_fn_name (tree old_fndecl, tree vectype_out, return new_fndecl; } -/* This function will extract the elem. function values from a vector and store - it in a data structure and return that. */ +/* Extracts all the elemental function's relevant information from the attribute + of DECL. The extracted information are returned in a structure of type + ELEM_FN_INFO. */ elem_fn_info * extract_elem_fn_values (tree decl) { elem_fn_info *elem_fn_values = NULL; - int x = 0; /* this is a dummy variable */ int arg_number = 0, ii = 0; tree ii_tree, jj_tree, kk_tree; tree decl_attr = DECL_ATTRIBUTES (decl); @@ -319,7 +305,7 @@ extract_elem_fn_values (tree decl) if (!decl_attr) return NULL; - elem_fn_values = (elem_fn_info *)xmalloc (sizeof (elem_fn_info)); + elem_fn_values = XNEWVEC (elem_fn_info, 1); gcc_assert (elem_fn_values); decl_ret_type = TREE_TYPE (decl); @@ -327,6 +313,7 @@ extract_elem_fn_values (tree decl) decl_ret_type = TREE_TYPE (decl_ret_type); elem_fn_values->proc_type = NULL; + elem_fn_values->isa_type = NULL; elem_fn_values->mask = USE_BOTH; elem_fn_values->no_vlengths = 0; elem_fn_values->no_uvars = 0; @@ -337,13 +324,13 @@ extract_elem_fn_values (tree decl) switch (compare_tree_int (TYPE_SIZE (decl_ret_type), 64)) { case 0: /* This means they are equal. */ - elem_fn_values->vectorlength[0] = 2; + elem_fn_values->vectorlength = 2; break; case -1: /* This means it is less than 64. */ - elem_fn_values->vectorlength[0] = 4; + elem_fn_values->vectorlength = 4; break; default: - elem_fn_values->vectorlength[0] = 1; + elem_fn_values->vectorlength = 1; } @@ -393,8 +380,7 @@ extract_elem_fn_values (tree decl) tree kk_value = TREE_VALUE (kk_tree); if (TREE_CODE (kk_value) == INTEGER_CST) { - x = elem_fn_values->no_vlengths; - elem_fn_values->vectorlength[x] = + elem_fn_values->vectorlength = (int) TREE_INT_CST_LOW (kk_value); elem_fn_values->no_vlengths++; } @@ -432,8 +418,7 @@ extract_elem_fn_values (tree decl) } } - for (ii_tree = DECL_ARGUMENTS (decl); ii_tree; - ii_tree = DECL_CHAIN (ii_tree)) + for (ii_tree = DECL_ARGUMENTS (decl); ii_tree; ii_tree = DECL_CHAIN (ii_tree)) { bool already_found = false; for (ii = 0; ii < elem_fn_values->no_uvars; ii++) @@ -453,9 +438,12 @@ extract_elem_fn_values (tree decl) elem_fn_values->linear_vars[ii])) { if (already_found) - fatal_error - ("variable %s defined in both uniform and linear clause", - elem_fn_values->linear_vars[ii]); + { + error_at (EXPR_LOCATION (ii_tree), + "variable %s defined in both uniform and linear " + "clause", elem_fn_values->linear_vars[ii]); + return NULL; + } else { already_found = true; @@ -463,7 +451,7 @@ extract_elem_fn_values (tree decl) } } } - if (!already_found) /* this means this variable is a private */ + if (!already_found) /* This means this variable is a private. */ elem_fn_values->private_location[elem_fn_values->no_pvars++] = arg_number; arg_number++; @@ -476,8 +464,8 @@ extract_elem_fn_values (tree decl) return elem_fn_values; } -/* This function will check to see if the node is part of an function that - needs to be converted to its vector equivalent. */ +/* Returns true of function FNDECL has an attribute with "vector" in it, thus + indicating it as an elemental function. */ bool is_elem_fn (tree fndecl) @@ -492,6 +480,6 @@ is_elem_fn (tree fndecl) && !strcmp (IDENTIFIER_POINTER (ii_value), "vector")) return true; } - /* If we are here, then we didn't find a vector keyword, so it is false. */ + /* If we are here then we didn't find a vector keyword, so it is false. */ return false; } diff --git a/gcc/target.def b/gcc/target.def index 0f3164a41bd..b521c9478cd 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -941,6 +941,46 @@ hook_int_uint_mode_1) HOOK_VECTOR_END (sched) +/* Functions relating to Cilk Plus. */ +#undef HOOK_PREFIX +#define HOOK_PREFIX "TARGET_CILKPLUS_" +HOOK_VECTOR (TARGET_CILKPLUS, cilkplus) +/* The following member value is a function that is used by Cilk Plus routines + to map the processor attribute to the appropriate arch and tune attributes. + By default, a NULL string is returned. */ +DEFHOOK +(builtin_map_processor_to_attr, +"This hook is called by a Cilk Plus routine that will be used to map the\n\ +processor clause to the appropriate arch and tune attributes.", +tree, (char *, tree *), +default_builtin_map_processor_to_attr) + +DEFHOOK +(builtin_find_processor_code, + "This hook is called by a Cilk Plus routine that will request the\n\ + processor code for processor name given in the vector attribute for\n\ + the elemental functions.", +char *, (char *), +default_builtin_find_processor_code) + +DEFHOOK +(builtin_find_vlength_for_proc, + "This hook is called by a Cilk Plus routine that will request the\n\ + default vectorlength for the processor specified in the processor clause\n\ + in the elemental functions.", +unsigned int, (char *), +default_builtin_find_vlength_for_proc) + +DEFHOOK +(builtin_find_isa_code, + "This hook is called by a Cilk Plus routine that will request the \n\ + ISA type (based on the register-set where vector parameters are passed).\n\ + in elemental functions.", +char *, (char *, char *), +default_builtin_find_isa_code) + +HOOK_VECTOR_END (cilkplus) + /* Functions relating to vectorization. */ #undef HOOK_PREFIX #define HOOK_PREFIX "TARGET_VECTORIZE_" diff --git a/gcc/targhooks.c b/gcc/targhooks.c index be008fdcd5d..a3ee063cae1 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1540,4 +1540,37 @@ default_member_type_forces_blk (const_tree, enum machine_mode) return false; } +/* Default version of default_builtin_processor_to_arch. */ + +tree +default_builtin_map_processor_to_attr (char *, tree *opp_attr) +{ + *opp_attr = NULL_TREE; + return NULL_TREE; +} + +/* Default version of default_builtin_find_processor_code. */ + +char * +default_builtin_find_processor_code (char *) +{ + return NULL; +} + +/* Default version of default_builtin_find_vlength_for_proc. */ + +int +default_builtin_find_vlength_for_proc (char *) +{ + return 0; +} + +/* Default version of default_builtin_find_isa_code. */ + +char * +default_builtin_find_isa_code (char *, char *) +{ + return NULL; +} + #include "gt-targhooks.h" diff --git a/gcc/testsuite/ChangeLog.cilkplus b/gcc/testsuite/ChangeLog.cilkplus index aacf16c907f..b38d6685681 100644 --- a/gcc/testsuite/ChangeLog.cilkplus +++ b/gcc/testsuite/ChangeLog.cilkplus @@ -1,3 +1,56 @@ +2012-12-10 Balaji V. Iyer <balaji.v.iyer@intel.com> + + * gcc.dg/cilk-plus/elem_fn_tests/test1.c: Remove. + * gcc.dg/cilk-plus/elem_fn_tests/test2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test3.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test4.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test5.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test6.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test7.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test8.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test9.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test10.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test11.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test12.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test13.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test14.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test15.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test16.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test17.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/test18.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c: New test. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c: Likewise. + * gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c: Likewise. + 2012-12-06 Balaji V. Iyer <balaji.v.iyer@intel.com> * gcc.dg/cilk-plus/array_notation_tests/execute/builtin_fn_mutating.c: diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c new file mode 100644 index 00000000000..a33ea3bd6ee --- /dev/null +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test1.c @@ -0,0 +1,30 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + +/* This test will insert the clone for the function ef_add inside the function + * main (the non-masked version). + */ + +#include <stdlib.h> +#define My_Type float +__attribute__ ((vector(vectorlength(4), processor (pentium_4), uniform (x,y)))) My_Type ef_add (My_Type x, My_Type y); + +My_Type vhx2[10]; +int +main (int argc, char **argv) +{ + My_Type vhx[10]; + int ii = 9; + + if (argc == 1) + for (ii = 0; ii < 10; ii++) + vhx[ii] = argc; + + for (ii = 0; ii < 10; ii++) + vhx2[ii] = ef_add(vhx[ii], vhx[ii]); + + for (ii = 0; ii < 10; ii++) + if (vhx2[ii] != (argc + argc)) + abort (); + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test10.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c index 597333f6e12..477369e8fe5 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test10.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test10.c @@ -1,7 +1,8 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 2 clones of the function below, * for the pentium4 with sse3 processor. */ -#ifdef __x86__ #define My_Type float __attribute__ ((vector(vectorlength(4), processor (pentium_4_sse3), linear(y), uniform (x)))) My_Type ef_add (My_Type x, My_Type y) @@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y) { return x + y; } -#endif diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test11.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c index 6fcc0612789..197064beae0 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test11.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test11.c @@ -1,7 +1,8 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 1 clones of the function below, just mask * for the pentium4 processor. */ -#ifdef __x86__ #define My_Type float __attribute__ ((vector(vectorlength(4), mask, processor (pentium_4_sse3), linear(y), uniform (x)))) My_Type ef_add (My_Type x, My_Type y) @@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y) { return x + y; } -#endif diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test12.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c index 1426f53adcb..1c78356498a 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test12.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test12.c @@ -1,7 +1,8 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 1 clones of the function below, just no mask * for the pentium4 with sse3 processor. */ -#ifdef __x86__ #define My_Type float __attribute__ ((vector(vectorlength(4), nomask, processor (pentium_4_sse3), linear(y), uniform (x)))) My_Type ef_add (My_Type x, My_Type y) @@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y) { return x + y; } -#endif diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test7.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c index adbe8738820..6720a8c6f36 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test7.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test7.c @@ -1,7 +1,8 @@ + +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 1 clone of the function below, just one for mask * for the pentium4 processor. */ -#ifdef __x86__ #define My_Type float __attribute__ ((vector(vectorlength(4), mask, processor (pentium_4), linear(y), uniform (x)))) My_Type ef_add (My_Type x, My_Type y) @@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y) { return x + y; } -#endif diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test8.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c index ff26046e6f0..7f402208750 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test8.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test8.c @@ -1,7 +1,8 @@ + +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 1 clone of the function below, just one for nomask * for the pentium4 processor. */ -#ifdef __x86__ #define My_Type float __attribute__ ((vector(vectorlength(4), nomask, processor (pentium_4), linear(y), uniform (x)))) My_Type ef_add (My_Type x, My_Type y) @@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y) { return x + y; } -#endif diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test9.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c index 8a78f30c8a2..0a5caf88d69 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test9.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/32bit/test9.c @@ -1,7 +1,8 @@ + +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 2 clones of the function below, * for the pentium4 processor. */ -#ifdef __x86__ #define My_Type float __attribute__ ((vector(vectorlength(4), processor (pentium_4), linear(y), uniform (x)))) My_Type ef_add (My_Type x, My_Type y) @@ -9,4 +10,3 @@ My_Type ef_add (My_Type x, My_Type y) { return x + y; } -#endif diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c index 4e3a914fac3..8df372a000f 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow.c @@ -1,3 +1,4 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 2 clones of the function below, one for mask and one without the mask */ @@ -12,4 +13,3 @@ My_Type ef_add (My_Type x, My_Type y) else return (x-y); } - diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c index 2b6cb4c293f..30e710ff9ab 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/ctrl_flow2.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/ctrl_flow2.c @@ -1,3 +1,5 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 2 clones of the function below, one for mask and one without the mask */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c index 8b745a81251..0d9db6f7de3 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/switch_stmt.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/switch_stmt.c @@ -1,3 +1,4 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 2 clones of the function below, one for mask and one without the mask */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test1.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c index 8b3649af210..576821aa386 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test1.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test1.c @@ -1,7 +1,10 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will insert the clone for the function ef_add inside the function * main (the non-masked version). */ +#include <stdlib.h> #define My_Type float __attribute__ ((vector(vectorlength(4), processor (core_i7_sse4_2), uniform (x,y)))) My_Type ef_add (My_Type x, My_Type y); @@ -12,9 +15,16 @@ main (int argc, char **argv) My_Type vhx[10]; int ii = 9; + if (argc == 1) + for (ii = 0; ii < 10; ii++) + vhx[ii] = argc; + for (ii = 0; ii < 10; ii++) vhx2[ii] = ef_add(vhx[ii], vhx[ii]); + for (ii = 0; ii < 10; ii++) + if (vhx2[ii] != (argc + argc)) + abort (); return 0; } diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test13.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c index 887a4fc2436..18c884421c4 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test13.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test13.c @@ -1,3 +1,4 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 2 clones of the function below, * for the pentium4 with sse3 processor. */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test14.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c index 58fb99ff7a1..f154d49fdb3 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test14.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test14.c @@ -1,3 +1,5 @@ + +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 1 clones of the function below, just the mask * for the pentium4 with sse3 processor. */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test15.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c index f9206c1a37d..8a458fcf2e8 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test15.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test15.c @@ -1,3 +1,5 @@ + +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 1 clones of the function below, just the mask * for the pentium4 with sse3 processor. */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test16.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c index 1e83ad06ef7..fcfc5c00eb1 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test16.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test16.c @@ -1,3 +1,5 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 2 clones of the function below, * for the pentium4 with sse3 processor. */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test17.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c index eeeef73e434..847c21981e4 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test17.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test17.c @@ -1,3 +1,5 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 2 clones of the function below, * for the core2_duo with sse 4.1 processor. */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test18.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c index 07fdfbd85a4..daa15468ae4 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test18.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test18.c @@ -1,3 +1,5 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 1 clones of the function below, just mask * for the core2_duo with sse 4.1 processor. */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test2.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c index b135c7e9af8..a515c534963 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test2.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test2.c @@ -1,3 +1,5 @@ + +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 2 clones of the function below, one for mask and one without the mask */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test3.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c index 3b4a6c3f827..d9e3fe1379b 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test3.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test3.c @@ -1,3 +1,5 @@ + +/* { dg-final { scan-assembler "simdsimd" } } */ /* This test will create 1 clone of the function below, just one for mask */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test4.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c index 41027e6c293..9aa36ccba49 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test4.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test4.c @@ -1,3 +1,5 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 1 clone of the function below, just one for nomask */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test5.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c index 32788a59004..6688980c84a 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test5.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test5.c @@ -1,3 +1,5 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 1 clone of the function below, just one for nomask and do a linear for y variable */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test6.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c index 329ec7d4164..4ec317fcacd 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/test6.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/64bit/test6.c @@ -1,3 +1,5 @@ +/* { dg-final { scan-assembler "simdsimd" } } */ + /* This test will create 1 clone of the function below, just one for mask */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp new file mode 100644 index 00000000000..67cb0aa1968 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/elem_fn.exp @@ -0,0 +1,53 @@ +# Copyright (C) 2012 +# Free Software Foundation, Inc. + +# Contributed by Balaji V. Iyer <balaji.v.iyer@intel.com> +# Intel Corporation. +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + + +# Exit immediately if this isn't a x86 target. +if { ![istarget i?86*-*-*] && ![istarget x86_64-*-*] } then { + return +} + +# Load support procs. +load_lib gcc-dg.exp +set tests_32bit [lsort [glob -nocomplain $srcdir/$subdir/32bit/*.\[cS\]]] +set tests_64bit [lsort [glob -nocomplain $srcdir/$subdir/64bit/*.\[cS\]]] +set test_errors [lsort [glob -nocomplain $srcdir/$subdir/errors/*.\[cS\]]] + +if { [istarget i?86*-*-*] } then { + +# Main loop. +dg-runtest $tests_32bit " -O3 -ftree-vectorize -fcilkplus" " " +# All done. +dg-finish +#dg-runtest $tests_errors " -O3 -ftree-vectorize -fcilkplus" " " +# All done. +#dg-finish +} + +# For 64 bit architectures, we can run both 32 bit and 64 bit tests. +if { [istarget x86_64-*-*] } then { + +# Main loop. +dg-runtest $tests_32bit "-O3 -ftree-vectorize -fcilkplus -m32" " " +dg-runtest $tests_64bit "-O3 -ftree-vectorize -fcilkplus" " " +dg-runtest $test_errors "-O3 -ftree-vectorize -fcilkplus" " " + +# All done. +dg-finish +} diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c new file mode 100644 index 00000000000..538a61badca --- /dev/null +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/duplicate_decls.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ + +__attribute__ ((vector(vectorlength(2), linear (x:2), processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y); + +int vhx2[10]; +int +main (int argc, char **argv) +{ + int vhx[10]; + int ii = 9; + + if (argc == 1) + for (ii = 0; ii < 10; ii++) + vhx[ii] = argc; + + for (ii = 0; ii < 10; ii++) + vhx2[ii] = ef_add(vhx[ii], vhx[ii]); + + for (ii = 0; ii < 10; ii++) + if (vhx2[ii] != (argc + argc)) + return 1; + return 0; +} +/* { dg-error "variable x defined in both uniform and linear clause" "" { target *-*-*} 0 } */ diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c new file mode 100644 index 00000000000..37b2ec34cc1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/linear_errors.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ + +__attribute__ ((vector(vectorlength(2), linear (x:2.5), processor (pentium_4), uniform (y)))) int ef_add (int x, int y); /* { dg-error "step-size must be an integer constant expression" } */ + +__attribute__ ((vector(vectorlength(2), linear (x:), processor (pentium_4), uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected step-size before" } */ + +__attribute__ ((vector(vectorlength(2), linear (y, x:2.5), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "step-size must be an integer constant expression" } */ + +__attribute__ ((vector(vectorlength(2), linear (y, x:), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected step-size before" } */ + +__attribute__ ((vector(vectorlength(2), linear (x:2.5,y), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "step-size must be an integer constant expression" } */ + +__attribute__ ((vector(vectorlength(2), linear (x:, y), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected step-size before" } */ + +int vhx2[10]; +int +main (int argc, char **argv) +{ + int vhx[10]; + int ii = 9; + + if (argc == 1) + for (ii = 0; ii < 10; ii++) + vhx[ii] = argc; + + for (ii = 0; ii < 10; ii++) + vhx2[ii] = ef_add(vhx[ii], vhx[ii]); + + for (ii = 0; ii < 10; ii++) + if (vhx2[ii] != (argc + argc)) + return 1; + return 0; +} diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c new file mode 100644 index 00000000000..f7187a5a84b --- /dev/null +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/processor_errors.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ + +__attribute__ ((vector(vectorlength(2), processor (pentixhaum_4), uniform (y)))) int ef_add (int x, int y); /* { dg-error "processor pentixhaum_4 not" } */ +__attribute__ ((vector(vectorlength(2), processor (, uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected processor-name before" } */ +__attribute__ ((vector(vectorlength(2), processor (), uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected '\\(' and CPUID before '\\)' token" } */ +__attribute__ ((vector(vectorlength(2), processor ), uniform (y)))) int ef_add (int x, int y); /* { dg-error "expected '\\(' before '\\)' token" } */ +__attribute__ ((vector(vectorlength(2), uniform (x), processor (pentixhaum_4)))) int ef_add (int x, int y); /* { dg-error "processor pentixhaum_4 not" } */ +__attribute__ ((vector(vectorlength(2), processor ( ))) int ef_add (int x, int y); /* { dg-error "expected '\\(' and CPUID before '\\)' token" } */ +__attribute__ ((vector(vectorlength(2), processor () ))) int ef_add (int x, int y); /* { dg-error "expected '\\(' and CPUID before '\\)' token" } */ +__attribute__ ((vector(vectorlength(2), processor )))) int ef_add (int x, int y); /* { dg-error "expected '\\(' before '\\)' token" } */ +int vhx2[10]; +int +main (int argc, char **argv) +{ + int vhx[10]; + int ii = 9; + + if (argc == 1) + for (ii = 0; ii < 10; ii++) + vhx[ii] = argc; + + for (ii = 0; ii < 10; ii++) + vhx2[ii] = ef_add(vhx[ii], vhx[ii]); + + for (ii = 0; ii < 10; ii++) + if (vhx2[ii] != (argc + argc)) + return 1; + return 0; +} diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c new file mode 100644 index 00000000000..083c4ab681c --- /dev/null +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/uniform_errors.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ + + +__attribute__ ((vector(vectorlength(2), processor (pentium_4), uniform (,y)))) int ef_add (int x, int y); /* { dg-error "expected variable-name before" } */ + +__attribute__ ((vector(vectorlength(2), linear (x), processor (pentium_4), uniform (y,)))) int ef_add (int x, int y); /* { dg-error "expected identifier after" } */ + +__attribute__ ((vector(vectorlength(2), uniform (,y), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected variable-name before" } */ + +__attribute__ ((vector(vectorlength(2), linear (x), uniform (y,), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "expected identifier after" } */ + +int vhx2[10]; +int +main (int argc, char **argv) +{ + int vhx[10]; + int ii = 9; + + if (argc == 1) + for (ii = 0; ii < 10; ii++) + vhx[ii] = argc; + + for (ii = 0; ii < 10; ii++) + vhx2[ii] = ef_add(vhx[ii], vhx[ii]); + + for (ii = 0; ii < 10; ii++) + if (vhx2[ii] != (argc + argc)) + return 1; + return 0; +} diff --git a/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c new file mode 100644 index 00000000000..9e0b3998d9a --- /dev/null +++ b/gcc/testsuite/gcc.dg/cilk-plus/elem_fn_tests/errors/vlength_errors.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ + + __attribute__ ((vector(vectorlength(2, processor (pentium_4), uniform (x,y))))) int ef_add (int x, int y); /* { dg-error "after vectorlength before" } */ + +__attribute__ ((vector(vectorlength 2, processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "expected '\\(' before numeric constant" } */ + +__attribute__ ((vector(vectorlength(5), processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */ + +__attribute__ ((vector(vectorlength(6), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */ + +__attribute__ ((vector(vectorlength(7), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */ + +__attribute__ ((vector(uniform (x,y)))) int ef_add (int x, int y); /* { "This is OK!" } */ + +__attribute__ ((vector(vectorlength(2.0005)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be an integer." } */ + +__attribute__ ((vector(vectorlength(5), processor (pentium_4), uniform (x,y)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be a power of 2" } */ + +__attribute__ ((vector(vectorlength(1)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */ + +__attribute__ ((vector(vectorlength(16), processor (pentium_4)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */ + +__attribute__ ((vector(vectorlength(32), processor (pentium_4), linear (x:1)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */ + +__attribute__ ((vector(vectorlength(32), processor (pentium_4), uniform (y), linear (x:1)))) int ef_add (int x, int y); /* { dg-error "vectorlength must be between 2 and 8." } */ + +int vhx2[10]; +int +main (int argc, char **argv) +{ + int vhx[10]; + int ii = 9; + + if (argc == 1) + for (ii = 0; ii < 10; ii++) + vhx[ii] = argc; + + for (ii = 0; ii < 10; ii++) + vhx2[ii] = ef_add(vhx[ii], vhx[ii]); + + for (ii = 0; ii < 10; ii++) + if (vhx2[ii] != (argc + argc)) + return 1; + return 0; +} diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 4e8e9a0b951..84e4ab2861a 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -903,26 +903,6 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data) return NULL_TREE; } -/* Remap DECL if it is defined. This is used in Cilk++. */ -static bool -remap_var_for_cilk (tree *tp, copy_body_data *id) -{ - tree decl = *tp; - tree *n; - - if (!DECL_P (decl)) - return false; - - n = (tree *) pointer_map_contains (id->decl_map, decl); - - - if (n == NULL) - return false; - - *tp = (*n); - return true; -} - /* Called from copy_body_id via walk_tree. DATA is really a `copy_body_data *'. */ @@ -987,8 +967,6 @@ copy_tree_body_r (tree *tp, int *walk_subtrees, void *data) *tp = new_decl; *walk_subtrees = 0; } - else if (id->remap_var_for_cilk && remap_var_for_cilk (tp, id)) - *walk_subtrees = 0; else if (TREE_CODE (*tp) == STATEMENT_LIST) copy_statement_list (tp); else if (TREE_CODE (*tp) == SAVE_EXPR @@ -4912,12 +4890,10 @@ copy_decl_no_change (tree decl, copy_body_data *id) { TREE_ADDRESSABLE (copy) = 0; LABEL_DECL_UID (copy) = -1; - if (TREE_CODE(decl) == LABEL_DECL) - { - PRAGMA_SIMD_INDEX (copy) = PRAGMA_SIMD_INDEX (decl); - } + if (flag_enable_cilk && TREE_CODE (decl) == LABEL_DECL) + PRAGMA_SIMD_INDEX (copy) = PRAGMA_SIMD_INDEX (decl); else - PRAGMA_SIMD_INDEX (copy) = 0; + PRAGMA_SIMD_INDEX (copy) = 0; } return copy_decl_for_dup_finish (id, decl, copy); @@ -4967,7 +4943,9 @@ copy_arguments_for_versioning (tree orig_parm, copy_body_data * id, return new_parm; } -/* Return a copy of the function's argument tree. */ +/* Return a copy of the function's argument tree but they are vectorized as + per VLENGTH value. Also add a mask variable if MASKED is set to true. */ + static tree elem_fn_copy_arguments_for_versioning (tree orig_parm, copy_body_data * id, bitmap args_to_skip, tree *vars, @@ -4995,7 +4973,7 @@ elem_fn_copy_arguments_for_versioning (tree orig_parm, copy_body_data * id, tree new_tree = remap_decl (arg, id); if (TREE_CODE (new_tree) != PARM_DECL) new_tree = id->copy_decl (arg, id); - TREE_TYPE (new_tree) = copy_node (TREE_TYPE (new_tree)); + TREE_TYPE (new_tree) = copy_node (TREE_TYPE (new_tree)); TREE_TYPE (new_tree) = build_vector_type (TREE_TYPE (new_tree), vlength); DECL_ARG_TYPE (new_tree) = build_vector_type (DECL_ARG_TYPE (new_tree), @@ -5420,12 +5398,13 @@ tree_function_versioning (tree old_decl, tree new_decl, return; } +/* This function initializes the cfun struct for elemental functions. */ + static void initialize_elem_fn_cfun (tree new_fndecl, tree callee_fndecl) { struct function *src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl); - gimple_register_cfg_hooks (); /* Get clean struct function. */ push_struct_function (new_fndecl); @@ -5463,9 +5442,11 @@ initialize_elem_fn_cfun (tree new_fndecl, tree callee_fndecl) pop_cfun (); } +/* Elemental function's version of tree_versioning. */ + void tree_elem_fn_versioning (tree old_decl, tree new_decl, - vec<ipa_replace_map_p, va_gc> * tree_map, + vec<ipa_replace_map_p, va_gc> *tree_map, bool update_clones, bitmap args_to_skip, bool skip_return, bitmap blocks_to_copy ATTRIBUTE_UNUSED, @@ -5476,9 +5457,9 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl, tree p; unsigned i; struct ipa_replace_map *replace_info; - vec<gimple> init_stmts; - init_stmts.create(10); + vec<gimple> init_stmts; + init_stmts.create (10); tree old_current_function_decl = current_function_decl; tree vars = NULL_TREE; @@ -5527,8 +5508,6 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl, id.transform_new_cfg = true; id.transform_return_to_modify = false; id.transform_lang_insert_block = NULL; - - current_function_decl = new_decl; initialize_elem_fn_cfun (new_decl, old_decl); push_cfun (DECL_STRUCT_FUNCTION (new_decl)); @@ -5564,17 +5543,6 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl, if (TREE_CODE (op) == VIEW_CONVERT_EXPR) op = TREE_OPERAND (op, 0); - -#if 0 - if (TREE_CODE (op) == ADDR_EXPR) - { - op = TREE_OPERAND (op, 0); - while (handled_component_p (op)) - op = TREE_OPERAND (op, 0); - if (TREE_CODE (op) == VAR_DECL) - add_referenced_var (op); - } -#endif gcc_assert (TREE_CODE (replace_info->old_tree) == PARM_DECL); init = setup_one_parameter (&id, replace_info->old_tree, @@ -5663,8 +5631,6 @@ tree_elem_fn_versioning (tree old_decl, tree new_decl, if (id.debug_map) pointer_map_destroy (id.debug_map); - /* gcc_assert (!id.debug_stmts); */ - /* VEC_free (gimple, heap, init_stmts); */ pop_cfun (); current_function_decl = old_current_function_decl; gcc_assert (!current_function_decl diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 0a0a470d9f4..ba10cb004cf 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1561,7 +1561,8 @@ elem_fn_vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def, { parm_type = find_elem_fn_parm_type (stmt, op, &step_size); if (parm_type == TYPE_UNIFORM || parm_type == TYPE_LINEAR) - dt = vect_external_def; + /* If Linear or Uniform type, just return the scalar version. */ + return op; } else parm_type = TYPE_NONE; @@ -2130,6 +2131,8 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, new_stmt = gimple_build_call_vec (fndecl, vargs); new_temp = make_ssa_name (vec_dest, new_stmt); gimple_call_set_lhs (new_stmt, new_temp); + if (flag_enable_cilk && is_elem_fn (fndecl)) + gimple_call_set_fntype (new_stmt, TREE_TYPE (fndecl)); vect_finish_stmt_generation (stmt, new_stmt, gsi); SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); } @@ -2165,8 +2168,7 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, { enum elem_fn_parm_type parm_type = find_elem_fn_parm_type (stmt, op, &step_size); - if (parm_type == TYPE_UNIFORM - || parm_type == TYPE_LINEAR) + if (parm_type == TYPE_UNIFORM || parm_type == TYPE_LINEAR) dt[i] = vect_constant_def; } vec_oprnd0 diff --git a/gcc/tree.c b/gcc/tree.c index 0c1e1e63c69..75a902d3a68 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -11663,6 +11663,7 @@ build_call_list (tree return_type, tree fn, tree arglist) /* Build a vector of type VECTYPE where all the elements are SCs. */ + tree build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size) { @@ -11684,7 +11685,6 @@ build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size) { tree *v = XALLOCAVEC (tree, nunits); for (i = 0; i < nunits; ++i) - // v[i] = sc; v[i] = build2 (PLUS_EXPR, TREE_TYPE (sc), sc, fold_build2 (MULT_EXPR, TREE_TYPE (step_size), step_size, build_int_cst (integer_type_node, i))); @@ -11696,7 +11696,6 @@ build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size) vec_alloc (v, nunits); for (i = 0; i < nunits; ++i) { - // CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, sc); tree tmp = NULL_TREE; tmp = build2 (PLUS_EXPR, TREE_TYPE (sc), sc, fold_build2 (MULT_EXPR, TREE_TYPE (step_size), @@ -11709,6 +11708,4 @@ build_elem_fn_linear_vector_from_val (tree vectype, tree sc, tree step_size) } - - #include "gt-tree.h" diff --git a/gcc/tree.h b/gcc/tree.h index 0b99d05afdf..bd5a9a5d2ce 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -3716,13 +3716,6 @@ struct GTY(()) tree_optimization_option { }; -enum elem_fn_parm_type -{ - TYPE_NONE = 0, - TYPE_UNIFORM = 1, - TYPE_LINEAR = 2 -}; - #define TREE_OPTIMIZATION(NODE) \ (&OPTIMIZATION_NODE_CHECK (NODE)->optimization.opts) @@ -4873,6 +4866,7 @@ extern tree build_vector_stat (tree, tree * MEM_STAT_DECL); #define build_vector(t,v) build_vector_stat (t, v MEM_STAT_INFO) extern tree build_vector_from_ctor (tree, vec<constructor_elt, va_gc> *); extern tree build_vector_from_val (tree, tree); +extern tree elem_fn_linear_vector_from_val (tree, tree, tree); extern tree build_constructor (tree, vec<constructor_elt, va_gc> *); extern tree build_constructor_single (tree, tree, tree); extern tree build_constructor_from_list (tree, tree); @@ -6620,7 +6614,6 @@ extern HOST_WIDE_INT find_linear_step_size (int pragma_simd_index, tree var); tree build_call_list (tree return_type, tree fn, tree arglist); bool is_elem_fn (tree); -enum elem_fn_parm_type find_elem_fn_parm_type (gimple, tree, tree*); void elem_fn_create_fn (tree) __attribute__((weak)); /* Functional interface to the builtin functions. */ |