diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2021-09-10 16:48:02 +0100 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2021-11-04 14:50:40 +0000 |
commit | 8197ab94b47c814632d758dd36a121ad4114ff70 (patch) | |
tree | d6552d642af27bab058d947e171edd257b3aee5b | |
parent | fbe58ba97aff3270877d7fd5600c17687b85964c (diff) |
aarch64: Move Neon vector-tuple type declaration into the compiler
Declare the Neon vector-tuple types inside the compiler instead of in
the arm_neon.h header. This is a necessary first step before adding
corresponding machine modes to the AArch64 backend.
The vector-tuple types are implemented using a #pragma. This means
initialization of builtin functions that have vector-tuple types as
arguments or return values has to be delayed until the #pragma is
handled.
gcc/ChangeLog:
2021-09-10 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_init_simd_builtins):
Factor out main loop to...
(aarch64_init_simd_builtin_functions): This new function.
(register_tuple_type): Define.
(aarch64_scalar_builtin_type_p): Define.
(handle_arm_neon_h): Define.
* config/aarch64/aarch64-c.c (aarch64_pragma_aarch64): Handle
pragma for arm_neon.h.
* config/aarch64/aarch64-protos.h (aarch64_advsimd_struct_mode_p):
Declare.
(handle_arm_neon_h): Likewise.
* config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p):
Remove static modifier.
* config/aarch64/arm_neon.h (target): Remove Neon vector
structure type definitions.
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.c | 125 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-c.c | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 452 |
5 files changed, 107 insertions, 476 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index a815e4cfbcc..eff4cdc6a7b 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1046,32 +1046,22 @@ aarch64_init_fcmla_laneq_builtins (void) } void -aarch64_init_simd_builtins (void) +aarch64_init_simd_builtin_functions (bool called_from_pragma) { unsigned int i, fcode = AARCH64_SIMD_PATTERN_START; - if (aarch64_simd_builtins_initialized_p) - return; - - aarch64_simd_builtins_initialized_p = true; - - aarch64_init_simd_builtin_types (); - - /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics. - Therefore we need to preserve the old __builtin scalar types. It can be - removed once all the intrinsics become strongly typed using the qualifier - system. */ - aarch64_init_simd_builtin_scalar_types (); - - tree lane_check_fpr = build_function_type_list (void_type_node, - size_type_node, - size_type_node, - intSI_type_node, - NULL); - aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] - = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi", - lane_check_fpr, - AARCH64_SIMD_BUILTIN_LANE_CHECK); + if (!called_from_pragma) + { + tree lane_check_fpr = build_function_type_list (void_type_node, + size_type_node, + size_type_node, + intSI_type_node, + NULL); + aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] + = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi", + lane_check_fpr, + AARCH64_SIMD_BUILTIN_LANE_CHECK); + } for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) { @@ -1101,6 +1091,18 @@ aarch64_init_simd_builtins (void) tree return_type = void_type_node, args = void_list_node; tree eltype; + int struct_mode_args = 0; + for (int j = op_num; j >= 0; j--) + { + machine_mode op_mode = insn_data[d->code].operand[j].mode; + if (aarch64_advsimd_struct_mode_p (op_mode)) + struct_mode_args++; + } + + if ((called_from_pragma && struct_mode_args == 0) + || (!called_from_pragma && struct_mode_args > 0)) + continue; + /* Build a function type directly from the insn_data for this builtin. The build_function_type () function takes care of removing duplicates for us. */ @@ -1174,9 +1176,82 @@ aarch64_init_simd_builtins (void) fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs); aarch64_builtin_decls[fcode] = fndecl; } +} + +/* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type + indexed by TYPE_INDEX. */ +static void +register_tuple_type (unsigned int num_vectors, unsigned int type_index) +{ + aarch64_simd_type_info *type = &aarch64_simd_types[type_index]; + + /* Synthesize the name of the user-visible vector tuple type. */ + const char *vector_type_name = type->name; + char tuple_type_name[sizeof ("bfloat16x4x2_t")]; + snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t", + (int) strlen (vector_type_name) - 4, vector_type_name + 2, + num_vectors); + tuple_type_name[0] = TOLOWER (tuple_type_name[0]); + + tree vector_type = type->itype; + tree array_type = build_array_type_nelts (vector_type, num_vectors); + unsigned int alignment + = (known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64); + gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type) + && TYPE_ALIGN (array_type) == alignment); + + tree field = build_decl (input_location, FIELD_DECL, + get_identifier ("val"), array_type); + + tree t = lang_hooks.types.simulate_record_decl (input_location, + tuple_type_name, + make_array_slice (&field, + 1)); + gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t) + && TYPE_ALIGN (t) == alignment); +} + +static bool +aarch64_scalar_builtin_type_p (aarch64_simd_type t) +{ + return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t); +} + +/* Implement #pragma GCC aarch64 "arm_neon.h". */ +void +handle_arm_neon_h (void) +{ + /* Register the AdvSIMD vector tuple types. */ + for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++) + for (unsigned int count = 2; count <= 4; ++count) + if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type)) + register_tuple_type (count, i); + + aarch64_init_simd_builtin_functions (true); +} + +void +aarch64_init_simd_builtins (void) +{ + if (aarch64_simd_builtins_initialized_p) + return; + + aarch64_simd_builtins_initialized_p = true; + + aarch64_init_simd_builtin_types (); + + /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics. + Therefore we need to preserve the old __builtin scalar types. It can be + removed once all the intrinsics become strongly typed using the qualifier + system. */ + aarch64_init_simd_builtin_scalar_types (); + + aarch64_init_simd_builtin_functions (false); + if (in_lto_p) + handle_arm_neon_h (); - /* Initialize the remaining fcmla_laneq intrinsics. */ - aarch64_init_fcmla_laneq_builtins (); + /* Initialize the remaining fcmla_laneq intrinsics. */ + aarch64_init_fcmla_laneq_builtins (); } static void diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c index f9ddffa0078..d6653e474de 100644 --- a/gcc/config/aarch64/aarch64-c.c +++ b/gcc/config/aarch64/aarch64-c.c @@ -296,6 +296,8 @@ aarch64_pragma_aarch64 (cpp_reader *) const char *name = TREE_STRING_POINTER (x); if (strcmp (name, "arm_sve.h") == 0) aarch64_sve::handle_arm_sve_h (); + else if (strcmp (name, "arm_neon.h") == 0) + handle_arm_neon_h (); else error ("unknown %<#pragma GCC aarch64%> option %qs", name); } diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 768e8fae136..f7887d06139 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -743,6 +743,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); int aarch64_branch_cost (bool, bool); enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); +bool aarch64_advsimd_struct_mode_p (machine_mode mode); opt_machine_mode aarch64_vq_mode (scalar_mode); opt_machine_mode aarch64_full_sve_mode (scalar_mode); bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); @@ -968,6 +969,7 @@ rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); tree aarch64_general_builtin_decl (unsigned, bool); tree aarch64_general_builtin_rsqrt (unsigned int); tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); +void handle_arm_neon_h (void); namespace aarch64_sve { void init_builtins (); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index cc65b58a48f..1780751d849 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2864,7 +2864,7 @@ aarch64_estimated_sve_vq () } /* Return true if MODE is any of the Advanced SIMD structure modes. */ -static bool +bool aarch64_advsimd_struct_mode_p (machine_mode mode) { return (TARGET_SIMD diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 99fe293ef0e..ed0dfa952b9 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -30,6 +30,8 @@ #pragma GCC push_options #pragma GCC target ("+nothing+simd") +#pragma GCC aarch64 "arm_neon.h" + #include <stdint.h> #define __AARCH64_UINT64_C(__C) ((uint64_t) __C) @@ -76,456 +78,6 @@ typedef double float64_t; typedef __Bfloat16x4_t bfloat16x4_t; typedef __Bfloat16x8_t bfloat16x8_t; -typedef struct bfloat16x4x2_t -{ - bfloat16x4_t val[2]; -} bfloat16x4x2_t; - -typedef struct bfloat16x8x2_t -{ - bfloat16x8_t val[2]; -} bfloat16x8x2_t; - -typedef struct bfloat16x4x3_t -{ - bfloat16x4_t val[3]; -} bfloat16x4x3_t; - -typedef struct bfloat16x8x3_t -{ - bfloat16x8_t val[3]; -} bfloat16x8x3_t; - -typedef struct bfloat16x4x4_t -{ - bfloat16x4_t val[4]; -} bfloat16x4x4_t; - -typedef struct bfloat16x8x4_t -{ - bfloat16x8_t val[4]; -} bfloat16x8x4_t; - -typedef struct int8x8x2_t -{ - int8x8_t val[2]; -} int8x8x2_t; - -typedef struct int8x16x2_t -{ - int8x16_t val[2]; -} int8x16x2_t; - -typedef struct int16x4x2_t -{ - int16x4_t val[2]; -} int16x4x2_t; - -typedef struct int16x8x2_t -{ - int16x8_t val[2]; -} int16x8x2_t; - -typedef struct int32x2x2_t -{ - int32x2_t val[2]; -} int32x2x2_t; - -typedef struct int32x4x2_t -{ - int32x4_t val[2]; -} int32x4x2_t; - -typedef struct int64x1x2_t -{ - int64x1_t val[2]; -} int64x1x2_t; - -typedef struct int64x2x2_t -{ - int64x2_t val[2]; -} int64x2x2_t; - -typedef struct uint8x8x2_t -{ - uint8x8_t val[2]; -} uint8x8x2_t; - -typedef struct uint8x16x2_t -{ - uint8x16_t val[2]; -} uint8x16x2_t; - -typedef struct uint16x4x2_t -{ - uint16x4_t val[2]; -} uint16x4x2_t; - -typedef struct uint16x8x2_t -{ - uint16x8_t val[2]; -} uint16x8x2_t; - -typedef struct uint32x2x2_t -{ - uint32x2_t val[2]; -} uint32x2x2_t; - -typedef struct uint32x4x2_t -{ - uint32x4_t val[2]; -} uint32x4x2_t; - -typedef struct uint64x1x2_t -{ - uint64x1_t val[2]; -} uint64x1x2_t; - -typedef struct uint64x2x2_t -{ - uint64x2_t val[2]; -} uint64x2x2_t; - -typedef struct float16x4x2_t -{ - float16x4_t val[2]; -} float16x4x2_t; - -typedef struct float16x8x2_t -{ - float16x8_t val[2]; -} float16x8x2_t; - -typedef struct float32x2x2_t -{ - float32x2_t val[2]; -} float32x2x2_t; - -typedef struct float32x4x2_t -{ - float32x4_t val[2]; -} float32x4x2_t; - -typedef struct float64x2x2_t -{ - float64x2_t val[2]; -} float64x2x2_t; - -typedef struct float64x1x2_t -{ - float64x1_t val[2]; -} float64x1x2_t; - -typedef struct poly8x8x2_t -{ - poly8x8_t val[2]; -} poly8x8x2_t; - -typedef struct poly8x16x2_t -{ - poly8x16_t val[2]; -} poly8x16x2_t; - -typedef struct poly16x4x2_t -{ - poly16x4_t val[2]; -} poly16x4x2_t; - -typedef struct poly16x8x2_t -{ - poly16x8_t val[2]; -} poly16x8x2_t; - -typedef struct poly64x1x2_t -{ - poly64x1_t val[2]; -} poly64x1x2_t; - -typedef struct poly64x1x3_t -{ - poly64x1_t val[3]; -} poly64x1x3_t; - -typedef struct poly64x1x4_t -{ - poly64x1_t val[4]; -} poly64x1x4_t; - -typedef struct poly64x2x2_t -{ - poly64x2_t val[2]; -} poly64x2x2_t; - -typedef struct poly64x2x3_t -{ - poly64x2_t val[3]; -} poly64x2x3_t; - -typedef struct poly64x2x4_t -{ - poly64x2_t val[4]; -} poly64x2x4_t; - -typedef struct int8x8x3_t -{ - int8x8_t val[3]; -} int8x8x3_t; - -typedef struct int8x16x3_t -{ - int8x16_t val[3]; -} int8x16x3_t; - -typedef struct int16x4x3_t -{ - int16x4_t val[3]; -} int16x4x3_t; - -typedef struct int16x8x3_t -{ - int16x8_t val[3]; -} int16x8x3_t; - -typedef struct int32x2x3_t -{ - int32x2_t val[3]; -} int32x2x3_t; - -typedef struct int32x4x3_t -{ - int32x4_t val[3]; -} int32x4x3_t; - -typedef struct int64x1x3_t -{ - int64x1_t val[3]; -} int64x1x3_t; - -typedef struct int64x2x3_t -{ - int64x2_t val[3]; -} int64x2x3_t; - -typedef struct uint8x8x3_t -{ - uint8x8_t val[3]; -} uint8x8x3_t; - -typedef struct uint8x16x3_t -{ - uint8x16_t val[3]; -} uint8x16x3_t; - -typedef struct uint16x4x3_t -{ - uint16x4_t val[3]; -} uint16x4x3_t; - -typedef struct uint16x8x3_t -{ - uint16x8_t val[3]; -} uint16x8x3_t; - -typedef struct uint32x2x3_t -{ - uint32x2_t val[3]; -} uint32x2x3_t; - -typedef struct uint32x4x3_t -{ - uint32x4_t val[3]; -} uint32x4x3_t; - -typedef struct uint64x1x3_t -{ - uint64x1_t val[3]; -} uint64x1x3_t; - -typedef struct uint64x2x3_t -{ - uint64x2_t val[3]; -} uint64x2x3_t; - -typedef struct float16x4x3_t -{ - float16x4_t val[3]; -} float16x4x3_t; - -typedef struct float16x8x3_t -{ - float16x8_t val[3]; -} float16x8x3_t; - -typedef struct float32x2x3_t -{ - float32x2_t val[3]; -} float32x2x3_t; - -typedef struct float32x4x3_t -{ - float32x4_t val[3]; -} float32x4x3_t; - -typedef struct float64x2x3_t -{ - float64x2_t val[3]; -} float64x2x3_t; - -typedef struct float64x1x3_t -{ - float64x1_t val[3]; -} float64x1x3_t; - -typedef struct poly8x8x3_t -{ - poly8x8_t val[3]; -} poly8x8x3_t; - -typedef struct poly8x16x3_t -{ - poly8x16_t val[3]; -} poly8x16x3_t; - -typedef struct poly16x4x3_t -{ - poly16x4_t val[3]; -} poly16x4x3_t; - -typedef struct poly16x8x3_t -{ - poly16x8_t val[3]; -} poly16x8x3_t; - -typedef struct int8x8x4_t -{ - int8x8_t val[4]; -} int8x8x4_t; - -typedef struct int8x16x4_t -{ - int8x16_t val[4]; -} int8x16x4_t; - -typedef struct int16x4x4_t -{ - int16x4_t val[4]; -} int16x4x4_t; - -typedef struct int16x8x4_t -{ - int16x8_t val[4]; -} int16x8x4_t; - -typedef struct int32x2x4_t -{ - int32x2_t val[4]; -} int32x2x4_t; - -typedef struct int32x4x4_t -{ - int32x4_t val[4]; -} int32x4x4_t; - -typedef struct int64x1x4_t -{ - int64x1_t val[4]; -} int64x1x4_t; - -typedef struct int64x2x4_t -{ - int64x2_t val[4]; -} int64x2x4_t; - -typedef struct uint8x8x4_t -{ - uint8x8_t val[4]; -} uint8x8x4_t; - -typedef struct uint8x16x4_t -{ - uint8x16_t val[4]; -} uint8x16x4_t; - -typedef struct uint16x4x4_t -{ - uint16x4_t val[4]; -} uint16x4x4_t; - -typedef struct uint16x8x4_t -{ - uint16x8_t val[4]; -} uint16x8x4_t; - -typedef struct uint32x2x4_t -{ - uint32x2_t val[4]; -} uint32x2x4_t; - -typedef struct uint32x4x4_t -{ - uint32x4_t val[4]; -} uint32x4x4_t; - -typedef struct uint64x1x4_t -{ - uint64x1_t val[4]; -} uint64x1x4_t; - -typedef struct uint64x2x4_t -{ - uint64x2_t val[4]; -} uint64x2x4_t; - -typedef struct float16x4x4_t -{ - float16x4_t val[4]; -} float16x4x4_t; - -typedef struct float16x8x4_t -{ - float16x8_t val[4]; -} float16x8x4_t; - -typedef struct float32x2x4_t -{ - float32x2_t val[4]; -} float32x2x4_t; - -typedef struct float32x4x4_t -{ - float32x4_t val[4]; -} float32x4x4_t; - -typedef struct float64x2x4_t -{ - float64x2_t val[4]; -} float64x2x4_t; - -typedef struct float64x1x4_t -{ - float64x1_t val[4]; -} float64x1x4_t; - -typedef struct poly8x8x4_t -{ - poly8x8_t val[4]; -} poly8x8x4_t; - -typedef struct poly8x16x4_t -{ - poly8x16_t val[4]; -} poly8x16x4_t; - -typedef struct poly16x4x4_t -{ - poly16x4_t val[4]; -} poly16x4x4_t; - -typedef struct poly16x8x4_t -{ - poly16x8_t val[4]; -} poly16x8x4_t; - /* __aarch64_vdup_lane internal macros. */ #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \ vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b)) |