aarch64: Move Neon vector-tuple type declaration into the compiler

Declare the Neon vector-tuple types inside the compiler instead of in the arm_neon.h header. This is a necessary first step before adding corresponding machine modes to the AArch64 backend. The vector-tuple types are implemented using a #pragma. This means initialization of builtin functions that have vector-tuple types as arguments or return values has to be delayed until the #pragma is handled. gcc/ChangeLog: 2021-09-10 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-builtins.c (aarch64_init_simd_builtins): Factor out main loop to... (aarch64_init_simd_builtin_functions): This new function. (register_tuple_type): Define. (aarch64_scalar_builtin_type_p): Define. (handle_arm_neon_h): Define. * config/aarch64/aarch64-c.c (aarch64_pragma_aarch64): Handle pragma for arm_neon.h. * config/aarch64/aarch64-protos.h (aarch64_advsimd_struct_mode_p): Declare. (handle_arm_neon_h): Likewise. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Remove static modifier. * config/aarch64/arm_neon.h (target): Remove Neon vector structure type definitions.
author: Jonathan Wright <jonathan.wright@arm.com> 2021-09-10 16:48:02 +0100
committer: Jonathan Wright <jonathan.wright@arm.com> 2021-11-04 14:50:40 +0000
commit: 8197ab94b47c814632d758dd36a121ad4114ff70 (patch)
tree: d6552d642af27bab058d947e171edd257b3aee5b
parent: fbe58ba97aff3270877d7fd5600c17687b85964c (diff)
5 files changed, 107 insertions, 476 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index a815e4cfbcc..eff4cdc6a7b 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1046,32 +1046,22 @@ aarch64_init_fcmla_laneq_builtins (void)
 }
 
 void
-aarch64_init_simd_builtins (void)
+aarch64_init_simd_builtin_functions (bool called_from_pragma)
 {
   unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
 
-  if (aarch64_simd_builtins_initialized_p)
-    return;
-
-  aarch64_simd_builtins_initialized_p = true;
-
-  aarch64_init_simd_builtin_types ();
-
-  /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
-     Therefore we need to preserve the old __builtin scalar types.  It can be
-     removed once all the intrinsics become strongly typed using the qualifier
-     system.  */
-  aarch64_init_simd_builtin_scalar_types ();
- 
-  tree lane_check_fpr = build_function_type_list (void_type_node,
-						  size_type_node,
-						  size_type_node,
-						  intSI_type_node,
-						  NULL);
-  aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
-    = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
-				   lane_check_fpr,
-				   AARCH64_SIMD_BUILTIN_LANE_CHECK);
+  if (!called_from_pragma)
+    {
+      tree lane_check_fpr = build_function_type_list (void_type_node,
+						      size_type_node,
+						      size_type_node,
+						      intSI_type_node,
+						      NULL);
+      aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
+	= aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
+				       lane_check_fpr,
+				       AARCH64_SIMD_BUILTIN_LANE_CHECK);
+    }
 
   for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
     {
@@ -1101,6 +1091,18 @@ aarch64_init_simd_builtins (void)
       tree return_type = void_type_node, args = void_list_node;
       tree eltype;
 
+      int struct_mode_args = 0;
+      for (int j = op_num; j >= 0; j--)
+	{
+	  machine_mode op_mode = insn_data[d->code].operand[j].mode;
+	  if (aarch64_advsimd_struct_mode_p (op_mode))
+	    struct_mode_args++;
+	}
+
+      if ((called_from_pragma && struct_mode_args == 0)
+	  || (!called_from_pragma && struct_mode_args > 0))
+	continue;
+
       /* Build a function type directly from the insn_data for this
 	 builtin.  The build_function_type () function takes care of
 	 removing duplicates for us.  */
@@ -1174,9 +1176,82 @@ aarch64_init_simd_builtins (void)
       fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
       aarch64_builtin_decls[fcode] = fndecl;
     }
+}
+
+/* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
+   indexed by TYPE_INDEX.  */
+static void
+register_tuple_type (unsigned int num_vectors, unsigned int type_index)
+{
+  aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
+
+  /* Synthesize the name of the user-visible vector tuple type.  */
+  const char *vector_type_name = type->name;
+  char tuple_type_name[sizeof ("bfloat16x4x2_t")];
+  snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
+	    (int) strlen (vector_type_name) - 4, vector_type_name + 2,
+	    num_vectors);
+  tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
+
+  tree vector_type = type->itype;
+  tree array_type = build_array_type_nelts (vector_type, num_vectors);
+  unsigned int alignment
+	= (known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64);
+  gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
+	      && TYPE_ALIGN (array_type) == alignment);
+
+  tree field = build_decl (input_location, FIELD_DECL,
+			   get_identifier ("val"), array_type);
+
+  tree t = lang_hooks.types.simulate_record_decl (input_location,
+						  tuple_type_name,
+						  make_array_slice (&field,
+								    1));
+  gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
+	      && TYPE_ALIGN (t) == alignment);
+}
+
+static bool
+aarch64_scalar_builtin_type_p (aarch64_simd_type t)
+{
+  return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
+}
+
+/* Implement #pragma GCC aarch64 "arm_neon.h".  */
+void
+handle_arm_neon_h (void)
+{
+  /* Register the AdvSIMD vector tuple types.  */
+  for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
+    for (unsigned int count = 2; count <= 4; ++count)
+      if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
+	register_tuple_type (count, i);
+
+  aarch64_init_simd_builtin_functions (true);
+}
+
+void
+aarch64_init_simd_builtins (void)
+{
+  if (aarch64_simd_builtins_initialized_p)
+    return;
+
+  aarch64_simd_builtins_initialized_p = true;
+
+  aarch64_init_simd_builtin_types ();
+
+  /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
+     Therefore we need to preserve the old __builtin scalar types.  It can be
+     removed once all the intrinsics become strongly typed using the qualifier
+     system.  */
+  aarch64_init_simd_builtin_scalar_types ();
+
+  aarch64_init_simd_builtin_functions (false);
+  if (in_lto_p)
+    handle_arm_neon_h ();
 
-   /* Initialize the remaining fcmla_laneq intrinsics.  */
-   aarch64_init_fcmla_laneq_builtins ();
+  /* Initialize the remaining fcmla_laneq intrinsics.  */
+  aarch64_init_fcmla_laneq_builtins ();
 }
 
 static void
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index f9ddffa0078..d6653e474de 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -296,6 +296,8 @@ aarch64_pragma_aarch64 (cpp_reader *)
   const char *name = TREE_STRING_POINTER (x);
   if (strcmp (name, "arm_sve.h") == 0)
     aarch64_sve::handle_arm_sve_h ();
+  else if (strcmp (name, "arm_neon.h") == 0)
+    handle_arm_neon_h ();
   else
     error ("unknown %<#pragma GCC aarch64%> option %qs", name);
 }
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 768e8fae136..f7887d06139 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -743,6 +743,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
 bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
 int aarch64_branch_cost (bool, bool);
 enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
+bool aarch64_advsimd_struct_mode_p (machine_mode mode);
 opt_machine_mode aarch64_vq_mode (scalar_mode);
 opt_machine_mode aarch64_full_sve_mode (scalar_mode);
 bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
@@ -968,6 +969,7 @@ rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
 tree aarch64_general_builtin_decl (unsigned, bool);
 tree aarch64_general_builtin_rsqrt (unsigned int);
 tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
+void handle_arm_neon_h (void);
 
 namespace aarch64_sve {
   void init_builtins ();
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cc65b58a48f..1780751d849 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2864,7 +2864,7 @@ aarch64_estimated_sve_vq ()
 }
 
 /* Return true if MODE is any of the Advanced SIMD structure modes.  */
-static bool
+bool
 aarch64_advsimd_struct_mode_p (machine_mode mode)
 {
   return (TARGET_SIMD
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 99fe293ef0e..ed0dfa952b9 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -30,6 +30,8 @@
 #pragma GCC push_options
 #pragma GCC target ("+nothing+simd")
 
+#pragma GCC aarch64 "arm_neon.h"
+
 #include <stdint.h>
 
 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
@@ -76,456 +78,6 @@ typedef double float64_t;
 typedef __Bfloat16x4_t bfloat16x4_t;
 typedef __Bfloat16x8_t bfloat16x8_t;
 
-typedef struct bfloat16x4x2_t
-{
-  bfloat16x4_t val[2];
-} bfloat16x4x2_t;
-
-typedef struct bfloat16x8x2_t
-{
-  bfloat16x8_t val[2];
-} bfloat16x8x2_t;
-
-typedef struct bfloat16x4x3_t
-{
-  bfloat16x4_t val[3];
-} bfloat16x4x3_t;
-
-typedef struct bfloat16x8x3_t
-{
-  bfloat16x8_t val[3];
-} bfloat16x8x3_t;
-
-typedef struct bfloat16x4x4_t
-{
-  bfloat16x4_t val[4];
-} bfloat16x4x4_t;
-
-typedef struct bfloat16x8x4_t
-{
-  bfloat16x8_t val[4];
-} bfloat16x8x4_t;
-
-typedef struct int8x8x2_t
-{
-  int8x8_t val[2];
-} int8x8x2_t;
-
-typedef struct int8x16x2_t
-{
-  int8x16_t val[2];
-} int8x16x2_t;
-
-typedef struct int16x4x2_t
-{
-  int16x4_t val[2];
-} int16x4x2_t;
-
-typedef struct int16x8x2_t
-{
-  int16x8_t val[2];
-} int16x8x2_t;
-
-typedef struct int32x2x2_t
-{
-  int32x2_t val[2];
-} int32x2x2_t;
-
-typedef struct int32x4x2_t
-{
-  int32x4_t val[2];
-} int32x4x2_t;
-
-typedef struct int64x1x2_t
-{
-  int64x1_t val[2];
-} int64x1x2_t;
-
-typedef struct int64x2x2_t
-{
-  int64x2_t val[2];
-} int64x2x2_t;
-
-typedef struct uint8x8x2_t
-{
-  uint8x8_t val[2];
-} uint8x8x2_t;
-
-typedef struct uint8x16x2_t
-{
-  uint8x16_t val[2];
-} uint8x16x2_t;
-
-typedef struct uint16x4x2_t
-{
-  uint16x4_t val[2];
-} uint16x4x2_t;
-
-typedef struct uint16x8x2_t
-{
-  uint16x8_t val[2];
-} uint16x8x2_t;
-
-typedef struct uint32x2x2_t
-{
-  uint32x2_t val[2];
-} uint32x2x2_t;
-
-typedef struct uint32x4x2_t
-{
-  uint32x4_t val[2];
-} uint32x4x2_t;
-
-typedef struct uint64x1x2_t
-{
-  uint64x1_t val[2];
-} uint64x1x2_t;
-
-typedef struct uint64x2x2_t
-{
-  uint64x2_t val[2];
-} uint64x2x2_t;
-
-typedef struct float16x4x2_t
-{
-  float16x4_t val[2];
-} float16x4x2_t;
-
-typedef struct float16x8x2_t
-{
-  float16x8_t val[2];
-} float16x8x2_t;
-
-typedef struct float32x2x2_t
-{
-  float32x2_t val[2];
-} float32x2x2_t;
-
-typedef struct float32x4x2_t
-{
-  float32x4_t val[2];
-} float32x4x2_t;
-
-typedef struct float64x2x2_t
-{
-  float64x2_t val[2];
-} float64x2x2_t;
-
-typedef struct float64x1x2_t
-{
-  float64x1_t val[2];
-} float64x1x2_t;
-
-typedef struct poly8x8x2_t
-{
-  poly8x8_t val[2];
-} poly8x8x2_t;
-
-typedef struct poly8x16x2_t
-{
-  poly8x16_t val[2];
-} poly8x16x2_t;
-
-typedef struct poly16x4x2_t
-{
-  poly16x4_t val[2];
-} poly16x4x2_t;
-
-typedef struct poly16x8x2_t
-{
-  poly16x8_t val[2];
-} poly16x8x2_t;
-
-typedef struct poly64x1x2_t
-{
-  poly64x1_t val[2];
-} poly64x1x2_t;
-
-typedef struct poly64x1x3_t
-{
-  poly64x1_t val[3];
-} poly64x1x3_t;
-
-typedef struct poly64x1x4_t
-{
-  poly64x1_t val[4];
-} poly64x1x4_t;
-
-typedef struct poly64x2x2_t
-{
-  poly64x2_t val[2];
-} poly64x2x2_t;
-
-typedef struct poly64x2x3_t
-{
-  poly64x2_t val[3];
-} poly64x2x3_t;
-
-typedef struct poly64x2x4_t
-{
-  poly64x2_t val[4];
-} poly64x2x4_t;
-
-typedef struct int8x8x3_t
-{
-  int8x8_t val[3];
-} int8x8x3_t;
-
-typedef struct int8x16x3_t
-{
-  int8x16_t val[3];
-} int8x16x3_t;
-
-typedef struct int16x4x3_t
-{
-  int16x4_t val[3];
-} int16x4x3_t;
-
-typedef struct int16x8x3_t
-{
-  int16x8_t val[3];
-} int16x8x3_t;
-
-typedef struct int32x2x3_t
-{
-  int32x2_t val[3];
-} int32x2x3_t;
-
-typedef struct int32x4x3_t
-{
-  int32x4_t val[3];
-} int32x4x3_t;
-
-typedef struct int64x1x3_t
-{
-  int64x1_t val[3];
-} int64x1x3_t;
-
-typedef struct int64x2x3_t
-{
-  int64x2_t val[3];
-} int64x2x3_t;
-
-typedef struct uint8x8x3_t
-{
-  uint8x8_t val[3];
-} uint8x8x3_t;
-
-typedef struct uint8x16x3_t
-{
-  uint8x16_t val[3];
-} uint8x16x3_t;
-
-typedef struct uint16x4x3_t
-{
-  uint16x4_t val[3];
-} uint16x4x3_t;
-
-typedef struct uint16x8x3_t
-{
-  uint16x8_t val[3];
-} uint16x8x3_t;
-
-typedef struct uint32x2x3_t
-{
-  uint32x2_t val[3];
-} uint32x2x3_t;
-
-typedef struct uint32x4x3_t
-{
-  uint32x4_t val[3];
-} uint32x4x3_t;
-
-typedef struct uint64x1x3_t
-{
-  uint64x1_t val[3];
-} uint64x1x3_t;
-
-typedef struct uint64x2x3_t
-{
-  uint64x2_t val[3];
-} uint64x2x3_t;
-
-typedef struct float16x4x3_t
-{
-  float16x4_t val[3];
-} float16x4x3_t;
-
-typedef struct float16x8x3_t
-{
-  float16x8_t val[3];
-} float16x8x3_t;
-
-typedef struct float32x2x3_t
-{
-  float32x2_t val[3];
-} float32x2x3_t;
-
-typedef struct float32x4x3_t
-{
-  float32x4_t val[3];
-} float32x4x3_t;
-
-typedef struct float64x2x3_t
-{
-  float64x2_t val[3];
-} float64x2x3_t;
-
-typedef struct float64x1x3_t
-{
-  float64x1_t val[3];
-} float64x1x3_t;
-
-typedef struct poly8x8x3_t
-{
-  poly8x8_t val[3];
-} poly8x8x3_t;
-
-typedef struct poly8x16x3_t
-{
-  poly8x16_t val[3];
-} poly8x16x3_t;
-
-typedef struct poly16x4x3_t
-{
-  poly16x4_t val[3];
-} poly16x4x3_t;
-
-typedef struct poly16x8x3_t
-{
-  poly16x8_t val[3];
-} poly16x8x3_t;
-
-typedef struct int8x8x4_t
-{
-  int8x8_t val[4];
-} int8x8x4_t;
-
-typedef struct int8x16x4_t
-{
-  int8x16_t val[4];
-} int8x16x4_t;
-
-typedef struct int16x4x4_t
-{
-  int16x4_t val[4];
-} int16x4x4_t;
-
-typedef struct int16x8x4_t
-{
-  int16x8_t val[4];
-} int16x8x4_t;
-
-typedef struct int32x2x4_t
-{
-  int32x2_t val[4];
-} int32x2x4_t;
-
-typedef struct int32x4x4_t
-{
-  int32x4_t val[4];
-} int32x4x4_t;
-
-typedef struct int64x1x4_t
-{
-  int64x1_t val[4];
-} int64x1x4_t;
-
-typedef struct int64x2x4_t
-{
-  int64x2_t val[4];
-} int64x2x4_t;
-
-typedef struct uint8x8x4_t
-{
-  uint8x8_t val[4];
-} uint8x8x4_t;
-
-typedef struct uint8x16x4_t
-{
-  uint8x16_t val[4];
-} uint8x16x4_t;
-
-typedef struct uint16x4x4_t
-{
-  uint16x4_t val[4];
-} uint16x4x4_t;
-
-typedef struct uint16x8x4_t
-{
-  uint16x8_t val[4];
-} uint16x8x4_t;
-
-typedef struct uint32x2x4_t
-{
-  uint32x2_t val[4];
-} uint32x2x4_t;
-
-typedef struct uint32x4x4_t
-{
-  uint32x4_t val[4];
-} uint32x4x4_t;
-
-typedef struct uint64x1x4_t
-{
-  uint64x1_t val[4];
-} uint64x1x4_t;
-
-typedef struct uint64x2x4_t
-{
-  uint64x2_t val[4];
-} uint64x2x4_t;
-
-typedef struct float16x4x4_t
-{
-  float16x4_t val[4];
-} float16x4x4_t;
-
-typedef struct float16x8x4_t
-{
-  float16x8_t val[4];
-} float16x8x4_t;
-
-typedef struct float32x2x4_t
-{
-  float32x2_t val[4];
-} float32x2x4_t;
-
-typedef struct float32x4x4_t
-{
-  float32x4_t val[4];
-} float32x4x4_t;
-
-typedef struct float64x2x4_t
-{
-  float64x2_t val[4];
-} float64x2x4_t;
-
-typedef struct float64x1x4_t
-{
-  float64x1_t val[4];
-} float64x1x4_t;
-
-typedef struct poly8x8x4_t
-{
-  poly8x8_t val[4];
-} poly8x8x4_t;
-
-typedef struct poly8x16x4_t
-{
-  poly8x16_t val[4];
-} poly8x16x4_t;
-
-typedef struct poly16x4x4_t
-{
-  poly16x4_t val[4];
-} poly16x4x4_t;
-
-typedef struct poly16x8x4_t
-{
-  poly16x8_t val[4];
-} poly16x8x4_t;
-
 /* __aarch64_vdup_lane internal macros.  */
 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
   vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
author	Jonathan Wright <jonathan.wright@arm.com>	2021-09-10 16:48:02 +0100
committer	Jonathan Wright <jonathan.wright@arm.com>	2021-11-04 14:50:40 +0000
commit	8197ab94b47c814632d758dd36a121ad4114ff70 (patch)
tree	d6552d642af27bab058d947e171edd257b3aee5b
parent	fbe58ba97aff3270877d7fd5600c17687b85964c (diff)