aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2015-10-26 13:36:18 +0100
committerLinaro Code Review <review@review.linaro.org>2015-10-27 19:49:00 +0000
commitd2c0c9b163372623969620283187c5848e479c4e (patch)
tree036a9b66b3b9c86ed5b893a4bc8fb7797b551783
parent16d999dc7dfa8ddde9598eb0564f881fa9e0a8aa (diff)
gcc/
Backport from trunk r226346. 2015-07-29 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64-builtins.c (aarch64_fp16_type_node): New. (aarch64_init_builtins): Make aarch64_fp16_type_node, use for __fp16. * config/aarch64/aarch64-modes.def: Add HFmode. * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_FP16_FORMAT_IEEE and __ARM_FP16_ARGS. Set bit 1 of __ARM_FP. * config/aarch64/aarch64.c (aarch64_init_libfuncs, aarch64_promoted_type): New. (aarch64_float_const_representable_p): Disable HFmode. (aarch64_mangle_type): Mangle half-precision floats to "Dh". (TARGET_PROMOTED_TYPE): Define to aarch64_promoted_type. (TARGET_INIT_LIBFUNCS): Define to aarch64_init_libfuncs. * config/aarch64/aarch64.md (mov<mode>): Include HFmode using GPF_F16. (movhf_aarch64, extendhfsf2, extendhfdf2, truncsfhf2, truncdfhf2): New. * config/aarch64/iterators.md (GPF_F16): New. gcc/testsuite/ Backport from trunk r226346. 2015-07-29 Alan Lawrence <alan.lawrence@arm.com> * gcc.target/aarch64/f16_movs_1.c: New test. gcc/testsuite/ Backport from trunk r226350. 2015-07-29 Alan Lawrence <alan.lawrence@arm.com> * gcc.target/aarch64/fp16/fp16.exp: New. * gcc.target/aarch64/fp16/f16_convs_1.c: New. * gcc.target/aarch64/fp16/f16_convs_2.c: New. gcc/ Backport from trunk r227033. 2015-08-20 Alan Lawrence <alan.lawrence@arm.com> * config/arm/arm-builtins.c (arm_init_simd_builtin_types): Move initialization of HFmode scalar type (float16_t) to... (arm_init_fp16_builtins): ... Here. Combine with __fp16 initialization code. (arm_init_builtins): Call arm_init_fp16_builtins earlier and always. * config/arm/arm_neon.h (vcvt_f16_f32, vcvt_f32_f16): Condition on having an -mfp16-format. gcc/testsuite/ Backport from trunk r227033. 2015-08-20 Alan Lawrence <alan.lawrence@arm.com> * lib/target-supports.exp (check_effective_target_arm_neon_fp16_ok_nocache): Add flag variants with -mfp16-format=ieee. gcc/ Backport from trunk r227535, r227558. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/arm/arm_neon.h (float16_t, vget_lane_f16, vset_lane_f16, vcreate_f16, vld1_lane_f16, vld1_dup_f16, vreinterpret_p8_f16, vreinterpret_p16_f16, vreinterpret_f16_p8, vreinterpret_f16_p16, vreinterpret_f16_f32, vreinterpret_f16_p64, vreinterpret_f16_s64, vreinterpret_f16_u64, vreinterpret_f16_s8, vreinterpret_f16_s16, vreinterpret_f16_s32, vreinterpret_f16_u8, vreinterpret_f16_u16, vreinterpret_f16_u32, vreinterpret_f32_f16, vreinterpret_p64_f16, vreinterpret_s64_f16, vreinterpret_u64_f16, vreinterpret_s8_f16, vreinterpret_s16_f16, vreinterpret_s32_f16, vreinterpret_u8_f16, vreinterpret_u16_f16, vreinterpret_u32_f16): New. gcc/ Backport from trunk r227536, r227558. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/arm/arm.h (VALID_NEON_QREG_MODE): Add V8HFmode. * config/arm/arm.c (arm_vector_mode_supported_p): Support V8HFmode. * config/arm/arm-builtins.c (v8hf_UP): New. (arm_init_simd_builtin_types): Initialise Float16x8_t. * config/arm/arm-simd-builtin-types.def (Float16x8_t): New. * config/arm/arm_neon.h (float16x8_t): New typedef. gcc/ Backport from trunk r227538, r227558. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/arm/arm_neon.h (vgetq_lane_f16, vsetq_lane_f16, vld1q_lane_f16, vld1q_dup_f16, vreinterpretq_p8_f16, vreinterpretq_p16_f16, vreinterpretq_f16_p8, vreinterpretq_f16_p16, vreinterpretq_f16_f32, vreinterpretq_f16_p64, vreinterpretq_f16_p128, vreinterpretq_f16_s64, vreinterpretq_f16_u64, vreinterpretq_f16_s8, vreinterpretq_f16_s16, vreinterpretq_f16_s32, vreinterpretq_f16_u8, vreinterpretq_f16_u16, vreinterpretq_f16_u32, vreinterpretq_f32_f16, vreinterpretq_p64_f16, vreinterpretq_p128_f16, vreinterpretq_s64_f16, vreinterpretq_u64_f16, vreinterpretq_s8_f16, vreinterpretq_s16_f16, vreinterpretq_s32_f16, vreinterpretq_u8_f16, vreinterpretq_u16_f16, vreinterpretq_u32_f16): New. gcc/ Backport from trunk r227541. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/arm/arm-builtins.c (VAR11, VAR12): New. * config/arm/arm_neon_builtins.def (vcombine, vld2_dup, vld3_dup, vld4_dup): Add v4hf variant. (vget_high, vget_low): Add v8hf variant. (vld1, vst1, vst1_lane, vld2, vld2_lane, vst2, vst2_lane, vld3, vld3_lane, vst3, vst3_lane, vld4, vld4_lane, vst4, vst4_lane): Add v4hf and v8hf variants. * config/arm/iterators.md (VD_LANE, VD_RE, VQ2, VQ_HS): New. (VDX): Add V4HF. (V_DOUBLE): Add case for V4HF. (VQX): Add V8HF. (V_HALF): Add case for V8HF. (VDQX): Add V4HF, V8HF. (V_elem, V_two_elem, V_three_elem, V_four_elem, V_cmp_result, V_uf_sclr, V_sz_elem, V_mode_nunits, q): Add cases for V4HF & V8HF. * config/arm/neon.md (vec_set<mode>internal, vec_extract<mode>, neon_vget_lane<mode>_sext_internal, neon_vget_lane<mode>_zext_internal, vec_load_lanesoi<mode>, neon_vld2<mode>, vec_store_lanesoi<mode>, neon_vst2<mode>, vec_load_lanesci<mode>, neon_vld3<mode>, neon_vld3qa<mode>, neon_vld3qb<mode>, vec_store_lanesci<mode>, neon_vst3<mode>, neon_vst3qa<mode>, neon_vst3qb<mode>, vec_load_lanesxi<mode>, neon_vld4<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>, vec_store_lanesxi<mode>, neon_vst4<mode>, neon_vst4qa<mode>, neon_vst4qb<mode>): Change VQ iterator to VQ2. (neon_vcreate, neon_vreinterpretv8qi<mode>, neon_vreinterpretv4hi<mode>, neon_vreinterpretv2si<mode>, neon_vreinterpretv2sf<mode>, neon_vreinterpretdi<mode>): Change VDX to VD_RE. (neon_vld2_lane<mode>, neon_vst2_lane<mode>, neon_vld3_lane<mode>, neon_vst3_lane<mode>, neon_vld4_lane<mode>, neon_vst4_lane<mode>): Change VD iterator to VD_LANE, and VMQ iterator to VQ_HS. * config/arm/arm_neon.h (float16x4x2_t, float16x8x2_t, float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t, vcombine_f16, vget_high_f16, vget_low_f16, vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16, vst1q_lane_f16, vld2_f16, vld2q_f16, vld2_lane_f16, vld2q_lane_f16, vld2_dup_f16, vst2_f16, vst2q_f16, vst2_lane_f16, vst2q_lane_f16, vld3_f16, vld3q_f16, vld3_lane_f16, vld3q_lane_f16, vld3_dup_f16, vst3_f16, vst3q_f16, vst3_lane_f16, vst3q_lane_f16, vld4_f16, vld4q_f16, vld4_lane_f16, vld4q_lane_f16, vld4_dup_f16, vst4_f16, vst4q_f16, vst4_lane_f16, vst4q_lane_f16): New. gcc/ Backport from trunk r227542. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support V4HFmode and V8HFmode. (aarch64_split_simd_move): Add case for V8HFmode. * config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define. (aarch64_simd_builtin_std_type): Handle HFmode. (aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t. * config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>, aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator. (aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator. * config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t, Float16x8_t. * config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16. * config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t): New typedefs. (vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16, vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16, vst1q_lane_f16): New. * config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode. (VALLDI_F16, VALL_F16): New. (Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q): Add cases for V4HF and V8HF. (VDBL, VRL2, VRL3, VRL4): Add V4HF case. gcc/testsuite/ Backport from trunk r227542. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and float16x8_t. * gcc.target/aarch64/vset_lane_1.c: Likewise. * gcc.target/aarch64/vld1-vst1_1.c: Likewise. * gcc.target/aarch64/vld1_lane.c: Likewise. gcc/ Backport from trunk r227543. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode. * config/aarch64/aarch64-builtins.c (VAR13, VAR14): New. (aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types): Add __builtin_aarch64_simd_hf. * config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t, float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t, vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16, vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16, vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16, vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16, vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16, vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16, vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New. * config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype, V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF. (VDC, Vdbl): Add V4HF. gcc/testsuite/ Backport from trunk r227543. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases. * gcc.target/aarch64/vldN_dup_1.c: Likewise. * gcc.target/aarch64/vldN_lane_1.c: Likewise. (main): update orig_data to avoid float16 NaN on bigendian. gcc/ Backport from trunk r227545. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64-simd.md (aarch64_float_truncate_lo_v2sf): Reparameterize to... (aarch64_float_truncate_lo_<mode>): ...this, for both V2SF and V4HF. (aarch64_float_truncate_hi_v4sf): Reparameterize to... (aarch64_float_truncate_hi_<Vdbl>): ...this, for both V4SF and V8HF. * config/aarch64/aarch64-simd-builtins.def (float_truncate_hi_): Add v8hf variant. (float_truncate_lo_): Use BUILTIN_VDF iterator. * config/aarch64/arm_neon.h (vcvt_f16_f32, vcvt_high_f16_f32): New. * config/aarch64/iterators.md (VDF, Vdtype): New. (VWIDE, Vmwtype): Add cases for V4HF and V2SF. gcc/ Backport from trunk r227546. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/arm_neon.h (vreinterpret_p8_f16, vreinterpret_p16_f16, vreinterpret_f16_f64, vreinterpret_f16_s8, vreinterpret_f16_s16, vreinterpret_f16_s32, vreinterpret_f16_s64, vreinterpret_f16_f32, vreinterpret_f16_u8, vreinterpret_f16_u16, vreinterpret_f16_u32, vreinterpret_f16_u64, vreinterpret_f16_p8, vreinterpret_f16_p16, vreinterpretq_f16_f64, vreinterpretq_f16_s8, vreinterpretq_f16_s16, vreinterpretq_f16_s32, vreinterpretq_f16_s64, vreinterpretq_f16_f32, vreinterpretq_f16_u8, vreinterpretq_f16_u16, vreinterpretq_f16_u32, vreinterpretq_f16_u64, vreinterpretq_f16_p8, vreinterpretq_f16_p16, vreinterpret_f32_f16, vreinterpret_f64_f16, vreinterpret_s64_f16, vreinterpret_u64_f16, vreinterpretq_u64_f16, vreinterpret_s8_f16, vreinterpret_s16_f16, vreinterpret_s32_f16, vreinterpret_u8_f16, vreinterpret_u16_f16, vreinterpret_u32_f16, vreinterpretq_p8_f16, vreinterpretq_p16_f16, vreinterpretq_f32_f16, vreinterpretq_f64_f16, vreinterpretq_s64_f16, vreinterpretq_s8_f16, vreinterpretq_s16_f16, vreinterpretq_s32_f16, vreinterpretq_u8_f16, vreinterpretq_u16_f16, vreinterpretq_u32_f16, vget_low_f16, vget_high_f16, vld1_dup_f16, vld1q_dup_f16): New. gcc/testsuite/ Backport from trunk r227546. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * gcc.target/aarch64/vget_high_1.c: Add float16x8->float16x4 case. * gcc.target/aarch64/vget_low_1.c: Likewise. gcc/ Backport from trunk r227550. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, aarch64_dup_lane<mode>, aarch64_dup_lane_<vswap_width_name><mode>, aarch64_simd_vec_set<mode>, vec_set<mode>, vec_perm_const<mode>, vec_init<mode>, *aarch64_simd_ld1r<mode>, vec_extract<mode>): Add V4HF and V8HF variants to iterator. * config/aarch64/aarch64.c (aarch64_evpc_dup): Add V4HF and V8HF cases. * config/aarch64/iterators.md (VDQF_F16): New. (VSWAP_WIDTH, vswap_width_name): Add V4HF and V8HF cases. gcc/ Backport from trunk r227551. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64-simd.md (aarch64_simd_vec_unpacks_lo_<mode>, aarch64_simd_vec_unpacks_hi_<mode>): New insn. (vec_unpacks_lo_v4sf, vec_unpacks_hi_v4sf): Delete insn. (vec_unpacks_lo_<mode>, vec_unpacks_hi_<mode>): New expand. (aarch64_float_extend_lo_v2df): Rename to... (aarch64_float_extend_lo_<Vwide>): this, using VDF and so adding V4SF. * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi): Add v8hf. (float_extend_lo): Add v4sf. * config/aarch64/arm_neon.h (vcvt_f32_f16, vcvt_high_f32_f16): New. * config/aarch64/iterators.md (VQ_HSF): New iterator. (VWIDE, Vwtype, Vhalftype): Add V8HF, V4SF. (Vwide): New mode_attr. gcc/ Backport from trunk r227552. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * fold-const.c (native_interpret_real): Fix HFmode for bigendian where UNITS_PER_WORD >= 4. gcc/testsuite/ Backport from trunk r227554. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp: Set additional_flags for neon-fp16 if supported, else fallback to neon. * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (hfloat16_t): New. (result, expected, clean_results, DECL_VARIABLE_64BITS_VARIANTS, DECL_VARIABLE_128BITS_VARIANTS): Add float16x4_t and float16x8_t cases if supported. (CHECK_RESULTS): Redefine using CHECK_RESULTS_NAMED. (CHECK_RESULTS_NAMED): Move body to CHECK_RESULTS_NAMED_NO_FP16; redefine in terms of CHECK_RESULTS_NAMED_NO_FP16 with float16 variants when those are supported. (CHECK_RESULTS_NAMED_NO_FP16, CHECK_RESULTS_NO_FP16): New. (vdup_n_f16): New. * gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h (buffer, buffer_pad, buffer_dup, buffer_dup_pad): Add float16x4 and float16x8_t cases if supported. * gcc.target/aarch64/advsimd-intrinsics/vbsl.c (exec_vbsl): Use CHECK_RESULTS_NO_FP16 in place of CHECK_RESULTS. * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c (exec_vdup_vmov): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c (exec_vdup_lane): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vext.c (exec_vext): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcombine.c (expected): Add float16x8_t case. (main, exec_vcombine): test float16x4_t -> float16x8_t, if supported. * gcc.target/aarch64/advsimd-intrinsics/vcreate.c (expected, main, exec_vcreate): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vget_high (expected, exec_vget_high): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vget_low.c (expected, exec_vget_low): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1.c (expected, exec_vld1): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c (expected, exec_vld1_dup): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c (expected, exec_vld1_lane): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vldX.c (expected, exec_vldX): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c (expected, exec_vldX_dup): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c (expected, exec_vldX_lane): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vset_lane.c (expected, exec_vset_lane): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c (expected, exec_vst1_lane): Likewise. gcc/testsuite/ Backport from trunk r227555. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c: New. * lib/target-supports.exp (check_effective_target_arm_neon_fp16_hw): New. gcc/ Backport from trunk r227556. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> * doc/sourcebuild.texi (arm_neon_fp16): Correct cross-reference. (arm_neon_fp16_ok): Document adding of -mfp16-format=ieee flag. (arm_neon_fp16_hw): New. gcc/testsuite/ Backport from trunk r227557. 2015-09-08 Alan Lawrence <alan.lawrence@arm.com> PR target/63870 * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c: New. Change-Id: I20b8ee88a73373649cac10a0ffecc58e7e11acff
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c24
-rw-r--r--gcc/config/aarch64/aarch64-modes.def4
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtin-types.def2
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def14
-rw-r--r--gcc/config/aarch64/aarch64-simd.md157
-rw-r--r--gcc/config/aarch64/aarch64.c56
-rw-r--r--gcc/config/aarch64/aarch64.h7
-rw-r--r--gcc/config/aarch64/aarch64.md56
-rw-r--r--gcc/config/aarch64/arm_neon.h705
-rw-r--r--gcc/config/aarch64/iterators.md90
-rw-r--r--gcc/config/arm/arm-builtins.c34
-rw-r--r--gcc/config/arm/arm-simd-builtin-types.def2
-rw-r--r--gcc/config/arm/arm.c3
-rw-r--r--gcc/config/arm/arm.h2
-rw-r--r--gcc/config/arm/arm_neon.h900
-rw-r--r--gcc/config/arm/arm_neon_builtins.def72
-rw-r--r--gcc/config/arm/iterators.md44
-rw-r--r--gcc/config/arm/neon.md90
-rw-r--r--gcc/doc/sourcebuild.texi9
-rw-r--r--gcc/fold-const.c22
-rw-r--r--gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c98
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c102
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c82
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/f16_movs_1.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_1.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_2.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16/fp16.exp43
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vget_high_1.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vget_low_1.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vld1_lane.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vldN_1.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vset_lane_1.c5
-rw-r--r--gcc/testsuite/lib/target-supports.exp21
66 files changed, 2990 insertions, 290 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 025d6c12a1e..619c2765753 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -80,6 +80,7 @@
#define v8qi_UP V8QImode
#define v4hi_UP V4HImode
+#define v4hf_UP V4HFmode
#define v2si_UP V2SImode
#define v2sf_UP V2SFmode
#define v1df_UP V1DFmode
@@ -87,6 +88,7 @@
#define df_UP DFmode
#define v16qi_UP V16QImode
#define v8hi_UP V8HImode
+#define v8hf_UP V8HFmode
#define v4si_UP V4SImode
#define v4sf_UP V4SFmode
#define v2di_UP V2DImode
@@ -314,6 +316,12 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
VAR1 (T, N, MAP, L)
+#define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+ VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
+ VAR1 (T, N, MAP, M)
+#define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
+ VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+ VAR1 (T, X, MAP, N)
#include "aarch64-builtin-iterators.h"
@@ -391,6 +399,7 @@ const char *aarch64_scalar_builtin_types[] = {
"__builtin_aarch64_simd_qi",
"__builtin_aarch64_simd_hi",
"__builtin_aarch64_simd_si",
+ "__builtin_aarch64_simd_hf",
"__builtin_aarch64_simd_sf",
"__builtin_aarch64_simd_di",
"__builtin_aarch64_simd_df",
@@ -455,6 +464,9 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = {
};
#undef ENTRY
+/* This type is not SIMD-specific; it is the user-visible __fp16. */
+static tree aarch64_fp16_type_node = NULL_TREE;
+
static tree aarch64_simd_intOI_type_node = NULL_TREE;
static tree aarch64_simd_intEI_type_node = NULL_TREE;
static tree aarch64_simd_intCI_type_node = NULL_TREE;
@@ -536,6 +548,8 @@ aarch64_simd_builtin_std_type (enum machine_mode mode,
return aarch64_simd_intCI_type_node;
case XImode:
return aarch64_simd_intXI_type_node;
+ case HFmode:
+ return aarch64_fp16_type_node;
case SFmode:
return float_type_node;
case DFmode:
@@ -620,6 +634,8 @@ aarch64_init_simd_builtin_types (void)
aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
/* Continue with standard types. */
+ aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
+ aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
aarch64_simd_types[Float32x2_t].eltype = float_type_node;
aarch64_simd_types[Float32x4_t].eltype = float_type_node;
aarch64_simd_types[Float64x1_t].eltype = double_type_node;
@@ -671,6 +687,8 @@ aarch64_init_simd_builtin_scalar_types (void)
"__builtin_aarch64_simd_qi");
(*lang_hooks.types.register_builtin_type) (intHI_type_node,
"__builtin_aarch64_simd_hi");
+ (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
+ "__builtin_aarch64_simd_hf");
(*lang_hooks.types.register_builtin_type) (intSI_type_node,
"__builtin_aarch64_simd_si");
(*lang_hooks.types.register_builtin_type) (float_type_node,
@@ -865,6 +883,12 @@ aarch64_init_builtins (void)
= add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+ aarch64_fp16_type_node = make_node (REAL_TYPE);
+ TYPE_PRECISION (aarch64_fp16_type_node) = 16;
+ layout_type (aarch64_fp16_type_node);
+
+ (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
+
if (TARGET_SIMD)
aarch64_init_simd_builtins ();
if (TARGET_CRC32)
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index b17b90d9060..3160bef1105 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -36,6 +36,10 @@ CC_MODE (CC_DLTU);
CC_MODE (CC_DGEU);
CC_MODE (CC_DGTU);
+/* Half-precision floating point for __fp16. */
+FLOAT_MODE (HF, 2, 0);
+ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
+
/* Vector modes. */
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index bb54e56ce63..ea219b72ff9 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -44,6 +44,8 @@
ENTRY (Poly16x8_t, V8HI, poly, 12)
ENTRY (Poly64x1_t, DI, poly, 12)
ENTRY (Poly64x2_t, V2DI, poly, 12)
+ ENTRY (Float16x4_t, V4HF, none, 13)
+ ENTRY (Float16x8_t, V8HF, none, 13)
ENTRY (Float32x2_t, V2SF, none, 13)
ENTRY (Float32x4_t, V4SF, none, 13)
ENTRY (Float64x1_t, V1DF, none, 13)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d0f298a1f07..2c13cfb0823 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -361,17 +361,19 @@
BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
BUILTIN_VDQF (UNOP, abs, 2)
- VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
+ BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
+ VAR1 (BINOP, float_truncate_hi_, 0, v8hf)
VAR1 (UNOP, float_extend_lo_, 0, v2df)
- VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
+ VAR1 (UNOP, float_extend_lo_, 0, v4sf)
+ BUILTIN_VDF (UNOP, float_truncate_lo_, 0)
- /* Implemented by aarch64_ld1<VALL:mode>. */
- BUILTIN_VALL (LOAD1, ld1, 0)
+ /* Implemented by aarch64_ld1<VALL_F16:mode>. */
+ BUILTIN_VALL_F16 (LOAD1, ld1, 0)
- /* Implemented by aarch64_st1<VALL:mode>. */
- BUILTIN_VALL (STORE1, st1, 0)
+ /* Implemented by aarch64_st1<VALL_F16:mode>. */
+ BUILTIN_VALL_F16 (STORE1, st1, 0)
/* Implemented by fma<mode>4. */
BUILTIN_VDQF (TERNOP, fma, 4)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 40afcedcd57..5ea80c6b9c2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -19,8 +19,8 @@
;; <http://www.gnu.org/licenses/>.
(define_expand "mov<mode>"
- [(set (match_operand:VALL 0 "nonimmediate_operand" "")
- (match_operand:VALL 1 "general_operand" ""))]
+ [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
+ (match_operand:VALL_F16 1 "general_operand" ""))]
"TARGET_SIMD"
"
if (GET_CODE (operands[0]) == MEM)
@@ -53,18 +53,19 @@
)
(define_insn "aarch64_simd_dup<mode>"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
+ [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
+ (vec_duplicate:VDQF_F16
+ (match_operand:<VEL> 1 "register_operand" "w")))]
"TARGET_SIMD"
"dup\\t%0.<Vtype>, %1.<Vetype>[0]"
[(set_attr "type" "neon_dup<q>")]
)
(define_insn "aarch64_dup_lane<mode>"
- [(set (match_operand:VALL 0 "register_operand" "=w")
- (vec_duplicate:VALL
+ [(set (match_operand:VALL_F16 0 "register_operand" "=w")
+ (vec_duplicate:VALL_F16
(vec_select:<VEL>
- (match_operand:VALL 1 "register_operand" "w")
+ (match_operand:VALL_F16 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
@@ -76,8 +77,8 @@
)
(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
- [(set (match_operand:VALL 0 "register_operand" "=w")
- (vec_duplicate:VALL
+ [(set (match_operand:VALL_F16 0 "register_operand" "=w")
+ (vec_duplicate:VALL_F16
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
@@ -834,11 +835,11 @@
)
(define_insn "aarch64_simd_vec_set<mode>"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (vec_merge:VDQF
- (vec_duplicate:VDQF
+ [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
+ (vec_merge:VDQF_F16
+ (vec_duplicate:VDQF_F16
(match_operand:<VEL> 1 "register_operand" "w"))
- (match_operand:VDQF 3 "register_operand" "0")
+ (match_operand:VDQF_F16 3 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
@@ -851,7 +852,7 @@
)
(define_expand "vec_set<mode>"
- [(match_operand:VDQF 0 "register_operand" "+w")
+ [(match_operand:VDQF_F16 0 "register_operand" "+w")
(match_operand:<VEL> 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_SIMD"
@@ -1691,58 +1692,79 @@
;; Float widening operations.
-(define_insn "vec_unpacks_lo_v4sf"
- [(set (match_operand:V2DF 0 "register_operand" "=w")
- (float_extend:V2DF
- (vec_select:V2SF
- (match_operand:V4SF 1 "register_operand" "w")
- (parallel [(const_int 0) (const_int 1)])
- )))]
+(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (float_extend:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQ_HSF 1 "register_operand" "w")
+ (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
+ )))]
"TARGET_SIMD"
- "fcvtl\\t%0.2d, %1.2s"
+ "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
-(define_insn "aarch64_float_extend_lo_v2df"
- [(set (match_operand:V2DF 0 "register_operand" "=w")
- (float_extend:V2DF
- (match_operand:V2SF 1 "register_operand" "w")))]
+(define_expand "vec_unpacks_lo_<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand" "")
+ (match_operand:VQ_HSF 1 "register_operand" "")]
"TARGET_SIMD"
- "fcvtl\\t%0.2d, %1.2s"
+ {
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+ emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
+ operands[1], p));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (float_extend:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQ_HSF 1 "register_operand" "w")
+ (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
+ )))]
+ "TARGET_SIMD"
+ "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
-(define_insn "vec_unpacks_hi_v4sf"
- [(set (match_operand:V2DF 0 "register_operand" "=w")
- (float_extend:V2DF
- (vec_select:V2SF
- (match_operand:V4SF 1 "register_operand" "w")
- (parallel [(const_int 2) (const_int 3)])
- )))]
+(define_expand "vec_unpacks_hi_<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand" "")
+ (match_operand:VQ_HSF 1 "register_operand" "")]
"TARGET_SIMD"
- "fcvtl2\\t%0.2d, %1.4s"
+ {
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
+ operands[1], p));
+ DONE;
+ }
+)
+(define_insn "aarch64_float_extend_lo_<Vwide>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (float_extend:<VWIDE>
+ (match_operand:VDF 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
;; Float narrowing operations.
-(define_insn "aarch64_float_truncate_lo_v2sf"
- [(set (match_operand:V2SF 0 "register_operand" "=w")
- (float_truncate:V2SF
- (match_operand:V2DF 1 "register_operand" "w")))]
+(define_insn "aarch64_float_truncate_lo_<mode>"
+ [(set (match_operand:VDF 0 "register_operand" "=w")
+ (float_truncate:VDF
+ (match_operand:<VWIDE> 1 "register_operand" "w")))]
"TARGET_SIMD"
- "fcvtn\\t%0.2s, %1.2d"
+ "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
-(define_insn "aarch64_float_truncate_hi_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=w")
- (vec_concat:V4SF
- (match_operand:V2SF 1 "register_operand" "0")
- (float_truncate:V2SF
- (match_operand:V2DF 2 "register_operand" "w"))))]
+(define_insn "aarch64_float_truncate_hi_<Vdbl>"
+ [(set (match_operand:<VDBL> 0 "register_operand" "=w")
+ (vec_concat:<VDBL>
+ (match_operand:VDF 1 "register_operand" "0")
+ (float_truncate:VDF
+ (match_operand:<VWIDE> 2 "register_operand" "w"))))]
"TARGET_SIMD"
- "fcvtn2\\t%0.4s, %2.2d"
+ "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
@@ -2450,7 +2472,7 @@
(define_insn "aarch64_get_lane<mode>"
[(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
(vec_select:<VEL>
- (match_operand:VALL 1 "register_operand" "w, w, w")
+ (match_operand:VALL_F16 1 "register_operand" "w, w, w")
(parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
"TARGET_SIMD"
{
@@ -4243,8 +4265,9 @@
)
(define_insn "aarch64_be_ld1<mode>"
- [(set (match_operand:VALLDI 0 "register_operand" "=w")
- (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
+ [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
+ (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
+ "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD1))]
"TARGET_SIMD"
"ld1\\t{%0<Vmtype>}, %1"
@@ -4252,8 +4275,8 @@
)
(define_insn "aarch64_be_st1<mode>"
- [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
+ [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
UNSPEC_ST1))]
"TARGET_SIMD"
"st1\\t{%1<Vmtype>}, %0"
@@ -4542,16 +4565,16 @@
DONE;
})
-(define_expand "aarch64_ld1<VALL:mode>"
- [(match_operand:VALL 0 "register_operand")
+(define_expand "aarch64_ld1<VALL_F16:mode>"
+ [(match_operand:VALL_F16 0 "register_operand")
(match_operand:DI 1 "register_operand")]
"TARGET_SIMD"
{
- machine_mode mode = <VALL:MODE>mode;
+ machine_mode mode = <VALL_F16:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
+ emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
else
emit_move_insn (operands[0], mem);
DONE;
@@ -4669,9 +4692,9 @@
;; vec_perm support
(define_expand "vec_perm_const<mode>"
- [(match_operand:VALL 0 "register_operand")
- (match_operand:VALL 1 "register_operand")
- (match_operand:VALL 2 "register_operand")
+ [(match_operand:VALL_F16 0 "register_operand")
+ (match_operand:VALL_F16 1 "register_operand")
+ (match_operand:VALL_F16 2 "register_operand")
(match_operand:<V_cmp_result> 3)]
"TARGET_SIMD"
{
@@ -4895,16 +4918,16 @@
DONE;
})
-(define_expand "aarch64_st1<VALL:mode>"
+(define_expand "aarch64_st1<VALL_F16:mode>"
[(match_operand:DI 0 "register_operand")
- (match_operand:VALL 1 "register_operand")]
+ (match_operand:VALL_F16 1 "register_operand")]
"TARGET_SIMD"
{
- machine_mode mode = <VALL:MODE>mode;
+ machine_mode mode = <VALL_F16:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[0]);
if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
+ emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
else
emit_move_insn (mem, operands[1]);
DONE;
@@ -4935,7 +4958,7 @@
;; Standard pattern name vec_init<mode>.
(define_expand "vec_init<mode>"
- [(match_operand:VALL 0 "register_operand" "")
+ [(match_operand:VALL_F16 0 "register_operand" "")
(match_operand 1 "" "")]
"TARGET_SIMD"
{
@@ -4944,8 +4967,8 @@
})
(define_insn "*aarch64_simd_ld1r<mode>"
- [(set (match_operand:VALL 0 "register_operand" "=w")
- (vec_duplicate:VALL
+ [(set (match_operand:VALL_F16 0 "register_operand" "=w")
+ (vec_duplicate:VALL_F16
(match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
"TARGET_SIMD"
"ld1r\\t{%0.<Vtype>}, %1"
@@ -4992,7 +5015,7 @@
(define_expand "vec_extract<mode>"
[(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
- (match_operand:VALL 1 "register_operand" "")
+ (match_operand:VALL_F16 1 "register_operand" "")
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_SIMD"
{
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f74f641f89c..31b836ccecc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1154,6 +1154,9 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
case V2SImode:
gen = gen_aarch64_simd_combinev2si;
break;
+ case V4HFmode:
+ gen = gen_aarch64_simd_combinev4hf;
+ break;
case V2SFmode:
gen = gen_aarch64_simd_combinev2sf;
break;
@@ -1202,6 +1205,9 @@ aarch64_split_simd_move (rtx dst, rtx src)
case V2DImode:
gen = gen_aarch64_split_simd_movv2di;
break;
+ case V8HFmode:
+ gen = gen_aarch64_split_simd_movv8hf;
+ break;
case V4SFmode:
gen = gen_aarch64_split_simd_movv4sf;
break;
@@ -8545,6 +8551,7 @@ aarch64_vector_mode_supported_p (machine_mode mode)
|| mode == V2SImode || mode == V4HImode
|| mode == V8QImode || mode == V2SFmode
|| mode == V4SFmode || mode == V2DFmode
+ || mode == V4HFmode || mode == V8HFmode
|| mode == V1DFmode))
return true;
@@ -8620,6 +8627,10 @@ aarch64_mangle_type (const_tree type)
if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
return "St9__va_list";
+ /* Half-precision float. */
+ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+ return "Dh";
+
/* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
builtin types. */
if (TYPE_NAME (type) != NULL)
@@ -10188,6 +10199,33 @@ aarch64_start_file (void)
default_file_start();
}
+static void
+aarch64_init_libfuncs (void)
+{
+ /* Half-precision float operations. The compiler handles all operations
+ with NULL libfuncs by converting to SFmode. */
+
+ /* Conversions. */
+ set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
+ set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
+
+ /* Arithmetic. */
+ set_optab_libfunc (add_optab, HFmode, NULL);
+ set_optab_libfunc (sdiv_optab, HFmode, NULL);
+ set_optab_libfunc (smul_optab, HFmode, NULL);
+ set_optab_libfunc (neg_optab, HFmode, NULL);
+ set_optab_libfunc (sub_optab, HFmode, NULL);
+
+ /* Comparisons. */
+ set_optab_libfunc (eq_optab, HFmode, NULL);
+ set_optab_libfunc (ne_optab, HFmode, NULL);
+ set_optab_libfunc (lt_optab, HFmode, NULL);
+ set_optab_libfunc (le_optab, HFmode, NULL);
+ set_optab_libfunc (ge_optab, HFmode, NULL);
+ set_optab_libfunc (gt_optab, HFmode, NULL);
+ set_optab_libfunc (unord_optab, HFmode, NULL);
+}
+
/* Target hook for c_mode_for_suffix. */
static machine_mode
aarch64_c_mode_for_suffix (char suffix)
@@ -10226,7 +10264,8 @@ aarch64_float_const_representable_p (rtx x)
if (!CONST_DOUBLE_P (x))
return false;
- if (GET_MODE (x) == VOIDmode)
+ /* We don't support HFmode constants yet. */
+ if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
return false;
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
@@ -10929,6 +10968,8 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d)
case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
+ case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
+ case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
@@ -12162,6 +12203,14 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
return true;
}
+/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
+static tree
+aarch64_promoted_type (const_tree t)
+{
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ return float_type_node;
+ return NULL_TREE;
+}
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost
@@ -12316,6 +12365,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
#undef TARGET_SCHED_REASSOCIATION_WIDTH
#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE aarch64_promoted_type
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
@@ -12408,6 +12460,8 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
aarch64_vectorize_vec_perm_const_ok
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
#undef TARGET_FIXED_CONDITION_CODE_REGS
#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2a7bf1aeba1..7154ed95de7 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -61,7 +61,9 @@
if (TARGET_FLOAT) \
{ \
builtin_define ("__ARM_FEATURE_FMA"); \
- builtin_define_with_int_value ("__ARM_FP", 0x0C); \
+ builtin_define_with_int_value ("__ARM_FP", 0x0E); \
+ builtin_define ("__ARM_FP16_FORMAT_IEEE"); \
+ builtin_define ("__ARM_FP16_ARGS"); \
} \
if (TARGET_SIMD) \
{ \
@@ -922,7 +924,8 @@ extern enum aarch64_code_model aarch64_cmodel;
/* Modes valid for AdvSIMD Q registers. */
#define AARCH64_VALID_SIMD_QREG_MODE(MODE) \
((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
- || (MODE) == V4SFmode || (MODE) == V2DImode || mode == V2DFmode)
+ || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \
+ || (MODE) == V2DFmode)
#define ENDIAN_LANE_N(mode, n) \
(BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index bd5f40846cd..9921d70deeb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -977,8 +977,8 @@
})
(define_expand "mov<mode>"
- [(set (match_operand:GPF 0 "nonimmediate_operand" "")
- (match_operand:GPF 1 "general_operand" ""))]
+ [(set (match_operand:GPF_F16 0 "nonimmediate_operand" "")
+ (match_operand:GPF_F16 1 "general_operand" ""))]
""
{
if (!TARGET_FLOAT)
@@ -994,6 +994,26 @@
}
)
+(define_insn "*movhf_aarch64"
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
+ (match_operand:HF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))]
+ "TARGET_FLOAT && (register_operand (operands[0], HFmode)
+ || register_operand (operands[1], HFmode))"
+ "@
+ mov\\t%0.h[0], %w1
+ umov\\t%w0, %1.h[0]
+ mov\\t%0.h[0], %1.h[0]
+ ldr\\t%h0, %1
+ str\\t%h1, %0
+ ldrh\\t%w0, %1
+ strh\\t%w1, %0
+ mov\\t%w0, %w1"
+ [(set_attr "type" "neon_from_gp,neon_to_gp,fmov,\
+ f_loads,f_stores,load1,store1,mov_reg")
+ (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
+ (set_attr "fp" "*,*,*,yes,yes,*,*,*")]
+)
+
(define_insn "*movsf_aarch64"
[(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r")
(match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))]
@@ -4092,6 +4112,22 @@
[(set_attr "type" "f_cvt")]
)
+(define_insn "extendhfsf2"
+ [(set (match_operand:SF 0 "register_operand" "=w")
+ (float_extend:SF (match_operand:HF 1 "register_operand" "w")))]
+ "TARGET_FLOAT"
+ "fcvt\\t%s0, %h1"
+ [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "extendhfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=w")
+ (float_extend:DF (match_operand:HF 1 "register_operand" "w")))]
+ "TARGET_FLOAT"
+ "fcvt\\t%d0, %h1"
+ [(set_attr "type" "f_cvt")]
+)
+
(define_insn "truncdfsf2"
[(set (match_operand:SF 0 "register_operand" "=w")
(float_truncate:SF (match_operand:DF 1 "register_operand" "w")))]
@@ -4100,6 +4136,22 @@
[(set_attr "type" "f_cvt")]
)
+(define_insn "truncsfhf2"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+ (float_truncate:HF (match_operand:SF 1 "register_operand" "w")))]
+ "TARGET_FLOAT"
+ "fcvt\\t%h0, %s1"
+ [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "truncdfhf2"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+ (float_truncate:HF (match_operand:DF 1 "register_operand" "w")))]
+ "TARGET_FLOAT"
+ "fcvt\\t%h0, %d1"
+ [(set_attr "type" "f_cvt")]
+)
+
(define_insn "fix_trunc<GPF:mode><GPI:mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
(fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index fce557779c2..ed0bcccb574 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -40,6 +40,7 @@ typedef __Int8x8_t int8x8_t;
typedef __Int16x4_t int16x4_t;
typedef __Int32x2_t int32x2_t;
typedef __Int64x1_t int64x1_t;
+typedef __Float16x4_t float16x4_t;
typedef __Float32x2_t float32x2_t;
typedef __Poly8x8_t poly8x8_t;
typedef __Poly16x4_t poly16x4_t;
@@ -52,6 +53,7 @@ typedef __Int8x16_t int8x16_t;
typedef __Int16x8_t int16x8_t;
typedef __Int32x4_t int32x4_t;
typedef __Int64x2_t int64x2_t;
+typedef __Float16x8_t float16x8_t;
typedef __Float32x4_t float32x4_t;
typedef __Float64x2_t float64x2_t;
typedef __Poly8x16_t poly8x16_t;
@@ -67,6 +69,7 @@ typedef __Poly16_t poly16_t;
typedef __Poly64_t poly64_t;
typedef __Poly128_t poly128_t;
+typedef __fp16 float16_t;
typedef float float32_t;
typedef double float64_t;
@@ -150,6 +153,16 @@ typedef struct uint64x2x2_t
uint64x2_t val[2];
} uint64x2x2_t;
+typedef struct float16x4x2_t
+{
+ float16x4_t val[2];
+} float16x4x2_t;
+
+typedef struct float16x8x2_t
+{
+ float16x8_t val[2];
+} float16x8x2_t;
+
typedef struct float32x2x2_t
{
float32x2_t val[2];
@@ -270,6 +283,16 @@ typedef struct uint64x2x3_t
uint64x2_t val[3];
} uint64x2x3_t;
+typedef struct float16x4x3_t
+{
+ float16x4_t val[3];
+} float16x4x3_t;
+
+typedef struct float16x8x3_t
+{
+ float16x8_t val[3];
+} float16x8x3_t;
+
typedef struct float32x2x3_t
{
float32x2_t val[3];
@@ -390,6 +413,16 @@ typedef struct uint64x2x4_t
uint64x2_t val[4];
} uint64x2x4_t;
+typedef struct float16x4x4_t
+{
+ float16x4_t val[4];
+} float16x4x4_t;
+
+typedef struct float16x8x4_t
+{
+ float16x8_t val[4];
+} float16x8x4_t;
+
typedef struct float32x2x4_t
{
float32x2_t val[4];
@@ -2641,6 +2674,12 @@ vcreate_s64 (uint64_t __a)
return (int64x1_t) {__a};
}
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcreate_f16 (uint64_t __a)
+{
+ return (float16x4_t) __a;
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcreate_f32 (uint64_t __a)
{
@@ -2691,6 +2730,12 @@ vcreate_p16 (uint64_t __a)
/* vget_lane */
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vget_lane_f16 (float16x4_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vget_lane_f32 (float32x2_t __a, const int __b)
{
@@ -2765,6 +2810,12 @@ vget_lane_u64 (uint64x1_t __a, const int __b)
/* vgetq_lane */
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vgetq_lane_f16 (float16x8_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vgetq_lane_f32 (float32x4_t __a, const int __b)
{
@@ -2840,6 +2891,12 @@ vgetq_lane_u64 (uint64x2_t __a, const int __b)
/* vreinterpret */
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_f16 (float16x4_t __a)
+{
+ return (poly8x8_t) __a;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_f64 (float64x1_t __a)
{
return (poly8x8_t) __a;
@@ -2936,6 +2993,12 @@ vreinterpretq_p8_s64 (int64x2_t __a)
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_f16 (float16x8_t __a)
+{
+ return (poly8x16_t) __a;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_f32 (float32x4_t __a)
{
return (poly8x16_t) __a;
@@ -2972,6 +3035,12 @@ vreinterpretq_p8_p16 (poly16x8_t __a)
}
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_f16 (float16x4_t __a)
+{
+ return (poly16x4_t) __a;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_f64 (float64x1_t __a)
{
return (poly16x4_t) __a;
@@ -3068,6 +3137,12 @@ vreinterpretq_p16_s64 (int64x2_t __a)
}
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_f16 (float16x8_t __a)
+{
+ return (poly16x8_t) __a;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_f32 (float32x4_t __a)
{
return (poly16x8_t) __a;
@@ -3103,6 +3178,156 @@ vreinterpretq_p16_p8 (poly8x16_t __a)
return (poly16x8_t) __a;
}
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_f64 (float64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s8 (int8x8_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s16 (int16x4_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s32 (int32x2_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s64 (int64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_f32 (float32x2_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u8 (uint8x8_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u16 (uint16x4_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u32 (uint32x2_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u64 (uint64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_p8 (poly8x8_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_p16 (poly16x4_t __a)
+{
+ return (float16x4_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_f64 (float64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s8 (int8x16_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s16 (int16x8_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s32 (int32x4_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s64 (int64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_f32 (float32x4_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u8 (uint8x16_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u16 (uint16x8_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u32 (uint32x4_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u64 (uint64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_p8 (poly8x16_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_p16 (poly16x8_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_f16 (float16x4_t __a)
+{
+ return (float32x2_t) __a;
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_f64 (float64x1_t __a)
{
@@ -3170,6 +3395,12 @@ vreinterpret_f32_p16 (poly16x4_t __a)
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_f16 (float16x8_t __a)
+{
+ return (float32x4_t) __a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_f64 (float64x2_t __a)
{
return (float32x4_t) __a;
@@ -3236,6 +3467,12 @@ vreinterpretq_f32_p16 (poly16x8_t __a)
}
__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_f16 (float16x4_t __a)
+{
+ return (float64x1_t) __a;
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_f32 (float32x2_t __a)
{
return (float64x1_t) __a;
@@ -3302,6 +3539,12 @@ vreinterpret_f64_u64 (uint64x1_t __a)
}
__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_f16 (float16x8_t __a)
+{
+ return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_f32 (float32x4_t __a)
{
return (float64x2_t) __a;
@@ -3368,6 +3611,12 @@ vreinterpretq_f64_u64 (uint64x2_t __a)
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_f16 (float16x4_t __a)
+{
+ return (int64x1_t) __a;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_f64 (float64x1_t __a)
{
return (int64x1_t) __a;
@@ -3458,6 +3707,12 @@ vreinterpretq_s64_s32 (int32x4_t __a)
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_f16 (float16x8_t __a)
+{
+ return (int64x2_t) __a;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_f32 (float32x4_t __a)
{
return (int64x2_t) __a;
@@ -3500,6 +3755,12 @@ vreinterpretq_s64_p16 (poly16x8_t __a)
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_f16 (float16x4_t __a)
+{
+ return (uint64x1_t) __a;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_f64 (float64x1_t __a)
{
return (uint64x1_t) __a;
@@ -3596,6 +3857,12 @@ vreinterpretq_u64_s64 (int64x2_t __a)
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f16 (float16x8_t __a)
+{
+ return (uint64x2_t) __a;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_f32 (float32x4_t __a)
{
return (uint64x2_t) __a;
@@ -3632,6 +3899,12 @@ vreinterpretq_u64_p16 (poly16x8_t __a)
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_f16 (float16x4_t __a)
+{
+ return (int8x8_t) __a;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_f64 (float64x1_t __a)
{
return (int8x8_t) __a;
@@ -3722,6 +3995,12 @@ vreinterpretq_s8_s64 (int64x2_t __a)
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_f16 (float16x8_t __a)
+{
+ return (int8x16_t) __a;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_f32 (float32x4_t __a)
{
return (int8x16_t) __a;
@@ -3764,6 +4043,12 @@ vreinterpretq_s8_p16 (poly16x8_t __a)
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_f16 (float16x4_t __a)
+{
+ return (int16x4_t) __a;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_f64 (float64x1_t __a)
{
return (int16x4_t) __a;
@@ -3854,6 +4139,12 @@ vreinterpretq_s16_s64 (int64x2_t __a)
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_f16 (float16x8_t __a)
+{
+ return (int16x8_t) __a;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_f32 (float32x4_t __a)
{
return (int16x8_t) __a;
@@ -3896,6 +4187,12 @@ vreinterpretq_s16_p16 (poly16x8_t __a)
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_f16 (float16x4_t __a)
+{
+ return (int32x2_t) __a;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_f64 (float64x1_t __a)
{
return (int32x2_t) __a;
@@ -3986,6 +4283,12 @@ vreinterpretq_s32_s64 (int64x2_t __a)
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_f16 (float16x8_t __a)
+{
+ return (int32x4_t) __a;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_f32 (float32x4_t __a)
{
return (int32x4_t) __a;
@@ -4028,6 +4331,12 @@ vreinterpretq_s32_p16 (poly16x8_t __a)
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_f16 (float16x4_t __a)
+{
+ return (uint8x8_t) __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_f64 (float64x1_t __a)
{
return (uint8x8_t) __a;
@@ -4124,6 +4433,12 @@ vreinterpretq_u8_s64 (int64x2_t __a)
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_f16 (float16x8_t __a)
+{
+ return (uint8x16_t) __a;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_f32 (float32x4_t __a)
{
return (uint8x16_t) __a;
@@ -4160,6 +4475,12 @@ vreinterpretq_u8_p16 (poly16x8_t __a)
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_f16 (float16x4_t __a)
+{
+ return (uint16x4_t) __a;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_f64 (float64x1_t __a)
{
return (uint16x4_t) __a;
@@ -4256,6 +4577,12 @@ vreinterpretq_u16_s64 (int64x2_t __a)
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_f16 (float16x8_t __a)
+{
+ return (uint16x8_t) __a;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_f32 (float32x4_t __a)
{
return (uint16x8_t) __a;
@@ -4292,6 +4619,12 @@ vreinterpretq_u16_p16 (poly16x8_t __a)
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_f16 (float16x4_t __a)
+{
+ return (uint32x2_t) __a;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_f64 (float64x1_t __a)
{
return (uint32x2_t) __a;
@@ -4388,6 +4721,12 @@ vreinterpretq_u32_s64 (int64x2_t __a)
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_f16 (float16x8_t __a)
+{
+ return (uint32x4_t) __a;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_f32 (float32x4_t __a)
{
return (uint32x4_t) __a;
@@ -4425,6 +4764,12 @@ vreinterpretq_u32_p16 (poly16x8_t __a)
/* vset_lane */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
{
@@ -4499,6 +4844,12 @@ vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
/* vsetq_lane */
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
{
@@ -4576,6 +4927,12 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
return vreinterpret_##__TYPE##_u64 (lo);
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_low_f16 (float16x8_t __a)
+{
+ __GET_LOW (f16);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_low_f32 (float32x4_t __a)
{
@@ -4655,6 +5012,12 @@ vget_low_u64 (uint64x2_t __a)
uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
return vreinterpret_##__TYPE##_u64 (hi);
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_high_f16 (float16x8_t __a)
+{
+ __GET_HIGH (f16);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_high_f32 (float32x4_t __a)
{
@@ -4753,6 +5116,12 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b)
return __builtin_aarch64_combinedi (__a[0], __b[0]);
}
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcombine_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_combinev4hf (__a, __b);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32 (float32x2_t __a, float32x2_t __b)
{
@@ -5657,14 +6026,6 @@ vaddlvq_u32 (uint32x4_t a)
result; \
})
-/* vcvt_f16_f32 not supported */
-
-/* vcvt_f32_f16 not supported */
-
-/* vcvt_high_f16_f32 not supported */
-
-/* vcvt_high_f32_f16 not supported */
-
#define vcvt_n_f32_s32(a, b) \
__extension__ \
({ \
@@ -9881,7 +10242,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | Y | N | N |
+------+----+----+----+----+
- |float | - | - | N | N |
+ |float | - | Y | N | N |
+------+----+----+----+----+
|poly | Y | Y | - | - |
+------+----+----+----+----+
@@ -9895,7 +10256,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | Y | Y | Y |
+------+----+----+----+----+
- |float | - | - | Y | Y |
+ |float | - | Y | Y | Y |
+------+----+----+----+----+
|poly | Y | Y | - | - |
+------+----+----+----+----+
@@ -9909,7 +10270,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | N | N | Y |
+------+----+----+----+----+
- |float | - | - | N | Y |
+ |float | - | N | N | Y |
+------+----+----+----+----+
|poly | Y | N | - | - |
+------+----+----+----+----+
@@ -9925,6 +10286,7 @@ __STRUCTN (int, 8, 2)
__STRUCTN (int, 16, 2)
__STRUCTN (uint, 8, 2)
__STRUCTN (uint, 16, 2)
+__STRUCTN (float, 16, 2)
__STRUCTN (poly, 8, 2)
__STRUCTN (poly, 16, 2)
/* 3-element structs. */
@@ -9936,6 +10298,7 @@ __STRUCTN (uint, 8, 3)
__STRUCTN (uint, 16, 3)
__STRUCTN (uint, 32, 3)
__STRUCTN (uint, 64, 3)
+__STRUCTN (float, 16, 3)
__STRUCTN (float, 32, 3)
__STRUCTN (float, 64, 3)
__STRUCTN (poly, 8, 3)
@@ -9973,6 +10336,8 @@ vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
+__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
+ float16x8_t)
__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
float32x4_t)
__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
@@ -10011,6 +10376,7 @@ vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
+__ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
@@ -10052,6 +10418,8 @@ vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
+__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
+ float16x8_t)
__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
float32x4_t)
__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
@@ -10090,6 +10458,7 @@ vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
+__ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
@@ -10136,6 +10505,8 @@ vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
+__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
+ float16x8_t)
__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
float32x4_t)
__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
@@ -10174,6 +10545,7 @@ vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
+__ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
@@ -13036,6 +13408,18 @@ vcntq_u8 (uint8x16_t __a)
/* vcvt (double -> float). */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_f32 (float32x4_t __a)
+{
+ return __builtin_aarch64_float_truncate_lo_v4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
+{
+ return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvt_f32_f64 (float64x2_t __a)
{
@@ -13050,6 +13434,12 @@ vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
/* vcvt (float -> double). */
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvt_f32_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_float_extend_lo_v4sf (__a);
+}
+
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_f64_f32 (float32x2_t __a)
{
@@ -13057,6 +13447,12 @@ vcvt_f64_f32 (float32x2_t __a)
return __builtin_aarch64_float_extend_lo_v2df (__a);
}
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvt_high_f32_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
+}
+
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_high_f64_f32 (float32x4_t __a)
{
@@ -14630,6 +15026,12 @@ vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
/* vld1 */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_f16 (const float16_t *__a)
+{
+ return __builtin_aarch64_ld1v4hf (__a);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t *a)
{
@@ -14709,6 +15111,12 @@ vld1_u64 (const uint64_t *a)
/* vld1q */
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_f16 (const float16_t *__a)
+{
+ return __builtin_aarch64_ld1v8hf (__a);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t *a)
{
@@ -14789,6 +15197,13 @@ vld1q_u64 (const uint64_t *a)
/* vld1_dup */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_dup_f16 (const float16_t* __a)
+{
+ float16_t __f = *__a;
+ return (float16x4_t) { __f, __f, __f, __f };
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_dup_f32 (const float32_t* __a)
{
@@ -14863,6 +15278,13 @@ vld1_dup_u64 (const uint64_t* __a)
/* vld1q_dup */
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_f16 (const float16_t* __a)
+{
+ float16_t __f = *__a;
+ return (float16x8_t) { __f, __f, __f, __f, __f, __f, __f, __f };
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32 (const float32_t* __a)
{
@@ -14937,6 +15359,12 @@ vld1q_dup_u64 (const uint64_t* __a)
/* vld1_lane */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
{
@@ -15011,6 +15439,12 @@ vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
/* vld1q_lane */
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
{
@@ -15206,6 +15640,17 @@ vld2_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_f16 (const float16_t * __a)
+{
+ float16x4x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2v4hf (__a);
+ ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32 (const float32_t * __a)
{
@@ -15327,6 +15772,17 @@ vld2q_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_f16 (const float16_t * __a)
+{
+ float16x8x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2v8hf (__a);
+ ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32 (const float32_t * __a)
{
@@ -15481,6 +15937,18 @@ vld3_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_f16 (const float16_t * __a)
+{
+ float16x4x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3v4hf (__a);
+ ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32 (const float32_t * __a)
{
@@ -15613,6 +16081,18 @@ vld3q_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_f16 (const float16_t * __a)
+{
+ float16x8x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3v8hf (__a);
+ ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32 (const float32_t * __a)
{
@@ -15780,6 +16260,19 @@ vld4_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_f16 (const float16_t * __a)
+{
+ float16x4x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4v4hf (__a);
+ ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
+ ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32 (const float32_t * __a)
{
@@ -15923,6 +16416,19 @@ vld4q_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_f16 (const float16_t * __a)
+{
+ float16x8x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4v8hf (__a);
+ ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
+ ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32 (const float32_t * __a)
{
@@ -15984,6 +16490,17 @@ vld2_dup_s32 (const int32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_f16 (const float16_t * __a)
+{
+ float16x4x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32 (const float32_t * __a)
{
@@ -16193,6 +16710,17 @@ vld2q_dup_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_dup_f16 (const float16_t * __a)
+{
+ float16x8x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_dup_f32 (const float32_t * __a)
{
@@ -16347,6 +16875,18 @@ vld3_dup_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_f16 (const float16_t * __a)
+{
+ float16x4x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
+ ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32 (const float32_t * __a)
{
@@ -16479,6 +17019,18 @@ vld3q_dup_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_dup_f16 (const float16_t * __a)
+{
+ float16x8x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
+ ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
+ ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_dup_f32 (const float32_t * __a)
{
@@ -16646,6 +17198,19 @@ vld4_dup_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_f16 (const float16_t * __a)
+{
+ float16x4x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
+ ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
+ ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32 (const float32_t * __a)
{
@@ -16789,6 +17354,19 @@ vld4q_dup_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_dup_f16 (const float16_t * __a)
+{
+ float16x8x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
+ ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
+ ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
+ ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_dup_f32 (const float32_t * __a)
{
@@ -16841,6 +17419,8 @@ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return __b; \
}
+__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
+ v8hf, hf, f16, float16x8_t)
__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
sf, f32, float32x4_t)
__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
@@ -16885,6 +17465,7 @@ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
+__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@@ -16932,6 +17513,8 @@ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return __b; \
}
+__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
+ v8hf, hf, f16, float16x8_t)
__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
sf, f32, float32x4_t)
__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
@@ -16978,6 +17561,7 @@ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
+__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@@ -17033,6 +17617,8 @@ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
/* vld4q_lane */
+__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
+ v8hf, hf, f16, float16x8_t)
__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
sf, f32, float32x4_t)
__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
@@ -17081,6 +17667,7 @@ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
+__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@@ -21978,6 +22565,12 @@ vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
/* vst1 */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f16 (float16_t *__a, float16x4_t __b)
+{
+ __builtin_aarch64_st1v4hf (__a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32 (float32_t *a, float32x2_t b)
{
__builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
@@ -22057,6 +22650,12 @@ vst1_u64 (uint64_t *a, uint64x1_t b)
/* vst1q */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f16 (float16_t *__a, float16x8_t __b)
+{
+ __builtin_aarch64_st1v8hf (__a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32 (float32_t *a, float32x4_t b)
{
__builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
@@ -22137,6 +22736,12 @@ vst1q_u64 (uint64_t *a, uint64x2_t b)
/* vst1_lane */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
@@ -22211,6 +22816,12 @@ vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
/* vst1q_lane */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
@@ -22417,6 +23028,18 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f16 (float16_t * __a, float16x4x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ float16x8x2_t temp;
+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1);
+ __builtin_aarch64_st2v4hf (__a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32 (float32_t * __a, float32x2x2_t val)
{
__builtin_aarch64_simd_oi __o;
@@ -22519,6 +23142,15 @@ vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f16 (float16_t * __a, float16x8x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1);
+ __builtin_aarch64_st2v8hf (__a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32 (float32_t * __a, float32x4x2_t val)
{
__builtin_aarch64_simd_oi __o;
@@ -22691,6 +23323,20 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f16 (float16_t * __a, float16x4x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ float16x8x3_t temp;
+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2);
+ __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32 (float32_t * __a, float32x2x3_t val)
{
__builtin_aarch64_simd_ci __o;
@@ -22805,6 +23451,16 @@ vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f16 (float16_t * __a, float16x8x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2);
+ __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32 (float32_t * __a, float32x4x3_t val)
{
__builtin_aarch64_simd_ci __o;
@@ -23001,6 +23657,22 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f16 (float16_t * __a, float16x4x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ float16x8x4_t temp;
+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3);
+ __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32 (float32_t * __a, float32x2x4_t val)
{
__builtin_aarch64_simd_xi __o;
@@ -23127,6 +23799,17 @@ vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f16 (float16_t * __a, float16x8x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3);
+ __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32 (float32_t * __a, float32x4x4_t val)
{
__builtin_aarch64_simd_xi __o;
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 31c47eb75bd..f1479213ac7 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -38,6 +38,12 @@
;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
(define_mode_iterator GPF [SF DF])
+;; Iterator for General Purpose Float registers, inc __fp16.
+(define_mode_iterator GPF_F16 [HF SF DF])
+
+;; Double vector modes.
+(define_mode_iterator VDF [V2SF V4HF])
+
;; Integer vector modes.
(define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
@@ -49,7 +55,7 @@
(define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])
;; Double vector modes.
-(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
;; vector, 64-bit container, all integer modes
(define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
@@ -58,10 +64,10 @@
(define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI])
;; Quad vector modes.
-(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
;; VQ without 2 element modes.
-(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V4SF])
+(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF])
;; Quad vector with only 2 element modes.
(define_mode_iterator VQ_2E [V2DI V2DF])
@@ -76,7 +82,10 @@
;; pointer-sized quantities. Exactly one of the two alternatives will match.
(define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
-;; Vector Float modes.
+;; Vector Float modes suitable for moving, loading and storing.
+(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
+
+;; Vector Float modes, barring HF modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF])
;; Vector Float modes, and DF.
@@ -85,6 +94,9 @@
;; Vector single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF])
+;; Quad vector Float modes with half/single elements.
+(define_mode_iterator VQ_HSF [V8HF V4SF])
+
;; Modes suitable to use as the return type of a vcond expression.
(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
@@ -94,15 +106,23 @@
;; Vector Float modes with 2 elements.
(define_mode_iterator V2F [V2SF V2DF])
-;; All modes.
+;; All vector modes on which we support any arithmetic operations.
(define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
-;; All vector modes and DI.
+;; All vector modes suitable for moving, loading, and storing.
+(define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+ V4HF V8HF V2SF V4SF V2DF])
+
+;; All vector modes barring HF modes, plus DI.
(define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI])
-;; All vector modes and DI and DF.
+;; All vector modes and DI.
+(define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+ V4HF V8HF V2SF V4SF V2DF DI])
+
+;; All vector modes, plus DI and DF.
(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI
- V2DI V2SF V4SF V2DF DI DF])
+ V2DI V4HF V8HF V2SF V4SF V2DF DI DF])
;; Vector modes for Integer reduction across lanes.
(define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI V2DI])
@@ -123,7 +143,7 @@
(define_mode_iterator VQW [V16QI V8HI V4SI])
;; Double vector modes for combines.
-(define_mode_iterator VDC [V8QI V4HI V2SI V2SF DI DF])
+(define_mode_iterator VDC [V8QI V4HI V4HF V2SI V2SF DI DF])
;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
@@ -350,7 +370,8 @@
(V2SI "2s") (V4SI "4s")
(DI "1d") (DF "1d")
(V2DI "2d") (V2SF "2s")
- (V4SF "4s") (V2DF "2d")])
+ (V4SF "4s") (V2DF "2d")
+ (V4HF "4h") (V8HF "8h")])
(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
(V4SI "32") (V2DI "64")])
@@ -358,7 +379,8 @@
(define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
(V4HI ".4h") (V8HI ".8h")
(V2SI ".2s") (V4SI ".4s")
- (V2DI ".2d") (V2SF ".2s")
+ (V2DI ".2d") (V4HF ".4h")
+ (V8HF ".8h") (V2SF ".2s")
(V4SF ".4s") (V2DF ".2d")
(DI "") (SI "")
(HI "") (QI "")
@@ -375,7 +397,8 @@
(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
(V4HI "h") (V8HI "h")
(V2SI "s") (V4SI "s")
- (V2DI "d") (V2SF "s")
+ (V2DI "d") (V4HF "h")
+ (V8HF "h") (V2SF "s")
(V4SF "s") (V2DF "d")
(SF "s") (DF "d")
(QI "b") (HI "h")
@@ -385,7 +408,8 @@
(define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b")
(V4HI "8b") (V8HI "16b")
(V2SI "8b") (V4SI "16b")
- (V2DI "16b") (V2SF "8b")
+ (V2DI "16b") (V4HF "8b")
+ (V8HF "16b") (V2SF "8b")
(V4SF "16b") (V2DF "16b")
(DI "8b") (DF "8b")
(SI "8b")])
@@ -395,6 +419,7 @@
(V4HI "HI") (V8HI "HI")
(V2SI "SI") (V4SI "SI")
(DI "DI") (V2DI "DI")
+ (V4HF "HF") (V8HF "HF")
(V2SF "SF") (V4SF "SF")
(V2DF "DF") (DF "DF")
(SI "SI") (HI "HI")
@@ -413,6 +438,7 @@
(V4HI "V8HI") (V8HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI")
(DI "V2DI") (V2DI "V2DI")
+ (V4HF "V8HF") (V8HF "V8HF")
(V2SF "V2SF") (V4SF "V4SF")
(V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")])
@@ -422,16 +448,22 @@
(V4HI "V2HI") (V8HI "V4HI")
(V2SI "SI") (V4SI "V2SI")
(V2DI "DI") (V2SF "SF")
- (V4SF "V2SF") (V2DF "DF")])
+ (V4SF "V2SF") (V4HF "V2HF")
+ (V8HF "V4HF") (V2DF "DF")])
;; Double modes of vector modes.
(define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
+ (V4HF "V8HF")
(V2SI "V4SI") (V2SF "V4SF")
(SI "V2SI") (DI "V2DI")
(DF "V2DF")])
+;; Register suffix for double-length mode.
+(define_mode_attr Vdtype [(V4HF "8h") (V2SF "4s")])
+
;; Double modes of vector modes (lower case).
(define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
+ (V4HF "v8hf")
(V2SI "v4si") (V2SF "v4sf")
(SI "v2si") (DI "v2di")
(DF "v2df")])
@@ -462,24 +494,31 @@
(define_mode_attr VWIDE [(V8QI "V8HI") (V4HI "V4SI")
(V2SI "V2DI") (V16QI "V8HI")
(V8HI "V4SI") (V4SI "V2DI")
- (HI "SI") (SI "DI")]
-
+ (HI "SI") (SI "DI")
+ (V8HF "V4SF") (V4SF "V2DF")
+ (V4HF "V4SF") (V2SF "V2DF")]
)
-;; Widened mode register suffixes for VD_BHSI/VQW.
+;; Widened modes of vector modes, lowercase
+(define_mode_attr Vwide [(V2SF "v2df") (V4HF "v4sf")])
+
+;; Widened mode register suffixes for VD_BHSI/VQW/VQ_HSF.
(define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s")
(V2SI "2d") (V16QI "8h")
- (V8HI "4s") (V4SI "2d")])
+ (V8HI "4s") (V4SI "2d")
+ (V8HF "4s") (V4SF "2d")])
;; Widened mode register suffixes for VDW/VQW.
(define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s")
(V2SI ".2d") (V16QI ".8h")
(V8HI ".4s") (V4SI ".2d")
+ (V4HF ".4s") (V2SF ".2d")
(SI "") (HI "")])
-;; Lower part register suffixes for VQW.
+;; Lower part register suffixes for VQW/VQ_HSF.
(define_mode_attr Vhalftype [(V16QI "8b") (V8HI "4h")
- (V4SI "2s")])
+ (V4SI "2s") (V8HF "4h")
+ (V4SF "2s")])
;; Define corresponding core/FP element mode for each vector mode.
(define_mode_attr vw [(V8QI "w") (V16QI "w")
@@ -506,6 +545,7 @@
(V4HI "V4HI") (V8HI "V8HI")
(V2SI "V2SI") (V4SI "V4SI")
(DI "DI") (V2DI "V2DI")
+ (V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(V2DF "V2DI") (DF "DI")
(SF "SI")])
@@ -515,6 +555,7 @@
(V4HI "v4hi") (V8HI "v8hi")
(V2SI "v2si") (V4SI "v4si")
(DI "di") (V2DI "v2di")
+ (V4HF "v4hi") (V8HF "v8hi")
(V2SF "v2si") (V4SF "v4si")
(V2DF "v2di") (DF "di")
(SF "si")])
@@ -536,14 +577,17 @@
(define_mode_attr nregs [(OI "2") (CI "3") (XI "4")])
(define_mode_attr VRL2 [(V8QI "V32QI") (V4HI "V16HI")
+ (V4HF "V16HF")
(V2SI "V8SI") (V2SF "V8SF")
(DI "V4DI") (DF "V4DF")])
(define_mode_attr VRL3 [(V8QI "V48QI") (V4HI "V24HI")
+ (V4HF "V24HF")
(V2SI "V12SI") (V2SF "V12SF")
(DI "V6DI") (DF "V6DF")])
(define_mode_attr VRL4 [(V8QI "V64QI") (V4HI "V32HI")
+ (V4HF "V32HF")
(V2SI "V16SI") (V2SF "V16SF")
(DI "V8DI") (DF "V8DF")])
@@ -556,6 +600,7 @@
(V2SI "V2SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "V2DI")
(V2SF "V2SF") (V4SF "V2SF")
+ (V4HF "SF") (V8HF "SF")
(DF "V2DI") (V2DF "V2DI")])
;; Similar, for three elements.
@@ -564,6 +609,7 @@
(V2SI "BLK") (V4SI "BLK")
(DI "EI") (V2DI "EI")
(V2SF "BLK") (V4SF "BLK")
+ (V4HF "BLK") (V8HF "BLK")
(DF "EI") (V2DF "EI")])
;; Similar, for four elements.
@@ -572,6 +618,7 @@
(V2SI "V4SI") (V4SI "V4SI")
(DI "OI") (V2DI "OI")
(V2SF "V4SF") (V4SF "V4SF")
+ (V4HF "V4HF") (V8HF "V4HF")
(DF "OI") (V2DF "OI")])
@@ -591,12 +638,14 @@
(V2SI "V4SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "DI")
(V2SF "V4SF") (V4SF "V2SF")
+ (V4HF "V8HF") (V8HF "V4HF")
(DF "V2DF") (V2DF "DF")])
(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
(V4HI "to_128") (V8HI "to_64")
(V2SI "to_128") (V4SI "to_64")
(DI "to_128") (V2DI "to_64")
+ (V4HF "to_128") (V8HF "to_64")
(V2SF "to_128") (V4SF "to_64")
(DF "to_128") (V2DF "to_64")])
@@ -630,6 +679,7 @@
(V4HI "") (V8HI "_q")
(V2SI "") (V4SI "_q")
(DI "") (V2DI "_q")
+ (V4HF "") (V8HF "_q")
(V2SF "") (V4SF "_q")
(V2DF "_q")
(QI "") (HI "") (SI "") (DI "") (SF "") (DF "")])
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 45b49f2ea2a..8b26cb6d35c 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -204,6 +204,7 @@ arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define di_UP DImode
#define v16qi_UP V16QImode
#define v8hi_UP V8HImode
+#define v8hf_UP V8HFmode
#define v4si_UP V4SImode
#define v4sf_UP V4SFmode
#define v2di_UP V2DImode
@@ -252,6 +253,12 @@ typedef struct {
#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
VAR1 (T, N, J)
+#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
+ VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
+ VAR1 (T, N, K)
+#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
+ VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
+ VAR1 (T, N, L)
/* The NEON builtin data can be found in arm_neon_builtins.def.
The mode entries in the following table correspond to the "key" type of the
@@ -783,13 +790,6 @@ arm_init_simd_builtin_types (void)
int nelts = sizeof (arm_simd_types) / sizeof (arm_simd_types[0]);
tree tdecl;
- /* Initialize the HFmode scalar type. */
- arm_simd_floatHF_type_node = make_node (REAL_TYPE);
- TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
- layout_type (arm_simd_floatHF_type_node);
- (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node,
- "__builtin_neon_hf");
-
/* Poly types are a world of their own. In order to maintain legacy
ABI, they get initialized using the old interface, and don't get
an entry in our mangling table, consequently, they get default
@@ -837,7 +837,10 @@ arm_init_simd_builtin_types (void)
mangling. */
/* Continue with standard types. */
+ /* The __builtin_simd{64,128}_float16 types are kept private unless
+ we have a scalar __fp16 type. */
arm_simd_types[Float16x4_t].eltype = arm_simd_floatHF_type_node;
+ arm_simd_types[Float16x8_t].eltype = arm_simd_floatHF_type_node;
arm_simd_types[Float32x2_t].eltype = float_type_node;
arm_simd_types[Float32x4_t].eltype = float_type_node;
@@ -1723,10 +1726,12 @@ arm_init_iwmmxt_builtins (void)
static void
arm_init_fp16_builtins (void)
{
- tree fp16_type = make_node (REAL_TYPE);
- TYPE_PRECISION (fp16_type) = 16;
- layout_type (fp16_type);
- (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+ arm_simd_floatHF_type_node = make_node (REAL_TYPE);
+ TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
+ layout_type (arm_simd_floatHF_type_node);
+ if (arm_fp16_format)
+ (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node,
+ "__fp16");
}
static void
@@ -1771,12 +1776,13 @@ arm_init_builtins (void)
if (TARGET_REALLY_IWMMXT)
arm_init_iwmmxt_builtins ();
+ /* This creates the arm_simd_floatHF_type_node so must come before
+ arm_init_neon_builtins which uses it. */
+ arm_init_fp16_builtins ();
+
if (TARGET_NEON)
arm_init_neon_builtins ();
- if (arm_fp16_format)
- arm_init_fp16_builtins ();
-
if (TARGET_CRC32)
arm_init_crc32_builtins ();
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index bcbd20be057..b178ae6c05f 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -44,5 +44,7 @@
ENTRY (Float16x4_t, V4HF, none, 64, float16, 18)
ENTRY (Float32x2_t, V2SF, none, 64, float32, 18)
+
+ ENTRY (Float16x8_t, V8HF, none, 128, float16, 19)
ENTRY (Float32x4_t, V4SF, none, 128, float32, 19)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index efbaac56487..5dae0842604 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -26377,7 +26377,8 @@ arm_vector_mode_supported_p (machine_mode mode)
{
/* Neon also supports V2SImode, etc. listed in the clause below. */
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
- || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
+ || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
+ || mode == V2DImode || mode == V8HFmode))
return true;
if ((TARGET_NEON || TARGET_IWMMXT)
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 1e15cdf5e9e..b1ad977dc36 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1001,7 +1001,7 @@ extern int arm_arch_crc;
/* Modes valid for Neon Q registers. */
#define VALID_NEON_QREG_MODE(MODE) \
((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
- || (MODE) == V4SFmode || (MODE) == V2DImode)
+ || (MODE) == V8HFmode || (MODE) == V4SFmode || (MODE) == V2DImode)
/* Structure modes valid for Neon registers. */
#define VALID_NEON_STRUCT_MODE(MODE) \
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index c923e294cda..66622dfcfe2 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -41,7 +41,10 @@ typedef __simd64_int8_t int8x8_t;
typedef __simd64_int16_t int16x4_t;
typedef __simd64_int32_t int32x2_t;
typedef __builtin_neon_di int64x1_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef __fp16 float16_t;
typedef __simd64_float16_t float16x4_t;
+#endif
typedef __simd64_float32_t float32x2_t;
typedef __simd64_poly8_t poly8x8_t;
typedef __simd64_poly16_t poly16x4_t;
@@ -57,6 +60,9 @@ typedef __simd128_int8_t int8x16_t;
typedef __simd128_int16_t int16x8_t;
typedef __simd128_int32_t int32x4_t;
typedef __simd128_int64_t int64x2_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef __simd128_float16_t float16x8_t;
+#endif
typedef __simd128_float32_t float32x4_t;
typedef __simd128_poly8_t poly8x16_t;
typedef __simd128_poly16_t poly16x8_t;
@@ -160,6 +166,20 @@ typedef struct uint64x2x2_t
uint64x2_t val[2];
} uint64x2x2_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x4x2_t
+{
+ float16x4_t val[2];
+} float16x4x2_t;
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x8x2_t
+{
+ float16x8_t val[2];
+} float16x8x2_t;
+#endif
+
typedef struct float32x2x2_t
{
float32x2_t val[2];
@@ -286,6 +306,20 @@ typedef struct uint64x2x3_t
uint64x2_t val[3];
} uint64x2x3_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x4x3_t
+{
+ float16x4_t val[3];
+} float16x4x3_t;
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x8x3_t
+{
+ float16x8_t val[3];
+} float16x8x3_t;
+#endif
+
typedef struct float32x2x3_t
{
float32x2_t val[3];
@@ -412,6 +446,20 @@ typedef struct uint64x2x4_t
uint64x2_t val[4];
} uint64x2x4_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x4x4_t
+{
+ float16x4_t val[4];
+} float16x4x4_t;
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x8x4_t
+{
+ float16x8_t val[4];
+} float16x8x4_t;
+#endif
+
typedef struct float32x2x4_t
{
float32x2_t val[4];
@@ -5201,6 +5249,21 @@ vget_lane_s32 (int32x2_t __a, const int __b)
return (int32_t)__builtin_neon_vget_lanev2si (__a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+/* Functions cannot accept or return __FP16 types. Even if the function
+ were marked always-inline so there were no call sites, the declaration
+ would nonetheless raise an error. Hence, we must use a macro instead. */
+
+#define vget_lane_f16(__v, __idx) \
+ __extension__ \
+ ({ \
+ float16x4_t __vec = (__v); \
+ __builtin_arm_lane_check (4, __idx); \
+ float16_t __res = __vec[__idx]; \
+ __res; \
+ })
+#endif
+
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vget_lane_f32 (float32x2_t __a, const int __b)
{
@@ -5267,6 +5330,17 @@ vgetq_lane_s32 (int32x4_t __a, const int __b)
return (int32_t)__builtin_neon_vget_lanev4si (__a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define vgetq_lane_f16(__v, __idx) \
+ __extension__ \
+ ({ \
+ float16x8_t __vec = (__v); \
+ __builtin_arm_lane_check (8, __idx); \
+ float16_t __res = __vec[__idx]; \
+ __res; \
+ })
+#endif
+
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vgetq_lane_f32 (float32x4_t __a, const int __b)
{
@@ -5333,6 +5407,18 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
return (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define vset_lane_f16(__e, __v, __idx) \
+ __extension__ \
+ ({ \
+ float16_t __elem = (__e); \
+ float16x4_t __vec = (__v); \
+ __builtin_arm_lane_check (4, __idx); \
+ __vec[__idx] = __elem; \
+ __vec; \
+ })
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
{
@@ -5399,6 +5485,18 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
return (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define vsetq_lane_f16(__e, __v, __idx) \
+ __extension__ \
+ ({ \
+ float16_t __elem = (__e); \
+ float16x8_t __vec = (__v); \
+ __builtin_arm_lane_check (8, __idx); \
+ __vec[__idx] = __elem; \
+ __vec; \
+ })
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
{
@@ -5479,6 +5577,14 @@ vcreate_s64 (uint64_t __a)
return (int64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcreate_f16 (uint64_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcreate_f32 (uint64_t __a)
{
@@ -5981,6 +6087,14 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b)
return (int64x2_t)__builtin_neon_vcombinedi (__a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcombine_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_neon_vcombinev4hf (__a, __b);
+}
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32 (float32x2_t __a, float32x2_t __b)
{
@@ -6055,6 +6169,14 @@ vget_high_s64 (int64x2_t __a)
return (int64x1_t)__builtin_neon_vget_highv2di (__a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_high_f16 (float16x8_t __a)
+{
+ return __builtin_neon_vget_highv8hf (__a);
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_high_f32 (float32x4_t __a)
{
@@ -6115,6 +6237,14 @@ vget_low_s32 (int32x4_t __a)
return (int32x2_t)__builtin_neon_vget_lowv4si (__a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_low_f16 (float16x8_t __a)
+{
+ return __builtin_neon_vget_lowv8hf (__a);
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_low_f32 (float32x4_t __a)
{
@@ -6220,21 +6350,25 @@ vcvtq_u32_f32 (float32x4_t __a)
}
#if ((__ARM_FP & 0x2) != 0)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_f32 (float32x4_t __a)
{
return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
}
-
#endif
+#endif
+
#if ((__ARM_FP & 0x2) != 0)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_f32_f16 (float16x4_t __a)
{
return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
}
-
#endif
+#endif
+
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
{
@@ -8662,6 +8796,14 @@ vld1_s64 (const int64_t * __a)
return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_f16 (const float16_t * __a)
+{
+ return __builtin_neon_vld1v4hf (__a);
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t * __a)
{
@@ -8736,6 +8878,14 @@ vld1q_s64 (const int64_t * __a)
return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_f16 (const float16_t * __a)
+{
+ return __builtin_neon_vld1v8hf (__a);
+}
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t * __a)
{
@@ -8796,6 +8946,14 @@ vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c)
return (int32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_lane_f16 (const float16_t * __a, float16x4_t __b, const int __c)
+{
+ return vset_lane_f16 (*__a, __b, __c);
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
{
@@ -8870,6 +9028,14 @@ vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c)
return (int32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_f16 (const float16_t * __a, float16x8_t __b, const int __c)
+{
+ return vsetq_lane_f16 (*__a, __b, __c);
+}
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
{
@@ -8944,6 +9110,15 @@ vld1_dup_s32 (const int32_t * __a)
return (int32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_dup_f16 (const float16_t * __a)
+{
+ float16_t __f = *__a;
+ return (float16x4_t) { __f, __f, __f, __f };
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_dup_f32 (const float32_t * __a)
{
@@ -9018,6 +9193,15 @@ vld1q_dup_s32 (const int32_t * __a)
return (int32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_f16 (const float16_t * __a)
+{
+ float16_t __f = *__a;
+ return (float16x8_t) { __f, __f, __f, __f, __f, __f, __f, __f };
+}
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32 (const float32_t * __a)
{
@@ -9106,6 +9290,14 @@ vst1_s64 (int64_t * __a, int64x1_t __b)
__builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f16 (float16_t * __a, float16x4_t __b)
+{
+ __builtin_neon_vst1v4hf (__a, __b);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32 (float32_t * __a, float32x2_t __b)
{
@@ -9180,6 +9372,14 @@ vst1q_s64 (int64_t * __a, int64x2_t __b)
__builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f16 (float16_t * __a, float16x8_t __b)
+{
+ __builtin_neon_vst1v8hf (__a, __b);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32 (float32_t * __a, float32x4_t __b)
{
@@ -9240,6 +9440,14 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
__builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c)
+{
+ __builtin_neon_vst1_lanev4hf (__a, __b, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
{
@@ -9314,6 +9522,14 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
__builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c)
+{
+ __builtin_neon_vst1_lanev8hf (__a, __b, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
{
@@ -9394,6 +9610,16 @@ vld2_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_f16 (const float16_t * __a)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2v4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32 (const float32_t * __a)
{
@@ -9492,6 +9718,16 @@ vld2q_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_f16 (const float16_t * __a)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld2v8hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32 (const float32_t * __a)
{
@@ -9567,6 +9803,17 @@ vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev4hf ( __a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
{
@@ -9639,6 +9886,17 @@ vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev8hf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
{
@@ -9699,6 +9957,16 @@ vld2_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_dupv4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32 (const float32_t * __a)
{
@@ -9794,6 +10062,15 @@ vst2_s32 (int32_t * __a, int32x2x2_t __b)
__builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f16 (float16_t * __a, float16x4x2_t __b)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ __builtin_neon_vst2v4hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32 (float32_t * __a, float32x2x2_t __b)
{
@@ -9880,6 +10157,15 @@ vst2q_s32 (int32_t * __a, int32x4x2_t __b)
__builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f16 (float16_t * __a, float16x8x2_t __b)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst2v8hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32 (float32_t * __a, float32x4x2_t __b)
{
@@ -9943,6 +10229,15 @@ vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c)
__builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev4hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
{
@@ -9999,6 +10294,15 @@ vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c)
__builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev8hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
{
@@ -10051,6 +10355,16 @@ vld3_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_f16 (const float16_t * __a)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3v4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32 (const float32_t * __a)
{
@@ -10149,6 +10463,16 @@ vld3q_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_f16 (const float16_t * __a)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+ __rv.__o = __builtin_neon_vld3v8hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32 (const float32_t * __a)
{
@@ -10224,6 +10548,17 @@ vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev4hf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
{
@@ -10296,6 +10631,17 @@ vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev8hf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
{
@@ -10356,6 +10702,16 @@ vld3_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_dupv4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32 (const float32_t * __a)
{
@@ -10451,6 +10807,15 @@ vst3_s32 (int32_t * __a, int32x2x3_t __b)
__builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f16 (float16_t * __a, float16x4x3_t __b)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ __builtin_neon_vst3v4hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32 (float32_t * __a, float32x2x3_t __b)
{
@@ -10537,6 +10902,15 @@ vst3q_s32 (int32_t * __a, int32x4x3_t __b)
__builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f16 (float16_t * __a, float16x8x3_t __b)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ __builtin_neon_vst3v8hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32 (float32_t * __a, float32x4x3_t __b)
{
@@ -10600,6 +10974,15 @@ vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c)
__builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev4hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
{
@@ -10656,6 +11039,15 @@ vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c)
__builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev8hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
{
@@ -10708,6 +11100,16 @@ vld4_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_f16 (const float16_t * __a)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4v4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32 (const float32_t * __a)
{
@@ -10806,6 +11208,16 @@ vld4q_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_f16 (const float16_t * __a)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4v8hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32 (const float32_t * __a)
{
@@ -10881,6 +11293,18 @@ vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev4hf (__a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
{
@@ -10953,6 +11377,18 @@ vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev8hf (__a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
{
@@ -11013,6 +11449,16 @@ vld4_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_dupv4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32 (const float32_t * __a)
{
@@ -11108,6 +11554,15 @@ vst4_s32 (int32_t * __a, int32x2x4_t __b)
__builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f16 (float16_t * __a, float16x4x4_t __b)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst4v4hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32 (float32_t * __a, float32x2x4_t __b)
{
@@ -11194,6 +11649,15 @@ vst4q_s32 (int32_t * __a, int32x4x4_t __b)
__builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f16 (float16_t * __a, float16x8x4_t __b)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ __builtin_neon_vst4v8hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32 (float32_t * __a, float32x4x4_t __b)
{
@@ -11257,6 +11721,15 @@ vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c)
__builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev4hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
{
@@ -11313,6 +11786,15 @@ vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c)
__builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev8hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
{
@@ -11827,6 +12309,14 @@ vreinterpret_p8_p16 (poly16x4_t __a)
return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_f16 (float16x4_t __a)
+{
+ return (poly8x8_t) __a;
+}
+#endif
+
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_f32 (float32x2_t __a)
{
@@ -11895,6 +12385,14 @@ vreinterpret_p16_p8 (poly8x8_t __a)
return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_f16 (float16x4_t __a)
+{
+ return (poly16x4_t) __a;
+}
+#endif
+
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_f32 (float32x2_t __a)
{
@@ -11957,6 +12455,104 @@ vreinterpret_p16_u32 (uint32x2_t __a)
return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_p8 (poly8x8_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_p16 (poly16x4_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_f32 (float32x2_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#ifdef __ARM_FEATURE_CRYPTO
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_p64 (poly64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s64 (int64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u64 (uint64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s8 (int8x8_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s16 (int16x4_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_s32 (int32x2_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u8 (uint8x8_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u16 (uint16x4_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_u32 (uint32x2_t __a)
+{
+ return (float16x4_t) __a;
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_p8 (poly8x8_t __a)
{
@@ -11969,6 +12565,14 @@ vreinterpret_f32_p16 (poly16x4_t __a)
return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_f16 (float16x4_t __a)
+{
+ return (float32x2_t) __a;
+}
+#endif
+
#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_p64 (poly64x1_t __a)
@@ -12041,6 +12645,17 @@ vreinterpret_p64_p16 (poly16x4_t __a)
}
#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_f16 (float16x4_t __a)
+{
+ return (poly64x1_t) __a;
+}
+#endif
+#endif
+
#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_f32 (float32x2_t __a)
@@ -12125,6 +12740,14 @@ vreinterpret_s64_p16 (poly16x4_t __a)
return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_f16 (float16x4_t __a)
+{
+ return (int64x1_t) __a;
+}
+#endif
+
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_f32 (float32x2_t __a)
{
@@ -12193,6 +12816,14 @@ vreinterpret_u64_p16 (poly16x4_t __a)
return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_f16 (float16x4_t __a)
+{
+ return (uint64x1_t) __a;
+}
+#endif
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_f32 (float32x2_t __a)
{
@@ -12261,6 +12892,14 @@ vreinterpret_s8_p16 (poly16x4_t __a)
return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_f16 (float16x4_t __a)
+{
+ return (int8x8_t) __a;
+}
+#endif
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_f32 (float32x2_t __a)
{
@@ -12329,6 +12968,14 @@ vreinterpret_s16_p16 (poly16x4_t __a)
return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_f16 (float16x4_t __a)
+{
+ return (int16x4_t) __a;
+}
+#endif
+
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_f32 (float32x2_t __a)
{
@@ -12397,6 +13044,14 @@ vreinterpret_s32_p16 (poly16x4_t __a)
return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_f16 (float16x4_t __a)
+{
+ return (int32x2_t) __a;
+}
+#endif
+
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_f32 (float32x2_t __a)
{
@@ -12465,6 +13120,14 @@ vreinterpret_u8_p16 (poly16x4_t __a)
return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_f16 (float16x4_t __a)
+{
+ return (uint8x8_t) __a;
+}
+#endif
+
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_f32 (float32x2_t __a)
{
@@ -12533,6 +13196,14 @@ vreinterpret_u16_p16 (poly16x4_t __a)
return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_f16 (float16x4_t __a)
+{
+ return (uint16x4_t) __a;
+}
+#endif
+
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_f32 (float32x2_t __a)
{
@@ -12601,6 +13272,14 @@ vreinterpret_u32_p16 (poly16x4_t __a)
return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_f16 (float16x4_t __a)
+{
+ return (uint32x2_t) __a;
+}
+#endif
+
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_f32 (float32x2_t __a)
{
@@ -12663,6 +13342,14 @@ vreinterpretq_p8_p16 (poly16x8_t __a)
return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_f16 (float16x8_t __a)
+{
+ return (poly8x16_t) __a;
+}
+#endif
+
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_f32 (float32x4_t __a)
{
@@ -12739,6 +13426,14 @@ vreinterpretq_p16_p8 (poly8x16_t __a)
return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_f16 (float16x8_t __a)
+{
+ return (poly16x8_t) __a;
+}
+#endif
+
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_f32 (float32x4_t __a)
{
@@ -12809,6 +13504,114 @@ vreinterpretq_p16_u32 (uint32x4_t __a)
return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_p8 (poly8x16_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_p16 (poly16x8_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_f32 (float32x4_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_p64 (poly64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_p128 (poly128_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s64 (int64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u64 (uint64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s8 (int8x16_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s16 (int16x8_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_s32 (int32x4_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u8 (uint8x16_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u16 (uint16x8_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_u32 (uint32x4_t __a)
+{
+ return (float16x8_t) __a;
+}
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p8 (poly8x16_t __a)
{
@@ -12821,6 +13624,14 @@ vreinterpretq_f32_p16 (poly16x8_t __a)
return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_f16 (float16x8_t __a)
+{
+ return (float32x4_t) __a;
+}
+#endif
+
#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p64 (poly64x2_t __a)
@@ -12901,6 +13712,17 @@ vreinterpretq_p64_p16 (poly16x8_t __a)
}
#endif
+
+#ifdef __ARM_FEATURE_CRYPTO
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_f16 (float16x8_t __a)
+{
+ return (poly64x2_t) __a;
+}
+#endif
+#endif
+
#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_f32 (float32x4_t __a)
@@ -12995,8 +13817,18 @@ vreinterpretq_p128_p16 (poly16x8_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a);
}
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_f16 (float16x8_t __a)
+{
+ return (poly128_t) __a;
+}
#endif
+#endif
+
#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_f32 (float32x4_t __a)
@@ -13089,6 +13921,14 @@ vreinterpretq_s64_p16 (poly16x8_t __a)
return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_f16 (float16x8_t __a)
+{
+ return (int64x2_t) __a;
+}
+#endif
+
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_f32 (float32x4_t __a)
{
@@ -13165,6 +14005,14 @@ vreinterpretq_u64_p16 (poly16x8_t __a)
return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f16 (float16x8_t __a)
+{
+ return (uint64x2_t) __a;
+}
+#endif
+
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_f32 (float32x4_t __a)
{
@@ -13241,6 +14089,14 @@ vreinterpretq_s8_p16 (poly16x8_t __a)
return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_f16 (float16x8_t __a)
+{
+ return (int8x16_t) __a;
+}
+#endif
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_f32 (float32x4_t __a)
{
@@ -13317,6 +14173,14 @@ vreinterpretq_s16_p16 (poly16x8_t __a)
return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_f16 (float16x8_t __a)
+{
+ return (int16x8_t) __a;
+}
+#endif
+
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_f32 (float32x4_t __a)
{
@@ -13393,6 +14257,14 @@ vreinterpretq_s32_p16 (poly16x8_t __a)
return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_f16 (float16x8_t __a)
+{
+ return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+#endif
+
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_f32 (float32x4_t __a)
{
@@ -13469,6 +14341,14 @@ vreinterpretq_u8_p16 (poly16x8_t __a)
return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_f16 (float16x8_t __a)
+{
+ return (uint8x16_t) __a;
+}
+#endif
+
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_f32 (float32x4_t __a)
{
@@ -13545,6 +14425,14 @@ vreinterpretq_u16_p16 (poly16x8_t __a)
return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_f16 (float16x8_t __a)
+{
+ return (uint16x8_t) __a;
+}
+#endif
+
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_f32 (float32x4_t __a)
{
@@ -13621,6 +14509,14 @@ vreinterpretq_u32_p16 (poly16x8_t __a)
return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_f16 (float16x8_t __a)
+{
+ return (uint32x4_t) __a;
+}
+#endif
+
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_f32 (float32x4_t __a)
{
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index f150b98b809..0b719df7607 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -164,9 +164,9 @@ VAR10 (UNOP, vdup_n,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (GETLANE, vdup_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
-VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
-VAR5 (UNOP, vget_low, v16qi, v8hi, v4si, v4sf, v2di)
+VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR3 (UNOP, vmovn, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovns, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di)
@@ -242,40 +242,40 @@ VAR6 (UNOP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti)
-VAR10 (LOAD1, vld1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR12 (LOAD1, vld1,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR10 (LOAD1LANE, vld1_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (LOAD1, vld1_dup,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (STORE1, vst1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (STORE1LANE, vst1_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR9 (LOAD1, vld2,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld2_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst2,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR9 (LOAD1, vld3,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld3_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst3,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR9 (LOAD1, vld4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld4_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR12 (STORE1, vst1,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR12 (STORE1LANE, vst1_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR11 (LOAD1, vld2,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld2_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst2,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst2_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1, vld3,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld3_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst3,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst3_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1, vld4,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld4_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst4,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst4_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 1e7f3f17a8a..47cc1eebecd 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -65,20 +65,32 @@
;; Integer modes supported by Neon and IWMMXT, except V2DI
(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
-;; Double-width vector modes.
+;; Double-width vector modes, on which we support arithmetic (no HF!)
(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+;; Double-width vector modes plus 64-bit elements for vreinterpret + vcreate.
+(define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI])
+
;; Double-width vector modes plus 64-bit elements.
-(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI])
+(define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI])
+
+;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane.
+(define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF])
;; Double-width vector modes without floating-point elements.
(define_mode_iterator VDI [V8QI V4HI V2SI])
-;; Quad-width vector modes.
+;; Quad-width vector modes supporting arithmetic (no HF!).
(define_mode_iterator VQ [V16QI V8HI V4SI V4SF])
+;; Quad-width vector modes, including V8HF.
+(define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF])
+
+;; Quad-width vector modes with 16- or 32-bit elements
+(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF])
+
;; Quad-width vector modes plus 64-bit elements.
-(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI])
+(define_mode_iterator VQX [V16QI V8HI V8HF V4SI V4SF V2DI])
;; Quad-width vector modes without floating-point elements.
(define_mode_iterator VQI [V16QI V8HI V4SI])
@@ -111,7 +123,8 @@
(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
;; Vector modes, including 64-bit integer elements.
-(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI])
+(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI
+ V4HF V8HF V2SF V4SF DI V2DI])
;; Vector modes including 64-bit integer elements, but no floats.
(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
@@ -366,7 +379,8 @@
;; Define element mode for each vector mode.
(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
- (V4HI "HI") (V8HI "HI")
+ (V4HI "HI") (V8HI "HI")
+ (V4HF "HF") (V8HF "HF")
(V2SI "SI") (V4SI "SI")
(V2SF "SF") (V4SF "SF")
(DI "DI") (V2DI "DI")])
@@ -383,6 +397,7 @@
;; size for structure lane/dup loads and stores.
(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
(V4HI "SI") (V8HI "SI")
+ (V4HF "SF") (V8HF "SF")
(V2SI "V2SI") (V4SI "V2SI")
(V2SF "V2SF") (V4SF "V2SF")
(DI "V2DI") (V2DI "V2DI")])
@@ -390,6 +405,7 @@
;; Similar, for three elements.
(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
(V4HI "BLK") (V8HI "BLK")
+ (V4HF "BLK") (V8HF "BLK")
(V2SI "BLK") (V4SI "BLK")
(V2SF "BLK") (V4SF "BLK")
(DI "EI") (V2DI "EI")])
@@ -397,6 +413,7 @@
;; Similar, for four elements.
(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI")
(V4HI "V4HI") (V8HI "V4HI")
+ (V4HF "V4HF") (V8HF "V4HF")
(V2SI "V4SI") (V4SI "V4SI")
(V2SF "V4SF") (V4SF "V4SF")
(DI "OI") (V2DI "OI")])
@@ -421,7 +438,8 @@
;; Modes with half the number of equal-sized elements.
(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
- (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF")
+ (V8HF "V4HF") (V4SI "V2SI")
+ (V4SF "V2SF") (V2DF "DF")
(V2DI "DI")])
;; Same, but lower-case.
@@ -431,8 +449,9 @@
;; Modes with twice the number of equal-sized elements.
(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
- (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF")
- (DI "V2DI")])
+ (V2SI "V4SI") (V4HF "V8HF")
+ (V2SF "V4SF") (DF "V2DF")
+ (DI "V2DI")])
;; Same, but lower-case.
(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
@@ -454,8 +473,9 @@
;; Mode of result of comparison operations (and bit-select operand 1).
(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
- (V4HI "V4HI") (V8HI "V8HI")
+ (V4HI "V4HI") (V8HI "V8HI")
(V2SI "V2SI") (V4SI "V4SI")
+ (V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(DI "DI") (V2DI "V2DI")])
@@ -492,12 +512,14 @@
(define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8")
(V4HI "u16") (V8HI "u16")
(V2SI "32") (V4SI "32")
+ (V4HF "u16") (V8HF "u16")
(V2SF "32") (V4SF "32")])
(define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8")
(V4HI "16") (V8HI "16")
(V2SI "32") (V4SI "32")
(DI "64") (V2DI "64")
+ (V4HF "16") (V8HF "16")
(V2SF "32") (V4SF "32")])
(define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b")
@@ -564,6 +586,7 @@
(DI "true") (V2DI "false")])
(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
+ (V4HF "4") (V8HF "8")
(V4HI "4") (V8HI "8")
(V2SI "2") (V4SI "4")
(V2SF "2") (V4SF "4")
@@ -607,6 +630,7 @@
(define_mode_attr q [(V8QI "") (V16QI "_q")
(V4HI "") (V8HI "_q")
(V2SI "") (V4SI "_q")
+ (V4HF "") (V8HF "_q")
(V2SF "") (V4SF "_q")
(DI "") (V2DI "_q")
(DF "") (V2DF "_q")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index b1bf26a9451..0545231e314 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -320,11 +320,11 @@
[(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
(define_insn "vec_set<mode>_internal"
- [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
- (vec_merge:VQ
- (vec_duplicate:VQ
+ [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
+ (vec_merge:VQ2
+ (vec_duplicate:VQ2
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
- (match_operand:VQ 3 "s_register_operand" "0,0")
+ (match_operand:VQ2 3 "s_register_operand" "0,0")
(match_operand:SI 2 "immediate_operand" "i,i")))]
"TARGET_NEON"
{
@@ -407,7 +407,7 @@
(define_insn "vec_extract<mode>"
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
(vec_select:<V_elem>
- (match_operand:VQ 1 "s_register_operand" "w,w")
+ (match_operand:VQ2 1 "s_register_operand" "w,w")
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
"TARGET_NEON"
{
@@ -2607,7 +2607,7 @@
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extend:SI
(vec_select:<V_elem>
- (match_operand:VQ 1 "s_register_operand" "w")
+ (match_operand:VQ2 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
{
@@ -2634,7 +2634,7 @@
[(set (match_operand:SI 0 "s_register_operand" "=r")
(zero_extend:SI
(vec_select:<V_elem>
- (match_operand:VQ 1 "s_register_operand" "w")
+ (match_operand:VQ2 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
{
@@ -2789,7 +2789,7 @@ if (BYTES_BIG_ENDIAN)
})
(define_expand "neon_vcreate<mode>"
- [(match_operand:VDX 0 "s_register_operand" "")
+ [(match_operand:VD_RE 0 "s_register_operand" "")
(match_operand:DI 1 "general_operand" "")]
"TARGET_NEON"
{
@@ -4140,7 +4140,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vreinterpretv8qi<mode>"
[(match_operand:V8QI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4149,7 +4149,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vreinterpretv4hi<mode>"
[(match_operand:V4HI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4158,7 +4158,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vreinterpretv2si<mode>"
[(match_operand:V2SI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4167,7 +4167,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vreinterpretv2sf<mode>"
[(match_operand:V2SF 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4176,7 +4176,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vreinterpretdi<mode>"
[(match_operand:DI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4435,14 +4435,14 @@ if (BYTES_BIG_ENDIAN)
(define_expand "vec_load_lanesoi<mode>"
[(set (match_operand:OI 0 "s_register_operand")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON")
(define_insn "neon_vld2<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON"
"vld2.<V_sz_elem>\t%h0, %A1"
@@ -4453,7 +4453,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(match_operand:TI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_LANE))]
"TARGET_NEON"
{
@@ -4478,7 +4478,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(match_operand:OI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_LANE))]
"TARGET_NEON"
{
@@ -4549,14 +4549,14 @@ if (BYTES_BIG_ENDIAN)
(define_expand "vec_store_lanesoi<mode>"
[(set (match_operand:OI 0 "neon_struct_operand")
(unspec:OI [(match_operand:OI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))]
"TARGET_NEON")
(define_insn "neon_vst2<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))]
"TARGET_NEON"
"vst2.<V_sz_elem>\t%h1, %A0"
@@ -4568,7 +4568,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:<V_two_elem>
[(match_operand:TI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2_LANE))]
"TARGET_NEON"
{
@@ -4593,7 +4593,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:<V_two_elem>
[(match_operand:OI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2_LANE))]
"TARGET_NEON"
{
@@ -4646,7 +4646,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "vec_load_lanesci<mode>"
[(match_operand:CI 0 "s_register_operand")
(match_operand:CI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
@@ -4656,7 +4656,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vld3<mode>"
[(match_operand:CI 0 "s_register_operand")
(match_operand:CI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -4671,7 +4671,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld3qa<mode>"
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3A))]
"TARGET_NEON"
{
@@ -4691,7 +4691,7 @@ if (BYTES_BIG_ENDIAN)
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
(match_operand:CI 2 "s_register_operand" "0")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3B))]
"TARGET_NEON"
{
@@ -4712,7 +4712,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(match_operand:EI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3_LANE))]
"TARGET_NEON"
{
@@ -4739,7 +4739,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(match_operand:CI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3_LANE))]
"TARGET_NEON"
{
@@ -4819,7 +4819,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "vec_store_lanesci<mode>"
[(match_operand:CI 0 "neon_struct_operand")
(match_operand:CI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
@@ -4829,7 +4829,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vst3<mode>"
[(match_operand:CI 0 "neon_struct_operand")
(match_operand:CI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -4844,7 +4844,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst3qa<mode>"
[(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3A))]
"TARGET_NEON"
{
@@ -4863,7 +4863,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst3qb<mode>"
[(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3B))]
"TARGET_NEON"
{
@@ -4884,7 +4884,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:<V_three_elem>
[(match_operand:EI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3_LANE))]
"TARGET_NEON"
{
@@ -4911,7 +4911,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:<V_three_elem>
[(match_operand:CI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3_LANE))]
"TARGET_NEON"
{
@@ -4966,7 +4966,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "vec_load_lanesxi<mode>"
[(match_operand:XI 0 "s_register_operand")
(match_operand:XI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
@@ -4976,7 +4976,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vld4<mode>"
[(match_operand:XI 0 "s_register_operand")
(match_operand:XI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -4991,7 +4991,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld4qa<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4A))]
"TARGET_NEON"
{
@@ -5012,7 +5012,7 @@ if (BYTES_BIG_ENDIAN)
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
(match_operand:XI 2 "s_register_operand" "0")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4B))]
"TARGET_NEON"
{
@@ -5034,7 +5034,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(match_operand:OI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4_LANE))]
"TARGET_NEON"
{
@@ -5062,7 +5062,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(match_operand:XI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4_LANE))]
"TARGET_NEON"
{
@@ -5147,7 +5147,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "vec_store_lanesxi<mode>"
[(match_operand:XI 0 "neon_struct_operand")
(match_operand:XI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
@@ -5157,7 +5157,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vst4<mode>"
[(match_operand:XI 0 "neon_struct_operand")
(match_operand:XI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -5172,7 +5172,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst4qa<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4A))]
"TARGET_NEON"
{
@@ -5192,7 +5192,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst4qb<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4B))]
"TARGET_NEON"
{
@@ -5214,7 +5214,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:<V_four_elem>
[(match_operand:OI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4_LANE))]
"TARGET_NEON"
{
@@ -5242,7 +5242,7 @@ if (BYTES_BIG_ENDIAN)
(unspec:<V_four_elem>
[(match_operand:XI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4_LANE))]
"TARGET_NEON"
{
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index abe07799f33..2887e7cb8d6 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1549,7 +1549,12 @@ options. Some multilibs may be incompatible with these options.
@item arm_neon_fp16_ok
@anchor{arm_neon_fp16_ok}
ARM Target supports @code{-mfpu=neon-fp16 -mfloat-abi=softfp} or compatible
-options. Some multilibs may be incompatible with these options.
+options, including @code{-mfp16-format=ieee} if necessary to obtain the
+@code{__fp16} type. Some multilibs may be incompatible with these options.
+
+@item arm_neon_fp16_hw
+Test system supports executing Neon half-precision float instructions.
+(Implies previous.)
@item arm_thumb1_ok
ARM target generates Thumb-1 code for @code{-mthumb}.
@@ -2016,7 +2021,7 @@ keyword}.
@item arm_neon_fp16
NEON and half-precision floating point support. Only ARM targets
support this feature, and only then in certain modes; see
-the @ref{arm_neon_ok,,arm_neon_fp16_ok effective target keyword}.
+the @ref{arm_neon_fp16_ok,,arm_neon_fp16_ok effective target keyword}.
@item arm_vfp3
arm vfp3 floating point support; see
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 775020f6eb0..7d5b1ab70a8 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -7603,7 +7603,6 @@ native_interpret_real (tree type, const unsigned char *ptr, int len)
{
machine_mode mode = TYPE_MODE (type);
int total_bytes = GET_MODE_SIZE (mode);
- int byte, offset, word, words, bitpos;
unsigned char value;
/* There are always 32 bits in each long, no matter the size of
the hosts long. We handle floating point representations with
@@ -7614,16 +7613,18 @@ native_interpret_real (tree type, const unsigned char *ptr, int len)
total_bytes = GET_MODE_SIZE (TYPE_MODE (type));
if (total_bytes > len || total_bytes > 24)
return NULL_TREE;
- words = (32 / BITS_PER_UNIT) / UNITS_PER_WORD;
+ int words = (32 / BITS_PER_UNIT) / UNITS_PER_WORD;
memset (tmp, 0, sizeof (tmp));
- for (bitpos = 0; bitpos < total_bytes * BITS_PER_UNIT;
+ for (int bitpos = 0; bitpos < total_bytes * BITS_PER_UNIT;
bitpos += BITS_PER_UNIT)
{
- byte = (bitpos / BITS_PER_UNIT) & 3;
+ /* Both OFFSET and BYTE index within a long;
+ bitpos indexes the whole float. */
+ int offset, byte = (bitpos / BITS_PER_UNIT) & 3;
if (UNITS_PER_WORD < 4)
{
- word = byte / UNITS_PER_WORD;
+ int word = byte / UNITS_PER_WORD;
if (WORDS_BIG_ENDIAN)
word = (words - 1) - word;
offset = word * UNITS_PER_WORD;
@@ -7633,7 +7634,16 @@ native_interpret_real (tree type, const unsigned char *ptr, int len)
offset += byte % UNITS_PER_WORD;
}
else
- offset = BYTES_BIG_ENDIAN ? 3 - byte : byte;
+ {
+ offset = byte;
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* Reverse bytes within each long, or within the entire float
+ if it's smaller than a long (for HFmode). */
+ offset = MIN (3, total_bytes - 1) - offset;
+ gcc_assert (offset >= 0);
+ }
+ }
value = ptr[offset + ((bitpos / BITS_PER_UNIT) & ~3)];
tmp[bitpos / 32] |= (unsigned long)value << (bitpos & 31);
diff --git a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
index 09a20dc985e..5740c0281b2 100644
--- a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
+++ b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
@@ -13,6 +13,7 @@ void f3 (uint8x8_t a) {}
void f4 (uint16x4_t a) {}
void f5 (uint32x2_t a) {}
void f23 (uint64x1_t a) {}
+void f61 (float16x4_t a) {}
void f6 (float32x2_t a) {}
void f7 (poly8x8_t a) {}
void f8 (poly16x4_t a) {}
@@ -25,6 +26,7 @@ void f13 (uint8x16_t a) {}
void f14 (uint16x8_t a) {}
void f15 (uint32x4_t a) {}
void f16 (uint64x2_t a) {}
+void f171 (float16x8_t a) {}
void f17 (float32x4_t a) {}
void f18 (float64x2_t a) {}
void f19 (poly8x16_t a) {}
@@ -42,6 +44,7 @@ void g1 (int8x16_t, int8x16_t) {}
// { dg-final { scan-assembler "_Z2f412__Uint16x4_t:" } }
// { dg-final { scan-assembler "_Z2f512__Uint32x2_t:" } }
// { dg-final { scan-assembler "_Z3f2312__Uint64x1_t:" } }
+// { dg-final { scan-assembler "_Z3f6113__Float16x4_t:" } }
// { dg-final { scan-assembler "_Z2f613__Float32x2_t:" } }
// { dg-final { scan-assembler "_Z2f711__Poly8x8_t:" } }
// { dg-final { scan-assembler "_Z2f812__Poly16x4_t:" } }
@@ -53,6 +56,7 @@ void g1 (int8x16_t, int8x16_t) {}
// { dg-final { scan-assembler "_Z3f1412__Uint16x8_t:" } }
// { dg-final { scan-assembler "_Z3f1512__Uint32x4_t:" } }
// { dg-final { scan-assembler "_Z3f1612__Uint64x2_t:" } }
+// { dg-final { scan-assembler "_Z4f17113__Float16x8_t:" } }
// { dg-final { scan-assembler "_Z3f1713__Float32x4_t:" } }
// { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } }
// { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } }
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
index ceada839d98..462696315e0 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
@@ -52,8 +52,12 @@ if {[istarget arm*-*-*]} then {
torture-init
set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
-# Make sure Neon flags are provided, if necessary.
-set additional_flags [add_options_for_arm_neon ""]
+# Make sure Neon flags are provided, if necessary. Use fp16 if we can.
+if {[check_effective_target_arm_neon_fp16_ok]} then {
+ set additional_flags [add_options_for_arm_neon_fp16 ""]
+} else {
+ set additional_flags [add_options_for_arm_neon ""]
+}
# Main loop.
gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 4e728d5572c..49fbd843e50 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -7,6 +7,7 @@
#include <inttypes.h>
/* helper type, to help write floating point results in integer form. */
+typedef uint16_t hfloat16_t;
typedef uint32_t hfloat32_t;
typedef uint64_t hfloat64_t;
@@ -132,6 +133,9 @@ static ARRAY(result, uint, 32, 2);
static ARRAY(result, uint, 64, 1);
static ARRAY(result, poly, 8, 8);
static ARRAY(result, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+static ARRAY(result, float, 16, 4);
+#endif
static ARRAY(result, float, 32, 2);
static ARRAY(result, int, 8, 16);
static ARRAY(result, int, 16, 8);
@@ -143,6 +147,9 @@ static ARRAY(result, uint, 32, 4);
static ARRAY(result, uint, 64, 2);
static ARRAY(result, poly, 8, 16);
static ARRAY(result, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+static ARRAY(result, float, 16, 8);
+#endif
static ARRAY(result, float, 32, 4);
#ifdef __aarch64__
static ARRAY(result, float, 64, 2);
@@ -160,6 +167,7 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
+extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, int, 8, 16);
extern ARRAY(expected, int, 16, 8);
@@ -171,38 +179,11 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
+extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
-/* Check results. Operates on all possible vector types. */
-#define CHECK_RESULTS(test_name,comment) \
- { \
- CHECK(test_name, int, 8, 8, PRIx8, expected, comment); \
- CHECK(test_name, int, 16, 4, PRIx16, expected, comment); \
- CHECK(test_name, int, 32, 2, PRIx32, expected, comment); \
- CHECK(test_name, int, 64, 1, PRIx64, expected, comment); \
- CHECK(test_name, uint, 8, 8, PRIx8, expected, comment); \
- CHECK(test_name, uint, 16, 4, PRIx16, expected, comment); \
- CHECK(test_name, uint, 32, 2, PRIx32, expected, comment); \
- CHECK(test_name, uint, 64, 1, PRIx64, expected, comment); \
- CHECK(test_name, poly, 8, 8, PRIx8, expected, comment); \
- CHECK(test_name, poly, 16, 4, PRIx16, expected, comment); \
- CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \
- \
- CHECK(test_name, int, 8, 16, PRIx8, expected, comment); \
- CHECK(test_name, int, 16, 8, PRIx16, expected, comment); \
- CHECK(test_name, int, 32, 4, PRIx32, expected, comment); \
- CHECK(test_name, int, 64, 2, PRIx64, expected, comment); \
- CHECK(test_name, uint, 8, 16, PRIx8, expected, comment); \
- CHECK(test_name, uint, 16, 8, PRIx16, expected, comment); \
- CHECK(test_name, uint, 32, 4, PRIx32, expected, comment); \
- CHECK(test_name, uint, 64, 2, PRIx64, expected, comment); \
- CHECK(test_name, poly, 8, 16, PRIx8, expected, comment); \
- CHECK(test_name, poly, 16, 8, PRIx16, expected, comment); \
- CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \
- } \
-
-#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
+#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
{ \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
@@ -229,6 +210,24 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
+/* Check results against EXPECTED. Operates on all possible vector types. */
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_NAMED_NO_FP16(test_name, EXPECTED, comment) \
+ CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 16, 8, PRIx16, EXPECTED, comment); \
+ }
+#else
+#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
+ CHECK_RESULTS_NAMED_NO_FP16(test_name, EXPECTED, comment)
+#endif
+
+#define CHECK_RESULTS_NO_FP16(test_name,comment) \
+ CHECK_RESULTS_NAMED_NO_FP16(test_name, expected, comment)
+
+#define CHECK_RESULTS(test_name,comment) \
+ CHECK_RESULTS_NAMED(test_name, expected, comment)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@@ -380,6 +379,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ CLEAN(result, float, 16, 4);
+#endif
CLEAN(result, float, 32, 2);
CLEAN(result, int, 8, 16);
@@ -392,6 +394,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ CLEAN(result, float, 16, 8);
+#endif
CLEAN(result, float, 32, 4);
#if defined(__aarch64__)
@@ -443,21 +448,40 @@ static void clean_results (void)
DECL_VARIABLE(VAR, uint, 64, 2)
/* Declare all 64 bits variants. */
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \
+ DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE(VAR, poly, 8, 8); \
+ DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE(VAR, float, 16, 4); \
+ DECL_VARIABLE(VAR, float, 32, 2)
+#else
#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \
DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
+#endif
/* Declare all 128 bits variants. */
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \
DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
-
+#else
+#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \
+ DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE(VAR, poly, 8, 16); \
+ DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE(VAR, float, 32, 4)
+#endif
/* Declare all variants. */
#define DECL_VARIABLE_ALL_VARIANTS(VAR) \
DECL_VARIABLE_64BITS_VARIANTS(VAR); \
@@ -476,6 +500,15 @@ static void clean_results (void)
/* Helpers to initialize vectors. */
#define VDUP(VAR, Q, T1, T2, W, N, V) \
VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+/* Work around that there is no vdup_n_f16 intrinsic. */
+#define vdup_n_f16(VAL) \
+ __extension__ \
+ ({ \
+ float16_t f = VAL; \
+ vld1_dup_f16(&f); \
+ })
+#endif
#define VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \
VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
index 26203cc0a69..c8d43367bef 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
@@ -118,6 +118,10 @@ VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
PAD(buffer_pad, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
PAD(buffer_pad, uint, 64, 1);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+VECT_VAR_DECL_INIT(buffer, float, 16, 4);
+PAD(buffer_pad, float, 16, 4);
+#endif
VECT_VAR_DECL_INIT(buffer, float, 32, 2);
PAD(buffer_pad, float, 32, 2);
VECT_VAR_DECL_INIT(buffer, int, 8, 16);
@@ -140,6 +144,10 @@ VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
PAD(buffer_pad, poly, 8, 16);
VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
PAD(buffer_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+VECT_VAR_DECL_INIT(buffer, float, 16, 8);
+PAD(buffer_pad, float, 16, 8);
+#endif
VECT_VAR_DECL_INIT(buffer, float, 32, 4);
PAD(buffer_pad, float, 32, 4);
#ifdef __aarch64__
@@ -170,6 +178,10 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8);
VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8);
VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4);
VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+VECT_VAR_DECL_INIT4(buffer_dup, float, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, float, 16, 4);
+#endif
VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2);
VECT_VAR_DECL(buffer_dup_pad, float, 32, 2);
@@ -193,5 +205,9 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16);
VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16);
VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8);
VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+VECT_VAR_DECL_INIT(buffer_dup, float, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, float, 16, 8);
+#endif
VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4);
VECT_VAR_DECL(buffer_dup_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
index bb17f0a9649..c4fdbb45102 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
@@ -114,7 +114,7 @@ void exec_vbsl (void)
TEST_VBSL(uint, , float, f, 32, 2);
TEST_VBSL(uint, q, float, f, 32, 4);
- CHECK_RESULTS (TEST_MSG, "");
+ CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
}
int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
index 295768a0348..5100375e5fe 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
@@ -27,6 +27,8 @@ VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0x66, 0x66, 0x66, 0x66 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0x40533333, 0x40533333 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0x4080, 0x4080, 0x4080, 0x4080 };
#define TEST_MSG "VCOMBINE"
void exec_vcombine (void)
@@ -44,6 +46,9 @@ void exec_vcombine (void)
/* Initialize input "vector64_a" from "buffer". */
TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64_a, buffer);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VLOAD(vector64_a, buffer, , float, f, 16, 4);
+#endif
VLOAD(vector64_a, buffer, , float, f, 32, 2);
/* Choose init value arbitrarily. */
@@ -57,6 +62,9 @@ void exec_vcombine (void)
VDUP(vector64_b, , uint, u, 64, 1, 0x88);
VDUP(vector64_b, , poly, p, 8, 8, 0x55);
VDUP(vector64_b, , poly, p, 16, 4, 0x66);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VDUP(vector64_b, , float, f, 16, 4, 2.25);
+#endif
VDUP(vector64_b, , float, f, 32, 2, 3.3f);
clean_results ();
@@ -72,6 +80,9 @@ void exec_vcombine (void)
TEST_VCOMBINE(uint, u, 64, 1, 2);
TEST_VCOMBINE(poly, p, 8, 8, 16);
TEST_VCOMBINE(poly, p, 16, 4, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VCOMBINE(float, f, 16, 4, 8);
+#endif
TEST_VCOMBINE(float, f, 32, 2, 4);
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
@@ -84,6 +95,9 @@ void exec_vcombine (void)
CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, "");
+#endif
CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
index b2289d3a628..b8b338ef3c0 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
0x78, 0x56, 0x34, 0x12 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
#define INSN_NAME vcreate
@@ -38,6 +39,9 @@ FNNAME (INSN_NAME)
DECL_VAL(val, int, 16, 4);
DECL_VAL(val, int, 32, 2);
DECL_VAL(val, int, 64, 1);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ DECL_VAL(val, float, 16, 4);
+#endif
DECL_VAL(val, float, 32, 2);
DECL_VAL(val, uint, 8, 8);
DECL_VAL(val, uint, 16, 4);
@@ -50,6 +54,9 @@ FNNAME (INSN_NAME)
DECL_VARIABLE(vector_res, int, 16, 4);
DECL_VARIABLE(vector_res, int, 32, 2);
DECL_VARIABLE(vector_res, int, 64, 1);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ DECL_VARIABLE(vector_res, float, 16, 4);
+#endif
DECL_VARIABLE(vector_res, float, 32, 2);
DECL_VARIABLE(vector_res, uint, 8, 8);
DECL_VARIABLE(vector_res, uint, 16, 4);
@@ -65,6 +72,9 @@ FNNAME (INSN_NAME)
VECT_VAR(val, int, 16, 4) = 0x123456789abcdef0LL;
VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL;
VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_VAR(val, float, 16, 4) = 0x123456789abcdef0LL;
+#endif
VECT_VAR(val, float, 32, 2) = 0x123456789abcdef0LL;
VECT_VAR(val, uint, 8, 8) = 0x123456789abcdef0ULL;
VECT_VAR(val, uint, 16, 4) = 0x123456789abcdef0ULL;
@@ -76,6 +86,9 @@ FNNAME (INSN_NAME)
TEST_VCREATE(int, s, 8, 8);
TEST_VCREATE(int, s, 16, 4);
TEST_VCREATE(int, s, 32, 2);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VCREATE(float, f, 16, 4);
+#endif
TEST_VCREATE(float, f, 32, 2);
TEST_VCREATE(int, s, 64, 1);
TEST_VCREATE(uint, u, 8, 8);
@@ -95,6 +108,9 @@ FNNAME (INSN_NAME)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
+#endif
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c
new file mode 100644
index 00000000000..48e50e18263
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c
@@ -0,0 +1,98 @@
+/* { dg-require-effective-target arm_neon_fp16_hw { target { arm*-*-* } } } */
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+#include <math.h>
+
+/* Expected results for vcvt. */
+VECT_VAR_DECL (expected,hfloat,32,4) [] = { 0x41800000, 0x41700000,
+ 0x41600000, 0x41500000 };
+VECT_VAR_DECL (expected,hfloat,16,4) [] = { 0x3e00, 0x4100, 0x4300, 0x4480 };
+
+/* Expected results for vcvt_high_f32_f16. */
+VECT_VAR_DECL (expected_high,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
+ 0xc1200000, 0xc1100000 };
+/* Expected results for vcvt_high_f16_f32. */
+VECT_VAR_DECL (expected_high,hfloat,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000,
+ 0xcc00, 0xcb80, 0xcb00, 0xca80 };
+
+void
+exec_vcvt (void)
+{
+ clean_results ();
+
+#define TEST_MSG vcvt_f32_f16
+ {
+ VECT_VAR_DECL (buffer_src, float, 16, 4) [] = { 16.0, 15.0, 14.0, 13.0 };
+
+ DECL_VARIABLE (vector_src, float, 16, 4);
+
+ VLOAD (vector_src, buffer_src, , float, f, 16, 4);
+ DECL_VARIABLE (vector_res, float, 32, 4) =
+ vcvt_f32_f16 (VECT_VAR (vector_src, float, 16, 4));
+ vst1q_f32 (VECT_VAR (result, float, 32, 4),
+ VECT_VAR (vector_res, float, 32, 4));
+
+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected, "");
+ }
+#undef TEST_MSG
+
+ clean_results ();
+
+#define TEST_MSG vcvt_f16_f32
+ {
+ VECT_VAR_DECL (buffer_src, float, 32, 4) [] = { 1.5, 2.5, 3.5, 4.5 };
+ DECL_VARIABLE (vector_src, float, 32, 4);
+
+ VLOAD (vector_src, buffer_src, q, float, f, 32, 4);
+ DECL_VARIABLE (vector_res, float, 16, 4) =
+ vcvt_f16_f32 (VECT_VAR (vector_src, float, 32, 4));
+ vst1_f16 (VECT_VAR (result, float, 16, 4),
+ VECT_VAR (vector_res, float, 16 ,4));
+
+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected, "");
+ }
+#undef TEST_MSG
+
+#if defined (__aarch64__)
+ clean_results ();
+
+#define TEST_MSG "vcvt_high_f32_f16"
+ {
+ DECL_VARIABLE (vector_src, float, 16, 8);
+ VLOAD (vector_src, buffer, q, float, f, 16, 8);
+ DECL_VARIABLE (vector_res, float, 32, 4);
+ VECT_VAR (vector_res, float, 32, 4) =
+ vcvt_high_f32_f16 (VECT_VAR (vector_src, float, 16, 8));
+ vst1q_f32 (VECT_VAR (result, float, 32, 4),
+ VECT_VAR (vector_res, float, 32, 4));
+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected_high, "");
+ }
+#undef TEST_MSG
+ clean_results ();
+
+#define TEST_MSG "vcvt_high_f16_f32"
+ {
+ DECL_VARIABLE (vector_low, float, 16, 4);
+ VDUP (vector_low, , float, f, 16, 4, 2.0);
+
+ DECL_VARIABLE (vector_src, float, 32, 4);
+ VLOAD (vector_src, buffer, q, float, f, 32, 4);
+
+ DECL_VARIABLE (vector_res, float, 16, 8) =
+ vcvt_high_f16_f32 (VECT_VAR (vector_low, float, 16, 4),
+ VECT_VAR (vector_src, float, 32, 4));
+ vst1q_f16 (VECT_VAR (result, float, 16, 8),
+ VECT_VAR (vector_res, float, 16, 8));
+
+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_high, "");
+ }
+#endif
+}
+
+int
+main (void)
+{
+ exec_vcvt ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
index b5132f41ac4..22d45d56c8e 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
@@ -187,13 +187,13 @@ void exec_vdup_vmov (void)
switch (i) {
case 0:
- CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
case 1:
- CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
break;
case 2:
- CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
break;
default:
abort();
@@ -232,13 +232,13 @@ void exec_vdup_vmov (void)
switch (i) {
case 0:
- CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
case 1:
- CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
break;
case 2:
- CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
break;
default:
abort();
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
index c1ff6dd3007..ef708dcba17 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
@@ -90,7 +90,7 @@ void exec_vdup_lane (void)
TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
- CHECK_RESULTS (TEST_MSG, "");
+ CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
}
int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
index 0b014ebda87..98f88a69898 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
@@ -113,7 +113,7 @@ void exec_vext (void)
TEST_VEXT(q, poly, p, 16, 8, 6);
TEST_VEXT(q, float, f, 32, 4, 3);
- CHECK_RESULTS (TEST_MSG, "");
+ CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
}
int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
index d7581125edd..9f0a1687f18 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
#define TEST_MSG "VGET_HIGH"
@@ -31,6 +32,9 @@ void exec_vget_high (void)
DECL_VARIABLE_128BITS_VARIANTS(vector128);
TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VLOAD(vector128, buffer, q, float, f, 16, 8);
+#endif
VLOAD(vector128, buffer, q, float, f, 32, 4);
clean_results ();
@@ -46,6 +50,9 @@ void exec_vget_high (void)
TEST_VGET_HIGH(uint, u, 64, 1, 2);
TEST_VGET_HIGH(poly, p, 8, 8, 16);
TEST_VGET_HIGH(poly, p, 16, 4, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VGET_HIGH(float, f, 16, 4, 8);
+#endif
TEST_VGET_HIGH(float, f, 32, 2, 4);
CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
index 12ecfc21ba0..2b875b9b7b8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
#define TEST_MSG "VGET_LOW"
@@ -31,6 +32,9 @@ void exec_vget_low (void)
DECL_VARIABLE_128BITS_VARIANTS(vector128);
TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VLOAD(vector128, buffer, q, float, f, 16, 8);
+#endif
VLOAD(vector128, buffer, q, float, f, 32, 4);
clean_results ();
@@ -46,6 +50,9 @@ void exec_vget_low (void)
TEST_VGET_LOW(uint, u, 64, 1, 2);
TEST_VGET_LOW(poly, p, 8, 8, 16);
TEST_VGET_LOW(poly, p, 16, 4, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VGET_LOW(float, f, 16, 4, 8);
+#endif
TEST_VGET_LOW(float, f, 32, 2, 4);
CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
@@ -58,6 +65,9 @@ void exec_vget_low (void)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
+#endif
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
index ced9d736d6d..4ed0e464f9c 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -44,6 +45,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -62,6 +65,10 @@ void exec_vld1 (void)
TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VLD1(vector, buffer, , float, f, 16, 4);
+ TEST_VLD1(vector, buffer, q, float, f, 16, 8);
+#endif
TEST_VLD1(vector, buffer, , float, f, 32, 2);
TEST_VLD1(vector, buffer, q, float, f, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
index 0e052743926..34be214e912 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
@@ -17,6 +17,7 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0,
@@ -44,6 +45,8 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00,
+ 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
0xc1800000, 0xc1800000 };
@@ -61,6 +64,7 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1,
@@ -88,6 +92,8 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80,
+ 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
@@ -105,6 +111,7 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
@@ -132,6 +139,8 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00,
+ 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
0xc1600000, 0xc1600000 };
@@ -154,6 +163,10 @@ void exec_vld1_dup (void)
TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer_dup);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VLD1_DUP(vector, buffer_dup, , float, f, 16, 4);
+ TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 16, 8);
+#endif
TEST_VLD1_DUP(vector, buffer_dup, , float, f, 32, 2);
TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
index d5c5d22a8ce..1f39006498d 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xf0 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xaaaa };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xc1800000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa,
@@ -43,6 +44,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xf0, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xfff0, 0xaaaa };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xcc00, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xc1800000, 0xaaaaaaaa };
@@ -72,6 +75,9 @@ void exec_vld1_lane (void)
ARRAY(buffer_src, uint, 64, 1);
ARRAY(buffer_src, poly, 8, 8);
ARRAY(buffer_src, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ ARRAY(buffer_src, float, 16, 4);
+#endif
ARRAY(buffer_src, float, 32, 2);
ARRAY(buffer_src, int, 8, 16);
@@ -84,6 +90,9 @@ void exec_vld1_lane (void)
ARRAY(buffer_src, uint, 64, 2);
ARRAY(buffer_src, poly, 8, 16);
ARRAY(buffer_src, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ ARRAY(buffer_src, float, 16, 8);
+#endif
ARRAY(buffer_src, float, 32, 4);
clean_results ();
@@ -99,6 +108,9 @@ void exec_vld1_lane (void)
TEST_VLD1_LANE(, uint, u, 64, 1, 0);
TEST_VLD1_LANE(, poly, p, 8, 8, 7);
TEST_VLD1_LANE(, poly, p, 16, 4, 3);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VLD1_LANE(, float, f, 16, 4, 2);
+#endif
TEST_VLD1_LANE(, float, f, 32, 2, 1);
TEST_VLD1_LANE(q, int, s, 8, 16, 15);
@@ -111,6 +123,9 @@ void exec_vld1_lane (void)
TEST_VLD1_LANE(q, uint, u, 64, 2, 0);
TEST_VLD1_LANE(q, poly, p, 8, 16, 12);
TEST_VLD1_LANE(q, poly, p, 16, 8, 6);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VLD1_LANE(q, float, f, 16, 8, 5);
+#endif
TEST_VLD1_LANE(q, float, f, 32, 4, 2);
CHECK_RESULTS (TEST_MSG, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c
new file mode 100644
index 00000000000..2174d6eaa8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+float16x4x2_t
+f_vld2_lane_f16 (float16_t * p, float16x4x2_t v)
+{
+ float16x4x2_t res;
+ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ res = vld2_lane_f16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ res = vld2_lane_f16 (p, v, -1);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c
new file mode 100644
index 00000000000..83ae82c8242
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+float16x8x2_t
+f_vld2q_lane_f16 (float16_t * p, float16x8x2_t v)
+{
+ float16x8x2_t res;
+ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ res = vld2q_lane_f16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ res = vld2q_lane_f16 (p, v, -1);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c
new file mode 100644
index 00000000000..21b7861ba75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+float16x4x3_t
+f_vld3_lane_f16 (float16_t * p, float16x4x3_t v)
+{
+ float16x4x3_t res;
+ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ res = vld3_lane_f16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ res = vld3_lane_f16 (p, v, -1);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c
new file mode 100644
index 00000000000..95ec3913eef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+float16x8x3_t
+f_vld3q_lane_f16 (float16_t * p, float16x8x3_t v)
+{
+ float16x8x3_t res;
+ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ res = vld3q_lane_f16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ res = vld3q_lane_f16 (p, v, -1);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c
new file mode 100644
index 00000000000..bd7ecf06690
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+float16x4x4_t
+f_vld4_lane_f16 (float16_t * p, float16x4x4_t v)
+{
+ float16x4x4_t res;
+ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ res = vld4_lane_f16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ res = vld4_lane_f16 (p, v, -1);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c
new file mode 100644
index 00000000000..c27559f4ee8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+float16x8x4_t
+f_vld4q_lane_f16 (float16_t * p, float16x8x4_t v)
+{
+ float16x8x4_t res;
+ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ res = vld4q_lane_f16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ res = vld4q_lane_f16 (p, v, -1);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
index f20aa03f51b..1e02dc3fa10 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
@@ -18,6 +18,7 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -41,6 +42,8 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -58,6 +61,7 @@ VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7,
@@ -81,6 +85,8 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
+ 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
0xc1200000, 0xc1100000 };
@@ -98,6 +104,7 @@ VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -121,6 +128,8 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -138,6 +147,7 @@ VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7,
@@ -161,6 +171,8 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
+ 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
0xc1200000, 0xc1100000 };
@@ -181,6 +193,7 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17,
@@ -204,6 +217,8 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x1c, 0x1d, 0x1e, 0x1f };
VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
+ 0x4400, 0x4500, 0x4600, 0x4700 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
0xc0c00000, 0xc0a00000 };
@@ -223,6 +238,7 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -246,6 +262,8 @@ VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -263,6 +281,7 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7,
@@ -286,6 +305,8 @@ VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
+ 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
0xc1200000, 0xc1100000 };
@@ -303,6 +324,7 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17,
@@ -326,6 +348,8 @@ VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x1c, 0x1d, 0x1e, 0x1f };
VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
+ 0x4400, 0x4500, 0x4600, 0x4700 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
0xc0c00000, 0xc0a00000 };
@@ -343,6 +367,7 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
0x24, 0x25, 0x26, 0x27,
@@ -366,6 +391,8 @@ VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
0x2c, 0x2d, 0x2e, 0x2f };
VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,8) [] = { 0x4800, 0x4880, 0x4900, 0x4980,
+ 0x4a00, 0x4a80, 0x4b00, 0x4b80 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xc0800000, 0xc0400000,
0xc0000000, 0xbf800000 };
@@ -398,7 +425,7 @@ void exec_vldX (void)
sizeof(VECT_VAR(result, T1, W, N)));
/* We need all variants in 64 bits, but there is no 64x2 variant. */
-#define DECL_ALL_VLDX(X) \
+#define DECL_ALL_VLDX_NO_FP16(X) \
DECL_VLDX(int, 8, 8, X); \
DECL_VLDX(int, 16, 4, X); \
DECL_VLDX(int, 32, 2, X); \
@@ -420,7 +447,16 @@ void exec_vldX (void)
DECL_VLDX(poly, 16, 8, X); \
DECL_VLDX(float, 32, 4, X)
-#define TEST_ALL_VLDX(X) \
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define DECL_ALL_VLDX(X) \
+ DECL_ALL_VLDX_NO_FP16(X); \
+ DECL_VLDX(float, 16, 4, X); \
+ DECL_VLDX(float, 16, 8, X)
+#else
+#define DECL_ALL_VLDX(X) DECL_ALL_VLDX_NO_FP16(X)
+#endif
+
+#define TEST_ALL_VLDX_NO_FP16(X) \
TEST_VLDX(, int, s, 8, 8, X); \
TEST_VLDX(, int, s, 16, 4, X); \
TEST_VLDX(, int, s, 32, 2, X); \
@@ -442,7 +478,16 @@ void exec_vldX (void)
TEST_VLDX(q, poly, p, 16, 8, X); \
TEST_VLDX(q, float, f, 32, 4, X)
-#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define TEST_ALL_VLDX(X) \
+ TEST_ALL_VLDX_NO_FP16(X); \
+ TEST_VLDX(, float, f, 16, 4, X); \
+ TEST_VLDX(q, float, f, 16, 8, X)
+#else
+#define TEST_ALL_VLDX(X) TEST_ALL_VLDX_NO_FP16(X)
+#endif
+
+#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \
TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
@@ -464,9 +509,17 @@ void exec_vldX (void)
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
+ TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \
+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 8, X, Y);
+#else
+#define TEST_ALL_EXTRA_CHUNKS(X, Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)
+#endif
+
/* vldX supports all vector types except [u]int64x2. */
-#define CHECK_RESULTS_VLDX(test_name,EXPECTED,comment) \
- { \
+#define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment) \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
@@ -487,8 +540,19 @@ void exec_vldX (void)
CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
- CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
- } \
+ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define CHECK_RESULTS_VLDX(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_VLDX_NO_FP16(test_name, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 16, 8, PRIx16, EXPECTED, comment); \
+ }
+#else
+#define CHECK_RESULTS_VLDX(test_name, EXPECTED, comment) \
+ { CHECK_RESULTS_VLDX_NO_FP16(test_name, EXPECTED, comment); }
+#endif
DECL_ALL_VLDX(2);
DECL_ALL_VLDX(3);
@@ -516,6 +580,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
+ PAD(buffer_vld2_pad, float, 16, 4);
+#endif
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
PAD(buffer_vld2_pad, float, 32, 2);
@@ -539,6 +607,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
+ PAD(buffer_vld2_pad, float, 16, 8);
+#endif
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
PAD(buffer_vld2_pad, float, 32, 4);
@@ -563,6 +635,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
+ PAD(buffer_vld3_pad, float, 16, 4);
+#endif
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
PAD(buffer_vld3_pad, float, 32, 2);
@@ -586,6 +662,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
+ PAD(buffer_vld3_pad, float, 16, 8);
+#endif
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
PAD(buffer_vld3_pad, float, 32, 4);
@@ -610,6 +690,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
+ PAD(buffer_vld4_pad, float, 16, 4);
+#endif
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
PAD(buffer_vld4_pad, float, 32, 2);
@@ -633,6 +717,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
+ PAD(buffer_vld4_pad, float, 16, 8);
+#endif
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
PAD(buffer_vld4_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
index c66dade8e45..e4cde46725f 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
@@ -18,6 +18,7 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = {0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld2_dup/chunk 1. */
@@ -35,6 +36,7 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld3_dup/chunk 0. */
@@ -54,6 +56,7 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
0xf1, 0xf2, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld3_dup/chunk 1. */
@@ -73,6 +76,7 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
0xf0, 0xf1, 0xf2, 0xf0 };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
/* vld3_dup/chunk 2. */
@@ -92,6 +96,7 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
0xf2, 0xf0, 0xf1, 0xf2 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
/* vld4_dup/chunk 0. */
@@ -109,6 +114,7 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld4_dup/chunk 1. */
@@ -125,6 +131,7 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
/* vld4_dup/chunk 2. */
@@ -141,6 +148,7 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld4_dup/chunk3. */
@@ -157,6 +165,7 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
void exec_vldX_dup (void)
@@ -188,7 +197,7 @@ void exec_vldX_dup (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
-#define DECL_ALL_VLDX_DUP(X) \
+#define DECL_ALL_VLDX_DUP_NO_FP16(X) \
DECL_VLDX_DUP(int, 8, 8, X); \
DECL_VLDX_DUP(int, 16, 4, X); \
DECL_VLDX_DUP(int, 32, 2, X); \
@@ -201,7 +210,15 @@ void exec_vldX_dup (void)
DECL_VLDX_DUP(poly, 16, 4, X); \
DECL_VLDX_DUP(float, 32, 2, X)
-#define TEST_ALL_VLDX_DUP(X) \
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define DECL_ALL_VLDX_DUP(X) \
+ DECL_ALL_VLDX_DUP_NO_FP16(X); \
+ DECL_VLDX_DUP(float, 16, 4, X)
+#else
+#define DECL_ALL_VLDX_DUP(X) DECL_ALL_VLDX_DUP_NO_FP16(X)
+#endif
+
+#define TEST_ALL_VLDX_DUP_NO_FP16(X) \
TEST_VLDX_DUP(, int, s, 8, 8, X); \
TEST_VLDX_DUP(, int, s, 16, 4, X); \
TEST_VLDX_DUP(, int, s, 32, 2, X); \
@@ -214,7 +231,15 @@ void exec_vldX_dup (void)
TEST_VLDX_DUP(, poly, p, 16, 4, X); \
TEST_VLDX_DUP(, float, f, 32, 2, X)
-#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define TEST_ALL_VLDX_DUP(X) \
+ TEST_ALL_VLDX_DUP_NO_FP16(X); \
+ TEST_VLDX_DUP(, float, f, 16, 4, X)
+#else
+#define TEST_ALL_VLDX_DUP(X) TEST_ALL_VLDX_DUP_NO_FP16(X)
+#endif
+
+#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \
TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
@@ -227,9 +252,16 @@ void exec_vldX_dup (void)
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
+ TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y)
+#else
+#define TEST_ALL_EXTRA_CHUNKS(X, Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)
+#endif
+
/* vldX_dup supports only 64-bit inputs. */
-#define CHECK_RESULTS_VLDX_DUP(test_name,EXPECTED,comment) \
- { \
+#define CHECK_RESULTS_VLDX_DUP_NO_FP16(test_name,EXPECTED,comment) \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
@@ -240,8 +272,20 @@ void exec_vldX_dup (void)
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
- CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
- } \
+ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment)
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define CHECK_RESULTS_VLDX_DUP(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_VLDX_DUP_NO_FP16(test_name,EXPECTED,comment); \
+ CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
+ }
+#else
+#define CHECK_RESULTS_VLDX_DUP(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_VLDX_DUP_NO_FP16(test_name,EXPECTED,comment); \
+ }
+#endif
DECL_ALL_VLDX_DUP(2);
DECL_ALL_VLDX_DUP(3);
@@ -269,6 +313,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
+ PAD(buffer_vld2_pad, float, 16, 4);
+#endif
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
PAD(buffer_vld2_pad, float, 32, 2);
@@ -292,6 +340,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
+ PAD(buffer_vld2_pad, float, 16, 8);
+#endif
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
PAD(buffer_vld2_pad, float, 32, 4);
@@ -316,6 +368,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
+ PAD(buffer_vld3_pad, float, 16, 4);
+#endif
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
PAD(buffer_vld3_pad, float, 32, 2);
@@ -339,6 +395,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
+ PAD(buffer_vld3_pad, float, 16, 8);
+#endif
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
PAD(buffer_vld3_pad, float, 32, 4);
@@ -363,6 +423,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
+ PAD(buffer_vld4_pad, float, 16, 4);
+#endif
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
PAD(buffer_vld4_pad, float, 32, 2);
@@ -386,6 +450,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
+ PAD(buffer_vld4_pad, float, 16, 8);
+#endif
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
PAD(buffer_vld4_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
index 2f2e62f0e3e..33b0eafbadb 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
@@ -18,6 +18,7 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -29,6 +30,8 @@ VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa } ;
VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -44,6 +47,7 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
@@ -55,6 +59,8 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -70,6 +76,7 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -81,6 +88,8 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -96,6 +105,7 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xf0, 0xf1, 0xf2, 0xaa };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -107,6 +117,8 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xc1800000, 0xc1700000 };
@@ -122,6 +134,7 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -133,6 +146,8 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80,
+ 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -148,6 +163,7 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -159,6 +175,8 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -174,6 +192,7 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -185,6 +204,8 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -200,6 +221,7 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -211,6 +233,8 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -226,6 +250,7 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -237,6 +262,8 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -252,6 +279,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
+#endif
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2);
/* Input buffers for vld3_lane */
@@ -265,6 +295,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
+#endif
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3);
/* Input buffers for vld4_lane */
@@ -278,6 +311,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
+#endif
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4);
void exec_vldX_lane (void)
@@ -321,7 +357,7 @@ void exec_vldX_lane (void)
sizeof(VECT_VAR(result, T1, W, N)));
/* We need all variants in 64 bits, but there is no 64x2 variant. */
-#define DECL_ALL_VLDX_LANE(X) \
+#define DECL_ALL_VLDX_LANE_NO_FP16(X) \
DECL_VLDX_LANE(int, 8, 8, X); \
DECL_VLDX_LANE(int, 16, 4, X); \
DECL_VLDX_LANE(int, 32, 2, X); \
@@ -338,6 +374,15 @@ void exec_vldX_lane (void)
DECL_VLDX_LANE(float, 32, 2, X); \
DECL_VLDX_LANE(float, 32, 4, X)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define DECL_ALL_VLDX_LANE(X) \
+ DECL_ALL_VLDX_LANE_NO_FP16(X); \
+ DECL_VLDX_LANE(float, 16, 4, X); \
+ DECL_VLDX_LANE(float, 16, 8, X)
+#else
+#define DECL_ALL_VLDX_LANE(X) DECL_ALL_VLDX_LANE_NO_FP16(X)
+#endif
+
/* Add some padding to try to catch out of bound accesses. */
#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
#define DUMMY_ARRAY(V, T, W, N, L) \
@@ -346,7 +391,7 @@ void exec_vldX_lane (void)
/* Use the same lanes regardless of the size of the array (X), for
simplicity. */
-#define TEST_ALL_VLDX_LANE(X) \
+#define TEST_ALL_VLDX_LANE_NO_FP16(X) \
TEST_VLDX_LANE(, int, s, 8, 8, X, 7); \
TEST_VLDX_LANE(, int, s, 16, 4, X, 2); \
TEST_VLDX_LANE(, int, s, 32, 2, X, 0); \
@@ -363,7 +408,16 @@ void exec_vldX_lane (void)
TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \
TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
-#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define TEST_ALL_VLDX_LANE(X) \
+ TEST_ALL_VLDX_LANE_NO_FP16(X); \
+ TEST_VLDX_LANE(, float, f, 16, 4, X, 2); \
+ TEST_VLDX_LANE(q, float, f, 16, 8, X, 6)
+#else
+#define TEST_ALL_VLDX_LANE(X) TEST_ALL_VLDX_LANE_NO_FP16(X)
+#endif
+
+#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X,Y) \
TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
@@ -380,9 +434,17 @@ void exec_vldX_lane (void)
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define TEST_ALL_EXTRA_CHUNKS(X,Y) \
+ TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 8, X, Y)
+#else
+#define TEST_ALL_EXTRA_CHUNKS(X,Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)
+#endif
+
/* vldX_lane supports only a subset of all variants. */
-#define CHECK_RESULTS_VLDX_LANE(test_name,EXPECTED,comment) \
- { \
+#define CHECK_RESULTS_VLDX_LANE_NO_FP16(test_name,EXPECTED,comment) \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
@@ -397,8 +459,21 @@ void exec_vldX_lane (void)
CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
- CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
- } \
+ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#define CHECK_RESULTS_VLDX_LANE(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_VLDX_LANE_NO_FP16(test_name,EXPECTED,comment); \
+ CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 16, 8, PRIx16, EXPECTED, comment); \
+ }
+#else
+#define CHECK_RESULTS_VLDX_LANE(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_VLDX_LANE_NO_FP16(test_name,EXPECTED,comment); \
+ }
+#endif
/* Declare the temporary buffers / variables. */
DECL_ALL_VLDX_LANE(2);
@@ -419,6 +494,10 @@ void exec_vldX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
+ DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
+#endif
DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c
index 51594068364..e0499df5170 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x88 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0x55, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x4840, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x4204cccd };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -41,6 +42,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xdd, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xee, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0x4480, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0x41333333 };
@@ -61,6 +64,10 @@ void exec_vset_lane (void)
/* Initialize input "vector" from "buffer". */
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ VLOAD(vector, buffer, , float, f, 16, 4);
+ VLOAD(vector, buffer, q, float, f, 16, 8);
+#endif
VLOAD(vector, buffer, , float, f, 32, 2);
VLOAD(vector, buffer, q, float, f, 32, 4);
@@ -75,6 +82,9 @@ void exec_vset_lane (void)
TEST_VSET_LANE(, uint, u, 64, 1, 0x88, 0);
TEST_VSET_LANE(, poly, p, 8, 8, 0x55, 6);
TEST_VSET_LANE(, poly, p, 16, 4, 0x66, 2);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VSET_LANE(, float, f, 16, 4, 8.5f, 2);
+#endif
TEST_VSET_LANE(, float, f, 32, 2, 33.2f, 1);
TEST_VSET_LANE(q, int, s, 8, 16, 0x99, 15);
@@ -87,6 +97,9 @@ void exec_vset_lane (void)
TEST_VSET_LANE(q, uint, u, 64, 2, 0x11, 1);
TEST_VSET_LANE(q, poly, p, 8, 16, 0xDD, 14);
TEST_VSET_LANE(q, poly, p, 16, 8, 0xEE, 6);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VSET_LANE(q, float, f, 16, 8, 4.5f, 5);
+#endif
TEST_VSET_LANE(q, float, f, 32, 4, 11.2f, 3);
CHECK_RESULTS(TEST_MSG, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
index 08583b88cf3..825d07dbf77 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
@@ -42,6 +43,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc900, 0x3333, 0x3333, 0x3333,
+ 0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333,
0x33333333, 0x33333333 };
@@ -69,6 +72,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(, uint, u, 64, 1, 0);
TEST_VST1_LANE(, poly, p, 8, 8, 6);
TEST_VST1_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VST1_LANE(, float, f, 16, 4, 1);
+#endif
TEST_VST1_LANE(, float, f, 32, 2, 1);
TEST_VST1_LANE(q, int, s, 8, 16, 15);
@@ -81,6 +87,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(q, uint, u, 64, 2, 0);
TEST_VST1_LANE(q, poly, p, 8, 16, 10);
TEST_VST1_LANE(q, poly, p, 16, 8, 4);
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+ TEST_VST1_LANE(q, float, f, 16, 8, 6);
+#endif
TEST_VST1_LANE(q, float, f, 32, 4, 1);
CHECK_RESULTS(TEST_MSG, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c
new file mode 100644
index 00000000000..dbf5241b591
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c
@@ -0,0 +1,15 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+void
+f_vst2_lane_f16 (float16_t * p, float16x4x2_t v)
+{
+ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ vst2_lane_f16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ vst2_lane_f16 (p, v, -1);
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c
new file mode 100644
index 00000000000..e3c0296534b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c
@@ -0,0 +1,15 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+void
+f_vst2q_lane_f16 (float16_t * p, float16x8x2_t v)
+{
+ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ vst2q_lane_f16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ vst2q_lane_f16 (p, v, -1);
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c
new file mode 100644
index 00000000000..406dfd410a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c
@@ -0,0 +1,15 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+void
+f_vst3_lane_f16 (float16_t * p, float16x4x3_t v)
+{
+ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ vst3_lane_f16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ vst3_lane_f16 (p, v, -1);
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c
new file mode 100644
index 00000000000..4e8b24cff8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c
@@ -0,0 +1,15 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+void
+f_vst3q_lane_f16 (float16_t * p, float16x8x3_t v)
+{
+ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ vst3q_lane_f16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ vst3q_lane_f16 (p, v, -1);
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c
new file mode 100644
index 00000000000..0fe65116712
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c
@@ -0,0 +1,15 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+void
+f_vst4_lane_f16 (float16_t * p, float16x4x4_t v)
+{
+ /* { dg-error "lane 4 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ vst4_lane_f16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { xfail arm*-*-* } 0 } */
+ vst4_lane_f16 (p, v, -1);
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c
new file mode 100644
index 00000000000..9a5f09aa5fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c
@@ -0,0 +1,15 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-excess-errors "" { xfail arm*-*-* } } */
+
+void
+f_vst4q_lane_f16 (float16_t * p, float16x8x4_t v)
+{
+ /* { dg-error "lane 8 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ vst4q_lane_f16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { xfail arm*-*-* } 0 } */
+ vst4q_lane_f16 (p, v, -1);
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c
new file mode 100644
index 00000000000..6cb80866790
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2" } */
+
+#include <arm_neon.h>
+
+__fp16
+func2 (__fp16 a, __fp16 b)
+{
+ return b;
+}
+
+int
+main (int argc, char **argv)
+{
+ __fp16 array[16];
+ int i;
+
+ for (i = 0; i < sizeof (array) / sizeof (array[0]); i++)
+ array[i] = i;
+
+ array[0] = func2 (array[1], array[2]);
+
+ __builtin_printf ("%f\n", array[0]); /* { dg-output "2.0" } */
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_1.c b/gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_1.c
new file mode 100644
index 00000000000..a1c95fd28d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_1.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-mfp16-format=ieee" {target "arm*-*-*"} } */
+
+extern void abort (void);
+
+#define EPSILON 0.0001
+
+int
+main (int argc, char **argv)
+{
+ float f1 = 3.14159f;
+ float f2 = 2.718f;
+ /* This 'assembler' statement should be portable between ARM and AArch64. */
+ asm volatile ("" : : : "memory");
+ __fp16 in1 = f1;
+ __fp16 in2 = f2;
+
+ /* Do the addition on __fp16's (implicitly converts both operands to
+ float32, adds, converts back to f16, then we convert back to f32). */
+ __fp16 res1 = in1 + in2;
+ asm volatile ("" : : : "memory");
+ float f_res_1 = res1;
+
+ /* Do the addition on float32's (we convert both operands to f32, and add,
+ as above, but skip the final conversion f32 -> f16 -> f32). */
+ float f1a = in1;
+ float f2a = in2;
+ float f_res_2 = f1a + f2a;
+
+ if (__builtin_fabs (f_res_2 - f_res_1) > EPSILON)
+ abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_2.c b/gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_2.c
new file mode 100644
index 00000000000..6aa3e59c15e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16/f16_convs_2.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-mfp16-format=ieee" {target "arm*-*-*"} } */
+
+extern void abort (void);
+
+#define EPSILON 0.0001
+
+int
+main (int argc, char **argv)
+{
+ int i1 = 3;
+ int i2 = 2;
+ /* This 'assembler' should be portable across ARM and AArch64. */
+ asm volatile ("" : : : "memory");
+
+ __fp16 in1 = i1;
+ __fp16 in2 = i2;
+
+ /* Do the addition on __fp16's (implicitly converts both operands to
+ float32, adds, converts back to f16, then we convert to int). */
+ __fp16 res1 = in1 + in2;
+ asm volatile ("" : : : "memory");
+ int result1 = res1;
+
+ /* Do the addition on int's (we convert both operands directly to int, add,
+ and we're done). */
+ int result2 = ((int) in1) + ((int) in2);
+
+ if (__builtin_abs (result2 - result1) > EPSILON)
+ abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16/fp16.exp b/gcc/testsuite/gcc.target/aarch64/fp16/fp16.exp
new file mode 100644
index 00000000000..7dc8d654a34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16/fp16.exp
@@ -0,0 +1,43 @@
+# Tests of 16-bit floating point (__fp16), for both ARM and AArch64.
+# Copyright (C) 2015 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARM or AArch64 target.
+if {![istarget arm*-*-*]
+ && ![istarget aarch64*-*-*]} then {
+ return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+ set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cC\]]] \
+ "" $DEFAULT_CFLAGS
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_1.c b/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
index 4cb872da2cd..b6b57e0c546 100644
--- a/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
@@ -14,6 +14,7 @@ VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \
VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \
VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \
VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \
+VARIANT (float16_t, 4, float16x4_t, float16x8_t, f16) \
VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \
VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
@@ -51,6 +52,8 @@ main (int argc, char **argv)
int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+ float16_t float16_t_data[8] = { 1.25, 4.5, 7.875, 2.3125, 5.675, 8.875,
+ 3.6875, 6.75};
float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
diff --git a/gcc/testsuite/gcc.target/aarch64/vget_low_1.c b/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
index f8016ef7312..2223676521c 100644
--- a/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
@@ -14,6 +14,7 @@ VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \
VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \
VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \
VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \
+VARIANT (float16_t, 4, float16x4_t, float16x8_t, f16) \
VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \
VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
@@ -51,6 +52,8 @@ main (int argc, char **argv)
int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+ float16_t float16_t_data[8] = { 1.25, 4.5, 7.875, 2.3125, 5.675, 8.875,
+ 3.6875, 6.75};
float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
diff --git a/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
index f8c6edb3bcf..fa9ef0f4e43 100644
--- a/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
@@ -31,6 +31,7 @@ THING (int8x8_t, 8, int8_t, _s8) \
THING (uint8x8_t, 8, uint8_t, _u8) \
THING (int16x4_t, 4, int16_t, _s16) \
THING (uint16x4_t, 4, uint16_t, _u16) \
+THING (float16x4_t, 4, float16_t, _f16) \
THING (int32x2_t, 2, int32_t, _s32) \
THING (uint32x2_t, 2, uint32_t, _u32) \
THING (float32x2_t, 2, float32_t, _f32) \
@@ -38,6 +39,7 @@ THING (int8x16_t, 16, int8_t, q_s8) \
THING (uint8x16_t, 16, uint8_t, q_u8) \
THING (int16x8_t, 8, int16_t, q_s16) \
THING (uint16x8_t, 8, uint16_t, q_u16) \
+THING (float16x8_t, 8, float16_t, q_f16)\
THING (int32x4_t, 4, int32_t, q_s32) \
THING (uint32x4_t, 4, uint32_t, q_u32) \
THING (float32x4_t, 4, float32_t, q_f32)\
diff --git a/gcc/testsuite/gcc.target/aarch64/vld1_lane.c b/gcc/testsuite/gcc.target/aarch64/vld1_lane.c
index 463c88c0a5f..c70df7135c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/vld1_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/vld1_lane.c
@@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, 0) \
VARIANT (int64, , 1, _s64, 0) \
VARIANT (poly8, , 8, _p8, 7) \
VARIANT (poly16, , 4, _p16, 2) \
+VARIANT (float16, , 4, _f16, 3) \
VARIANT (float32, , 2, _f32, 1) \
VARIANT (float64, , 1, _f64, 0) \
VARIANT (uint8, q, 16, _u8, 13) \
@@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, 1) \
VARIANT (int64, q, 2, _s64, 1) \
VARIANT (poly8, q, 16, _p8, 7) \
VARIANT (poly16, q, 8, _p16, 4) \
+VARIANT (float16, q, 8, _f16, 3)\
VARIANT (float32, q, 4, _f32, 2)\
VARIANT (float64, q, 2, _f64, 1)
@@ -76,6 +78,7 @@ main (int argc, char **argv)
int64_t int64_data = 0x1234567890abcdefLL;
poly8_t poly8_data = 13;
poly16_t poly16_data = 11111;
+ float16_t float16_data = 8.75;
float32_t float32_data = 3.14159;
float64_t float64_data = 1.010010001;
diff --git a/gcc/testsuite/gcc.target/aarch64/vldN_1.c b/gcc/testsuite/gcc.target/aarch64/vldN_1.c
index b64de16a165..caac94f86ce 100644
--- a/gcc/testsuite/gcc.target/aarch64/vldN_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vldN_1.c
@@ -39,6 +39,7 @@ VARIANT (int32, 2, STRUCT, _s32) \
VARIANT (int64, 1, STRUCT, _s64) \
VARIANT (poly8, 8, STRUCT, _p8) \
VARIANT (poly16, 4, STRUCT, _p16) \
+VARIANT (float16, 4, STRUCT, _f16) \
VARIANT (float32, 2, STRUCT, _f32) \
VARIANT (float64, 1, STRUCT, _f64) \
VARIANT (uint8, 16, STRUCT, q_u8) \
@@ -51,6 +52,7 @@ VARIANT (int32, 4, STRUCT, q_s32) \
VARIANT (int64, 2, STRUCT, q_s64) \
VARIANT (poly8, 16, STRUCT, q_p8) \
VARIANT (poly16, 8, STRUCT, q_p16) \
+VARIANT (float16, 8, STRUCT, q_f16) \
VARIANT (float32, 4, STRUCT, q_f32) \
VARIANT (float64, 2, STRUCT, q_f64)
diff --git a/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c b/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
index 9af0565d617..68c3fc34f5a 100644
--- a/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
@@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, STRUCT) \
VARIANT (int64, , 1, _s64, STRUCT) \
VARIANT (poly8, , 8, _p8, STRUCT) \
VARIANT (poly16, , 4, _p16, STRUCT) \
+VARIANT (float16, , 4, _f16, STRUCT) \
VARIANT (float32, , 2, _f32, STRUCT) \
VARIANT (float64, , 1, _f64, STRUCT) \
VARIANT (uint8, q, 16, _u8, STRUCT) \
@@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, STRUCT) \
VARIANT (int64, q, 2, _s64, STRUCT) \
VARIANT (poly8, q, 16, _p8, STRUCT) \
VARIANT (poly16, q, 8, _p16, STRUCT) \
+VARIANT (float16, q, 8, _f16, STRUCT) \
VARIANT (float32, q, 4, _f32, STRUCT) \
VARIANT (float64, q, 2, _f64, STRUCT)
@@ -74,6 +76,7 @@ main (int argc, char **argv)
int64_t *int64_data = (int64_t *)uint64_data;
poly8_t poly8_data[4] = { 0, 7, 13, 18, };
poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
+ float16_t float16_data[4] = { 1.0625, 3.125, 0.03125, 7.75 };
float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
diff --git a/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
index 13ab45459f4..6837a116117 100644
--- a/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
@@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, 0, STRUCT) \
VARIANT (int64, , 1, _s64, 0, STRUCT) \
VARIANT (poly8, , 8, _p8, 7, STRUCT) \
VARIANT (poly16, , 4, _p16, 1, STRUCT) \
+VARIANT (float16, , 4, _f16, 3, STRUCT) \
VARIANT (float32, , 2, _f32, 1, STRUCT) \
VARIANT (float64, , 1, _f64, 0, STRUCT) \
VARIANT (uint8, q, 16, _u8, 14, STRUCT) \
@@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, 2, STRUCT) \
VARIANT (int64, q, 2, _s64, 1, STRUCT) \
VARIANT (poly8, q, 16, _p8, 12, STRUCT) \
VARIANT (poly16, q, 8, _p16, 5, STRUCT) \
+VARIANT (float16, q, 8, _f16, 7, STRUCT)\
VARIANT (float32, q, 4, _f32, 1, STRUCT)\
VARIANT (float64, q, 2, _f64, 0, STRUCT)
@@ -71,7 +73,7 @@ main (int argc, char **argv)
{
/* Original data for all vector formats. */
uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
- 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
+ 0x012389ab4567cdefULL, 0xdeeddadacafe0431ULL,
0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
@@ -87,6 +89,7 @@ main (int argc, char **argv)
int64_t *int64_data = (int64_t *)uint64_data;
poly8_t poly8_data[4] = { 0, 7, 13, 18, };
poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
+ float16_t float16_data[4] = { 0.8125, 7.5, 19, 0.046875 };
float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
diff --git a/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
index 5fb11399f20..bc0132c20a7 100644
--- a/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
@@ -16,6 +16,7 @@ VARIANT (int32_t, , 2, int32x2_t, _s32, 0) \
VARIANT (int64_t, , 1, int64x1_t, _s64, 0) \
VARIANT (poly8_t, , 8, poly8x8_t, _p8, 6) \
VARIANT (poly16_t, , 4, poly16x4_t, _p16, 2) \
+VARIANT (float16_t, , 4, float16x4_t, _f16, 3) \
VARIANT (float32_t, , 2, float32x2_t, _f32, 1) \
VARIANT (float64_t, , 1, float64x1_t, _f64, 0) \
VARIANT (uint8_t, q, 16, uint8x16_t, _u8, 11) \
@@ -28,6 +29,7 @@ VARIANT (int32_t, q, 4, int32x4_t, _s32, 3) \
VARIANT (int64_t, q, 2, int64x2_t, _s64, 0) \
VARIANT (poly8_t, q, 16, poly8x16_t, _p8, 14) \
VARIANT (poly16_t, q, 8, poly16x8_t, _p16, 6) \
+VARIANT (float16_t, q, 8, float16x8_t, _f16, 6) \
VARIANT (float32_t, q, 4, float32x4_t, _f32, 2) \
VARIANT (float64_t, q, 2, float64x2_t, _f64, 1)
@@ -76,6 +78,9 @@ main (int argc, char **argv)
poly8_t poly8_t_data[16] =
{ 0, 7, 13, 18, 22, 25, 27, 28, 29, 31, 34, 38, 43, 49, 56, 64 };
poly16_t poly16_t_data[8] = { 11111, 2222, 333, 44, 5, 65432, 54321, 43210 };
+ float16_t float16_t_data[8] = { 1.25, 4.5, 7.875, 2.3125, 5.675, 8.875,
+ 3.6875, 6.75};
+
float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 70da85bb8a1..8b6c80b9c96 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2687,7 +2687,11 @@ proc check_effective_target_arm_neon_fp16_ok_nocache { } {
set et_arm_neon_fp16_flags ""
if { [check_effective_target_arm32] } {
foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
- "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
+ "-mfpu=neon-fp16 -mfloat-abi=softfp"
+ "-mfp16-format=ieee"
+ "-mfloat-abi=softfp -mfp16-format=ieee"
+ "-mfpu=neon-fp16 -mfp16-format=ieee"
+ "-mfpu=neon-fp16 -mfloat-abi=softfp -mfp16-format=ieee"} {
if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
#include "arm_neon.h"
float16x4_t
@@ -2710,6 +2714,21 @@ proc check_effective_target_arm_neon_fp16_ok { } {
check_effective_target_arm_neon_fp16_ok_nocache]
}
+proc check_effective_target_arm_neon_fp16_hw { } {
+ if {! [check_effective_target_arm_neon_fp16_ok] } {
+ return 0
+ }
+ global et_arm_neon_fp16_flags
+ check_runtime_nocache arm_neon_fp16_hw {
+ int
+ main (int argc, char **argv)
+ {
+ asm ("vcvt.f32.f16 q1, d0");
+ return 0;
+ }
+ } $et_arm_neon_fp16_flags
+}
+
proc add_options_for_arm_neon_fp16 { flags } {
if { ! [check_effective_target_arm_neon_fp16_ok] } {
return "$flags"