From ca1a40771fa8c1b5b577ca4aa05381f31f4881e6 Mon Sep 17 00:00:00 2001 From: rguenth Date: Fri, 15 Apr 2016 07:28:44 +0000 Subject: 2016-04-15 Richard Biener Alan Modra PR tree-optimization/70130 * tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect when alignment stays not the same and no not use the realign scheme then. * gcc.dg/vect/O3-pr70130.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@235006 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 8 +++ gcc/testsuite/ChangeLog | 6 +++ gcc/testsuite/gcc.dg/vect/O3-pr70130.c | 94 ++++++++++++++++++++++++++++++++++ gcc/tree-vect-data-refs.c | 17 ++++-- 4 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/O3-pr70130.c (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f6fdeffcc53..7d35837be7f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2016-04-15 Richard Biener + Alan Modra + + PR tree-optimization/70130 + * tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect + when alignment stays not the same and no not use the realign + scheme then. + 2016-04-14 Michael Meissner PR target/70669 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4131b9fcfe4..69ea1854678 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-04-15 Richard Biener + Alan Modra + + PR tree-optimization/70130 + * gcc.dg/vect/O3-pr70130.c: New testcase. + 2016-04-14 Michael Meissner PR target/70669 diff --git a/gcc/testsuite/gcc.dg/vect/O3-pr70130.c b/gcc/testsuite/gcc.dg/vect/O3-pr70130.c new file mode 100644 index 00000000000..e9938c21f41 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/O3-pr70130.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw { target powerpc*-*-* } } */ +/* { dg-additional-options "-mcpu=power7" { target powerpc*-*-* } } */ + +struct foo +{ + short a[3][16][16]; + short pad; +} images[8]; + +void __attribute__ ((noinline, noclone)) +Loop_err (struct foo *img, const int s[16][2], int s0) +{ + int i, j; + + for (j = 0; j < 16; j++) + { + for (i=0; i < 16; i++) + { + img->a[0][j][i] = s[i][0]; + img->a[1][j][i] = s[j][1]; + img->a[2][j][i] = s0; + } + } +} + +const int s[16][2] = { { 1, 16 }, { 2, 15 }, { 3, 14 }, { 4, 13 }, + { 5, 12 }, { 6, 11 }, { 7, 10 }, { 8, 9 }, + { 9, 8 }, { 10, 7 }, { 11, 6 }, { 12, 5 }, + { 13, 4 }, { 14, 3 }, { 15, 2 }, { 16, 1 } }; +const struct foo expected += { { { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 } }, + { { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 }, + { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }, + { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 }, + { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 }, + { 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 }, + { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, + { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 }, + { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 }, + { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, + { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 }, + { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, + { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, + { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, + { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } }, + { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } } }, + 0 }; + +int +main (void) +{ + int i; + + for (i = 0; i < 8; i++) + Loop_err (images + i, s, -1); + + for (i = 0; i < 8; i++) + if (__builtin_memcmp (&expected, images + i, sizeof (expected))) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index f8695b13d6d..7652e216eb6 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -5983,10 +5983,19 @@ vect_supportable_dr_alignment (struct data_reference *dr, || targetm.vectorize.builtin_mask_for_load ())) { tree vectype = STMT_VINFO_VECTYPE (stmt_info); - if ((nested_in_vect_loop - && (TREE_INT_CST_LOW (DR_STEP (dr)) - != GET_MODE_SIZE (TYPE_MODE (vectype)))) - || !loop_vinfo) + + /* If we are doing SLP then the accesses need not have the + same alignment, instead it depends on the SLP group size. */ + if (loop_vinfo + && STMT_SLP_TYPE (stmt_info) + && (LOOP_VINFO_VECT_FACTOR (loop_vinfo) + * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))) + % TYPE_VECTOR_SUBPARTS (vectype) != 0)) + ; + else if (!loop_vinfo + || (nested_in_vect_loop + && (TREE_INT_CST_LOW (DR_STEP (dr)) + != GET_MODE_SIZE (TYPE_MODE (vectype))))) return dr_explicit_realign; else return dr_explicit_realign_optimized; -- cgit v1.2.3