aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-loop-distribution.c
diff options
context:
space:
mode:
authorBin Cheng <bin.cheng@arm.com>2017-10-18 16:04:16 +0000
committerBin Cheng <amker@gcc.gnu.org>2017-10-18 16:04:16 +0000
commit957f0d8faf41d64fa4ba548411e6d4dd95a083bf (patch)
tree131565bc95410d1201c4c1be486f24340fcb7924 /gcc/tree-loop-distribution.c
parent85aa9ed64bf871b2e620211609855804e1775864 (diff)
tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file.
* tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file. (tree-ssa-loop-ivopts.h): New header file. (struct builtin_info): New fields. (classify_builtin_1): Compute and record base and offset parts for memset builtin partition by calling strip_offset. (offset_cmp, fuse_memset_builtins): New functions. (finalize_partitions): Fuse adjacent memset partitions by calling above function. * tree-ssa-loop-ivopts.c (strip_offset): Delete static declaration. Expose the interface. * tree-ssa-loop-ivopts.h (strip_offset): New declaration. * gcc.dg/tree-ssa/ldist-17.c: Adjust test string. * gcc.dg/tree-ssa/ldist-32.c: New test. * gcc.dg/tree-ssa/ldist-35.c: New test. * gcc.dg/tree-ssa/ldist-36.c: New test. From-SVN: r253859
Diffstat (limited to 'gcc/tree-loop-distribution.c')
-rw-r--r--gcc/tree-loop-distribution.c123
1 files changed, 122 insertions, 1 deletions
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 6abb7e43421..52db3c94bd9 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
data reuse. */
#include "config.h"
+#define INCLUDE_ALGORITHM /* stable_sort */
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -106,6 +107,7 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h"
#include "tree-cfg.h"
#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "tree-ssa.h"
@@ -604,6 +606,10 @@ struct builtin_info
tree dst_base;
tree src_base;
tree size;
+ /* Base and offset part of dst_base after stripping constant offset. This
+ is only used in memset builtin distribution for now. */
+ tree dst_base_base;
+ unsigned HOST_WIDE_INT dst_base_offset;
};
/* Partition for loop distribution. */
@@ -1504,7 +1510,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
if (!compute_access_range (loop, dr, &base, &size))
return;
- partition->builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
+ struct builtin_info *builtin;
+ builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
+ builtin->dst_base_base = strip_offset (builtin->dst_base,
+ &builtin->dst_base_offset);
+ partition->builtin = builtin;
partition->kind = PKIND_MEMSET;
}
@@ -2480,6 +2490,113 @@ version_for_distribution_p (vec<struct partition *> *partitions,
return (alias_ddrs->length () > 0);
}
+/* Compare base offset of builtin mem* partitions P1 and P2. */
+
+static bool
+offset_cmp (struct partition *p1, struct partition *p2)
+{
+ gcc_assert (p1 != NULL && p1->builtin != NULL);
+ gcc_assert (p2 != NULL && p2->builtin != NULL);
+ return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
+}
+
+/* Fuse adjacent memset builtin PARTITIONS if possible. This is a special
+ case optimization transforming below code:
+
+ __builtin_memset (&obj, 0, 100);
+ _1 = &obj + 100;
+ __builtin_memset (_1, 0, 200);
+ _2 = &obj + 300;
+ __builtin_memset (_2, 0, 100);
+
+ into:
+
+ __builtin_memset (&obj, 0, 400);
+
+ Note we don't have dependence information between different partitions
+ at this point, as a result, we can't handle nonadjacent memset builtin
+ partitions since dependence might be broken. */
+
+static void
+fuse_memset_builtins (vec<struct partition *> *partitions)
+{
+ unsigned i, j;
+ struct partition *part1, *part2;
+
+ for (i = 0; partitions->iterate (i, &part1);)
+ {
+ if (part1->kind != PKIND_MEMSET)
+ {
+ i++;
+ continue;
+ }
+
+ /* Find sub-array of memset builtins of the same base. Index range
+ of the sub-array is [i, j) with "j > i". */
+ for (j = i + 1; partitions->iterate (j, &part2); ++j)
+ {
+ if (part2->kind != PKIND_MEMSET
+ || !operand_equal_p (part1->builtin->dst_base_base,
+ part2->builtin->dst_base_base, 0))
+ break;
+ }
+
+ /* Stable sort is required in order to avoid breaking dependence. */
+ std::stable_sort (&(*partitions)[i],
+ &(*partitions)[i] + j - i, offset_cmp);
+ /* Continue with next partition. */
+ i = j;
+ }
+
+ /* Merge all consecutive memset builtin partitions. */
+ for (i = 0; i < partitions->length () - 1;)
+ {
+ part1 = (*partitions)[i];
+ if (part1->kind != PKIND_MEMSET)
+ {
+ i++;
+ continue;
+ }
+
+ part2 = (*partitions)[i + 1];
+ /* Only merge memset partitions of the same base and with constant
+ access sizes. */
+ if (part2->kind != PKIND_MEMSET
+ || TREE_CODE (part1->builtin->size) != INTEGER_CST
+ || TREE_CODE (part2->builtin->size) != INTEGER_CST
+ || !operand_equal_p (part1->builtin->dst_base_base,
+ part2->builtin->dst_base_base, 0))
+ {
+ i++;
+ continue;
+ }
+ tree rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
+ tree rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
+ int bytev1 = const_with_all_bytes_same (rhs1);
+ int bytev2 = const_with_all_bytes_same (rhs2);
+ /* Only merge memset partitions of the same value. */
+ if (bytev1 != bytev2 || bytev1 == -1)
+ {
+ i++;
+ continue;
+ }
+ wide_int end1 = wi::add (part1->builtin->dst_base_offset,
+ wi::to_wide (part1->builtin->size));
+ /* Only merge adjacent memset partitions. */
+ if (wi::ne_p (end1, part2->builtin->dst_base_offset))
+ {
+ i++;
+ continue;
+ }
+ /* Merge partitions[i] and partitions[i+1]. */
+ part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
+ part1->builtin->size,
+ part2->builtin->size);
+ partition_free (part2);
+ partitions->ordered_remove (i + 1);
+ }
+}
+
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */
@@ -2523,6 +2640,10 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
}
partitions->truncate (1);
}
+
+ /* Fuse memset builtins if possible. */
+ if (partitions->length () > 1)
+ fuse_memset_builtins (partitions);
}
/* Distributes the code from LOOP in such a way that producer statements