diff options
Diffstat (limited to 'gcc/tree-ssa-math-opts.c')
-rw-r--r-- | gcc/tree-ssa-math-opts.c | 81 |
1 files changed, 52 insertions, 29 deletions
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 735b7c67c31..6413bd6d1ae 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -1925,25 +1925,32 @@ make_pass_cse_sincos (gcc::context *ctxt) return new pass_cse_sincos (ctxt); } -/* A symbolic number is used to detect byte permutation and selection - patterns. Therefore the field N contains an artificial number - consisting of octet sized markers: +/* A symbolic number structure is used to detect byte permutation and selection + patterns of a source. To achieve that, its field N contains an artificial + number consisting of BITS_PER_MARKER sized markers tracking where does each + byte come from in the source: - 0 - target byte has the value 0 - FF - target byte has an unknown value (eg. due to sign extension) - 1..size - marker value is the target byte index minus one. + 0 - target byte has the value 0 + FF - target byte has an unknown value (eg. due to sign extension) + 1..size - marker value is the byte index in the source (0 for lsb). To detect permutations on memory sources (arrays and structures), a symbolic - number is also associated a base address (the array or structure the load is - made from), an offset from the base address and a range which gives the - difference between the highest and lowest accessed memory location to make - such a symbolic number. The range is thus different from size which reflects - the size of the type of current expression. Note that for non memory source, - range holds the same value as size. + number is also associated: + - a base address BASE_ADDR and an OFFSET giving the address of the source; + - a range which gives the difference between the highest and lowest accessed + memory location to make such a symbolic number; + - the address SRC of the source element of lowest address as a convenience + to easily get BASE_ADDR + offset + lowest bytepos. - For instance, for an array char a[], (short) a[0] | (short) a[3] would have - a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would - still have a size of 2 but this time a range of 1. */ + Note 1: the range is different from size as size reflects the size of the + type of the current expression. For instance, for an array char a[], + (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while + (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this + time a range of 1. + + Note 2: for non-memory sources, range holds the same value as size. + + Note 3: SRC points to the SSA_NAME in case of non-memory source. */ struct symbolic_number { uint64_t n; @@ -1951,6 +1958,7 @@ struct symbolic_number { tree base_addr; tree offset; HOST_WIDE_INT bytepos; + tree src; tree alias_set; tree vuse; unsigned HOST_WIDE_INT range; @@ -2052,6 +2060,7 @@ init_symbolic_number (struct symbolic_number *n, tree src) int size; n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; + n->src = src; /* Set up the symbolic number N by setting each byte to a value between 1 and the byte size of rhs1. The highest order byte is set to n->size and the @@ -2167,6 +2176,7 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, uint64_t inc; HOST_WIDE_INT start_sub, end_sub, end1, end2, end; struct symbolic_number *toinc_n_ptr, *n_end; + basic_block bb1, bb2; if (!n1->base_addr || !n2->base_addr || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) @@ -2180,15 +2190,20 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, { n_start = n1; start_sub = n2->bytepos - n1->bytepos; - source_stmt = source_stmt1; } else { n_start = n2; start_sub = n1->bytepos - n2->bytepos; - source_stmt = source_stmt2; } + bb1 = gimple_bb (source_stmt1); + bb2 = gimple_bb (source_stmt2); + if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) + source_stmt = source_stmt1; + else + source_stmt = source_stmt2; + /* Find the highest address at which a load is performed and compute related info. */ end1 = n1->bytepos + (n1->range - 1); @@ -2245,6 +2260,7 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, n->vuse = n_start->vuse; n->base_addr = n_start->base_addr; n->offset = n_start->offset; + n->src = n_start->src; n->bytepos = n_start->bytepos; n->type = n_start->type; size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; @@ -2455,7 +2471,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) uint64_t cmpxchg = CMPXCHG; uint64_t cmpnop = CMPNOP; - gimple *source_stmt; + gimple *ins_stmt; int limit; /* The last parameter determines the depth search limit. It usually @@ -2465,9 +2481,9 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) in libgcc, and for initial shift/and operation of the src operand. */ limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); - source_stmt = find_bswap_or_nop_1 (stmt, n, limit); + ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); - if (!source_stmt) + if (!ins_stmt) return NULL; /* Find real size of result (highest non-zero byte). */ @@ -2509,7 +2525,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) return NULL; n->range *= BITS_PER_UNIT; - return source_stmt; + return ins_stmt; } namespace { @@ -2558,7 +2574,7 @@ public: changing of basic block. */ static bool -bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl, +bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl, tree bswap_type, tree load_type, struct symbolic_number *n, bool bswap) { @@ -2567,18 +2583,24 @@ bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl, gimple *bswap_stmt; gsi = gsi_for_stmt (cur_stmt); - src = gimple_assign_rhs1 (src_stmt); + src = n->src; tgt = gimple_assign_lhs (cur_stmt); /* Need to load the value from memory first. */ if (n->base_addr) { - gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt); + gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt); tree addr_expr, addr_tmp, val_expr, val_tmp; tree load_offset_ptr, aligned_load_type; gimple *addr_stmt, *load_stmt; unsigned align; HOST_WIDE_INT load_offset = 0; + basic_block ins_bb, cur_bb; + + ins_bb = gimple_bb (ins_stmt); + cur_bb = gimple_bb (cur_stmt); + if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) + return false; align = get_object_alignment (src); /* If the new access is smaller than the original one, we need @@ -2610,7 +2632,7 @@ bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl, /* Move cur_stmt just before one of the load of the original to ensure it has the same VUSE. See PR61517 for what could go wrong. */ - if (gimple_bb (cur_stmt) != gimple_bb (src_stmt)) + if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); gsi_move_before (&gsi, &gsi_ins); gsi = gsi_for_stmt (cur_stmt); @@ -2783,6 +2805,7 @@ pass_optimize_bswap::execute (function *fun) memset (&nop_stats, 0, sizeof (nop_stats)); memset (&bswap_stats, 0, sizeof (bswap_stats)); + calculate_dominance_info (CDI_DOMINATORS); FOR_EACH_BB_FN (bb, fun) { @@ -2794,7 +2817,7 @@ pass_optimize_bswap::execute (function *fun) variant wouldn't be detected. */ for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) { - gimple *src_stmt, *cur_stmt = gsi_stmt (gsi); + gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi); tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; enum tree_code code; struct symbolic_number n; @@ -2827,9 +2850,9 @@ pass_optimize_bswap::execute (function *fun) continue; } - src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); + ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); - if (!src_stmt) + if (!ins_stmt) continue; switch (n.range) @@ -2863,7 +2886,7 @@ pass_optimize_bswap::execute (function *fun) if (bswap && !fndecl && n.range != 16) continue; - if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type, + if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type, &n, bswap)) changed = true; } |