From bd49d3628fa279a9dd9ad1973c84a6c8f2135a6f Mon Sep 17 00:00:00 2001 From: law Date: Tue, 31 Dec 1996 00:14:56 +0000 Subject: * pa/pa.c (fmpy_operands): Remove. No longer needed. (combinable_add, combinable_copy, combinable_fmpy): Likewise. (combinable_fadd, combineable_fsub): Likewise. (pa_reorg): Call pa_combine_instructions. (pa_combine_instructions): Combine instructions to make things like fmpyadd and fmpysub. (pa_can_combine_p): Helper function for pa_combine_instructions. * pa/pa.md (pa_combine_type): New attribute. Set it appropriately for various insns. (define_delays): Use a separate define_delay for unconditional branches. (fmpyadd, fmpysub peepholes): Remove, no longer needed. (fmpyadd, fmpysub insns): Add variant with fadd/fsub first, then the fmpy. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@13346 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/pa/pa.c | 527 +++++++++++++++++++++++++++++----------------------- gcc/config/pa/pa.md | 103 ++++------ 2 files changed, 331 insertions(+), 299 deletions(-) (limited to 'gcc/config/pa') diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index c8bff3b44ea..bab4a12fb7f 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -62,14 +62,6 @@ static int out_of_line_prologue_epilogue; static rtx find_addr_reg (); -/* Kludgery. We hold the operands to a fmpy insn here so we can - compare them with the operands for an fadd/fsub to determine if - they can be combined into a fmpyadd/fmpysub insn. - - This _WILL_ disappear as the code to combine independent insns - matures. */ -static rtx fmpy_operands[3]; - /* Keep track of the number of bytes we have output in the CODE subspaces during this compilation so we'll know when to emit inline long-calls. */ @@ -1347,7 +1339,7 @@ emit_move_sequence (operands, mode, scratch_reg) } /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning - it will need a link/runtime reloc. */ + it will need a link/runtime reloc). */ int reloc_needed (exp) @@ -5484,70 +5476,6 @@ output_parallel_addb (operands, length) } } -/* Return nonzero if INSN represents an integer add which might be - combinable with an unconditional branch. */ - -combinable_add (insn) - rtx insn; -{ - rtx src, dest, prev, pattern = PATTERN (insn); - - /* Must be a (set (reg) (plus (reg) (reg/5_bit_int))) */ - if (GET_CODE (pattern) != SET - || GET_CODE (SET_SRC (pattern)) != PLUS - || GET_CODE (SET_DEST (pattern)) != REG) - return 0; - - src = SET_SRC (pattern); - dest = SET_DEST (pattern); - - /* Must be an integer add. */ - if (GET_MODE (src) != SImode - || GET_MODE (dest) != SImode) - return 0; - - /* Each operand must be an integer register and/or 5 bit immediate. */ - if (!ireg_or_int5_operand (dest, VOIDmode) - || !ireg_or_int5_operand (XEXP (src, 0), VOIDmode) - || !ireg_or_int5_operand (XEXP (src, 1), VOIDmode)) - return 0; - - /* The destination must also be one of the sources. */ - return (dest == XEXP (src, 0) || dest == XEXP (src, 1)); -} - -/* Return nonzero if INSN represents an integer load/copy which might be - combinable with an unconditional branch. */ - -combinable_copy (insn) - rtx insn; -{ - rtx src, dest, pattern = PATTERN (insn); - enum machine_mode mode; - - /* Must be a (set (reg) (reg/5_bit_int)). */ - if (GET_CODE (pattern) != SET) - return 0; - - src = SET_SRC (pattern); - dest = SET_DEST (pattern); - - /* Must be a mode that corresponds to a single integer register. */ - mode = GET_MODE (dest); - if (mode != SImode - && mode != SFmode - && mode != HImode - && mode != QImode) - return 0; - - /* Each operand must be a register or 5 bit integer. */ - if (!ireg_or_int5_operand (dest, VOIDmode) - || !ireg_or_int5_operand (src, VOIDmode)) - return 0; - - return 1; -} - /* Return nonzero if INSN (a jump insn) immediately follows a call. This is used to discourage creating parallel movb/addb insns since a jump which immediately follows a call can execute in the delay slot of the @@ -5574,170 +5502,6 @@ following_call (insn) return 0; } -/* Return nonzero if this is a floating point multiply (fmpy) which - could be combined with a suitable floating point add or sub insn. */ - -combinable_fmpy (insn) - rtx insn; -{ - rtx src, dest, pattern = PATTERN (insn); - enum machine_mode mode; - - /* Only on 1.1 and later cpus. */ - if (!TARGET_SNAKE) - return 0; - - /* Must be a (set (reg) (mult (reg) (reg))). */ - if (GET_CODE (pattern) != SET - || GET_CODE (SET_SRC (pattern)) != MULT - || GET_CODE (SET_DEST (pattern)) != REG) - return 0; - - src = SET_SRC (pattern); - dest = SET_DEST (pattern); - - /* Must be registers. */ - if (GET_CODE (XEXP (src, 0)) != REG - || GET_CODE (XEXP (src, 1)) != REG) - return 0; - - /* Must be a floating point mode. Must match the mode of the fmul. */ - mode = GET_MODE (dest); - if (mode != DFmode && mode != SFmode) - return 0; - - /* SFmode limits the registers which can be used to the upper - 32 32bit FP registers. */ - if (mode == SFmode - && (REGNO (dest) < 57 - || REGNO (XEXP (src, 0)) < 57 - || REGNO (XEXP (src, 1)) < 57)) - return 0; - - /* Save our operands, we'll need to verify they don't conflict with - those in the fadd or fsub. XXX This needs to disasppear soon. */ - fmpy_operands[0] = dest; - fmpy_operands[1] = XEXP (src, 0); - fmpy_operands[2] = XEXP (src, 1); - - return 1; -} - -/* Return nonzero if INSN is a floating point add suitable for combining - with the most recently examined floating point multiply. */ - -combinable_fadd (insn) - rtx insn; -{ - rtx src, dest, pattern = PATTERN (insn); - enum machine_mode mode; - - /* Must be a (set (reg) (plus (reg) (reg))). */ - if (GET_CODE (pattern) != SET - || GET_CODE (SET_SRC (pattern)) != PLUS - || GET_CODE (SET_DEST (pattern)) != REG) - return 0; - - src = SET_SRC (pattern); - dest = SET_DEST (pattern); - - /* Must be registers. */ - if (GET_CODE (XEXP (src, 0)) != REG - || GET_CODE (XEXP (src, 1)) != REG) - return 0; - - /* Must be a floating point mode. Must match the mode of the fmul. */ - mode = GET_MODE (dest); - if (mode != DFmode && mode != SFmode) - return 0; - - if (mode != GET_MODE (fmpy_operands[0])) - return 0; - - /* SFmode limits the registers which can be used to the upper - 32 32bit FP registers. */ - if (mode == SFmode - && (REGNO (dest) < 57 - || REGNO (XEXP (src, 0)) < 57 - || REGNO (XEXP (src, 1)) < 57)) - return 0; - - /* Only 2 real operands to the addition. One of the input operands - must be the same as the output operand. */ - if (! rtx_equal_p (dest, XEXP (src, 0)) - && ! rtx_equal_p (dest, XEXP (src, 1))) - return 0; - - /* Inout operand of the add can not conflict with any operands from the - multiply. */ - if (rtx_equal_p (dest, fmpy_operands[0]) - || rtx_equal_p (dest, fmpy_operands[1]) - || rtx_equal_p (dest, fmpy_operands[2])) - return 0; - - /* The multiply can not feed into the addition. */ - if (rtx_equal_p (fmpy_operands[0], XEXP (src, 0)) - || rtx_equal_p (fmpy_operands[0], XEXP (src, 1))) - return 0; - - return 1; -} - -/* Return nonzero if INSN is a floating point sub suitable for combining - with the most recently examined floating point multiply. */ - -combinable_fsub (insn) - rtx insn; -{ - rtx src, dest, pattern = PATTERN (insn); - enum machine_mode mode; - - /* Must be (set (reg) (minus (reg) (reg))). */ - if (GET_CODE (pattern) != SET - || GET_CODE (SET_SRC (pattern)) != MINUS - || GET_CODE (SET_DEST (pattern)) != REG) - return 0; - - src = SET_SRC (pattern); - dest = SET_DEST (pattern); - - if (GET_CODE (XEXP (src, 0)) != REG - || GET_CODE (XEXP (src, 1)) != REG) - return 0; - - /* Must be a floating point mode. Must match the mode of the fmul. */ - mode = GET_MODE (dest); - if (mode != DFmode && mode != SFmode) - return 0; - - if (mode != GET_MODE (fmpy_operands[0])) - return 0; - - /* SFmode limits the registers which can be used to the upper - 32 32bit FP registers. */ - if (mode == SFmode && (REGNO (dest) < 57 || REGNO (XEXP (src, 1)) < 57)) - return 0; - - /* Only 2 real operands to the subtraction. Output must be the - same as the first operand of the MINUS. */ - if (! rtx_equal_p (dest, XEXP (src, 0))) - return 0; - - /* Inout operand of the sub can not conflict with any operands from the - multiply. */ - if (rtx_equal_p (dest, fmpy_operands[0]) - || rtx_equal_p (dest, fmpy_operands[1]) - || rtx_equal_p (dest, fmpy_operands[2])) - return 0; - - /* The multiply can not feed into the subtraction. */ - if (rtx_equal_p (fmpy_operands[0], XEXP (src, 0)) - || rtx_equal_p (fmpy_operands[0], XEXP (src, 1))) - return 0; - - return 1; -} - /* We use this hook to perform a PA specific optimization which is difficult to do in earlier passes. @@ -5771,6 +5535,8 @@ pa_reorg (insns) remove_useless_addtr_insns (insns, 1); + pa_combine_instructions (get_insns ()); + /* This is fairly cheap, so always run it if optimizing. */ if (optimize > 0) { @@ -5840,3 +5606,290 @@ pa_reorg (insns) } } } + +/* The PA has a number of odd instructions which can perform multiple + tasks at once. On first generation PA machines (PA1.0 and PA1.1) + it may be profitable to combine two instructions into one instruction + with two outputs. It's not profitable PA2.0 machines because the + two outputs would take two slots in the reorder buffers. + + This routine finds instructions which can be combined and combines + them. We only support some of the potential combinations, and we + only try common ways to find suitable instructions. + + * addb can add two registers or a register and a small integer + and jump to a nearby (+-8k) location. Normally the jump to the + nearby location is conditional on the result of the add, but by + using the "true" condition we can make the jump unconditional. + Thus addb can perform two independent operations in one insn. + + * movb is similar to addb in that it can perform a reg->reg + or small immediate->reg copy and jump to a nearby (+-8k location). + + * fmpyadd and fmpysub can perform a FP multiply and either an + FP add or FP sub if the operands of the multiply and add/sub are + independent (there are other minor restrictions). Note both + the fmpy and fadd/fsub can in theory move to better spots according + to data dependencies, but for now we require the fmpy stay at a + fixed location. + + * Many of the memory operations can perform pre & post updates + of index registers. GCC's pre/post increment/decrement addressing + is far too simple to take advantage of all the possibilities. This + pass may not be suitable since those insns may not be independent. + + * comclr can compare two ints or an int and a register, nullify + the following instruction and zero some other register. This + is more difficult to use as it's harder to find an insn which + will generate a comclr than finding something like an unconditional + branch. (conditional moves & long branches create comclr insns). + + * Most arithmetic operations can conditionally skip the next + instruction. They can be viewed as "perform this operation + and conditionally jump to this nearby location" (where nearby + is an insns away). These are difficult to use due to the + branch length restrictions. */ + +pa_combine_instructions (insns) + rtx insns; +{ + rtx anchor, new; + + /* This can get expensive since the basic algorithm is on the + order of O(n^2) (or worse). Only do it for -O2 or higher + levels of optimizaton. */ + if (optimize < 2) + return; + + /* Walk down the list of insns looking for "anchor" insns which + may be combined with "floating" insns. As the name implies, + "anchor" instructions don't move, while "floating" insns may + move around. */ + new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); + new = make_insn_raw (new); + + for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) + { + enum attr_pa_combine_type anchor_attr; + enum attr_pa_combine_type floater_attr; + + /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. + Also ignore any special USE insns. */ + if (GET_CODE (anchor) != INSN + && GET_CODE (anchor) != JUMP_INSN + && GET_CODE (anchor) != CALL_INSN + || GET_CODE (PATTERN (anchor)) == USE + || GET_CODE (PATTERN (anchor)) == CLOBBER + || GET_CODE (PATTERN (anchor)) == ADDR_VEC + || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) + continue; + + anchor_attr = get_attr_pa_combine_type (anchor); + /* See if anchor is an insn suitable for combination. */ + if (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB + || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && ! forward_branch_p (anchor))) + { + rtx floater; + + for (floater = PREV_INSN (anchor); + floater; + floater = PREV_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + + else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && floater_attr == PA_COMBINE_TYPE_ADDMOVE) + { + if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) + { + if (pa_can_combine_p (new, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + else + { + if (pa_can_combine_p (new, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + SET_SRC (PATTERN (floater)), + SET_SRC (PATTERN (floater)))) + break; + } + } + } + + /* If we didn't find anything on the backwards scan try forwards. */ + if (!floater + && (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) + { + for (floater = anchor; floater; floater = NEXT_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new, anchor, floater, 1, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN(floater)),0), + XEXP(SET_SRC(PATTERN(floater)),1))) + break; + } + } + } + + /* FLOATER will be nonzero if we found a suitable floating + insn for combination with ANCHOR. */ + if (floater + && (anchor_attr == PA_COMBINE_TYPE_FADDSUB + || anchor_attr == PA_COMBINE_TYPE_FMPY)) + { + /* Emit the new instruction and delete the old anchor. */ + emit_insn_before (gen_rtx (PARALLEL, VOIDmode, + gen_rtvec (2, PATTERN (anchor), + PATTERN (floater))), + anchor); + PUT_CODE (anchor, NOTE); + NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (anchor) = 0; + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater); + delete_insn (floater); + + continue; + } + else if (floater + && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) + { + rtx temp; + /* Emit the new_jump instruction and delete the old anchor. */ + temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode, + gen_rtvec (2, PATTERN (anchor), + PATTERN (floater))), + anchor); + JUMP_LABEL (temp) = JUMP_LABEL (anchor); + PUT_CODE (anchor, NOTE); + NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (anchor) = 0; + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater); + delete_insn (floater); + continue; + } + } + } +} + +int +pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2) + rtx new, anchor, floater; + int reversed; + rtx dest, src1, src2; +{ + int insn_code_number; + rtx start, end; + + /* Create a PARALLEL with the patterns of ANCHOR and + FLOATER, try to recognize it, then test constraints + for the resulting pattern. + + If the pattern doesn't match or the constraints + aren't met keep searching for a suitable floater + insn. */ + XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor); + XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater); + INSN_CODE (new) = -1; + insn_code_number = recog_memoized (new); + if (insn_code_number < 0 + || !constrain_operands (insn_code_number, 1)) + return 0; + + if (reversed) + { + start = anchor; + end = floater; + } + else + { + start = floater; + end = anchor; + } + + /* There's up to three operands to consider. One + output and two inputs. + + The output must not be used between FLOATER & ANCHOR + exclusive. The inputs must not be set between + FLOATER and ANCHOR exclusive. */ + + if (reg_used_between_p (dest, start, end)) + return 0; + + if (reg_set_between_p (src1, start, end)) + return 0; + + if (reg_set_between_p (src2, start, end)) + return 0; + + /* If we get here, then everything is good. */ + return 1; +} diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index d881956490c..ed7b654030c 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -34,6 +34,10 @@ "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,parallel_branch" (const_string "binary")) +(define_attr "pa_combine_type" + "fmpy,faddsub,uncond_branch,addmove,none" + (const_string "none")) + ;; Processor type (for scheduling, not code generation) -- this attribute ;; must exactly match the processor_type enumeration in pa.h. ;; @@ -97,7 +101,7 @@ ;; Call delay slot description. -(define_delay (eq_attr "type" "uncond_branch,call") +(define_delay (eq_attr "type" "call") [(eq_attr "in_call_delay" "true") (nil) (nil)]) ;; millicode call delay slot description. Note it disallows delay slot @@ -129,6 +133,11 @@ (and (eq_attr "in_nullified_branch_delay" "true") (attr_flag "backward"))]) +(define_delay (and (eq_attr "type" "uncond_branch") + (eq (symbol_ref "following_call (insn)") + (const_int 0))) + [(eq_attr "in_branch_delay" "true") (nil) (nil)]) + ;; Function units of the HPPA. The following data is for the 700 CPUs ;; (Mustang CPU + Timex FPU aka PA-89) because that's what I have the docs for. ;; Scheduling instructions for PA-83 machines according to the Snake @@ -1337,6 +1346,7 @@ fldw%F1 %1,%0 fstw%F0 %1,%0" [(set_attr "type" "move,move,move,shift,load,store,move,fpalu,fpload,fpstore") + (set_attr "pa_combine_type" "addmove") (set_attr "length" "4,4,4,4,4,4,4,4,4,4")]) (define_insn "" @@ -1356,6 +1366,7 @@ stw%M0 %r1,%0 mtsar %r1" [(set_attr "type" "move,move,move,move,load,store,move") + (set_attr "pa_combine_type" "addmove") (set_attr "length" "4,4,4,4,4,4,4")]) (define_insn "" @@ -1735,6 +1746,7 @@ mtsar %r1 fcpy,sgl %r1,%0" [(set_attr "type" "move,move,move,shift,load,store,move,fpalu") + (set_attr "pa_combine_type" "addmove") (set_attr "length" "4,4,4,4,4,4,4,4")]) (define_insn "" @@ -1896,6 +1908,7 @@ mtsar %r1 fcpy,sgl %r1,%0" [(set_attr "type" "move,move,move,shift,load,store,move,fpalu") + (set_attr "pa_combine_type" "addmove") (set_attr "length" "4,4,4,4,4,4,4,4")]) (define_insn "" @@ -2535,6 +2548,7 @@ fstw%F0 %r1,%0 stw%M0 %r1,%0" [(set_attr "type" "fpalu,move,fpload,load,fpstore,store") + (set_attr "pa_combine_type" "addmove") (set_attr "length" "4,4,4,4,4,4")]) (define_insn "" @@ -2550,6 +2564,7 @@ ldw%M1 %1,%0 stw%M0 %r1,%0" [(set_attr "type" "move,load,store") + (set_attr "pa_combine_type" "addmove") (set_attr "length" "4,4,4")]) (define_insn "" @@ -2932,6 +2947,7 @@ addl %1,%2,%0 ldo %2(%1),%0" [(set_attr "type" "binary,binary") + (set_attr "pa_combine_type" "addmove") (set_attr "length" "4,4")]) ;; Disgusting kludge to work around reload bugs with frame pointer @@ -3452,6 +3468,7 @@ "! TARGET_SOFT_FLOAT" "fadd,dbl %1,%2,%0" [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") (set_attr "length" "4")]) (define_insn "addsf3" @@ -3461,6 +3478,7 @@ "! TARGET_SOFT_FLOAT" "fadd,sgl %1,%2,%0" [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") (set_attr "length" "4")]) (define_insn "subdf3" @@ -3470,6 +3488,7 @@ "! TARGET_SOFT_FLOAT" "fsub,dbl %1,%2,%0" [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") (set_attr "length" "4")]) (define_insn "subsf3" @@ -3479,6 +3498,7 @@ "! TARGET_SOFT_FLOAT" "fsub,sgl %1,%2,%0" [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") (set_attr "length" "4")]) (define_insn "muldf3" @@ -3488,6 +3508,7 @@ "! TARGET_SOFT_FLOAT" "fmpy,dbl %1,%2,%0" [(set_attr "type" "fpmuldbl") + (set_attr "pa_combine_type" "fmpy") (set_attr "length" "4")]) (define_insn "mulsf3" @@ -3497,6 +3518,7 @@ "! TARGET_SOFT_FLOAT" "fmpy,sgl %1,%2,%0" [(set_attr "type" "fpmulsgl") + (set_attr "pa_combine_type" "fmpy") (set_attr "length" "4")]) (define_insn "divdf3" @@ -3892,6 +3914,7 @@ "" "bl%* %l0,0" [(set_attr "type" "uncond_branch") + (set_attr "pa_combine_type" "uncond_branch") (set (attr "length") (cond [(eq (symbol_ref "jump_in_call_delay (insn)") (const_int 0)) (const_int 4) @@ -4738,64 +4761,14 @@ (set_attr "length" "4")]) (define_insn "" - [(set (match_operand 0 "register_operand" "=f") - (mult (match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f"))) - (set (match_operand 3 "register_operand" "+f") - (minus (match_operand 4 "register_operand" "f") - (match_operand 5 "register_operand" "f")))] - "TARGET_SNAKE && ! TARGET_SOFT_FLOAT - && reload_completed && fmpysuboperands (operands)" - "* -{ - if (GET_MODE (operands[0]) == DFmode) - return \"fmpysub,dbl %1,%2,%0,%5,%3\"; - else - return \"fmpysub,sgl %1,%2,%0,%5,%3\"; -}" - [(set_attr "type" "fpalu") - (set_attr "length" "4")]) - -;; The next four peepholes take advantage of the new 5 operand -;; fmpy{add,sub} instructions available on 1.1 CPUS. Basically -;; fmpyadd performs a multiply and add/sub of independent operands -;; at the same time. Because the operands must be independent -;; combine will not try to combine such insns... Thus we have -;; to use a peephole. -(define_peephole - [(set (match_operand 0 "register_operand" "=f") - (mult (match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f"))) - (set (match_operand 3 "register_operand" "+f") - (plus (match_operand 4 "register_operand" "f") - (match_operand 5 "register_operand" "f")))] - "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpyaddoperands (operands)" - "* -{ - if (GET_MODE (operands[0]) == DFmode) - { - if (rtx_equal_p (operands[5], operands[3])) - return \"fmpyadd,dbl %1,%2,%0,%4,%3\"; - else - return \"fmpyadd,dbl %1,%2,%0,%5,%3\"; - } - else - { - if (rtx_equal_p (operands[5], operands[3])) - return \"fmpyadd,sgl %1,%2,%0,%4,%3\"; - else - return \"fmpyadd,sgl %1,%2,%0,%5,%3\"; - } -}") - -(define_peephole [(set (match_operand 3 "register_operand" "+f") (plus (match_operand 4 "register_operand" "f") (match_operand 5 "register_operand" "f"))) (set (match_operand 0 "register_operand" "=f") (mult (match_operand 1 "register_operand" "f") (match_operand 2 "register_operand" "f")))] - "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpyaddoperands (operands)" + "TARGET_SNAKE && ! TARGET_SOFT_FLOAT + && reload_completed && fmpyaddoperands (operands)" "* { if (GET_MODE (operands[0]) == DFmode) @@ -4812,41 +4785,47 @@ else return \"fmpyadd,sgl %1,%2,%0,%5,%3\"; } -}") +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) -;; Note fsub subtracts the second operand from the first while fmpysub -;; does the opposite for the subtraction operands! -(define_peephole +(define_insn "" [(set (match_operand 0 "register_operand" "=f") (mult (match_operand 1 "register_operand" "f") (match_operand 2 "register_operand" "f"))) (set (match_operand 3 "register_operand" "+f") (minus (match_operand 4 "register_operand" "f") (match_operand 5 "register_operand" "f")))] - "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpysuboperands (operands)" + "TARGET_SNAKE && ! TARGET_SOFT_FLOAT + && reload_completed && fmpysuboperands (operands)" "* { if (GET_MODE (operands[0]) == DFmode) return \"fmpysub,dbl %1,%2,%0,%5,%3\"; else return \"fmpysub,sgl %1,%2,%0,%5,%3\"; -}") +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) -(define_peephole +(define_insn "" [(set (match_operand 3 "register_operand" "+f") (minus (match_operand 4 "register_operand" "f") (match_operand 5 "register_operand" "f"))) (set (match_operand 0 "register_operand" "=f") (mult (match_operand 1 "register_operand" "f") (match_operand 2 "register_operand" "f")))] - "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpysuboperands (operands)" + "TARGET_SNAKE && ! TARGET_SOFT_FLOAT + && reload_completed && fmpysuboperands (operands)" "* { if (GET_MODE (operands[0]) == DFmode) return \"fmpysub,dbl %1,%2,%0,%5,%3\"; else return \"fmpysub,sgl %1,%2,%0,%5,%3\"; -}") +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) ;; Clean up turds left by reload. (define_peephole -- cgit v1.2.3