diff options
| author | Kaz Kojima <kkojima@rr.iij4u.or.jp> | 2024-09-20 18:15:30 +0900 |
|---|---|---|
| committer | Oleg Endo <olegendo@gcc.gnu.org> | 2024-09-25 09:51:15 +0900 |
| commit | 8ee643e50957904d75affece056a6dd84de343d6 (patch) | |
| tree | d692318ee4d98be607b3ceb3cf8dd2eea1636668 | |
| parent | 291e20e86090e5940e2bd862ec83c7d5e0715dd5 (diff) | |
SH: Try to reduce R0 live ranges
Some move or extend patterns will make long R0 live ranges and could
confuse LRA.
gcc/ChangeLog:
* config/sh/sh-protos.h
(sh_satisfies_constraint_Sid_subreg_index): Declare.
* config/sh/sh.cc (sh_satisfies_constraint_Sid_subreg_index):
New function.
* config/sh/sh.md (extend<mode>si2_short_mem_disp_z,
*mov<mode>_store_mem_index, mov<mode>_store_mem_index):
New insn and insn_and_split patterns.
(extend<mode>si2, mov<mode>): Use them for LRA.
| -rw-r--r-- | gcc/config/sh/sh-protos.h | 1 | ||||
| -rw-r--r-- | gcc/config/sh/sh.cc | 12 | ||||
| -rw-r--r-- | gcc/config/sh/sh.md | 90 |
3 files changed, 102 insertions, 1 deletions
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index b151a7c8fcc..5e5bd0aff7e 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -61,6 +61,7 @@ extern rtx legitimize_pic_address (rtx, machine_mode, rtx); extern bool nonpic_symbol_mentioned_p (rtx); extern void output_pic_addr_const (FILE *, rtx); extern bool expand_block_move (rtx *); +extern bool sh_satisfies_constraint_Sid_subreg_index (rtx); extern void prepare_move_operands (rtx[], machine_mode mode); extern bool sh_expand_cmpstr (rtx *); extern bool sh_expand_cmpnstr (rtx *); diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc index 7391b8df583..c9222c3e6ac 100644 --- a/gcc/config/sh/sh.cc +++ b/gcc/config/sh/sh.cc @@ -1577,6 +1577,18 @@ sh_encode_section_info (tree decl, rtx rtl, int first) SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; } +/* Test Sid constraint with subreg index. See also the comment in + prepare_move_operands. */ +bool +sh_satisfies_constraint_Sid_subreg_index (rtx op) +{ + return ((GET_CODE (op) == MEM) + && ((GET_CODE (XEXP (op, 0)) == PLUS) + && ((GET_CODE (XEXP (XEXP (op, 0), 0)) == REG) + && ((GET_CODE (XEXP (XEXP (op, 0), 1)) == SUBREG) + && (GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == REG))))); +} + /* Prepare operands for a move define_expand; specifically, one of the operands must be in a register. */ void diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 7eee12ca6b8..6d93f5cb816 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -4801,7 +4801,38 @@ (define_expand "extend<mode>si2" [(set (match_operand:SI 0 "arith_reg_dest") - (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]) + (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))] + "" +{ + /* When the displacement addressing is used, RA will assign r0 to + the pseudo register operand for the QI/HImode load. See + the comment in sh.cc:prepare_move_operand and PR target/55212. */ + if (! lra_in_progress && ! reload_completed + && sh_lra_p () + && ! TARGET_SH2A + && arith_reg_dest (operands[0], <MODE>mode) + && short_displacement_mem_operand (operands[1], <MODE>mode)) + { + emit_insn (gen_extend<mode>si2_short_mem_disp_z (operands[0], + operands[1])); + DONE; + } +}) + +(define_insn_and_split "extend<mode>si2_short_mem_disp_z" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (sign_extend:SI + (match_operand:QIHI 1 "short_displacement_mem_operand" "m"))) + (clobber (reg:SI R0_REG))] + "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()" + "#" + "&& 1" + [(set (match_dup 2) (sign_extend:SI (match_dup 1))) + (set (match_dup 0) (match_dup 2))] +{ + operands[2] = gen_rtx_REG (SImode, R0_REG); +} + [(set_attr "type" "load")]) (define_insn_and_split "*extend<mode>si2_compact_reg" [(set (match_operand:SI 0 "arith_reg_dest" "=r") @@ -5343,9 +5374,50 @@ operands[1] = gen_lowpart (<MODE>mode, reg); } + if (! lra_in_progress && ! reload_completed + && sh_lra_p () + && ! TARGET_SH2A + && arith_reg_operand (operands[1], <MODE>mode) + && (satisfies_constraint_Sid (operands[0]) + || sh_satisfies_constraint_Sid_subreg_index (operands[0]))) + { + rtx adr = XEXP (operands[0], 0); + rtx base = XEXP (adr, 0); + rtx idx = XEXP (adr, 1); + emit_insn (gen_mov<mode>_store_mem_index (base, idx, + operands[1])); + DONE; + } + prepare_move_operands (operands, <MODE>mode); }) +(define_insn "*mov<mode>_store_mem_index" + [(set (mem:QIHI + (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "z"))) + (match_operand:QIHI 2 "arith_reg_operand" "r"))] + "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p () + && REG_P (operands[1]) && REGNO (operands[1]) == R0_REG" + "mov.<bw> %2,@(%1,%0)" + [(set_attr "type" "store")]) + +(define_insn_and_split "mov<mode>_store_mem_index" + [(set (mem:QIHI + (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "^zr"))) + (match_operand:QIHI 2 "arith_reg_operand" "r")) + (clobber (reg:SI R0_REG))] + "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()" + "#" + "&& 1" + [(set (match_dup 3) (match_dup 1)) + (set (mem:QIHI (plus:SI (match_dup 0) (match_dup 3))) (match_dup 2))] +{ + operands[3] = gen_rtx_REG (SImode, R0_REG); +} + [(set_attr "type" "store")]) + ;; The pre-dec and post-inc mems must be captured by the '<' and '>' ;; constraints, otherwise wrong code might get generated. (define_insn "*mov<mode>_load_predec" @@ -5631,6 +5703,22 @@ (const_string "double") (const_string "none")))]) +;; LRA will try to satisfy the constraints in match_scratch for the memory +;; displacements and it will make issues on this target. Use R0 as a scratch +;; register for the constant load. +(define_insn "movdf_i4_F_z" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=d") + (match_operand:DF 1 "const_double_operand" "F")) + (use (reg:SI FPSCR_MODES_REG)) + (clobber (reg:SI R0_REG))] + "TARGET_FPU_DOUBLE && sh_lra_p ()" + "#" + [(set_attr "type" "pcfload") + (set (attr "length") (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))) + (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") + (const_string "double") + (const_string "none")))]) + ;; Moving DFmode between fp/general registers through memory ;; (the top of the stack) is faster than moving through fpul even for ;; little endian. Because the type of an instruction is important for its |
