summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaz Kojima <kkojima@rr.iij4u.or.jp>2024-09-20 18:15:30 +0900
committerOleg Endo <olegendo@gcc.gnu.org>2024-09-25 09:51:15 +0900
commit8ee643e50957904d75affece056a6dd84de343d6 (patch)
treed692318ee4d98be607b3ceb3cf8dd2eea1636668
parent291e20e86090e5940e2bd862ec83c7d5e0715dd5 (diff)
SH: Try to reduce R0 live ranges
Some move or extend patterns will make long R0 live ranges and could confuse LRA. gcc/ChangeLog: * config/sh/sh-protos.h (sh_satisfies_constraint_Sid_subreg_index): Declare. * config/sh/sh.cc (sh_satisfies_constraint_Sid_subreg_index): New function. * config/sh/sh.md (extend<mode>si2_short_mem_disp_z, *mov<mode>_store_mem_index, mov<mode>_store_mem_index): New insn and insn_and_split patterns. (extend<mode>si2, mov<mode>): Use them for LRA.
-rw-r--r--gcc/config/sh/sh-protos.h1
-rw-r--r--gcc/config/sh/sh.cc12
-rw-r--r--gcc/config/sh/sh.md90
3 files changed, 102 insertions, 1 deletions
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index b151a7c8fcc..5e5bd0aff7e 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -61,6 +61,7 @@ extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
extern bool nonpic_symbol_mentioned_p (rtx);
extern void output_pic_addr_const (FILE *, rtx);
extern bool expand_block_move (rtx *);
+extern bool sh_satisfies_constraint_Sid_subreg_index (rtx);
extern void prepare_move_operands (rtx[], machine_mode mode);
extern bool sh_expand_cmpstr (rtx *);
extern bool sh_expand_cmpnstr (rtx *);
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 7391b8df583..c9222c3e6ac 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -1577,6 +1577,18 @@ sh_encode_section_info (tree decl, rtx rtl, int first)
SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
}
+/* Test Sid constraint with subreg index. See also the comment in
+ prepare_move_operands. */
+bool
+sh_satisfies_constraint_Sid_subreg_index (rtx op)
+{
+ return ((GET_CODE (op) == MEM)
+ && ((GET_CODE (XEXP (op, 0)) == PLUS)
+ && ((GET_CODE (XEXP (XEXP (op, 0), 0)) == REG)
+ && ((GET_CODE (XEXP (XEXP (op, 0), 1)) == SUBREG)
+ && (GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == REG)))));
+}
+
/* Prepare operands for a move define_expand; specifically, one of the
operands must be in a register. */
void
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 7eee12ca6b8..6d93f5cb816 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -4801,7 +4801,38 @@
(define_expand "extend<mode>si2"
[(set (match_operand:SI 0 "arith_reg_dest")
- (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))])
+ (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]
+ ""
+{
+ /* When the displacement addressing is used, RA will assign r0 to
+ the pseudo register operand for the QI/HImode load. See
+ the comment in sh.cc:prepare_move_operand and PR target/55212. */
+ if (! lra_in_progress && ! reload_completed
+ && sh_lra_p ()
+ && ! TARGET_SH2A
+ && arith_reg_dest (operands[0], <MODE>mode)
+ && short_displacement_mem_operand (operands[1], <MODE>mode))
+ {
+ emit_insn (gen_extend<mode>si2_short_mem_disp_z (operands[0],
+ operands[1]));
+ DONE;
+ }
+})
+
+(define_insn_and_split "extend<mode>si2_short_mem_disp_z"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+ (sign_extend:SI
+ (match_operand:QIHI 1 "short_displacement_mem_operand" "m")))
+ (clobber (reg:SI R0_REG))]
+ "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 2) (sign_extend:SI (match_dup 1)))
+ (set (match_dup 0) (match_dup 2))]
+{
+ operands[2] = gen_rtx_REG (SImode, R0_REG);
+}
+ [(set_attr "type" "load")])
(define_insn_and_split "*extend<mode>si2_compact_reg"
[(set (match_operand:SI 0 "arith_reg_dest" "=r")
@@ -5343,9 +5374,50 @@
operands[1] = gen_lowpart (<MODE>mode, reg);
}
+ if (! lra_in_progress && ! reload_completed
+ && sh_lra_p ()
+ && ! TARGET_SH2A
+ && arith_reg_operand (operands[1], <MODE>mode)
+ && (satisfies_constraint_Sid (operands[0])
+ || sh_satisfies_constraint_Sid_subreg_index (operands[0])))
+ {
+ rtx adr = XEXP (operands[0], 0);
+ rtx base = XEXP (adr, 0);
+ rtx idx = XEXP (adr, 1);
+ emit_insn (gen_mov<mode>_store_mem_index (base, idx,
+ operands[1]));
+ DONE;
+ }
+
prepare_move_operands (operands, <MODE>mode);
})
+(define_insn "*mov<mode>_store_mem_index"
+ [(set (mem:QIHI
+ (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+ (match_operand:SI 1 "arith_reg_operand" "z")))
+ (match_operand:QIHI 2 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()
+ && REG_P (operands[1]) && REGNO (operands[1]) == R0_REG"
+ "mov.<bw> %2,@(%1,%0)"
+ [(set_attr "type" "store")])
+
+(define_insn_and_split "mov<mode>_store_mem_index"
+ [(set (mem:QIHI
+ (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+ (match_operand:SI 1 "arith_reg_operand" "^zr")))
+ (match_operand:QIHI 2 "arith_reg_operand" "r"))
+ (clobber (reg:SI R0_REG))]
+ "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 3) (match_dup 1))
+ (set (mem:QIHI (plus:SI (match_dup 0) (match_dup 3))) (match_dup 2))]
+{
+ operands[3] = gen_rtx_REG (SImode, R0_REG);
+}
+ [(set_attr "type" "store")])
+
;; The pre-dec and post-inc mems must be captured by the '<' and '>'
;; constraints, otherwise wrong code might get generated.
(define_insn "*mov<mode>_load_predec"
@@ -5631,6 +5703,22 @@
(const_string "double")
(const_string "none")))])
+;; LRA will try to satisfy the constraints in match_scratch for the memory
+;; displacements and it will make issues on this target. Use R0 as a scratch
+;; register for the constant load.
+(define_insn "movdf_i4_F_z"
+ [(set (match_operand:DF 0 "fp_arith_reg_operand" "=d")
+ (match_operand:DF 1 "const_double_operand" "F"))
+ (use (reg:SI FPSCR_MODES_REG))
+ (clobber (reg:SI R0_REG))]
+ "TARGET_FPU_DOUBLE && sh_lra_p ()"
+ "#"
+ [(set_attr "type" "pcfload")
+ (set (attr "length") (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8)))
+ (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+ (const_string "double")
+ (const_string "none")))])
+
;; Moving DFmode between fp/general registers through memory
;; (the top of the stack) is faster than moving through fpul even for
;; little endian. Because the type of an instruction is important for its