From a1c6b246b55b9c50aaf5d15e180d13f01b903cf5 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 24 Oct 2007 17:46:39 +0000 Subject: [PATCH] mips.h (MOVE_MAX): Use UNITS_PER_WORD and describe MIPS-specific implementation details. gcc/ * config/mips/mips.h (MOVE_MAX): Use UNITS_PER_WORD and describe MIPS-specific implementation details. (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER): New macro. (MIPS_MAX_MOVE_BYTES_STRAIGHT): Likewise. (MOVE_RATIO): Define to MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD for targets with movmemsi. (MOVE_BY_PIECES_P): Define. * config/mips/mips.c (MAX_MOVE_REGS, MAX_MOVE_BYTES): Delete. (mips_block_move_loop): Add a bytes_per_iter argument. (mips_expand_block_move): Use MIPS_MAX_MOVE_BYTES_STRAIGHT. Update call to mips_block_move_loop. From-SVN: r129605 --- gcc/ChangeLog | 14 ++++++++++ gcc/config/mips/mips.c | 29 ++++++++++----------- gcc/config/mips/mips.h | 69 +++++++++++++++++++++++++++++++++++--------------- 3 files changed, 77 insertions(+), 35 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 397bbb7f307..6522ce36cc9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2007-10-24 Richard Sandiford + + * config/mips/mips.h (MOVE_MAX): Use UNITS_PER_WORD and describe + MIPS-specific implementation details. + (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER): New macro. + (MIPS_MAX_MOVE_BYTES_STRAIGHT): Likewise. + (MOVE_RATIO): Define to MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD + for targets with movmemsi. + (MOVE_BY_PIECES_P): Define. + * config/mips/mips.c (MAX_MOVE_REGS, MAX_MOVE_BYTES): Delete. + (mips_block_move_loop): Add a bytes_per_iter argument. + (mips_expand_block_move): Use MIPS_MAX_MOVE_BYTES_STRAIGHT. + Update call to mips_block_move_loop. + 2007-10-24 Michael Matz PR debug/33868 diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 71678605cf9..19fbbd40577 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -5622,9 +5622,6 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch) emit_insn (gen_slt_sf (dest, fp2, fp1)); } -#define MAX_MOVE_REGS 4 -#define MAX_MOVE_BYTES (MAX_MOVE_REGS * UNITS_PER_WORD) - /* Emit straight-line code to move LENGTH bytes from SRC to DEST. Assume that the areas do not overlap. */ @@ -5710,22 +5707,23 @@ mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length, set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); } -/* Move LENGTH bytes from SRC to DEST using a loop that moves MAX_MOVE_BYTES - per iteration. LENGTH must be at least MAX_MOVE_BYTES. Assume that the - memory regions do not overlap. */ +/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER + bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that + the memory regions do not overlap. */ static void -mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length) +mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + HOST_WIDE_INT bytes_per_iter) { rtx label, src_reg, dest_reg, final_src; HOST_WIDE_INT leftover; - leftover = length % MAX_MOVE_BYTES; + leftover = length % bytes_per_iter; length -= leftover; /* Create registers and memory references for use within the loop. */ - mips_adjust_block_mem (src, MAX_MOVE_BYTES, &src_reg, &src); - mips_adjust_block_mem (dest, MAX_MOVE_BYTES, &dest_reg, &dest); + mips_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); + mips_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); /* Calculate the value that SRC_REG should have after the last iteration of the loop. */ @@ -5737,11 +5735,11 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length) emit_label (label); /* Emit the loop body. */ - mips_block_move_straight (dest, src, MAX_MOVE_BYTES); + mips_block_move_straight (dest, src, bytes_per_iter); /* Move on to the next block. */ - mips_emit_move (src_reg, plus_constant (src_reg, MAX_MOVE_BYTES)); - mips_emit_move (dest_reg, plus_constant (dest_reg, MAX_MOVE_BYTES)); + mips_emit_move (src_reg, plus_constant (src_reg, bytes_per_iter)); + mips_emit_move (dest_reg, plus_constant (dest_reg, bytes_per_iter)); /* Emit the loop condition. */ if (Pmode == DImode) @@ -5763,14 +5761,15 @@ mips_expand_block_move (rtx dest, rtx src, rtx length) { if (GET_CODE (length) == CONST_INT) { - if (INTVAL (length) <= 2 * MAX_MOVE_BYTES) + if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_STRAIGHT) { mips_block_move_straight (dest, src, INTVAL (length)); return true; } else if (optimize) { - mips_block_move_loop (dest, src, INTVAL (length)); + mips_block_move_loop (dest, src, INTVAL (length), + MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER); return true; } } diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index e1794c2830d..b4778a8577d 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -2338,9 +2338,10 @@ typedef struct mips_args { #define DEFAULT_SIGNED_CHAR 1 #endif -/* Max number of bytes we can move from memory to memory - in one reasonably fast instruction. */ -#define MOVE_MAX (TARGET_64BIT ? 8 : 4) +/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets, + we generally don't want to use them for copying arbitrary data. + A single N-word move is usually the same cost as N single-word moves. */ +#define MOVE_MAX UNITS_PER_WORD #define MAX_MOVE_MAX 8 /* Define this macro as a C expression which is nonzero if @@ -2769,6 +2770,18 @@ while (0) #undef PTRDIFF_TYPE #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") +/* The maximum number of bytes that can be copied by one iteration of + a movmemsi loop; see mips_block_move_loop. */ +#define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \ + (UNITS_PER_WORD * 4) + +/* The maximum number of bytes that can be copied by a straight-line + implementation of movmemsi; see mips_block_move_straight. We want + to make sure that any loop-based implementation will iterate at + least twice. */ +#define MIPS_MAX_MOVE_BYTES_STRAIGHT \ + (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER * 2) + /* The base cost of a memcpy call, for MOVE_RATIO and friends. These values were determined experimentally by benchmarking with CSiBE. In theory, the call overhead is higher for TARGET_ABICALLS (especially @@ -2778,23 +2791,39 @@ while (0) #define MIPS_CALL_RATIO 8 -/* Define MOVE_RATIO to encourage use of movmemsi when enabled, - since it should always generate code at least as good as - move_by_pieces(). But when inline movmemsi pattern is disabled - (i.e., with -mips16 or -mmemcpy), instead use a value approximating - the length of a memcpy call sequence, so that move_by_pieces will - generate inline code if it is shorter than a function call. - Since move_by_pieces_ninsns() counts memory-to-memory moves, but - we'll have to generate a load/store pair for each, halve the value of - MIPS_CALL_RATIO to take that into account. - The default value for MOVE_RATIO when HAVE_movmemsi is true is 2. - There is no point to setting it to less than this to try to disable - move_by_pieces entirely, because that also disables some desirable - tree-level optimizations, specifically related to optimizing a - one-byte string copy into a simple move byte operation. */ - -#define MOVE_RATIO \ - ((TARGET_MIPS16 || TARGET_MEMCPY) ? MIPS_CALL_RATIO / 2 : 2) +/* Any loop-based implementation of movmemsi will have at least + MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory + moves, so allow individual copies of fewer elements. + + When movmemsi is not available, use a value approximating + the length of a memcpy call sequence, so that move_by_pieces + will generate inline code if it is shorter than a function call. + Since move_by_pieces_ninsns counts memory-to-memory moves, but + we'll have to generate a load/store pair for each, halve the + value of MIPS_CALL_RATIO to take that into account. */ + +#define MOVE_RATIO \ + (HAVE_movmemsi \ + ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ + : MIPS_CALL_RATIO / 2) + +/* movmemsi is meant to generate code that is at least as good as + move_by_pieces. However, movmemsi effectively uses a by-pieces + implementation both for moves smaller than a word and for word-aligned + moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes. We should + allow the tree-level optimisers to do such moves by pieces, as it + often exposes other optimization opportunities. We might as well + continue to use movmemsi at the rtl level though, as it produces + better code when scheduling is disabled (such as at -O). */ + +#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ + (HAVE_movmemsi \ + ? (!currently_expanding_to_rtl \ + && ((ALIGN) < BITS_PER_WORD \ + ? (SIZE) < UNITS_PER_WORD \ + : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \ + : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ + < (unsigned int) MOVE_RATIO)) /* For CLEAR_RATIO, when optimizing for size, give a better estimate of the length of a memset call, but use the default otherwise. */ -- 2.11.4.GIT