From 816d21d096f67bb822dc55d80c3b3cde9926faeb Mon Sep 17 00:00:00 2001 From: yroux Date: Sun, 10 Aug 2014 22:53:28 +0000 Subject: [PATCH] gcc/ 2014-08-11 Yvan Roux Backport from trunk r211270, r211271, r211273, r211275, r212943, r212945, r212946, r212947, r212949, r212950, r212951, r212952, r212954, r212955, r212956, r212957, r212958, r212976, r212996, r212997, r212999, r213000. 2014-07-24 Jiong Wang * config/aarch64/aarch64.c (aarch64_popwb_single_reg): New function. (aarch64_expand_epilogue): Optimize epilogue when !frame_pointer_needed. 2014-07-24 Jiong Wang * config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function. (aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed. 2014-07-24 Jiong Wang * config/aarch64/aarch64.c (aarch64_restore_callee_saves) (aarch64_save_callee_saves): New parameter "skip_wb". (aarch64_expand_prologue, aarch64_expand_epilogue): Update call site. 2014-07-24 Jiong Wang * config/aarch64/aarch64.h (frame): New fields "wb_candidate1" and "wb_candidate2". * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize above. 2014-07-24 Jiong Wang * config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't subtract outgoing area size when restoring stack_pointer_rtx. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_popwb_pair_reg) (aarch64_gen_loadwb_pair): New helper function. (aarch64_expand_epilogue): Simplify code using new helper functions. * config/aarch64/aarch64.md (loadwb_pair_): Define. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_pushwb_pair_reg) (aarch64_gen_storewb_pair): New helper function. (aarch64_expand_prologue): Simplify code using new helper functions. * config/aarch64/aarch64.md (storewb_pair_): Define. 2014-07-23 Jiong Wang * config/aarch64/aarch64.md: (aarch64_save_or_restore_callee_saves): Rename to aarch64_save_callee_saves, remove restore code. (aarch64_restore_callee_saves): New function. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Deleted. (aarch64_save_callee_saves): New function to handle reg save for both core and vectore regs. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_gen_load_pair) (aarch64_gen_store_pair): New helper function. (aarch64_save_or_restore_callee_save_registers) (aarch64_save_or_restore_fprs): Use new helper functions. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_next_callee_save): New function. (aarch64_save_or_restore_callee_save_registers) (aarch64_save_or_restore_fprs): Use aarch64_next_callee_save. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_save_or_restore_callee_save_registers) (aarch64_save_or_restore_fprs): Hoist calculation of register rtx. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_save_or_restore_callee_save_registers) (aarch64_save_or_restore_fprs): Remove 'increment'. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_save_or_restore_callee_save_registers) (aarch64_save_or_restore_fprs): Use register offset in cfun->machine->frame.reg_offset. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_save_or_restore_callee_save_registers) (aarch64_save_or_restore_fprs): Remove base_rtx. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_save_or_restore_callee_save_registers): Rename 'offset' to 'start_offset'. Remove local variable 'start_offset'. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Change type to HOST_WIDE_INT. 2014-07-23 Jiong Wang * config/aarch64/aarch64.c (aarch64_expand_prologue) (aarch64_save_or_restore_fprs) (aarch64_save_or_restore_callee_save_registers): GNU-Stylize code. 2014-06-05 Marcus Shawcroft * config/aarch64/aarch64.h (aarch64_frame): Add hard_fp_offset and frame_size. * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize aarch64_frame hard_fp_offset and frame_size. (aarch64_expand_prologue): Use aarch64_frame hard_fp_offset and frame_size; remove original_frame_size. (aarch64_expand_epilogue, aarch64_final_eh_return_addr): Likewise. (aarch64_initial_elimination_offset): Remove frame_size and offset. Use aarch64_frame frame_size. 2014-06-05 Marcus Shawcroft Jiong Wang * config/aarch64/aarch64.c (aarch64_layout_frame): Correct initialization of R30 offset. Update offset. Iterate core regisers upto X30. Remove X29, X30 specific code. 2014-06-05 Marcus Shawcroft Jiong Wang * config/aarch64/aarch64.c (SLOT_NOT_REQUIRED, SLOT_REQUIRED): Define. (aarch64_layout_frame): Use SLOT_NOT_REQUIRED and SLOT_REQUIRED. (aarch64_register_saved_on_entry): Adjust test. 2014-06-05 Marcus Shawcroft * config/aarch64/aarch64.h (machine_function): Move saved_varargs_size from here... (aarch64_frameGTY): ... to here. * config/aarch64/aarch64.c (aarch64_expand_prologue) (aarch64_expand_epilogue, aarch64_final_eh_return_addr) (aarch64_initial_elimination_offset) (aarch64_setup_incoming_varargs): Adjust location of saved_varargs_size. gcc/testsuite/ 2014-08-11 Yvan Roux Backport from trunk r212959, r212976, r212999, r213000. 2014-07-24 Jiong Wang * gcc.target/aarch64/test_frame_1.c: Match optimized instruction sequences. * gcc.target/aarch64/test_frame_2.c: Likewise. * gcc.target/aarch64/test_frame_4.c: Likewise. * gcc.target/aarch64/test_frame_6.c: Likewise. * gcc.target/aarch64/test_frame_7.c: Likewise. * gcc.target/aarch64/test_frame_8.c: Likewise. * gcc.target/aarch64/test_frame_10.c: Likewise. 2014-07-24 Jiong Wang * gcc.target/aarch64/test_frame_1.c: Match optimized instruction sequences. * gcc.target/aarch64/test_frame_10.c: Likewise. * gcc.target/aarch64/test_frame_2.c: Likewise. * gcc.target/aarch64/test_frame_4.c: Likewise. * gcc.target/aarch64/test_frame_6.c: Likewise. * gcc.target/aarch64/test_frame_7.c: Likewise. * gcc.target/aarch64/test_frame_8.c: Likewise. * gcc.target/aarch64/test_fp_attribute_1.c: Likewise. 2014-07-24 Jiong Wang * gcc.target/aarch64/test_frame_12.c: Match optimized instruction sequences. 2014-07-23 Jiong Wang * gcc.target/aarch64/test_frame_common.h: New file. * gcc.target/aarch64/test_frame_1.c: Likewise. * gcc.target/aarch64/test_frame_2.c: Likewise. * gcc.target/aarch64/test_frame_3.c: Likewise. * gcc.target/aarch64/test_frame_4.c: Likewise. * gcc.target/aarch64/test_frame_5.c: Likewise. * gcc.target/aarch64/test_frame_6.c: Likewise. * gcc.target/aarch64/test_frame_7.c: Likewise. * gcc.target/aarch64/test_frame_8.c: Likewise. * gcc.target/aarch64/test_frame_9.c: Likewise. * gcc.target/aarch64/test_frame_10.c: Likewise. * gcc.target/aarch64/test_frame_11.c: Likewise. * gcc.target/aarch64/test_frame_12.c: Likewise. * gcc.target/aarch64/test_frame_13.c: Likewise. * gcc.target/aarch64/test_frame_14.c: Likewise. * gcc.target/aarch64/test_frame_15.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro@213799 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc-4_9-branch/gcc/ChangeLog.linaro | 152 +++++ gcc-4_9-branch/gcc/config/aarch64/aarch64.c | 680 +++++++++++---------- gcc-4_9-branch/gcc/config/aarch64/aarch64.h | 26 +- gcc-4_9-branch/gcc/config/aarch64/aarch64.md | 32 + gcc-4_9-branch/gcc/testsuite/ChangeLog.linaro | 50 ++ .../gcc.target/aarch64/test_fp_attribute_1.c | 2 +- .../testsuite/gcc.target/aarch64/test_frame_1.c | 19 + .../testsuite/gcc.target/aarch64/test_frame_10.c | 21 + .../testsuite/gcc.target/aarch64/test_frame_11.c | 16 + .../testsuite/gcc.target/aarch64/test_frame_12.c | 19 + .../testsuite/gcc.target/aarch64/test_frame_13.c | 18 + .../testsuite/gcc.target/aarch64/test_frame_14.c | 12 + .../testsuite/gcc.target/aarch64/test_frame_15.c | 19 + .../testsuite/gcc.target/aarch64/test_frame_2.c | 20 + .../testsuite/gcc.target/aarch64/test_frame_3.c | 14 + .../testsuite/gcc.target/aarch64/test_frame_4.c | 19 + .../testsuite/gcc.target/aarch64/test_frame_5.c | 13 + .../testsuite/gcc.target/aarch64/test_frame_6.c | 20 + .../testsuite/gcc.target/aarch64/test_frame_7.c | 20 + .../testsuite/gcc.target/aarch64/test_frame_8.c | 18 + .../testsuite/gcc.target/aarch64/test_frame_9.c | 17 + .../gcc.target/aarch64/test_frame_common.h | 94 +++ 22 files changed, 983 insertions(+), 318 deletions(-) create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_1.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_10.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_11.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_12.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_13.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_14.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_15.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_2.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_3.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_4.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_5.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_6.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_7.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_8.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_9.c create mode 100644 gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_common.h diff --git a/gcc-4_9-branch/gcc/ChangeLog.linaro b/gcc-4_9-branch/gcc/ChangeLog.linaro index 963d2cc04ce..2fe475fa143 100644 --- a/gcc-4_9-branch/gcc/ChangeLog.linaro +++ b/gcc-4_9-branch/gcc/ChangeLog.linaro @@ -1,5 +1,157 @@ 2014-08-11 Yvan Roux + Backport from trunk r211270, r211271, r211273, r211275, r212943, + r212945, r212946, r212947, r212949, r212950, r212951, r212952, r212954, + r212955, r212956, r212957, r212958, r212976, r212996, r212997, r212999, + r213000. + 2014-07-24 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_popwb_single_reg): New function. + (aarch64_expand_epilogue): Optimize epilogue when !frame_pointer_needed. + + 2014-07-24 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function. + (aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed. + + 2014-07-24 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_restore_callee_saves) + (aarch64_save_callee_saves): New parameter "skip_wb". + (aarch64_expand_prologue, aarch64_expand_epilogue): Update call site. + + 2014-07-24 Jiong Wang + + * config/aarch64/aarch64.h (frame): New fields "wb_candidate1" and + "wb_candidate2". + * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize above. + + 2014-07-24 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't + subtract outgoing area size when restoring stack_pointer_rtx. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_popwb_pair_reg) + (aarch64_gen_loadwb_pair): New helper function. + (aarch64_expand_epilogue): Simplify code using new helper functions. + * config/aarch64/aarch64.md (loadwb_pair_): Define. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_pushwb_pair_reg) + (aarch64_gen_storewb_pair): New helper function. + (aarch64_expand_prologue): Simplify code using new helper functions. + * config/aarch64/aarch64.md (storewb_pair_): Define. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.md: (aarch64_save_or_restore_callee_saves): + Rename to aarch64_save_callee_saves, remove restore code. + (aarch64_restore_callee_saves): New function. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Deleted. + (aarch64_save_callee_saves): New function to handle reg save + for both core and vectore regs. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_gen_load_pair) + (aarch64_gen_store_pair): New helper function. + (aarch64_save_or_restore_callee_save_registers) + (aarch64_save_or_restore_fprs): Use new helper functions. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_next_callee_save): New function. + (aarch64_save_or_restore_callee_save_registers) + (aarch64_save_or_restore_fprs): Use aarch64_next_callee_save. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c + (aarch64_save_or_restore_callee_save_registers) + (aarch64_save_or_restore_fprs): Hoist calculation of register rtx. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c + (aarch64_save_or_restore_callee_save_registers) + (aarch64_save_or_restore_fprs): Remove 'increment'. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c + (aarch64_save_or_restore_callee_save_registers) + (aarch64_save_or_restore_fprs): Use register offset in + cfun->machine->frame.reg_offset. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c + (aarch64_save_or_restore_callee_save_registers) + (aarch64_save_or_restore_fprs): Remove base_rtx. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c + (aarch64_save_or_restore_callee_save_registers): Rename 'offset' + to 'start_offset'. Remove local variable 'start_offset'. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Change + type to HOST_WIDE_INT. + + 2014-07-23 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_expand_prologue) + (aarch64_save_or_restore_fprs) + (aarch64_save_or_restore_callee_save_registers): GNU-Stylize code. + + 2014-06-05 Marcus Shawcroft + + * config/aarch64/aarch64.h (aarch64_frame): Add hard_fp_offset and + frame_size. + * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize + aarch64_frame hard_fp_offset and frame_size. + (aarch64_expand_prologue): Use aarch64_frame hard_fp_offset and + frame_size; remove original_frame_size. + (aarch64_expand_epilogue, aarch64_final_eh_return_addr): Likewise. + (aarch64_initial_elimination_offset): Remove frame_size and + offset. Use aarch64_frame frame_size. + + 2014-06-05 Marcus Shawcroft + Jiong Wang + + * config/aarch64/aarch64.c (aarch64_layout_frame): Correct + initialization of R30 offset. Update offset. Iterate core + regisers upto X30. Remove X29, X30 specific code. + + 2014-06-05 Marcus Shawcroft + Jiong Wang + + * config/aarch64/aarch64.c (SLOT_NOT_REQUIRED, SLOT_REQUIRED): Define. + (aarch64_layout_frame): Use SLOT_NOT_REQUIRED and SLOT_REQUIRED. + (aarch64_register_saved_on_entry): Adjust test. + + 2014-06-05 Marcus Shawcroft + + * config/aarch64/aarch64.h (machine_function): Move + saved_varargs_size from here... + (aarch64_frameGTY): ... to here. + + * config/aarch64/aarch64.c (aarch64_expand_prologue) + (aarch64_expand_epilogue, aarch64_final_eh_return_addr) + (aarch64_initial_elimination_offset) + (aarch64_setup_incoming_varargs): Adjust location of + saved_varargs_size. + +2014-08-11 Yvan Roux + Backport from trunk r212753. 2014-07-17 Kyrylo Tkachov diff --git a/gcc-4_9-branch/gcc/config/aarch64/aarch64.c b/gcc-4_9-branch/gcc/config/aarch64/aarch64.c index fac5262b10a..d0c65b881fa 100644 --- a/gcc-4_9-branch/gcc/config/aarch64/aarch64.c +++ b/gcc-4_9-branch/gcc/config/aarch64/aarch64.c @@ -1814,68 +1814,88 @@ aarch64_layout_frame (void) if (reload_completed && cfun->machine->frame.laid_out) return; +#define SLOT_NOT_REQUIRED (-2) +#define SLOT_REQUIRED (-1) + + cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER; + cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER; + /* First mark all the registers that really need to be saved... */ for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) - cfun->machine->frame.reg_offset[regno] = -1; + cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - cfun->machine->frame.reg_offset[regno] = -1; + cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; /* ... that includes the eh data registers (if needed)... */ if (crtl->calls_eh_return) for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) - cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0; + cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] + = SLOT_REQUIRED; /* ... and any callee saved register that dataflow says is live. */ for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) - cfun->machine->frame.reg_offset[regno] = 0; + cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) - cfun->machine->frame.reg_offset[regno] = 0; + cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; if (frame_pointer_needed) { - cfun->machine->frame.reg_offset[R30_REGNUM] = 0; + /* FP and LR are placed in the linkage record. */ cfun->machine->frame.reg_offset[R29_REGNUM] = 0; + cfun->machine->frame.wb_candidate1 = R29_REGNUM; + cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; + cfun->machine->frame.wb_candidate2 = R30_REGNUM; cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD; + offset += 2 * UNITS_PER_WORD; } /* Now assign stack slots for them. */ - for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++) - if (cfun->machine->frame.reg_offset[regno] != -1) + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) { cfun->machine->frame.reg_offset[regno] = offset; + if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) + cfun->machine->frame.wb_candidate1 = regno; + else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER) + cfun->machine->frame.wb_candidate2 = regno; offset += UNITS_PER_WORD; } for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (cfun->machine->frame.reg_offset[regno] != -1) + if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) { cfun->machine->frame.reg_offset[regno] = offset; + if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) + cfun->machine->frame.wb_candidate1 = regno; + else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER + && cfun->machine->frame.wb_candidate1 >= V0_REGNUM) + cfun->machine->frame.wb_candidate2 = regno; offset += UNITS_PER_WORD; } - if (frame_pointer_needed) - { - cfun->machine->frame.reg_offset[R29_REGNUM] = offset; - offset += UNITS_PER_WORD; - } - - if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1) - { - cfun->machine->frame.reg_offset[R30_REGNUM] = offset; - offset += UNITS_PER_WORD; - } - cfun->machine->frame.padding0 = (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset); offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); cfun->machine->frame.saved_regs_size = offset; + + cfun->machine->frame.hard_fp_offset + = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size + + get_frame_size () + + cfun->machine->frame.saved_regs_size, + STACK_BOUNDARY / BITS_PER_UNIT); + + cfun->machine->frame.frame_size + = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset + + crtl->outgoing_args_size, + STACK_BOUNDARY / BITS_PER_UNIT); + cfun->machine->frame.laid_out = true; } @@ -1898,179 +1918,276 @@ aarch64_set_frame_expr (rtx frame_pattern) static bool aarch64_register_saved_on_entry (int regno) { - return cfun->machine->frame.reg_offset[regno] != -1; + return cfun->machine->frame.reg_offset[regno] >= 0; } +static unsigned +aarch64_next_callee_save (unsigned regno, unsigned limit) +{ + while (regno <= limit && !aarch64_register_saved_on_entry (regno)) + regno ++; + return regno; +} + +static void +aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno, + HOST_WIDE_INT adjustment) + { + rtx base_rtx = stack_pointer_rtx; + rtx insn, reg, mem; + + reg = gen_rtx_REG (mode, regno); + mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, + plus_constant (Pmode, base_rtx, -adjustment)); + mem = gen_rtx_MEM (mode, mem); + + insn = emit_move_insn (mem, reg); + RTX_FRAME_RELATED_P (insn) = 1; +} + +static void +aarch64_popwb_single_reg (enum machine_mode mode, unsigned regno, + HOST_WIDE_INT adjustment) +{ + rtx base_rtx = stack_pointer_rtx; + rtx insn, reg, mem; + + reg = gen_rtx_REG (mode, regno); + mem = gen_rtx_POST_MODIFY (Pmode, base_rtx, + plus_constant (Pmode, base_rtx, adjustment)); + mem = gen_rtx_MEM (mode, mem); + + insn = emit_move_insn (reg, mem); + add_reg_note (insn, REG_CFA_RESTORE, reg); + RTX_FRAME_RELATED_P (insn) = 1; +} + +static rtx +aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2, + HOST_WIDE_INT adjustment) +{ + switch (mode) + { + case DImode: + return gen_storewb_pairdi_di (base, base, reg, reg2, + GEN_INT (-adjustment), + GEN_INT (UNITS_PER_WORD - adjustment)); + case DFmode: + return gen_storewb_pairdf_di (base, base, reg, reg2, + GEN_INT (-adjustment), + GEN_INT (UNITS_PER_WORD - adjustment)); + default: + gcc_unreachable (); + } +} + +static void +aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1, + unsigned regno2, HOST_WIDE_INT adjustment) +{ + rtx insn; + rtx reg1 = gen_rtx_REG (mode, regno1); + rtx reg2 = gen_rtx_REG (mode, regno2); + + insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1, + reg2, adjustment)); + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; + + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + RTX_FRAME_RELATED_P (insn) = 1; +} + +static rtx +aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2, + HOST_WIDE_INT adjustment) +{ + switch (mode) + { + case DImode: + return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment), + GEN_INT (adjustment + UNITS_PER_WORD)); + case DFmode: + return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment), + GEN_INT (adjustment + UNITS_PER_WORD)); + default: + gcc_unreachable (); + } +} static void -aarch64_save_or_restore_fprs (int start_offset, int increment, - bool restore, rtx base_rtx) +aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1, + unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa) +{ + rtx insn; + rtx reg1 = gen_rtx_REG (mode, regno1); + rtx reg2 = gen_rtx_REG (mode, regno2); + + insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1, + reg2, adjustment)); + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + RTX_FRAME_RELATED_P (insn) = 1; + + if (cfa) + add_reg_note (insn, REG_CFA_ADJUST_CFA, + (gen_rtx_SET (Pmode, stack_pointer_rtx, + plus_constant (Pmode, cfa, adjustment)))); + add_reg_note (insn, REG_CFA_RESTORE, reg1); + add_reg_note (insn, REG_CFA_RESTORE, reg2); +} + +static rtx +aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2, + rtx reg2) { + switch (mode) + { + case DImode: + return gen_store_pairdi (mem1, reg1, mem2, reg2); + + case DFmode: + return gen_store_pairdf (mem1, reg1, mem2, reg2); + + default: + gcc_unreachable (); + } +} + +static rtx +aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2, + rtx mem2) +{ + switch (mode) + { + case DImode: + return gen_load_pairdi (reg1, mem1, reg2, mem2); + + case DFmode: + return gen_load_pairdf (reg1, mem1, reg2, mem2); + + default: + gcc_unreachable (); + } +} + + +static void +aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset, + unsigned start, unsigned limit, bool skip_wb) +{ + rtx insn; + rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed + ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; - rtx insn; - rtx (*gen_mem_ref)(enum machine_mode, rtx) - = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + for (regno = aarch64_next_callee_save (start, limit); + regno <= limit; + regno = aarch64_next_callee_save (regno + 1, limit)) { - if (aarch64_register_saved_on_entry (regno)) - { - rtx mem; - mem = gen_mem_ref (DFmode, - plus_constant (Pmode, - base_rtx, - start_offset)); + rtx reg, mem; + HOST_WIDE_INT offset; - for (regno2 = regno + 1; - regno2 <= V31_REGNUM - && !aarch64_register_saved_on_entry (regno2); - regno2++) - { - /* Empty loop. */ - } + if (skip_wb + && (regno == cfun->machine->frame.wb_candidate1 + || regno == cfun->machine->frame.wb_candidate2)) + continue; - if (regno2 <= V31_REGNUM && - aarch64_register_saved_on_entry (regno2)) - { - rtx mem2; - - /* Next highest register to be saved. */ - mem2 = gen_mem_ref (DFmode, - plus_constant - (Pmode, - base_rtx, - start_offset + increment)); - if (restore == false) - { - insn = emit_insn - ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno), - mem2, gen_rtx_REG (DFmode, regno2))); + reg = gen_rtx_REG (mode, regno); + offset = start_offset + cfun->machine->frame.reg_offset[regno]; + mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); - } - else - { - insn = emit_insn - ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem, - gen_rtx_REG (DFmode, regno2), mem2)); - - add_reg_note (insn, REG_CFA_RESTORE, - gen_rtx_REG (DFmode, regno)); - add_reg_note (insn, REG_CFA_RESTORE, - gen_rtx_REG (DFmode, regno2)); - } + regno2 = aarch64_next_callee_save (regno + 1, limit); - /* The first part of a frame-related parallel insn is - always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; - regno = regno2; - start_offset += increment * 2; - } - else - { - if (restore == false) - insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno)); - else - { - insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); - add_reg_note (insn, REG_CFA_RESTORE, - gen_rtx_REG (DFmode, regno)); - } - start_offset += increment; - } - RTX_FRAME_RELATED_P (insn) = 1; + if (regno2 <= limit + && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) + == cfun->machine->frame.reg_offset[regno2])) + + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; + + offset = start_offset + cfun->machine->frame.reg_offset[regno2]; + mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); + insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, + reg2)); + + /* The first part of a frame-related parallel insn is + always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + regno = regno2; } + else + insn = emit_move_insn (mem, reg); + + RTX_FRAME_RELATED_P (insn) = 1; } } - -/* offset from the stack pointer of where the saves and - restore's have to happen. */ static void -aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, - bool restore) +aarch64_restore_callee_saves (enum machine_mode mode, + HOST_WIDE_INT start_offset, unsigned start, + unsigned limit, bool skip_wb) { rtx insn; rtx base_rtx = stack_pointer_rtx; - HOST_WIDE_INT start_offset = offset; - HOST_WIDE_INT increment = UNITS_PER_WORD; - rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; - unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM; + rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed + ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; + HOST_WIDE_INT offset; - for (regno = R0_REGNUM; regno <= limit; regno++) + for (regno = aarch64_next_callee_save (start, limit); + regno <= limit; + regno = aarch64_next_callee_save (regno + 1, limit)) { - if (aarch64_register_saved_on_entry (regno)) - { - rtx mem; - mem = gen_mem_ref (Pmode, - plus_constant (Pmode, - base_rtx, - start_offset)); + rtx reg, mem; - for (regno2 = regno + 1; - regno2 <= limit - && !aarch64_register_saved_on_entry (regno2); - regno2++) - { - /* Empty loop. */ - } - if (regno2 <= limit && - aarch64_register_saved_on_entry (regno2)) - { - rtx mem2; - - /* Next highest register to be saved. */ - mem2 = gen_mem_ref (Pmode, - plus_constant - (Pmode, - base_rtx, - start_offset + increment)); - if (restore == false) - { - insn = emit_insn - ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno), - mem2, gen_rtx_REG (DImode, regno2))); + if (skip_wb + && (regno == cfun->machine->frame.wb_candidate1 + || regno == cfun->machine->frame.wb_candidate2)) + continue; - } - else - { - insn = emit_insn - ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem, - gen_rtx_REG (DImode, regno2), mem2)); + reg = gen_rtx_REG (mode, regno); + offset = start_offset + cfun->machine->frame.reg_offset[regno]; + mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); - add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); - add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); - } + regno2 = aarch64_next_callee_save (regno + 1, limit); - /* The first part of a frame-related parallel insn is - always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; - regno = regno2; - start_offset += increment * 2; - } - else - { - if (restore == false) - insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno)); - else - { - insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem); - add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); - } - start_offset += increment; - } - RTX_FRAME_RELATED_P (insn) = 1; + if (regno2 <= limit + && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) + == cfun->machine->frame.reg_offset[regno2])) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; + + offset = start_offset + cfun->machine->frame.reg_offset[regno2]; + mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); + insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, + mem2)); + add_reg_note (insn, REG_CFA_RESTORE, reg); + add_reg_note (insn, REG_CFA_RESTORE, reg2); + + /* The first part of a frame-related parallel insn is + always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + regno = regno2; + } + else + { + insn = emit_move_insn (reg, mem); + add_reg_note (insn, REG_CFA_RESTORE, reg); } - } - aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } } /* AArch64 stack frames generated by this compiler look like: @@ -2125,26 +2242,20 @@ aarch64_expand_prologue (void) sub sp, sp, */ - HOST_WIDE_INT original_frame_size; /* local variables + vararg save */ HOST_WIDE_INT frame_size, offset; - HOST_WIDE_INT fp_offset; /* FP offset from SP */ + HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */ rtx insn; aarch64_layout_frame (); - original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; - gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg) - && (cfun->stdarg || !cfun->machine->saved_varargs_size)); - frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size - + crtl->outgoing_args_size); - offset = frame_size = AARCH64_ROUND_UP (frame_size, - STACK_BOUNDARY / BITS_PER_UNIT); if (flag_stack_usage_info) - current_function_static_stack_size = frame_size; + current_function_static_stack_size = cfun->machine->frame.frame_size; - fp_offset = (offset - - original_frame_size - - cfun->machine->frame.saved_regs_size); + frame_size = cfun->machine->frame.frame_size; + offset = cfun->machine->frame.frame_size; + + fp_offset = cfun->machine->frame.frame_size + - cfun->machine->frame.hard_fp_offset; /* Store pairs and load pairs have a range only -512 to 504. */ if (offset >= 512) @@ -2155,7 +2266,7 @@ aarch64_expand_prologue (void) register area. This will allow the pre-index write-back store pair instructions to be used for setting up the stack frame efficiently. */ - offset = original_frame_size + cfun->machine->frame.saved_regs_size; + offset = cfun->machine->frame.hard_fp_offset; if (offset >= 512) offset = cfun->machine->frame.saved_regs_size; @@ -2198,12 +2309,11 @@ aarch64_expand_prologue (void) if (offset > 0) { - /* Save the frame pointer and lr if the frame pointer is needed - first. Make the frame pointer point to the location of the - old frame pointer on the stack. */ + bool skip_wb = false; + if (frame_pointer_needed) { - rtx mem_fp, mem_lr; + skip_wb = true; if (fp_offset) { @@ -2212,41 +2322,14 @@ aarch64_expand_prologue (void) RTX_FRAME_RELATED_P (insn) = 1; aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx, - gen_rtx_MINUS (Pmode, - stack_pointer_rtx, + gen_rtx_MINUS (Pmode, stack_pointer_rtx, GEN_INT (offset)))); - mem_fp = gen_frame_mem (DImode, - plus_constant (Pmode, - stack_pointer_rtx, - fp_offset)); - mem_lr = gen_frame_mem (DImode, - plus_constant (Pmode, - stack_pointer_rtx, - fp_offset - + UNITS_PER_WORD)); - insn = emit_insn (gen_store_pairdi (mem_fp, - hard_frame_pointer_rtx, - mem_lr, - gen_rtx_REG (DImode, - LR_REGNUM))); + + aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM, + R30_REGNUM, false); } else - { - insn = emit_insn (gen_storewb_pairdi_di - (stack_pointer_rtx, stack_pointer_rtx, - hard_frame_pointer_rtx, - gen_rtx_REG (DImode, LR_REGNUM), - GEN_INT (-offset), - GEN_INT (GET_MODE_SIZE (DImode) - offset))); - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; - } - - /* The first part of a frame-related parallel insn is always - assumed to be relevant to the frame calculations; - subsequent parts, are only frame-related if explicitly - marked. */ - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; - RTX_FRAME_RELATED_P (insn) = 1; + aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset); /* Set up frame pointer to point to the location of the previous frame pointer on the stack. */ @@ -2264,13 +2347,35 @@ aarch64_expand_prologue (void) } else { - insn = emit_insn (gen_add2_insn (stack_pointer_rtx, - GEN_INT (-offset))); - RTX_FRAME_RELATED_P (insn) = 1; + unsigned reg1 = cfun->machine->frame.wb_candidate1; + unsigned reg2 = cfun->machine->frame.wb_candidate2; + + if (fp_offset + || reg1 == FIRST_PSEUDO_REGISTER + || (reg2 == FIRST_PSEUDO_REGISTER + && offset >= 256)) + { + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (-offset))); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; + + skip_wb = true; + + if (reg2 == FIRST_PSEUDO_REGISTER) + aarch64_pushwb_single_reg (mode1, reg1, offset); + else + aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset); + } } - aarch64_save_or_restore_callee_save_registers - (fp_offset + cfun->machine->frame.hardfp_offset, 0); + aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, + skip_wb); + aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, + skip_wb); } /* when offset >= 512, @@ -2291,28 +2396,23 @@ aarch64_expand_prologue (void) void aarch64_expand_epilogue (bool for_sibcall) { - HOST_WIDE_INT original_frame_size, frame_size, offset; + HOST_WIDE_INT frame_size, offset; HOST_WIDE_INT fp_offset; rtx insn; rtx cfa_reg; aarch64_layout_frame (); - original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; - frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size - + crtl->outgoing_args_size); - offset = frame_size = AARCH64_ROUND_UP (frame_size, - STACK_BOUNDARY / BITS_PER_UNIT); - fp_offset = (offset - - original_frame_size - - cfun->machine->frame.saved_regs_size); + offset = frame_size = cfun->machine->frame.frame_size; + fp_offset = cfun->machine->frame.frame_size + - cfun->machine->frame.hard_fp_offset; cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx; /* Store pairs and load pairs have a range only -512 to 504. */ if (offset >= 512) { - offset = original_frame_size + cfun->machine->frame.saved_regs_size; + offset = cfun->machine->frame.hard_fp_offset; if (offset >= 512) offset = cfun->machine->frame.saved_regs_size; @@ -2338,7 +2438,8 @@ aarch64_expand_epilogue (bool for_sibcall) { insn = emit_insn (gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx, - GEN_INT (- fp_offset))); + GEN_INT (0))); + offset = offset - fp_offset; RTX_FRAME_RELATED_P (insn) = 1; /* As SP is set to (FP - fp_offset), according to the rules in dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated @@ -2346,64 +2447,37 @@ aarch64_expand_epilogue (bool for_sibcall) cfa_reg = stack_pointer_rtx; } - aarch64_save_or_restore_callee_save_registers - (fp_offset + cfun->machine->frame.hardfp_offset, 1); - - /* Restore the frame pointer and lr if the frame pointer is needed. */ if (offset > 0) { + unsigned reg1 = cfun->machine->frame.wb_candidate1; + unsigned reg2 = cfun->machine->frame.wb_candidate2; + bool skip_wb = true; + if (frame_pointer_needed) + fp_offset = 0; + else if (fp_offset + || reg1 == FIRST_PSEUDO_REGISTER + || (reg2 == FIRST_PSEUDO_REGISTER + && offset >= 256)) + skip_wb = false; + + aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, + skip_wb); + aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, + skip_wb); + + if (skip_wb) { - rtx mem_fp, mem_lr; + enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; - if (fp_offset) - { - mem_fp = gen_frame_mem (DImode, - plus_constant (Pmode, - stack_pointer_rtx, - fp_offset)); - mem_lr = gen_frame_mem (DImode, - plus_constant (Pmode, - stack_pointer_rtx, - fp_offset - + UNITS_PER_WORD)); - insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx, - mem_fp, - gen_rtx_REG (DImode, - LR_REGNUM), - mem_lr)); - } + if (reg2 == FIRST_PSEUDO_REGISTER) + aarch64_popwb_single_reg (mode1, reg1, offset); else { - insn = emit_insn (gen_loadwb_pairdi_di - (stack_pointer_rtx, - stack_pointer_rtx, - hard_frame_pointer_rtx, - gen_rtx_REG (DImode, LR_REGNUM), - GEN_INT (offset), - GEN_INT (GET_MODE_SIZE (DImode) + offset))); - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; - add_reg_note (insn, REG_CFA_ADJUST_CFA, - (gen_rtx_SET (Pmode, stack_pointer_rtx, - plus_constant (Pmode, cfa_reg, - offset)))); - } + if (reg1 != HARD_FRAME_POINTER_REGNUM) + cfa_reg = NULL; - /* The first part of a frame-related parallel insn - is always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; - RTX_FRAME_RELATED_P (insn) = 1; - add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); - add_reg_note (insn, REG_CFA_RESTORE, - gen_rtx_REG (DImode, LR_REGNUM)); - - if (fp_offset) - { - insn = emit_insn (gen_add2_insn (stack_pointer_rtx, - GEN_INT (offset))); - RTX_FRAME_RELATED_P (insn) = 1; + aarch64_popwb_pair_reg (mode1, reg1, reg2, offset, cfa_reg); } } else @@ -2477,10 +2551,10 @@ aarch64_expand_epilogue (bool for_sibcall) } } - aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx, - plus_constant (Pmode, - stack_pointer_rtx, - offset))); + aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + offset))); } emit_use (gen_rtx_REG (DImode, LR_REGNUM)); @@ -2494,16 +2568,12 @@ aarch64_expand_epilogue (bool for_sibcall) rtx aarch64_final_eh_return_addr (void) { - HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset; + HOST_WIDE_INT fp_offset; + aarch64_layout_frame (); - original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; - frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size - + crtl->outgoing_args_size); - offset = frame_size = AARCH64_ROUND_UP (frame_size, - STACK_BOUNDARY / BITS_PER_UNIT); - fp_offset = offset - - original_frame_size - - cfun->machine->frame.saved_regs_size; + + fp_offset = cfun->machine->frame.frame_size + - cfun->machine->frame.hard_fp_offset; if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0) return gen_rtx_REG (DImode, LR_REGNUM); @@ -4253,42 +4323,28 @@ aarch64_can_eliminate (const int from, const int to) HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned from, unsigned to) { - HOST_WIDE_INT frame_size; - HOST_WIDE_INT offset; - aarch64_layout_frame (); - frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size - + crtl->outgoing_args_size - + cfun->machine->saved_varargs_size); - - frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); - offset = frame_size; if (to == HARD_FRAME_POINTER_REGNUM) { if (from == ARG_POINTER_REGNUM) - return offset - crtl->outgoing_args_size; + return cfun->machine->frame.frame_size - crtl->outgoing_args_size; if (from == FRAME_POINTER_REGNUM) - return cfun->machine->frame.saved_regs_size + get_frame_size (); + return (cfun->machine->frame.hard_fp_offset + - cfun->machine->frame.saved_varargs_size); } if (to == STACK_POINTER_REGNUM) { if (from == FRAME_POINTER_REGNUM) - { - HOST_WIDE_INT elim = crtl->outgoing_args_size - + cfun->machine->frame.saved_regs_size - + get_frame_size (); - elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); - return elim; - } + return (cfun->machine->frame.frame_size + - cfun->machine->frame.saved_varargs_size); } - return offset; + return cfun->machine->frame.frame_size; } - /* Implement RETURN_ADDR_RTX. We do not support moving back to a previous frame. */ @@ -7015,7 +7071,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode, /* We don't save the size into *PRETEND_SIZE because we want to avoid any complication of having crtl->args.pretend_args_size changed. */ - cfun->machine->saved_varargs_size + cfun->machine->frame.saved_varargs_size = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, STACK_BOUNDARY / BITS_PER_UNIT) + vr_saved * UNITS_PER_VREG); diff --git a/gcc-4_9-branch/gcc/config/aarch64/aarch64.h b/gcc-4_9-branch/gcc/config/aarch64/aarch64.h index e0384b3adf3..cd05f68f3f2 100644 --- a/gcc-4_9-branch/gcc/config/aarch64/aarch64.h +++ b/gcc-4_9-branch/gcc/config/aarch64/aarch64.h @@ -523,23 +523,39 @@ extern enum aarch64_processor aarch64_tune; struct GTY (()) aarch64_frame { HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; + + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the + frame. This value is rounded up to a multiple of + STACK_BOUNDARY. */ + HOST_WIDE_INT saved_varargs_size; + HOST_WIDE_INT saved_regs_size; /* Padding if needed after the all the callee save registers have been saved. */ HOST_WIDE_INT padding0; HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */ + /* Offset from the base of the frame (incomming SP) to the + hard_frame_pointer. This value is always a multiple of + STACK_BOUNDARY. */ + HOST_WIDE_INT hard_fp_offset; + + /* The size of the frame. This value is the offset from base of the + * frame (incomming SP) to the stack_pointer. This value is always + * a multiple of STACK_BOUNDARY. */ + + unsigned wb_candidate1; + unsigned wb_candidate2; + + HOST_WIDE_INT frame_size; + bool laid_out; }; typedef struct GTY (()) machine_function { struct aarch64_frame frame; - - /* The number of extra stack bytes taken up by register varargs. - This area is allocated by the callee at the very top of the frame. */ - HOST_WIDE_INT saved_varargs_size; - } machine_function; #endif diff --git a/gcc-4_9-branch/gcc/config/aarch64/aarch64.md b/gcc-4_9-branch/gcc/config/aarch64/aarch64.md index aae56a8117a..9b7239e3002 100644 --- a/gcc-4_9-branch/gcc/config/aarch64/aarch64.md +++ b/gcc-4_9-branch/gcc/config/aarch64/aarch64.md @@ -987,6 +987,22 @@ [(set_attr "type" "load2")] ) +(define_insn "loadwb_pair_" + [(parallel + [(set (match_operand:P 0 "register_operand" "=k") + (plus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 4 "const_int_operand" "n"))) + (set (match_operand:GPF 2 "register_operand" "=w") + (mem:GPF (plus:P (match_dup 1) + (match_dup 4)))) + (set (match_operand:GPF 3 "register_operand" "=w") + (mem:GPF (plus:P (match_dup 1) + (match_operand:P 5 "const_int_operand" "n"))))])] + "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" + "ldp\\t%2, %3, [%1], %4" + [(set_attr "type" "neon_load1_2reg")] +) + ;; Store pair with writeback. This is primarily used in function prologues ;; when saving [fp,lr] (define_insn "storewb_pair_" @@ -1005,6 +1021,22 @@ [(set_attr "type" "store2")] ) +(define_insn "storewb_pair_" + [(parallel + [(set (match_operand:P 0 "register_operand" "=&k") + (plus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 4 "const_int_operand" "n"))) + (set (mem:GPF (plus:P (match_dup 0) + (match_dup 4))) + (match_operand:GPF 2 "register_operand" "w")) + (set (mem:GPF (plus:P (match_dup 0) + (match_operand:P 5 "const_int_operand" "n"))) + (match_operand:GPF 3 "register_operand" "w"))])] + "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" + "stp\\t%2, %3, [%0, %4]!" + [(set_attr "type" "neon_store1_2reg")] +) + ;; ------------------------------------------------------------------- ;; Sign/Zero extension ;; ------------------------------------------------------------------- diff --git a/gcc-4_9-branch/gcc/testsuite/ChangeLog.linaro b/gcc-4_9-branch/gcc/testsuite/ChangeLog.linaro index b3478e9cb58..77aeba4be51 100644 --- a/gcc-4_9-branch/gcc/testsuite/ChangeLog.linaro +++ b/gcc-4_9-branch/gcc/testsuite/ChangeLog.linaro @@ -1,3 +1,53 @@ +2014-08-11 Yvan Roux + + Backport from trunk r212959, r212976, r212999, r213000. + 2014-07-24 Jiong Wang + + * gcc.target/aarch64/test_frame_1.c: Match optimized instruction + sequences. + * gcc.target/aarch64/test_frame_2.c: Likewise. + * gcc.target/aarch64/test_frame_4.c: Likewise. + * gcc.target/aarch64/test_frame_6.c: Likewise. + * gcc.target/aarch64/test_frame_7.c: Likewise. + * gcc.target/aarch64/test_frame_8.c: Likewise. + * gcc.target/aarch64/test_frame_10.c: Likewise. + + 2014-07-24 Jiong Wang + + * gcc.target/aarch64/test_frame_1.c: Match optimized instruction + sequences. + * gcc.target/aarch64/test_frame_10.c: Likewise. + * gcc.target/aarch64/test_frame_2.c: Likewise. + * gcc.target/aarch64/test_frame_4.c: Likewise. + * gcc.target/aarch64/test_frame_6.c: Likewise. + * gcc.target/aarch64/test_frame_7.c: Likewise. + * gcc.target/aarch64/test_frame_8.c: Likewise. + * gcc.target/aarch64/test_fp_attribute_1.c: Likewise. + + 2014-07-24 Jiong Wang + + * gcc.target/aarch64/test_frame_12.c: Match optimized instruction + sequences. + + 2014-07-23 Jiong Wang + + * gcc.target/aarch64/test_frame_common.h: New file. + * gcc.target/aarch64/test_frame_1.c: Likewise. + * gcc.target/aarch64/test_frame_2.c: Likewise. + * gcc.target/aarch64/test_frame_3.c: Likewise. + * gcc.target/aarch64/test_frame_4.c: Likewise. + * gcc.target/aarch64/test_frame_5.c: Likewise. + * gcc.target/aarch64/test_frame_6.c: Likewise. + * gcc.target/aarch64/test_frame_7.c: Likewise. + * gcc.target/aarch64/test_frame_8.c: Likewise. + * gcc.target/aarch64/test_frame_9.c: Likewise. + * gcc.target/aarch64/test_frame_10.c: Likewise. + * gcc.target/aarch64/test_frame_11.c: Likewise. + * gcc.target/aarch64/test_frame_12.c: Likewise. + * gcc.target/aarch64/test_frame_13.c: Likewise. + * gcc.target/aarch64/test_frame_14.c: Likewise. + * gcc.target/aarch64/test_frame_15.c: Likewise. + 2014-08-10 Yvan Roux Backport from trunk r212023, r212024. diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c index 7538250c9f3..960174a5e20 100644 --- a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c @@ -21,6 +21,6 @@ non_leaf_2 (void) leaf (); } -/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_1.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_1.c new file mode 100644 index 00000000000..5b3c0ab32f7 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_1.c @@ -0,0 +1,19 @@ +/* Verify: + * -fomit-frame-pointer. + * withoug outgoing. + * total frame size <= 256. + * number of callee-save reg == 1. + * optimized code should use "str !" for stack adjustment. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test1, 200, ) +t_frame_run (test1) + +/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ +/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_10.c new file mode 100644 index 00000000000..525b49e3868 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_10.c @@ -0,0 +1,21 @@ +/* Verify: + * -fomit-frame-pointer. + * with outgoing. + * total frame size > 512. + area except outgoing <= 512 + * number of callee-saved reg >= 2. + * Split stack adjustment into two subtractions. + the first subtractions could be optimized into "stp !". */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10]) +t_frame_run (test10) + +/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ +/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_11.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_11.c new file mode 100644 index 00000000000..8b860dd31ef --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_11.c @@ -0,0 +1,16 @@ +/* Verify: + * without outgoing. + * total frame size <= 512. + * number of callee-save reg >= 2. + * optimized code should use "stp !" for stack adjustment. */ + +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test11, 400, ) +t_frame_run (test11) + +/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_12.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_12.c new file mode 100644 index 00000000000..81f00706175 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_12.c @@ -0,0 +1,19 @@ +/* Verify: + * with outgoing. + * total frame size <= 512. + * number of callee-save reg >= 2. */ + +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern_outgoing (test12, 400, , 8, a[8]) +t_frame_run (test12) + +/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ + +/* Check epilogue using write-back. */ +/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_13.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_13.c new file mode 100644 index 00000000000..25df08b47d5 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_13.c @@ -0,0 +1,18 @@ +/* Verify: + * without outgoing. + * total frame size > 512. + * number of callee-save reg >= 2. + * split the stack adjustment into two substractions, + the second could be optimized into "stp !". */ + +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test13, 700, ) +t_frame_run (test13) + +/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_14.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_14.c new file mode 100644 index 00000000000..78818dec32a --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_14.c @@ -0,0 +1,12 @@ +/* Verify: + * with outgoing. + * total frame size > 512. + * number of callee-save reg >= 2. */ + +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include "test_frame_common.h" + +t_frame_pattern_outgoing (test14, 700, , 8, a[8]) +t_frame_run (test14) diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_15.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_15.c new file mode 100644 index 00000000000..7ab1f205785 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_15.c @@ -0,0 +1,19 @@ +/* Verify: + * with outgoing. + * total frame size > 512. + area except outgoing <= 512 + * number of callee-save reg >= 2. + * split the stack adjustment into two substractions, + the first could be optimized into "stp !". */ + +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern_outgoing (test15, 480, , 8, a[8]) +t_frame_run (test15) + +/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_2.c new file mode 100644 index 00000000000..6ec4088da03 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_2.c @@ -0,0 +1,20 @@ +/* Verify: + * -fomit-frame-pointer. + * without outgoing. + * total frame size <= 256. + * number of callee-save regs >= 2. + * optimized code should use "stp !" for stack adjustment. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test2, 200, "x19") +t_frame_run (test2) + + +/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ +/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_3.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_3.c new file mode 100644 index 00000000000..f90ea4a1ae8 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_3.c @@ -0,0 +1,14 @@ +/* Verify: + * -fomit-frame-pointer. + * without outgoing. + * total frame size <= 512 but > 256. + * number of callee-save reg == 1. + * we can't use "str !" to optimize stack adjustment. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test3, 400, ) +t_frame_run (test3) diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_4.c new file mode 100644 index 00000000000..ebfb290ef12 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_4.c @@ -0,0 +1,19 @@ +/* Verify: + * -fomit-frame-pointer. + * without outgoing. + * total frame size <= 512 but > 256. + * number of callee-save reg >= 2. + * we can use "stp !" to optimize stack adjustment. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test4, 400, "x19") +t_frame_run (test4) + +/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ +/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_5.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_5.c new file mode 100644 index 00000000000..0624b5b7473 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_5.c @@ -0,0 +1,13 @@ +/* Verify: + * -fomit-frame-pointer. + * with outgoing. + * total frame size <= 512. + * one subtraction of the whole frame size. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + +#include "test_frame_common.h" + +t_frame_pattern_outgoing (test5, 300, "x19", 8, a[8]) +t_frame_run (test5) diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_6.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_6.c new file mode 100644 index 00000000000..b5ea7ee08ce --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_6.c @@ -0,0 +1,20 @@ +/* Verify: + * -fomit-frame-pointer. + * without outgoing. + * total frame size > 512. + * number of callee-saved reg == 1. + * split stack adjustment into two subtractions. + the second subtraction should use "str !". */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test6, 700, ) +t_frame_run (test6) + +/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ +/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_7.c new file mode 100644 index 00000000000..daa1f422e16 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_7.c @@ -0,0 +1,20 @@ +/* Verify: + * -fomit-frame-pointer. + * without outgoing. + * total frame size > 512. + * number of callee-saved reg == 2. + * split stack adjustment into two subtractions. + the second subtraction should use "stp !". */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern (test7, 700, "x19") +t_frame_run (test7) + +/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ +/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_8.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_8.c new file mode 100644 index 00000000000..4ce630ce0fe --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_8.c @@ -0,0 +1,18 @@ +/* Verify: + * -fomit-frame-pointer. + * with outgoing. + * total frame size bigger than 512. + * number of callee-saved reg == 1. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ + +#include "test_frame_common.h" + +t_frame_pattern_outgoing (test8, 700, , 8, a[8]) +t_frame_run (test8) + +/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */ +/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_9.c b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_9.c new file mode 100644 index 00000000000..0dffbf8ad17 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_9.c @@ -0,0 +1,17 @@ +/* Verify: + * -fomit-frame-pointer. + * with outgoing. + * total frame size > 512. + area except outgoing <= 512 + * number of callee-saved reg = 1. + * Split stack adjustment into two subtractions. + the first subtractions couldn't be optimized + into "str !" as it's > 256. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + +#include "test_frame_common.h" + +t_frame_pattern_outgoing (test9, 480, , 24, a[8], a[9], a[10]) +t_frame_run (test9) diff --git a/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_common.h b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_common.h new file mode 100644 index 00000000000..d7fed250ca8 --- /dev/null +++ b/gcc-4_9-branch/gcc/testsuite/gcc.target/aarch64/test_frame_common.h @@ -0,0 +1,94 @@ +extern void abort (); + +#define CVT(v) ((unsigned char)(v)) + +static void __attribute__((noinline)) +check_args_8 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7, + int a8) +{ + if (a0 != 0 + || a1 != 1 + || a2 != 2 + || a3 != 3 + || a4 != 4 + || a5 != 5 + || a6 != 6 + || a7 != 7 + || a8 != 8) + abort (); +} + +static void __attribute__((noinline)) +check_args_24 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7, + int a8, int a9, int a10) +{ + if (a0 != 0 + || a1 != 1 + || a2 != 2 + || a3 != 3 + || a4 != 4 + || a5 != 5 + || a6 != 6 + || a7 != 7 + || a8 != 8 + || a9 != 9 + || a10 != 10) + abort (); +} + +void __attribute__ ((noinline)) +initialize_array (unsigned char *a, int len) +{ + int i; + + for (i = 0; i < (len / 2); i++) + { + a[i] = i; + a[len - i - 1] = i; + } + + return; +} + +#define t_frame_pattern(name, local_size, callee_saved)\ +int \ +name (void)\ +{\ + unsigned char a[local_size];\ + initialize_array (a, local_size); \ + __asm__ ("":::callee_saved); \ + if (a[0] != a[local_size - 1] \ + || a[0] != 0) \ + return 0; \ + if (a[local_size / 2 - 1] != a[local_size / 2] \ + || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \ + return 0; \ + return 1; \ +} + +#define t_frame_pattern_outgoing(name, local_size, callee_saved, out_going_num, ...)\ +int \ +name (void)\ +{\ + unsigned char a[local_size];\ + initialize_array (a, local_size); \ + __asm__ ("":::callee_saved); \ + if (a[0] != a[local_size - 1] \ + || a[0] != 0) \ + return 0; \ + if (a[local_size / 2 - 1] != a[local_size / 2] \ + || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \ + return 0; \ + check_args_ ## out_going_num (a[0], a[1], a[2], a[3], a[4], a[5], a[6],\ + a[7], __VA_ARGS__); \ + return 1; \ +} + +#define t_frame_run(name) \ +int \ +main (int argc, char **argv) \ +{\ + if (!name ())\ + abort ();\ + return 0;\ +} -- 2.11.4.GIT