preprocessor: Create the parser before handling command-line includes [PR115312]
[official-gcc.git] / gcc / config / aarch64 / aarch64-ldp-fusion.cc
blobb255dcbe73cbcc8f9d8c2528aba06b9c02146a26
1 // LoadPair fusion optimization pass for AArch64.
2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
3 //
4 // This file is part of GCC.
5 //
6 // GCC is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3, or (at your option)
9 // any later version.
11 // GCC is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "memmodel.h"
26 #include "emit-rtl.h"
27 #include "tm_p.h"
28 #include "rtl-iter.h"
29 #include "tree-pass.h"
30 #include "insn-attr.h"
31 #include "pair-fusion.h"
33 static constexpr HOST_WIDE_INT LDP_IMM_BITS = 7;
34 static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT = (1 << (LDP_IMM_BITS - 1));
35 static constexpr HOST_WIDE_INT LDP_MAX_IMM = LDP_IMM_SIGN_BIT - 1;
36 static constexpr HOST_WIDE_INT LDP_MIN_IMM = -LDP_MAX_IMM - 1;
38 struct aarch64_pair_fusion : public pair_fusion
40 bool fpsimd_op_p (rtx reg_op, machine_mode mem_mode,
41 bool load_p) override final
43 // Before RA, we use the modes, noting that stores of constant zero
44 // operands use GPRs (even in non-integer modes). After RA, we use
45 // the hard register numbers.
46 return reload_completed
47 ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op)))
48 : (GET_MODE_CLASS (mem_mode) != MODE_INT
49 && (load_p || !aarch64_const_zero_rtx_p (reg_op)));
52 bool pair_mem_insn_p (rtx_insn *rti, bool &load_p) override final;
54 bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) override final
56 return aarch64_mem_ok_with_ldpstp_policy_model (base_mem,
57 load_p,
58 GET_MODE (base_mem));
61 bool pair_operand_mode_ok_p (machine_mode mode) override final;
63 rtx gen_pair (rtx *pats, rtx writeback, bool load_p) override final;
65 bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
66 machine_mode mode) override final
68 return (load_p
69 ? aarch64_ldp_reg_operand (reg_op, mode)
70 : aarch64_stp_reg_operand (reg_op, mode));
73 int pair_mem_alias_check_limit () override final
75 return aarch64_ldp_alias_check_limit;
78 bool should_handle_writeback (writeback_type which) override final
80 if (which == writeback_type::ALL)
81 return aarch64_ldp_writeback > 1;
82 else
83 return aarch64_ldp_writeback;
86 bool track_loads_p () override final
88 return aarch64_tune_params.ldp_policy_model
89 != AARCH64_LDP_STP_POLICY_NEVER;
92 bool track_stores_p () override final
94 return aarch64_tune_params.stp_policy_model
95 != AARCH64_LDP_STP_POLICY_NEVER;
98 bool pair_mem_in_range_p (HOST_WIDE_INT offset) override final
100 return (offset >= LDP_MIN_IMM && offset <= LDP_MAX_IMM);
103 rtx gen_promote_writeback_pair (rtx wb_effect, rtx mem, rtx regs[2],
104 bool load_p) override final;
106 rtx destructure_pair (rtx regs[2], rtx pattern, bool load_p) override final;
109 bool
110 aarch64_pair_fusion::pair_mem_insn_p (rtx_insn *rti, bool &load_p)
112 rtx pat = PATTERN (rti);
113 if (GET_CODE (pat) == PARALLEL
114 && XVECLEN (pat, 0) == 2)
116 const auto attr = get_attr_ldpstp (rti);
117 if (attr == LDPSTP_NONE)
118 return false;
120 load_p = (attr == LDPSTP_LDP);
121 gcc_checking_assert (load_p || attr == LDPSTP_STP);
122 return true;
124 return false;
128 aarch64_pair_fusion::gen_pair (rtx *pats, rtx writeback, bool load_p)
130 rtx pair_pat;
132 if (writeback)
134 auto patvec = gen_rtvec (3, writeback, pats[0], pats[1]);
135 return gen_rtx_PARALLEL (VOIDmode, patvec);
137 else if (load_p)
138 return aarch64_gen_load_pair (XEXP (pats[0], 0),
139 XEXP (pats[1], 0),
140 XEXP (pats[0], 1));
141 else
142 return aarch64_gen_store_pair (XEXP (pats[0], 0),
143 XEXP (pats[0], 1),
144 XEXP (pats[1], 1));
145 return pair_pat;
148 // Return true if we should consider forming ldp/stp insns from memory
149 // accesses with operand mode MODE at this stage in compilation.
150 bool
151 aarch64_pair_fusion::pair_operand_mode_ok_p (machine_mode mode)
153 if (!aarch64_ldpstp_operand_mode_p (mode))
154 return false;
156 // We don't pair up TImode accesses before RA because TImode is
157 // special in that it can be allocated to a pair of GPRs or a single
158 // FPR, and the RA is best placed to make that decision.
159 return reload_completed || mode != TImode;
162 // Given a pair mode MODE, return a canonical mode to be used for a single
163 // operand of such a pair. Currently we only use this when promoting a
164 // non-writeback pair into a writeback pair, as it isn't otherwise clear
165 // which mode to use when storing a modeless CONST_INT.
166 static machine_mode
167 aarch64_operand_mode_for_pair_mode (machine_mode mode)
169 switch (mode)
171 case E_V2x4QImode:
172 return SImode;
173 case E_V2x8QImode:
174 return DImode;
175 case E_V2x16QImode:
176 return V16QImode;
177 default:
178 gcc_unreachable ();
182 // Given a load pair insn in PATTERN, unpack the insn, storing
183 // the registers in REGS and returning the mem.
184 static rtx
185 aarch64_destructure_load_pair (rtx regs[2], rtx pattern)
187 rtx mem = NULL_RTX;
189 for (int i = 0; i < 2; i++)
191 rtx pat = XVECEXP (pattern, 0, i);
192 regs[i] = XEXP (pat, 0);
193 rtx unspec = XEXP (pat, 1);
194 gcc_checking_assert (GET_CODE (unspec) == UNSPEC);
195 rtx this_mem = XVECEXP (unspec, 0, 0);
196 if (mem)
197 gcc_checking_assert (rtx_equal_p (mem, this_mem));
198 else
200 gcc_checking_assert (MEM_P (this_mem));
201 mem = this_mem;
205 return mem;
208 // Given a store pair insn in PATTERN, unpack the insn, storing
209 // the register operands in REGS, and returning the mem.
210 static rtx
211 aarch64_destructure_store_pair (rtx regs[2], rtx pattern)
213 rtx mem = XEXP (pattern, 0);
214 rtx unspec = XEXP (pattern, 1);
215 gcc_checking_assert (GET_CODE (unspec) == UNSPEC);
216 for (int i = 0; i < 2; i++)
217 regs[i] = XVECEXP (unspec, 0, i);
218 return mem;
222 aarch64_pair_fusion::destructure_pair (rtx regs[2], rtx pattern, bool load_p)
224 if (load_p)
225 return aarch64_destructure_load_pair (regs, pattern);
226 else
227 return aarch64_destructure_store_pair (regs, pattern);
231 aarch64_pair_fusion::gen_promote_writeback_pair (rtx wb_effect, rtx pair_mem,
232 rtx regs[2],
233 bool load_p)
235 auto op_mode = aarch64_operand_mode_for_pair_mode (GET_MODE (pair_mem));
237 machine_mode modes[2];
238 for (int i = 0; i < 2; i++)
240 machine_mode mode = GET_MODE (regs[i]);
241 if (load_p)
242 gcc_checking_assert (mode != VOIDmode);
243 else if (mode == VOIDmode)
244 mode = op_mode;
246 modes[i] = mode;
249 const auto op_size = GET_MODE_SIZE (modes[0]);
250 gcc_checking_assert (known_eq (op_size, GET_MODE_SIZE (modes[1])));
252 rtx pats[2];
253 for (int i = 0; i < 2; i++)
255 rtx mem = adjust_address_nv (pair_mem, modes[i], op_size * i);
256 pats[i] = load_p
257 ? gen_rtx_SET (regs[i], mem)
258 : gen_rtx_SET (mem, regs[i]);
261 return gen_rtx_PARALLEL (VOIDmode,
262 gen_rtvec (3, wb_effect, pats[0], pats[1]));
265 namespace {
267 const pass_data pass_data_ldp_fusion =
269 RTL_PASS, /* type */
270 "ldp_fusion", /* name */
271 OPTGROUP_NONE, /* optinfo_flags */
272 TV_NONE, /* tv_id */
273 0, /* properties_required */
274 0, /* properties_provided */
275 0, /* properties_destroyed */
276 0, /* todo_flags_start */
277 TODO_df_finish, /* todo_flags_finish */
280 class pass_ldp_fusion : public rtl_opt_pass
282 public:
283 pass_ldp_fusion (gcc::context *ctx)
284 : rtl_opt_pass (pass_data_ldp_fusion, ctx)
287 opt_pass *clone () override { return new pass_ldp_fusion (m_ctxt); }
289 bool gate (function *) final override
291 if (!optimize || optimize_debug)
292 return false;
294 if (reload_completed)
295 return flag_aarch64_late_ldp_fusion;
296 else
297 return flag_aarch64_early_ldp_fusion;
300 unsigned execute (function *) final override
302 aarch64_pair_fusion pass;
303 pass.run ();
304 return 0;
308 } // anon namespace
310 rtl_opt_pass *
311 make_pass_ldp_fusion (gcc::context *ctx)
313 return new pass_ldp_fusion (ctx);