libstdc++: Fix std::codecvt<wchar_t, char, mbstate_t> for empty dest [PR37475]
[official-gcc.git] / gcc / config / riscv / riscv.cc
blob9bba5da016e950e8acd0dc9d53a82b945e619163
1 /* Subroutines used for code generation for RISC-V.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Andrew Waterman (andrew@sifive.com).
4 Based on MIPS target for GNU compiler.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #define INCLUDE_STRING
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "target.h"
29 #include "backend.h"
30 #include "tm.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "insn-config.h"
34 #include "insn-attr.h"
35 #include "recog.h"
36 #include "output.h"
37 #include "alias.h"
38 #include "tree.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "varasm.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "function.h"
45 #include "explow.h"
46 #include "ifcvt.h"
47 #include "memmodel.h"
48 #include "emit-rtl.h"
49 #include "reload.h"
50 #include "tm_p.h"
51 #include "basic-block.h"
52 #include "expr.h"
53 #include "optabs.h"
54 #include "bitmap.h"
55 #include "df.h"
56 #include "function-abi.h"
57 #include "diagnostic.h"
58 #include "builtins.h"
59 #include "predict.h"
60 #include "tree-pass.h"
61 #include "opts.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
64 #include "gimple.h"
65 #include "cfghooks.h"
66 #include "cfgloop.h"
67 #include "cfgrtl.h"
68 #include "shrink-wrap.h"
69 #include "sel-sched.h"
70 #include "sched-int.h"
71 #include "fold-const.h"
72 #include "gimple-iterator.h"
73 #include "gimple-expr.h"
74 #include "tree-vectorizer.h"
75 #include "gcse.h"
76 #include "tree-dfa.h"
77 #include "target-globals.h"
79 /* This file should be included last. */
80 #include "target-def.h"
81 #include "riscv-vector-costs.h"
82 #include "riscv-subset.h"
84 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
85 #define UNSPEC_ADDRESS_P(X) \
86 (GET_CODE (X) == UNSPEC \
87 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
88 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
90 /* Extract the symbol or label from UNSPEC wrapper X. */
91 #define UNSPEC_ADDRESS(X) \
92 XVECEXP (X, 0, 0)
94 /* Extract the symbol type from UNSPEC wrapper X. */
95 #define UNSPEC_ADDRESS_TYPE(X) \
96 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
98 /* Extract the backup dynamic frm rtl. */
99 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm)
101 /* True the mode switching has static frm, or false. */
102 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
104 /* True if we can use the instructions in the XTheadInt extension
105 to handle interrupts, or false. */
106 #define TH_INT_INTERRUPT(c) \
107 (TARGET_XTHEADINT \
108 /* The XTheadInt extension only supports rv32. */ \
109 && !TARGET_64BIT \
110 && (c)->machine->interrupt_handler_p \
111 /* The XTheadInt instructions can only be executed in M-mode. */ \
112 && (c)->machine->interrupt_mode == MACHINE_MODE)
114 /* Information about a function's frame layout. */
115 struct GTY(()) riscv_frame_info {
116 /* The size of the frame in bytes. */
117 poly_int64 total_size;
119 /* Bit X is set if the function saves or restores GPR X. */
120 unsigned int mask;
122 /* Likewise FPR X. */
123 unsigned int fmask;
125 /* Likewise for vector registers. */
126 unsigned int vmask;
128 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */
129 unsigned save_libcall_adjustment;
131 /* the minimum number of bytes, in multiples of 16-byte address increments,
132 required to cover the registers in a multi push & pop. */
133 unsigned multi_push_adj_base;
135 /* the number of additional 16-byte address increments allocated for the stack
136 frame in a multi push & pop. */
137 unsigned multi_push_adj_addi;
139 /* Offsets of fixed-point and floating-point save areas from frame bottom */
140 poly_int64 gp_sp_offset;
141 poly_int64 fp_sp_offset;
143 /* Top and bottom offsets of vector save areas from frame bottom. */
144 poly_int64 v_sp_offset_top;
145 poly_int64 v_sp_offset_bottom;
147 /* Offset of virtual frame pointer from stack pointer/frame bottom */
148 poly_int64 frame_pointer_offset;
150 /* Offset of hard frame pointer from stack pointer/frame bottom */
151 poly_int64 hard_frame_pointer_offset;
153 /* The offset of arg_pointer_rtx from the bottom of the frame. */
154 poly_int64 arg_pointer_offset;
156 /* Reset this struct, clean all field to zero. */
157 void reset(void);
160 enum riscv_privilege_levels {
161 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
164 struct GTY(()) mode_switching_info {
165 /* The RTL variable which stores the dynamic FRM value. We always use this
166 RTX to restore dynamic FRM rounding mode in mode switching. */
167 rtx dynamic_frm;
169 /* The boolean variables indicates there is at least one static rounding
170 mode instruction in the function or not. */
171 bool static_frm_p;
173 mode_switching_info ()
175 dynamic_frm = NULL_RTX;
176 static_frm_p = false;
180 struct GTY(()) machine_function {
181 /* The number of extra stack bytes taken up by register varargs.
182 This area is allocated by the callee at the very top of the frame. */
183 int varargs_size;
185 /* True if current function is a naked function. */
186 bool naked_p;
188 /* True if current function is an interrupt function. */
189 bool interrupt_handler_p;
190 /* For an interrupt handler, indicates the privilege level. */
191 enum riscv_privilege_levels interrupt_mode;
193 /* True if attributes on current function have been checked. */
194 bool attributes_checked_p;
196 /* True if RA must be saved because of a far jump. */
197 bool far_jump_used;
199 /* The current frame information, calculated by riscv_compute_frame_info. */
200 struct riscv_frame_info frame;
202 /* The components already handled by separate shrink-wrapping, which should
203 not be considered by the prologue and epilogue. */
204 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
206 /* The mode switching information for the FRM rounding modes. */
207 struct mode_switching_info mode_sw_info;
210 /* Information about a single argument. */
211 struct riscv_arg_info {
212 /* True if the argument is at least partially passed on the stack. */
213 bool stack_p;
215 /* The number of integer registers allocated to this argument. */
216 unsigned int num_gprs;
218 /* The offset of the first register used, provided num_gprs is nonzero.
219 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
220 unsigned int gpr_offset;
222 /* The number of floating-point registers allocated to this argument. */
223 unsigned int num_fprs;
225 /* The offset of the first register used, provided num_fprs is nonzero. */
226 unsigned int fpr_offset;
228 /* The number of vector registers allocated to this argument. */
229 unsigned int num_vrs;
231 /* The offset of the first register used, provided num_vrs is nonzero. */
232 unsigned int vr_offset;
234 /* The number of mask registers allocated to this argument. */
235 unsigned int num_mrs;
237 /* The offset of the first register used, provided num_mrs is nonzero. */
238 unsigned int mr_offset;
241 /* One stage in a constant building sequence. These sequences have
242 the form:
244 A = VALUE[0]
245 A = A CODE[1] VALUE[1]
246 A = A CODE[2] VALUE[2]
249 where A is an accumulator, each CODE[i] is a binary rtl operation
250 and each VALUE[i] is a constant integer. CODE[0] is undefined. */
251 struct riscv_integer_op {
252 bool use_uw;
253 bool save_temporary;
254 enum rtx_code code;
255 unsigned HOST_WIDE_INT value;
258 /* The largest number of operations needed to load an integer constant.
259 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
260 #define RISCV_MAX_INTEGER_OPS 8
262 enum riscv_fusion_pairs
264 RISCV_FUSE_NOTHING = 0,
265 RISCV_FUSE_ZEXTW = (1 << 0),
266 RISCV_FUSE_ZEXTH = (1 << 1),
267 RISCV_FUSE_ZEXTWS = (1 << 2),
268 RISCV_FUSE_LDINDEXED = (1 << 3),
269 RISCV_FUSE_LUI_ADDI = (1 << 4),
270 RISCV_FUSE_AUIPC_ADDI = (1 << 5),
271 RISCV_FUSE_LUI_LD = (1 << 6),
272 RISCV_FUSE_AUIPC_LD = (1 << 7),
273 RISCV_FUSE_LDPREINCREMENT = (1 << 8),
274 RISCV_FUSE_ALIGNED_STD = (1 << 9),
277 /* Costs of various operations on the different architectures. */
279 struct riscv_tune_param
281 unsigned short fp_add[2];
282 unsigned short fp_mul[2];
283 unsigned short fp_div[2];
284 unsigned short int_mul[2];
285 unsigned short int_div[2];
286 unsigned short issue_rate;
287 unsigned short branch_cost;
288 unsigned short memory_cost;
289 unsigned short fmv_cost;
290 bool slow_unaligned_access;
291 bool vector_unaligned_access;
292 bool use_divmod_expansion;
293 bool overlap_op_by_pieces;
294 unsigned int fusible_ops;
295 const struct cpu_vector_cost *vec_costs;
299 /* Global variables for machine-dependent things. */
301 /* Whether unaligned accesses execute very slowly. */
302 bool riscv_slow_unaligned_access_p;
304 /* Whether misaligned vector accesses are supported (i.e. do not
305 throw an exception). */
306 bool riscv_vector_unaligned_access_p;
308 /* Whether user explicitly passed -mstrict-align. */
309 bool riscv_user_wants_strict_align;
311 /* Stack alignment to assume/maintain. */
312 unsigned riscv_stack_boundary;
314 /* Whether in riscv_output_mi_thunk. */
315 static bool riscv_in_thunk_func = false;
317 /* If non-zero, this is an offset to be added to SP to redefine the CFA
318 when restoring the FP register from the stack. Only valid when generating
319 the epilogue. */
320 static poly_int64 epilogue_cfa_sp_offset;
322 /* Which tuning parameters to use. */
323 static const struct riscv_tune_param *tune_param;
325 /* Which automaton to use for tuning. */
326 enum riscv_microarchitecture_type riscv_microarchitecture;
328 /* The number of chunks in a single vector register. */
329 poly_uint16 riscv_vector_chunks;
331 /* The number of bytes in a vector chunk. */
332 unsigned riscv_bytes_per_vector_chunk;
334 /* Index R is the smallest register class that contains register R. */
335 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
336 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
337 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS,
338 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS,
339 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
340 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS,
341 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
342 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
343 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
344 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
345 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
346 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
347 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
348 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
349 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
350 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
351 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
352 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS,
353 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
354 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
355 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
356 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
357 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
358 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
359 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
360 VM_REGS, VD_REGS, VD_REGS, VD_REGS,
361 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
362 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
363 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
364 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
365 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
366 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
367 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
370 /* RVV costs for VLS vector operations. */
371 static const common_vector_cost rvv_vls_vector_cost = {
372 1, /* int_stmt_cost */
373 1, /* fp_stmt_cost */
374 1, /* gather_load_cost */
375 1, /* scatter_store_cost */
376 1, /* segment_permute (2) */
377 1, /* segment_permute (3) */
378 1, /* segment_permute (4) */
379 1, /* segment_permute (5) */
380 1, /* segment_permute (6) */
381 1, /* segment_permute (7) */
382 1, /* segment_permute (8) */
383 1, /* vec_to_scalar_cost */
384 1, /* scalar_to_vec_cost */
385 1, /* permute_cost */
386 1, /* align_load_cost */
387 1, /* align_store_cost */
388 2, /* unalign_load_cost */
389 2, /* unalign_store_cost */
392 /* RVV costs for VLA vector operations. */
393 static const scalable_vector_cost rvv_vla_vector_cost = {
395 1, /* int_stmt_cost */
396 1, /* fp_stmt_cost */
397 1, /* gather_load_cost */
398 1, /* scatter_store_cost */
399 1, /* segment_permute (2) */
400 1, /* segment_permute (3) */
401 1, /* segment_permute (4) */
402 1, /* segment_permute (5) */
403 1, /* segment_permute (6) */
404 1, /* segment_permute (7) */
405 1, /* segment_permute (8) */
406 1, /* vec_to_scalar_cost */
407 1, /* scalar_to_vec_cost */
408 1, /* permute_cost */
409 1, /* align_load_cost */
410 1, /* align_store_cost */
411 2, /* unalign_load_cost */
412 2, /* unalign_store_cost */
416 /* RVV register move cost. */
417 static const regmove_vector_cost rvv_regmove_vector_cost = {
418 2, /* GR2VR */
419 2, /* FR2VR */
420 2, /* VR2GR */
421 2, /* VR2FR */
424 /* Generic costs for vector insn classes. It is supposed to be the vector cost
425 models used by default if no other cost model was specified. */
426 static const struct cpu_vector_cost generic_vector_cost = {
427 1, /* scalar_int_stmt_cost */
428 1, /* scalar_fp_stmt_cost */
429 1, /* scalar_load_cost */
430 1, /* scalar_store_cost */
431 3, /* cond_taken_branch_cost */
432 1, /* cond_not_taken_branch_cost */
433 &rvv_vls_vector_cost, /* vls */
434 &rvv_vla_vector_cost, /* vla */
435 &rvv_regmove_vector_cost, /* regmove */
438 /* Costs to use when optimizing for rocket. */
439 static const struct riscv_tune_param rocket_tune_info = {
440 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
441 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
442 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
443 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
444 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
445 1, /* issue_rate */
446 3, /* branch_cost */
447 5, /* memory_cost */
448 8, /* fmv_cost */
449 true, /* slow_unaligned_access */
450 false, /* vector_unaligned_access */
451 false, /* use_divmod_expansion */
452 false, /* overlap_op_by_pieces */
453 RISCV_FUSE_NOTHING, /* fusible_ops */
454 NULL, /* vector cost */
457 /* Costs to use when optimizing for Sifive 7 Series. */
458 static const struct riscv_tune_param sifive_7_tune_info = {
459 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
460 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
461 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
462 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
463 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
464 2, /* issue_rate */
465 4, /* branch_cost */
466 3, /* memory_cost */
467 8, /* fmv_cost */
468 true, /* slow_unaligned_access */
469 false, /* vector_unaligned_access */
470 false, /* use_divmod_expansion */
471 false, /* overlap_op_by_pieces */
472 RISCV_FUSE_NOTHING, /* fusible_ops */
473 NULL, /* vector cost */
476 /* Costs to use when optimizing for Sifive p400 Series. */
477 static const struct riscv_tune_param sifive_p400_tune_info = {
478 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
479 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
480 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
481 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
482 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
483 3, /* issue_rate */
484 4, /* branch_cost */
485 3, /* memory_cost */
486 4, /* fmv_cost */
487 true, /* slow_unaligned_access */
488 false, /* vector_unaligned_access */
489 false, /* use_divmod_expansion */
490 false, /* overlap_op_by_pieces */
491 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
492 &generic_vector_cost, /* vector cost */
495 /* Costs to use when optimizing for Sifive p600 Series. */
496 static const struct riscv_tune_param sifive_p600_tune_info = {
497 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
498 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
499 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
500 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
501 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
502 4, /* issue_rate */
503 4, /* branch_cost */
504 3, /* memory_cost */
505 4, /* fmv_cost */
506 true, /* slow_unaligned_access */
507 false, /* vector_unaligned_access */
508 false, /* use_divmod_expansion */
509 false, /* overlap_op_by_pieces */
510 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
511 &generic_vector_cost, /* vector cost */
514 /* Costs to use when optimizing for T-HEAD c906. */
515 static const struct riscv_tune_param thead_c906_tune_info = {
516 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
517 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
518 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
519 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
520 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */
521 1, /* issue_rate */
522 3, /* branch_cost */
523 5, /* memory_cost */
524 8, /* fmv_cost */
525 false, /* slow_unaligned_access */
526 false, /* vector_unaligned_access */
527 false, /* use_divmod_expansion */
528 false, /* overlap_op_by_pieces */
529 RISCV_FUSE_NOTHING, /* fusible_ops */
530 NULL, /* vector cost */
533 /* Costs to use when optimizing for xiangshan nanhu. */
534 static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
535 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_add */
536 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_mul */
537 {COSTS_N_INSNS (10), COSTS_N_INSNS (20)}, /* fp_div */
538 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* int_mul */
539 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
540 6, /* issue_rate */
541 3, /* branch_cost */
542 3, /* memory_cost */
543 3, /* fmv_cost */
544 true, /* slow_unaligned_access */
545 false, /* vector_unaligned_access */
546 false, /* use_divmod_expansion */
547 false, /* overlap_op_by_pieces */
548 RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
549 NULL, /* vector cost */
552 /* Costs to use when optimizing for a generic ooo profile. */
553 static const struct riscv_tune_param generic_ooo_tune_info = {
554 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */
555 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */
556 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */
557 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
558 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
559 1, /* issue_rate */
560 3, /* branch_cost */
561 4, /* memory_cost */
562 4, /* fmv_cost */
563 false, /* slow_unaligned_access */
564 true, /* vector_unaligned_access */
565 false, /* use_divmod_expansion */
566 true, /* overlap_op_by_pieces */
567 RISCV_FUSE_NOTHING, /* fusible_ops */
568 &generic_vector_cost, /* vector cost */
571 /* Costs to use when optimizing for size. */
572 static const struct riscv_tune_param optimize_size_tune_info = {
573 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
574 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
575 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
576 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */
577 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */
578 1, /* issue_rate */
579 1, /* branch_cost */
580 2, /* memory_cost */
581 8, /* fmv_cost */
582 false, /* slow_unaligned_access */
583 false, /* vector_unaligned_access */
584 false, /* use_divmod_expansion */
585 false, /* overlap_op_by_pieces */
586 RISCV_FUSE_NOTHING, /* fusible_ops */
587 NULL, /* vector cost */
590 static bool riscv_avoid_shrink_wrapping_separate ();
591 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
592 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
593 static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int,
594 bool *);
596 /* Defining target-specific uses of __attribute__. */
597 static const attribute_spec riscv_gnu_attributes[] =
599 /* Syntax: { name, min_len, max_len, decl_required, type_required,
600 function_type_required, affects_type_identity, handler,
601 exclude } */
603 /* The attribute telling no prologue/epilogue. */
604 {"naked", 0, 0, true, false, false, false, riscv_handle_fndecl_attribute,
605 NULL},
606 /* This attribute generates prologue/epilogue for interrupt handlers. */
607 {"interrupt", 0, 1, false, true, true, false, riscv_handle_type_attribute,
608 NULL},
610 /* The following two are used for the built-in properties of the Vector type
611 and are not used externally */
612 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL},
613 {"RVV type", 0, 0, false, true, false, true, NULL, NULL},
614 /* This attribute is used to declare a function, forcing it to use the
615 standard vector calling convention variant. Syntax:
616 __attribute__((riscv_vector_cc)). */
617 {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL},
618 /* This attribute is used to declare a new type, to appoint the exactly
619 bits size of the type. For example:
621 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
623 The new created type f_vint8m1_t will be exactly 256 bits. It can be
624 be used in globals, structs, unions, and arrays instead of sizeless
625 types. */
626 {"riscv_rvv_vector_bits", 1, 1, false, true, false, true,
627 riscv_handle_rvv_vector_bits_attribute, NULL},
630 static const scoped_attribute_specs riscv_gnu_attribute_table =
632 "gnu", {riscv_gnu_attributes}
635 static const attribute_spec riscv_attributes[] =
637 /* This attribute is used to declare a function, forcing it to use the
638 standard vector calling convention variant. Syntax:
639 [[riscv::vector_cc]]. */
640 {"vector_cc", 0, 0, false, true, true, true, NULL, NULL},
641 /* This attribute is used to declare a new type, to appoint the exactly
642 bits size of the type. For example:
644 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
646 The new created type f_vint8m1_t will be exactly 256 bits. It can be
647 be used in globals, structs, unions, and arrays instead of sizeless
648 types. */
649 {"rvv_vector_bits", 1, 1, false, true, false, true,
650 riscv_handle_rvv_vector_bits_attribute, NULL},
653 static const scoped_attribute_specs riscv_nongnu_attribute_table =
655 "riscv", {riscv_attributes}
658 static const scoped_attribute_specs *const riscv_attribute_table[] =
660 &riscv_gnu_attribute_table,
661 &riscv_nongnu_attribute_table
664 /* Order for the CLOBBERs/USEs of gpr_save. */
665 static const unsigned gpr_save_reg_order[] = {
666 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
667 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
668 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
669 S10_REGNUM, S11_REGNUM
672 /* A table describing all the processors GCC knows about. */
673 static const struct riscv_tune_info riscv_tune_info_table[] = {
674 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
675 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
676 #include "riscv-cores.def"
679 /* Global variable to distinguish whether we should save and restore s0/fp for
680 function. */
681 static bool riscv_save_frame_pointer;
683 typedef enum
685 PUSH_IDX = 0,
686 POP_IDX,
687 POPRET_IDX,
688 POPRETZ_IDX,
689 ZCMP_OP_NUM
690 } riscv_zcmp_op_t;
692 typedef insn_code (*code_for_push_pop_t) (machine_mode);
694 void riscv_frame_info::reset(void)
696 total_size = 0;
697 mask = 0;
698 fmask = 0;
699 vmask = 0;
700 save_libcall_adjustment = 0;
702 gp_sp_offset = 0;
703 fp_sp_offset = 0;
704 v_sp_offset_top = 0;
705 v_sp_offset_bottom = 0;
707 frame_pointer_offset = 0;
709 hard_frame_pointer_offset = 0;
711 arg_pointer_offset = 0;
714 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
716 static unsigned int
717 riscv_min_arithmetic_precision (void)
719 return 32;
722 template <class T>
723 static const char *
724 get_tune_str (const T *opts)
726 const char *tune_string = RISCV_TUNE_STRING_DEFAULT;
727 if (opts->x_riscv_tune_string)
728 tune_string = opts->x_riscv_tune_string;
729 else if (opts->x_riscv_cpu_string)
730 tune_string = opts->x_riscv_cpu_string;
731 return tune_string;
734 /* Return the riscv_tune_info entry for the given name string, return nullptr
735 if NULL_P is true, otherwise return an placeholder and report error. */
737 const struct riscv_tune_info *
738 riscv_parse_tune (const char *tune_string, bool null_p)
740 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
742 if (cpu)
743 tune_string = cpu->tune;
745 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
746 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
747 return riscv_tune_info_table + i;
749 if (null_p)
750 return nullptr;
752 error ("unknown cpu %qs for %<-mtune%>", tune_string);
753 return riscv_tune_info_table;
756 /* Helper function for riscv_build_integer; arguments are as for
757 riscv_build_integer. */
759 static int
760 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
761 HOST_WIDE_INT value, machine_mode mode)
763 HOST_WIDE_INT low_part = CONST_LOW_PART (value);
764 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
765 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
766 int upper_trailing_ones = ctz_hwi (~value >> 32);
767 int lower_leading_ones = clz_hwi (~value << 32);
770 if (SMALL_OPERAND (value) || LUI_OPERAND (value))
772 /* Simply ADDI or LUI. */
773 codes[0].code = UNKNOWN;
774 codes[0].value = value;
775 codes[0].use_uw = false;
776 codes[0].save_temporary = false;
777 return 1;
779 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
781 /* Simply BSETI. */
782 codes[0].code = UNKNOWN;
783 codes[0].value = value;
784 codes[0].use_uw = false;
785 codes[0].save_temporary = false;
787 /* RISC-V sign-extends all 32bit values that live in a 32bit
788 register. To avoid paradoxes, we thus need to use the
789 sign-extended (negative) representation (-1 << 31) for the
790 value, if we want to build (1 << 31) in SImode. This will
791 then expand to an LUI instruction. */
792 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
793 codes[0].value = (HOST_WIDE_INT_M1U << 31);
795 return 1;
798 /* End with ADDI. When constructing HImode constants, do not generate any
799 intermediate value that is not itself a valid HImode constant. The
800 XORI case below will handle those remaining HImode constants. */
801 if (low_part != 0
802 && (mode != HImode
803 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
805 HOST_WIDE_INT upper_part = value - low_part;
806 if (mode != VOIDmode)
807 upper_part = trunc_int_for_mode (value - low_part, mode);
809 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
810 if (alt_cost < cost)
812 alt_codes[alt_cost-1].code = PLUS;
813 alt_codes[alt_cost-1].value = low_part;
814 alt_codes[alt_cost-1].use_uw = false;
815 alt_codes[alt_cost-1].save_temporary = false;
816 memcpy (codes, alt_codes, sizeof (alt_codes));
817 cost = alt_cost;
821 /* End with XORI. */
822 if (cost > 2 && (low_part < 0 || mode == HImode))
824 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
825 if (alt_cost < cost)
827 alt_codes[alt_cost-1].code = XOR;
828 alt_codes[alt_cost-1].value = low_part;
829 alt_codes[alt_cost-1].use_uw = false;
830 alt_codes[alt_cost-1].save_temporary = false;
831 memcpy (codes, alt_codes, sizeof (alt_codes));
832 cost = alt_cost;
836 /* Eliminate trailing zeros and end with SLLI. */
837 if (cost > 2 && (value & 1) == 0)
839 int shift = ctz_hwi (value);
840 unsigned HOST_WIDE_INT x = value;
841 bool use_uw = false;
842 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
844 /* Don't eliminate the lower 12 bits if LUI might apply. */
845 if (shift > IMM_BITS
846 && !SMALL_OPERAND (x)
847 && (LUI_OPERAND (x << IMM_BITS)
848 || (TARGET_64BIT
849 && TARGET_ZBA
850 && LUI_OPERAND ((x << IMM_BITS)
851 & ~HOST_WIDE_INT_C (0x80000000)))))
852 shift -= IMM_BITS, x <<= IMM_BITS;
854 /* If X has bits 32..63 clear and bit 31 set, then go ahead and mark
855 it as desiring a "uw" operation for the shift. That way we can have
856 LUI+ADDI to generate the constant, then shift it into position
857 clearing out the undesirable bits. */
858 if (!LUI_OPERAND (x)
859 && TARGET_64BIT
860 && TARGET_ZBA
861 && clz_hwi (x) == 32)
863 x = sext_hwi (x, 32);
864 use_uw = true;
867 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
868 if (alt_cost < cost)
870 alt_codes[alt_cost-1].code = ASHIFT;
871 alt_codes[alt_cost-1].value = shift;
872 alt_codes[alt_cost-1].use_uw = use_uw;
873 alt_codes[alt_cost-1].save_temporary = false;
874 memcpy (codes, alt_codes, sizeof (alt_codes));
875 cost = alt_cost;
879 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB))
881 int leading_ones = clz_hwi (~value);
882 int trailing_ones = ctz_hwi (~value);
884 /* If all bits are one except a few that are zero, and the zero bits
885 are within a range of 11 bits, then we can synthesize a constant
886 by loading a small negative constant and rotating. */
887 if (leading_ones < 64
888 && ((64 - leading_ones - trailing_ones) < 12))
890 codes[0].code = UNKNOWN;
891 /* The sign-bit might be zero, so just rotate to be safe. */
892 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
893 | (value << (64 - trailing_ones)));
894 codes[0].use_uw = false;
895 codes[0].save_temporary = false;
896 codes[1].code = ROTATERT;
897 codes[1].value = 64 - trailing_ones;
898 codes[1].use_uw = false;
899 codes[1].save_temporary = false;
900 cost = 2;
902 /* Handle the case where the 11 bit range of zero bits wraps around. */
903 else if (upper_trailing_ones < 32 && lower_leading_ones < 32
904 && ((64 - upper_trailing_ones - lower_leading_ones) < 12))
906 codes[0].code = UNKNOWN;
907 /* The sign-bit might be zero, so just rotate to be safe. */
908 codes[0].value = ((value << (32 - upper_trailing_ones))
909 | ((unsigned HOST_WIDE_INT) value
910 >> (32 + upper_trailing_ones)));
911 codes[0].use_uw = false;
912 codes[0].save_temporary = false;
913 codes[1].code = ROTATERT;
914 codes[1].value = 32 - upper_trailing_ones;
915 codes[1].use_uw = false;
916 codes[1].save_temporary = false;
917 cost = 2;
920 /* If LUI/ADDI are going to set bits 32..63 and we need a small
921 number of them cleared, we might be able to use bclri profitably.
923 Note we may allow clearing of bit 31 using bclri. There's a class
924 of constants with that bit clear where this helps. */
925 else if (TARGET_64BIT
926 && TARGET_ZBS
927 && (32 - popcount_hwi (value & HOST_WIDE_INT_C (0xffffffff80000000))) + 1 < cost)
929 /* Turn on all those upper bits and synthesize the result. */
930 HOST_WIDE_INT nval = value | HOST_WIDE_INT_C (0xffffffff80000000);
931 alt_cost = riscv_build_integer_1 (alt_codes, nval, mode);
933 /* Now iterate over the bits we want to clear until the cost is
934 too high or we're done. */
935 nval = value ^ HOST_WIDE_INT_C (-1);
936 nval &= HOST_WIDE_INT_C (~0x7fffffff);
937 while (nval && alt_cost < cost)
939 HOST_WIDE_INT bit = ctz_hwi (nval);
940 alt_codes[alt_cost].code = AND;
941 alt_codes[alt_cost].value = ~(1UL << bit);
942 alt_codes[alt_cost].use_uw = false;
943 alt_codes[alt_cost].save_temporary = false;
944 alt_cost++;
945 nval &= ~(1UL << bit);
948 if (nval == 0 && alt_cost <= cost)
950 memcpy (codes, alt_codes, sizeof (alt_codes));
951 cost = alt_cost;
956 if (cost > 2 && TARGET_64BIT && TARGET_ZBA)
958 if ((value % 9) == 0
959 && (alt_cost
960 = riscv_build_integer_1 (alt_codes, value / 9, mode) + 1) < cost)
962 alt_codes[alt_cost - 1].code = FMA;
963 alt_codes[alt_cost - 1].value = 9;
964 alt_codes[alt_cost - 1].use_uw = false;
965 alt_codes[alt_cost - 1].save_temporary = false;
966 memcpy (codes, alt_codes, sizeof (alt_codes));
967 cost = alt_cost;
969 if ((value % 5) == 0
970 && (alt_cost
971 = riscv_build_integer_1 (alt_codes, value / 5, mode) + 1) < cost)
973 alt_codes[alt_cost - 1].code = FMA;
974 alt_codes[alt_cost - 1].value = 5;
975 alt_codes[alt_cost - 1].use_uw = false;
976 alt_codes[alt_cost - 1].save_temporary = false;
977 memcpy (codes, alt_codes, sizeof (alt_codes));
978 cost = alt_cost;
980 if ((value % 3) == 0
981 && (alt_cost
982 = riscv_build_integer_1 (alt_codes, value / 3, mode) + 1) < cost)
984 alt_codes[alt_cost - 1].code = FMA;
985 alt_codes[alt_cost - 1].value = 3;
986 alt_codes[alt_cost - 1].use_uw = false;
987 alt_codes[alt_cost - 1].save_temporary = false;
988 memcpy (codes, alt_codes, sizeof (alt_codes));
989 cost = alt_cost;
993 /* We might be able to generate a constant close to our target
994 then a final ADDI to get the desired constant. */
995 if (cost > 2
996 && (value & 0xfff) != 0
997 && (value & 0x1800) == 0x1000)
999 HOST_WIDE_INT adjustment = -(0x800 - (value & 0xfff));
1000 alt_cost = 1 + riscv_build_integer_1 (alt_codes,
1001 value - adjustment, mode);
1003 if (alt_cost < cost)
1005 alt_codes[alt_cost - 1].code = PLUS;
1006 alt_codes[alt_cost - 1].value = adjustment;
1007 alt_codes[alt_cost - 1].use_uw = false;
1008 alt_codes[alt_cost - 1].save_temporary = false;
1009 memcpy (codes, alt_codes, sizeof (alt_codes));
1010 cost = alt_cost;
1014 /* Final cases, particularly focused on bseti. */
1015 if (cost > 2 && TARGET_ZBS)
1017 int i = 0;
1019 /* First handle any bits set by LUI. Be careful of the
1020 SImode sign bit!. */
1021 if (value & 0x7ffff000)
1023 alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
1024 alt_codes[i].value = value & 0x7ffff000;
1025 alt_codes[i].use_uw = false;
1026 alt_codes[i].save_temporary = false;
1027 value &= ~0x7ffff000;
1028 i++;
1031 /* Next, any bits we can handle with addi. */
1032 if (value & 0x7ff)
1034 alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
1035 alt_codes[i].value = value & 0x7ff;
1036 alt_codes[i].use_uw = false;
1037 alt_codes[i].save_temporary = false;
1038 value &= ~0x7ff;
1039 i++;
1042 /* And any residuals with bseti. */
1043 while (i < cost && value)
1045 HOST_WIDE_INT bit = ctz_hwi (value);
1046 alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
1047 alt_codes[i].value = 1UL << bit;
1048 alt_codes[i].use_uw = false;
1049 alt_codes[i].save_temporary = false;
1050 value &= ~(1ULL << bit);
1051 i++;
1054 /* If LUI+ADDI+BSETI resulted in a more efficient
1055 sequence, then use it. */
1056 if (value == 0 && i < cost)
1058 memcpy (codes, alt_codes, sizeof (alt_codes));
1059 cost = i;
1063 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
1064 return cost;
1067 /* Fill CODES with a sequence of rtl operations to load VALUE.
1068 Return the number of operations needed. */
1070 static int
1071 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
1072 machine_mode mode)
1074 int cost = riscv_build_integer_1 (codes, value, mode);
1076 /* Eliminate leading zeros and end with SRLI. */
1077 if (value > 0 && cost > 2)
1079 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1080 int alt_cost, shift = clz_hwi (value);
1081 HOST_WIDE_INT shifted_val;
1083 /* Try filling trailing bits with 1s. */
1084 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
1085 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
1086 if (alt_cost < cost)
1088 alt_codes[alt_cost-1].code = LSHIFTRT;
1089 alt_codes[alt_cost-1].value = shift;
1090 alt_codes[alt_cost-1].use_uw = false;
1091 alt_codes[alt_cost-1].save_temporary = false;
1092 memcpy (codes, alt_codes, sizeof (alt_codes));
1093 cost = alt_cost;
1096 /* Try filling trailing bits with 0s. */
1097 shifted_val = value << shift;
1098 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
1099 if (alt_cost < cost)
1101 alt_codes[alt_cost-1].code = LSHIFTRT;
1102 alt_codes[alt_cost-1].value = shift;
1103 alt_codes[alt_cost-1].use_uw = false;
1104 alt_codes[alt_cost-1].save_temporary = false;
1105 memcpy (codes, alt_codes, sizeof (alt_codes));
1106 cost = alt_cost;
1110 /* See if we can generate the inverted constant, then use
1111 not to get the desired constant.
1113 This can't be in riscv_build_integer_1 as it'll mutually
1114 recurse with another case in there. And it has to recurse
1115 into riscv_build_integer so we get the trailing 0s case
1116 above. */
1117 if (cost > 2 && value < 0)
1119 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1120 int alt_cost;
1122 HOST_WIDE_INT nval = ~value;
1123 alt_cost = 1 + riscv_build_integer (alt_codes, nval, mode);
1124 if (alt_cost < cost)
1126 alt_codes[alt_cost - 1].code = XOR;
1127 alt_codes[alt_cost - 1].value = -1;
1128 alt_codes[alt_cost - 1].use_uw = false;
1129 alt_codes[alt_cost - 1].save_temporary = false;
1130 memcpy (codes, alt_codes, sizeof (alt_codes));
1131 cost = alt_cost;
1136 if (!TARGET_64BIT
1137 && (value > INT32_MAX || value < INT32_MIN))
1139 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32);
1140 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32);
1141 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1142 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS];
1143 int hi_cost, lo_cost;
1145 hi_cost = riscv_build_integer_1 (hicode, hival, mode);
1146 if (hi_cost < cost)
1148 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode);
1149 if (lo_cost + hi_cost < cost)
1151 memcpy (codes, alt_codes,
1152 lo_cost * sizeof (struct riscv_integer_op));
1153 memcpy (codes + lo_cost, hicode,
1154 hi_cost * sizeof (struct riscv_integer_op));
1155 cost = lo_cost + hi_cost;
1160 /* With pack we can generate a 64 bit constant with the same high
1161 and low 32 bits triviall. */
1162 if (cost > 3 && TARGET_64BIT && TARGET_ZBKB)
1164 unsigned HOST_WIDE_INT loval = value & 0xffffffff;
1165 unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
1166 if (hival == loval)
1168 cost = 1 + riscv_build_integer_1 (codes, sext_hwi (loval, 32), mode);
1169 codes[cost - 1].code = CONCAT;
1170 codes[cost - 1].value = 0;
1171 codes[cost - 1].use_uw = false;
1172 codes[cost - 1].save_temporary = false;
1175 /* An arbitrary 64 bit constant can be synthesized in 5 instructions
1176 using zbkb. We may do better than that if the upper or lower half
1177 can be synthsized with a single LUI, ADDI or BSET. Regardless the
1178 basic steps are the same. */
1179 if (cost > 3 && can_create_pseudo_p ())
1181 struct riscv_integer_op hi_codes[RISCV_MAX_INTEGER_OPS];
1182 struct riscv_integer_op lo_codes[RISCV_MAX_INTEGER_OPS];
1183 int hi_cost, lo_cost;
1185 /* Synthesize and get cost for each half. */
1186 lo_cost
1187 = riscv_build_integer_1 (lo_codes, sext_hwi (loval, 32), mode);
1188 hi_cost
1189 = riscv_build_integer_1 (hi_codes, sext_hwi (hival, 32), mode);
1191 /* If profitable, finish synthesis using zbkb. */
1192 if (cost > hi_cost + lo_cost + 1)
1194 /* We need the low half independent of the high half. So
1195 mark it has creating a temporary we'll use later. */
1196 memcpy (codes, lo_codes,
1197 lo_cost * sizeof (struct riscv_integer_op));
1198 codes[lo_cost - 1].save_temporary = true;
1200 /* Now the high half synthesis. */
1201 memcpy (codes + lo_cost, hi_codes,
1202 hi_cost * sizeof (struct riscv_integer_op));
1204 /* Adjust the cost. */
1205 cost = hi_cost + lo_cost + 1;
1207 /* And finally (ab)use VEC_MERGE to indicate we want to
1208 put merge the two parts together. */
1209 codes[cost - 1].code = VEC_MERGE;
1210 codes[cost - 1].value = 0;
1211 codes[cost - 1].use_uw = false;
1212 codes[cost - 1].save_temporary = false;
1218 return cost;
1221 /* Return the cost of constructing VAL in the event that a scratch
1222 register is available. */
1224 static int
1225 riscv_split_integer_cost (HOST_WIDE_INT val)
1227 int cost;
1228 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
1229 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
1230 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
1232 cost = 2 + riscv_build_integer (codes, loval, VOIDmode);
1233 if (loval != hival)
1234 cost += riscv_build_integer (codes, hival, VOIDmode);
1236 return cost;
1239 /* Return the cost of constructing the integer constant VAL. */
1241 static int
1242 riscv_integer_cost (HOST_WIDE_INT val)
1244 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
1245 return MIN (riscv_build_integer (codes, val, VOIDmode),
1246 riscv_split_integer_cost (val));
1249 /* Try to split a 64b integer into 32b parts, then reassemble. */
1251 static rtx
1252 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
1254 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
1255 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
1256 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
1258 riscv_move_integer (lo, lo, loval, mode);
1260 if (loval == hival)
1261 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
1262 else
1264 riscv_move_integer (hi, hi, hival, mode);
1265 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32));
1268 hi = force_reg (mode, hi);
1269 return gen_rtx_PLUS (mode, hi, lo);
1272 /* Return true if X is a thread-local symbol. */
1274 static bool
1275 riscv_tls_symbol_p (const_rtx x)
1277 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
1280 /* Return true if symbol X binds locally. */
1282 static bool
1283 riscv_symbol_binds_local_p (const_rtx x)
1285 if (SYMBOL_REF_P (x))
1286 return (SYMBOL_REF_DECL (x)
1287 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
1288 : SYMBOL_REF_LOCAL_P (x));
1289 else
1290 return false;
1293 /* Return the method that should be used to access SYMBOL_REF or
1294 LABEL_REF X. */
1296 static enum riscv_symbol_type
1297 riscv_classify_symbol (const_rtx x)
1299 if (riscv_tls_symbol_p (x))
1300 return SYMBOL_TLS;
1302 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
1303 return SYMBOL_GOT_DISP;
1305 switch (riscv_cmodel)
1307 case CM_MEDLOW:
1308 return SYMBOL_ABSOLUTE;
1309 case CM_LARGE:
1310 if (SYMBOL_REF_P (x))
1311 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM;
1312 return SYMBOL_PCREL;
1313 default:
1314 return SYMBOL_PCREL;
1318 /* Classify the base of symbolic expression X. */
1320 enum riscv_symbol_type
1321 riscv_classify_symbolic_expression (rtx x)
1323 rtx offset;
1325 split_const (x, &x, &offset);
1326 if (UNSPEC_ADDRESS_P (x))
1327 return UNSPEC_ADDRESS_TYPE (x);
1329 return riscv_classify_symbol (x);
1332 /* Return true if X is a symbolic constant. If it is, store the type of
1333 the symbol in *SYMBOL_TYPE. */
1335 bool
1336 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
1338 rtx offset;
1340 split_const (x, &x, &offset);
1341 if (UNSPEC_ADDRESS_P (x))
1343 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1344 x = UNSPEC_ADDRESS (x);
1346 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
1347 *symbol_type = riscv_classify_symbol (x);
1348 else
1349 return false;
1351 if (offset == const0_rtx)
1352 return true;
1354 /* Nonzero offsets are only valid for references that don't use the GOT. */
1355 switch (*symbol_type)
1357 case SYMBOL_ABSOLUTE:
1358 case SYMBOL_PCREL:
1359 case SYMBOL_TLS_LE:
1360 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1361 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1363 default:
1364 return false;
1368 /* Returns the number of instructions necessary to reference a symbol. */
1370 static int riscv_symbol_insns (enum riscv_symbol_type type)
1372 switch (type)
1374 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */
1375 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */
1376 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */
1377 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */
1378 case SYMBOL_TLSDESC: return 6; /* 4-instruction call + ADD TP + the reference. */
1379 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */
1380 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */
1381 default: gcc_unreachable ();
1385 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA
1386 Manual draft. For details, please see:
1387 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */
1389 static unsigned HOST_WIDE_INT fli_value_hf[32] =
1391 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
1392 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
1393 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
1394 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
1395 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */
1396 0x78p8,
1397 0x7cp8, 0x7ep8
1400 static unsigned HOST_WIDE_INT fli_value_sf[32] =
1402 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
1403 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
1404 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
1405 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
1408 static unsigned HOST_WIDE_INT fli_value_df[32] =
1410 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
1411 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
1412 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
1413 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
1414 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
1415 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
1416 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
1417 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
1420 /* Display floating-point values at the assembly level, which is consistent
1421 with the zfa extension of llvm:
1422 https://reviews.llvm.org/D145645. */
1424 const char *fli_value_print[32] =
1426 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125",
1427 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875",
1428 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0",
1429 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan"
1432 /* Return index of the FLI instruction table if rtx X is an immediate constant that can
1433 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */
1436 riscv_float_const_rtx_index_for_fli (rtx x)
1438 unsigned HOST_WIDE_INT *fli_value_array;
1440 machine_mode mode = GET_MODE (x);
1442 if (!TARGET_ZFA
1443 || !CONST_DOUBLE_P(x)
1444 || mode == VOIDmode
1445 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH))
1446 || (mode == SFmode && !TARGET_HARD_FLOAT)
1447 || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
1448 return -1;
1450 if (!SCALAR_FLOAT_MODE_P (mode)
1451 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT
1452 /* Only support up to DF mode. */
1453 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode))
1454 return -1;
1456 unsigned HOST_WIDE_INT ival = 0;
1458 long res[2];
1459 real_to_target (res,
1460 CONST_DOUBLE_REAL_VALUE (x),
1461 REAL_MODE_FORMAT (mode));
1463 if (mode == DFmode)
1465 int order = BYTES_BIG_ENDIAN ? 1 : 0;
1466 ival = zext_hwi (res[order], 32);
1467 ival |= (zext_hwi (res[1 - order], 32) << 32);
1469 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */
1470 if (ival & (unsigned HOST_WIDE_INT)0xffffffff)
1471 return -1;
1473 else
1474 ival = zext_hwi (res[0], 32);
1476 switch (mode)
1478 case E_HFmode:
1479 fli_value_array = fli_value_hf;
1480 break;
1481 case E_SFmode:
1482 fli_value_array = fli_value_sf;
1483 break;
1484 case E_DFmode:
1485 fli_value_array = fli_value_df;
1486 break;
1487 default:
1488 return -1;
1491 if (fli_value_array[0] == ival)
1492 return 0;
1494 if (fli_value_array[1] == ival)
1495 return 1;
1497 /* Perform a binary search to find target index. */
1498 unsigned l, r, m;
1500 l = 2;
1501 r = 31;
1503 while (l <= r)
1505 m = (l + r) / 2;
1506 if (fli_value_array[m] == ival)
1507 return m;
1508 else if (fli_value_array[m] < ival)
1509 l = m+1;
1510 else
1511 r = m-1;
1514 return -1;
1517 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1519 static bool
1520 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1522 return riscv_const_insns (x) > 0;
1525 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.
1526 Return true if X cannot (or should not) be spilled to the
1527 constant pool. */
1529 static bool
1530 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1532 enum riscv_symbol_type type;
1533 rtx base, offset;
1535 /* There's no way to calculate VL-based values using relocations. */
1536 subrtx_iterator::array_type array;
1537 FOR_EACH_SUBRTX (iter, array, x, ALL)
1538 if (GET_CODE (*iter) == CONST_POLY_INT)
1539 return true;
1541 /* There is no assembler syntax for expressing an address-sized
1542 high part. */
1543 if (GET_CODE (x) == HIGH)
1544 return true;
1546 if (satisfies_constraint_zfli (x))
1547 return true;
1549 split_const (x, &base, &offset);
1550 if (riscv_symbolic_constant_p (base, &type))
1552 if (type == SYMBOL_FORCE_TO_MEM)
1553 return false;
1555 /* As an optimization, don't spill symbolic constants that are as
1556 cheap to rematerialize as to access in the constant pool. */
1557 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
1558 return true;
1560 /* As an optimization, avoid needlessly generate dynamic relocations. */
1561 if (flag_pic)
1562 return true;
1565 /* TLS symbols must be computed by riscv_legitimize_move. */
1566 if (tls_referenced_p (x))
1567 return true;
1569 return false;
1572 /* Return true if register REGNO is a valid base register for mode MODE.
1573 STRICT_P is true if REG_OK_STRICT is in effect. */
1576 riscv_regno_mode_ok_for_base_p (int regno,
1577 machine_mode mode ATTRIBUTE_UNUSED,
1578 bool strict_p)
1580 if (!HARD_REGISTER_NUM_P (regno))
1582 if (!strict_p)
1583 return true;
1584 regno = reg_renumber[regno];
1587 /* These fake registers will be eliminated to either the stack or
1588 hard frame pointer, both of which are usually valid base registers.
1589 Reload deals with the cases where the eliminated form isn't valid. */
1590 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
1591 return true;
1593 return GP_REG_P (regno);
1596 /* Get valid index register class.
1597 The RISC-V base instructions don't support index registers,
1598 but extensions might support that. */
1600 enum reg_class
1601 riscv_index_reg_class ()
1603 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1604 return GR_REGS;
1606 return NO_REGS;
1609 /* Return true if register REGNO is a valid index register.
1610 The RISC-V base instructions don't support index registers,
1611 but extensions might support that. */
1614 riscv_regno_ok_for_index_p (int regno)
1616 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1617 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1);
1619 return 0;
1622 /* Return true if X is a valid base register for mode MODE.
1623 STRICT_P is true if REG_OK_STRICT is in effect. */
1625 bool
1626 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
1628 if (!strict_p && GET_CODE (x) == SUBREG)
1629 x = SUBREG_REG (x);
1631 return (REG_P (x)
1632 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
1635 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
1636 can address a value of mode MODE. */
1638 static bool
1639 riscv_valid_offset_p (rtx x, machine_mode mode)
1641 /* Check that X is a signed 12-bit number. */
1642 if (!const_arith_operand (x, Pmode))
1643 return false;
1645 /* We may need to split multiword moves, so make sure that every word
1646 is accessible. */
1647 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
1648 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
1649 return false;
1651 return true;
1654 /* Should a symbol of type SYMBOL_TYPE should be split in two? */
1656 bool
1657 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
1659 if (symbol_type == SYMBOL_TLS_LE)
1660 return true;
1662 if (!TARGET_EXPLICIT_RELOCS)
1663 return false;
1665 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
1668 /* Return true if a LO_SUM can address a value of mode MODE when the
1669 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which
1670 is used when the mode is BLKmode. */
1672 static bool
1673 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
1674 rtx x)
1676 int align, size;
1678 /* Check that symbols of type SYMBOL_TYPE can be used to access values
1679 of mode MODE. */
1680 if (riscv_symbol_insns (sym_type) == 0)
1681 return false;
1683 /* Check that there is a known low-part relocation. */
1684 if (!riscv_split_symbol_type (sym_type))
1685 return false;
1687 /* We can't tell size or alignment when we have BLKmode, so try extracting a
1688 decl from the symbol if possible. */
1689 if (mode == BLKmode)
1691 rtx offset;
1693 /* Extract the symbol from the LO_SUM operand, if any. */
1694 split_const (x, &x, &offset);
1696 /* Might be a CODE_LABEL. We can compute align but not size for that,
1697 so don't bother trying to handle it. */
1698 if (!SYMBOL_REF_P (x))
1699 return false;
1701 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
1702 align = (SYMBOL_REF_DECL (x)
1703 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
1704 : 1);
1705 size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
1706 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
1707 : 2*BITS_PER_WORD);
1709 else
1711 align = GET_MODE_ALIGNMENT (mode);
1712 size = GET_MODE_BITSIZE (mode).to_constant ();
1715 /* We may need to split multiword moves, so make sure that each word
1716 can be accessed without inducing a carry. */
1717 if (size > BITS_PER_WORD
1718 && (!TARGET_STRICT_ALIGN || size > align))
1719 return false;
1721 return true;
1724 /* Return true if mode is the RVV enabled mode.
1725 For example: 'RVVMF2SI' mode is disabled,
1726 whereas 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */
1728 bool
1729 riscv_v_ext_vector_mode_p (machine_mode mode)
1731 #define ENTRY(MODE, REQUIREMENT, ...) \
1732 case MODE##mode: \
1733 return REQUIREMENT;
1734 switch (mode)
1736 #include "riscv-vector-switch.def"
1737 default:
1738 return false;
1741 return false;
1744 /* Return true if mode is the RVV enabled tuple mode. */
1746 bool
1747 riscv_v_ext_tuple_mode_p (machine_mode mode)
1749 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \
1750 case MODE##mode: \
1751 return REQUIREMENT;
1752 switch (mode)
1754 #include "riscv-vector-switch.def"
1755 default:
1756 return false;
1759 return false;
1762 /* Return true if mode is the RVV enabled vls mode. */
1764 bool
1765 riscv_v_ext_vls_mode_p (machine_mode mode)
1767 #define VLS_ENTRY(MODE, REQUIREMENT) \
1768 case MODE##mode: \
1769 return REQUIREMENT;
1770 switch (mode)
1772 #include "riscv-vector-switch.def"
1773 default:
1774 return false;
1777 return false;
1780 /* Return true if it is either of below modes.
1781 1. RVV vector mode.
1782 2. RVV tuple mode.
1783 3. RVV vls mode. */
1785 static bool
1786 riscv_v_ext_mode_p (machine_mode mode)
1788 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
1789 || riscv_v_ext_vls_mode_p (mode);
1792 static unsigned
1793 riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size,
1794 unsigned scalar_unit_size)
1796 gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0);
1798 if (vls_unit_size < scalar_unit_size)
1799 return 1;
1801 /* Ensure the vls mode is exact_div by scalar_unit_size. */
1802 gcc_assert ((vls_unit_size % scalar_unit_size) == 0);
1804 return vls_unit_size / scalar_unit_size;
1807 static machine_mode
1808 riscv_v_vls_to_gpr_mode (unsigned vls_mode_size)
1810 switch (vls_mode_size)
1812 case 16:
1813 return TImode;
1814 case 8:
1815 return DImode;
1816 case 4:
1817 return SImode;
1818 case 2:
1819 return HImode;
1820 case 1:
1821 return QImode;
1822 default:
1823 gcc_unreachable ();
1827 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1828 NUNITS size for corresponding machine_mode. */
1830 poly_int64
1831 riscv_v_adjust_nunits (machine_mode mode, int scale)
1833 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
1834 if (riscv_v_ext_mode_p (mode))
1836 if (TARGET_MIN_VLEN == 32)
1837 scale = scale / 2;
1838 return riscv_vector_chunks * scale;
1840 return scale;
1843 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1844 NUNITS size for corresponding machine_mode. */
1846 poly_int64
1847 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf)
1849 if (riscv_v_ext_mode_p (mode))
1851 scalar_mode smode = GET_MODE_INNER (mode);
1852 int size = GET_MODE_SIZE (smode);
1853 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size;
1854 if (fractional_p)
1855 return nunits_per_chunk / lmul * riscv_vector_chunks * nf;
1856 else
1857 return nunits_per_chunk * lmul * riscv_vector_chunks * nf;
1859 /* Set the disabled RVV modes size as 1 by default. */
1860 return 1;
1863 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
1864 BYTE size for corresponding machine_mode. */
1866 poly_int64
1867 riscv_v_adjust_bytesize (machine_mode mode, int scale)
1869 if (riscv_v_ext_vector_mode_p (mode))
1871 if (TARGET_XTHEADVECTOR)
1872 return BYTES_PER_RISCV_VECTOR;
1874 poly_int64 nunits = GET_MODE_NUNITS (mode);
1876 if (nunits.coeffs[0] > 8)
1877 return exact_div (nunits, 8);
1878 else if (nunits.is_constant ())
1879 return 1;
1880 else
1881 return poly_int64 (1, 1);
1884 return scale;
1887 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
1888 PRECISION size for corresponding machine_mode. */
1890 poly_int64
1891 riscv_v_adjust_precision (machine_mode mode, int scale)
1893 return riscv_v_adjust_nunits (mode, scale);
1896 /* Return true if X is a valid address for machine mode MODE. If it is,
1897 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
1898 effect. */
1900 static bool
1901 riscv_classify_address (struct riscv_address_info *info, rtx x,
1902 machine_mode mode, bool strict_p)
1904 if (th_classify_address (info, x, mode, strict_p))
1905 return true;
1907 switch (GET_CODE (x))
1909 case REG:
1910 case SUBREG:
1911 info->type = ADDRESS_REG;
1912 info->reg = x;
1913 info->offset = const0_rtx;
1914 return riscv_valid_base_register_p (info->reg, mode, strict_p);
1916 case PLUS:
1917 /* RVV load/store disallow any offset. */
1918 if (riscv_v_ext_mode_p (mode))
1919 return false;
1921 info->type = ADDRESS_REG;
1922 info->reg = XEXP (x, 0);
1923 info->offset = XEXP (x, 1);
1924 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1925 && riscv_valid_offset_p (info->offset, mode));
1927 case LO_SUM:
1928 /* RVV load/store disallow LO_SUM. */
1929 if (riscv_v_ext_mode_p (mode))
1930 return false;
1932 info->type = ADDRESS_LO_SUM;
1933 info->reg = XEXP (x, 0);
1934 info->offset = XEXP (x, 1);
1935 /* We have to trust the creator of the LO_SUM to do something vaguely
1936 sane. Target-independent code that creates a LO_SUM should also
1937 create and verify the matching HIGH. Target-independent code that
1938 adds an offset to a LO_SUM must prove that the offset will not
1939 induce a carry. Failure to do either of these things would be
1940 a bug, and we are not required to check for it here. The RISC-V
1941 backend itself should only create LO_SUMs for valid symbolic
1942 constants, with the high part being either a HIGH or a copy
1943 of _gp. */
1944 info->symbol_type
1945 = riscv_classify_symbolic_expression (info->offset);
1946 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1947 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
1949 case CONST_INT:
1950 /* We only allow the const0_rtx for the RVV load/store. For example:
1951 +----------------------------------------------------------+
1952 | li a5,0 |
1953 | vsetvli zero,a1,e32,m1,ta,ma |
1954 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. |
1955 | vs1r.v v24,0(a0) |
1956 +----------------------------------------------------------+
1957 It can be folded to:
1958 +----------------------------------------------------------+
1959 | vsetvli zero,a1,e32,m1,ta,ma |
1960 | vle32.v v24,0(zero) |
1961 | vs1r.v v24,0(a0) |
1962 +----------------------------------------------------------+
1963 This behavior will benefit the underlying RVV auto vectorization. */
1964 if (riscv_v_ext_mode_p (mode))
1965 return x == const0_rtx;
1967 /* Small-integer addresses don't occur very often, but they
1968 are legitimate if x0 is a valid base register. */
1969 info->type = ADDRESS_CONST_INT;
1970 return SMALL_OPERAND (INTVAL (x));
1972 default:
1973 return false;
1977 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1979 static bool
1980 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
1981 code_helper = ERROR_MARK)
1983 /* Disallow RVV modes base address.
1984 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */
1985 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x))))
1986 return false;
1987 struct riscv_address_info addr;
1989 return riscv_classify_address (&addr, x, mode, strict_p);
1992 /* Return true if hard reg REGNO can be used in compressed instructions. */
1994 static bool
1995 riscv_compressed_reg_p (int regno)
1997 /* x8-x15/f8-f15 are compressible registers. */
1998 return ((TARGET_RVC || TARGET_ZCA)
1999 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
2000 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)));
2003 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */
2005 static bool
2006 riscv_compressed_lw_offset_p (rtx x)
2008 return (CONST_INT_P (x)
2009 && (INTVAL (x) & 3) == 0
2010 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET));
2013 /* Return true if load/store from/to address x can be compressed. */
2015 static bool
2016 riscv_compressed_lw_address_p (rtx x)
2018 struct riscv_address_info addr;
2019 bool result = riscv_classify_address (&addr, x, GET_MODE (x),
2020 reload_completed);
2022 /* Return false if address is not compressed_reg + small_offset. */
2023 if (!result
2024 || addr.type != ADDRESS_REG
2025 /* Before reload, assume all registers are OK. */
2026 || (reload_completed
2027 && !riscv_compressed_reg_p (REGNO (addr.reg))
2028 && addr.reg != stack_pointer_rtx)
2029 || !riscv_compressed_lw_offset_p (addr.offset))
2030 return false;
2032 return result;
2035 /* Return the number of instructions needed to load or store a value
2036 of mode MODE at address X. Return 0 if X isn't valid for MODE.
2037 Assume that multiword moves may need to be split into word moves
2038 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
2039 enough. */
2042 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
2044 struct riscv_address_info addr = {};
2045 int n = 1;
2047 if (!riscv_classify_address (&addr, x, mode, false))
2049 /* This could be a pattern from the pic.md file. In which case we want
2050 this address to always have a cost of 3 to make it as expensive as the
2051 most expensive symbol. This prevents constant propagation from
2052 preferring symbols over register plus offset. */
2053 return 3;
2056 /* BLKmode is used for single unaligned loads and stores and should
2057 not count as a multiword mode. */
2058 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p)
2059 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2061 if (addr.type == ADDRESS_LO_SUM)
2062 n += riscv_symbol_insns (addr.symbol_type) - 1;
2064 return n;
2067 /* Return the number of instructions needed to load constant X.
2068 Return 0 if X isn't a valid constant. */
2071 riscv_const_insns (rtx x)
2073 enum riscv_symbol_type symbol_type;
2074 rtx offset;
2076 switch (GET_CODE (x))
2078 case HIGH:
2079 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
2080 || !riscv_split_symbol_type (symbol_type))
2081 return 0;
2083 /* This is simply an LUI. */
2084 return 1;
2086 case CONST_INT:
2088 int cost = riscv_integer_cost (INTVAL (x));
2089 /* Force complicated constants to memory. */
2090 return cost < 4 ? cost : 0;
2093 case CONST_DOUBLE:
2094 /* See if we can use FMV directly. */
2095 if (satisfies_constraint_zfli (x))
2096 return 1;
2098 /* We can use x0 to load floating-point zero. */
2099 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2100 case CONST_VECTOR:
2102 /* TODO: This is not accurate, we will need to
2103 adapt the COST of CONST_VECTOR in the future
2104 for the following cases:
2106 - 1. const duplicate vector with element value
2107 in range of [-16, 15].
2108 - 2. const duplicate vector with element value
2109 out range of [-16, 15].
2110 - 3. const series vector.
2111 ...etc. */
2112 if (riscv_v_ext_mode_p (GET_MODE (x)))
2114 /* const series vector. */
2115 rtx base, step;
2116 if (const_vec_series_p (x, &base, &step))
2118 /* This is not accurate, we will need to adapt the COST
2119 * accurately according to BASE && STEP. */
2120 return 1;
2123 rtx elt;
2124 if (const_vec_duplicate_p (x, &elt))
2126 /* We don't allow CONST_VECTOR for DI vector on RV32
2127 system since the ELT constant value can not held
2128 within a single register to disable reload a DI
2129 register vec_duplicate into vmv.v.x. */
2130 scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
2131 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
2132 && !immediate_operand (elt, Pmode))
2133 return 0;
2134 /* Constants from -16 to 15 can be loaded with vmv.v.i.
2135 The Wc0, Wc1 constraints are already covered by the
2136 vi constraint so we do not need to check them here
2137 separately. */
2138 if (satisfies_constraint_vi (x))
2139 return 1;
2141 /* Any int/FP constants can always be broadcast from a
2142 scalar register. Loading of a floating-point
2143 constant incurs a literal-pool access. Allow this in
2144 order to increase vectorization possibilities. */
2145 int n = riscv_const_insns (elt);
2146 if (CONST_DOUBLE_P (elt))
2147 return 1 + 4; /* vfmv.v.f + memory access. */
2148 else
2150 /* We need as many insns as it takes to load the constant
2151 into a GPR and one vmv.v.x. */
2152 if (n != 0)
2153 return 1 + n;
2154 else
2155 return 1 + 4; /*vmv.v.x + memory access. */
2160 /* TODO: We may support more const vector in the future. */
2161 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2164 case CONST:
2165 /* See if we can refer to X directly. */
2166 if (riscv_symbolic_constant_p (x, &symbol_type))
2167 return riscv_symbol_insns (symbol_type);
2169 /* Otherwise try splitting the constant into a base and offset. */
2170 split_const (x, &x, &offset);
2171 if (offset != 0)
2173 int n = riscv_const_insns (x);
2174 if (n != 0)
2175 return n + riscv_integer_cost (INTVAL (offset));
2177 return 0;
2179 case SYMBOL_REF:
2180 case LABEL_REF:
2181 return riscv_symbol_insns (riscv_classify_symbol (x));
2183 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB
2184 instruction and several scalar shift or mult instructions,
2185 it is so far unknown. We set it to 4 temporarily. */
2186 case CONST_POLY_INT:
2187 return 4;
2189 default:
2190 return 0;
2194 /* X is a doubleword constant that can be handled by splitting it into
2195 two words and loading each word separately. Return the number of
2196 instructions required to do this. */
2199 riscv_split_const_insns (rtx x)
2201 unsigned int low, high;
2203 low = riscv_const_insns (riscv_subword (x, false));
2204 high = riscv_const_insns (riscv_subword (x, true));
2205 gcc_assert (low > 0 && high > 0);
2206 return low + high;
2209 /* Return the number of instructions needed to implement INSN,
2210 given that it loads from or stores to MEM. */
2213 riscv_load_store_insns (rtx mem, rtx_insn *insn)
2215 machine_mode mode;
2216 bool might_split_p;
2217 rtx set;
2219 gcc_assert (MEM_P (mem));
2220 mode = GET_MODE (mem);
2222 /* Try to prove that INSN does not need to be split. */
2223 might_split_p = true;
2224 if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
2225 might_split_p = false;
2226 else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
2228 set = single_set (insn);
2229 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
2230 might_split_p = false;
2233 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
2236 /* Emit a move from SRC to DEST. Assume that the move expanders can
2237 handle all moves if !can_create_pseudo_p (). The distinction is
2238 important because, unlike emit_move_insn, the move expanders know
2239 how to force Pmode objects into the constant pool even when the
2240 constant pool address is not itself legitimate. */
2243 riscv_emit_move (rtx dest, rtx src)
2245 return (can_create_pseudo_p ()
2246 ? emit_move_insn (dest, src)
2247 : emit_move_insn_1 (dest, src));
2250 /* Emit an instruction of the form (set TARGET SRC). */
2252 static rtx
2253 riscv_emit_set (rtx target, rtx src)
2255 emit_insn (gen_rtx_SET (target, src));
2256 return target;
2259 /* Emit an instruction of the form (set DEST (CODE X)). */
2262 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x)
2264 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x));
2267 /* Emit an instruction of the form (set DEST (CODE X Y)). */
2270 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
2272 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
2275 /* Compute (CODE X Y) and store the result in a new register
2276 of mode MODE. Return that new register. */
2278 static rtx
2279 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y)
2281 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
2284 static rtx
2285 riscv_swap_instruction (rtx inst)
2287 gcc_assert (GET_MODE (inst) == SImode);
2288 if (BYTES_BIG_ENDIAN)
2289 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
2290 return inst;
2293 /* Copy VALUE to a register and return that register. If new pseudos
2294 are allowed, copy it into a new register, otherwise use DEST. */
2296 static rtx
2297 riscv_force_temporary (rtx dest, rtx value)
2299 if (can_create_pseudo_p ())
2300 return force_reg (Pmode, value);
2301 else
2303 riscv_emit_move (dest, value);
2304 return dest;
2308 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
2309 then add CONST_INT OFFSET to the result. */
2311 static rtx
2312 riscv_unspec_address_offset (rtx base, rtx offset,
2313 enum riscv_symbol_type symbol_type)
2315 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
2316 UNSPEC_ADDRESS_FIRST + symbol_type);
2317 if (offset != const0_rtx)
2318 base = gen_rtx_PLUS (Pmode, base, offset);
2319 return gen_rtx_CONST (Pmode, base);
2322 /* Return an UNSPEC address with underlying address ADDRESS and symbol
2323 type SYMBOL_TYPE. */
2326 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
2328 rtx base, offset;
2330 split_const (address, &base, &offset);
2331 return riscv_unspec_address_offset (base, offset, symbol_type);
2334 /* If OP is an UNSPEC address, return the address to which it refers,
2335 otherwise return OP itself. */
2337 static rtx
2338 riscv_strip_unspec_address (rtx op)
2340 rtx base, offset;
2342 split_const (op, &base, &offset);
2343 if (UNSPEC_ADDRESS_P (base))
2344 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2345 return op;
2348 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
2349 high part to BASE and return the result. Just return BASE otherwise.
2350 TEMP is as for riscv_force_temporary.
2352 The returned expression can be used as the first operand to a LO_SUM. */
2354 static rtx
2355 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
2357 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
2358 return riscv_force_temporary (temp, addr);
2361 /* Load an entry from the GOT for a TLS GD access. */
2363 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
2365 if (Pmode == DImode)
2366 return gen_got_load_tls_gddi (dest, sym);
2367 else
2368 return gen_got_load_tls_gdsi (dest, sym);
2371 /* Load an entry from the GOT for a TLS IE access. */
2373 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
2375 if (Pmode == DImode)
2376 return gen_got_load_tls_iedi (dest, sym);
2377 else
2378 return gen_got_load_tls_iesi (dest, sym);
2381 /* Add in the thread pointer for a TLS LE access. */
2383 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
2385 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2386 if (Pmode == DImode)
2387 return gen_tls_add_tp_ledi (dest, base, tp, sym);
2388 else
2389 return gen_tls_add_tp_lesi (dest, base, tp, sym);
2392 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
2393 it appears in a MEM of that mode. Return true if ADDR is a legitimate
2394 constant in that context and can be split into high and low parts.
2395 If so, and if LOW_OUT is nonnull, emit the high part and store the
2396 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
2398 TEMP is as for riscv_force_temporary and is used to load the high
2399 part into a register.
2401 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
2402 a legitimize SET_SRC for an .md pattern, otherwise the low part
2403 is guaranteed to be a legitimate address for mode MODE. */
2405 bool
2406 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
2408 enum riscv_symbol_type symbol_type;
2410 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
2411 || !riscv_symbolic_constant_p (addr, &symbol_type)
2412 || riscv_symbol_insns (symbol_type) == 0
2413 || !riscv_split_symbol_type (symbol_type))
2414 return false;
2416 if (low_out)
2417 switch (symbol_type)
2419 case SYMBOL_FORCE_TO_MEM:
2420 return false;
2422 case SYMBOL_ABSOLUTE:
2424 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
2425 high = riscv_force_temporary (temp, high);
2426 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
2428 break;
2430 case SYMBOL_PCREL:
2432 static unsigned seqno;
2433 char buf[32];
2434 rtx label;
2436 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
2437 gcc_assert ((size_t) bytes < sizeof (buf));
2439 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
2440 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
2441 /* ??? Ugly hack to make weak symbols work. May need to change the
2442 RTL for the auipc and/or low patterns to get a better fix for
2443 this. */
2444 if (! nonzero_address_p (addr))
2445 SYMBOL_REF_WEAK (label) = 1;
2447 if (temp == NULL)
2448 temp = gen_reg_rtx (Pmode);
2450 if (Pmode == DImode)
2451 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
2452 else
2453 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
2455 *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
2457 seqno++;
2459 break;
2461 default:
2462 gcc_unreachable ();
2465 return true;
2468 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2469 riscv_force_temporary; it is only needed when OFFSET is not a
2470 SMALL_OPERAND. */
2472 static rtx
2473 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2475 if (!SMALL_OPERAND (offset))
2477 rtx high;
2479 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2480 The addition inside the macro CONST_HIGH_PART may cause an
2481 overflow, so we need to force a sign-extension check. */
2482 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2483 offset = CONST_LOW_PART (offset);
2484 high = riscv_force_temporary (temp, high);
2485 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2487 return plus_constant (Pmode, reg, offset);
2490 /* The __tls_get_attr symbol. */
2491 static GTY(()) rtx riscv_tls_symbol;
2493 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2494 the TLS symbol we are referencing and TYPE is the symbol type to use
2495 (either global dynamic or local dynamic). RESULT is an RTX for the
2496 return value location. */
2498 static rtx_insn *
2499 riscv_call_tls_get_addr (rtx sym, rtx result)
2501 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
2502 rtx_insn *insn;
2504 if (!riscv_tls_symbol)
2505 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
2506 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
2508 start_sequence ();
2510 emit_insn (riscv_got_load_tls_gd (a0, sym));
2511 insn = emit_call_insn (gen_call_value (result, func, const0_rtx,
2512 gen_int_mode (RISCV_CC_BASE, SImode)));
2513 RTL_CONST_CALL_P (insn) = 1;
2514 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2515 insn = get_insns ();
2517 end_sequence ();
2519 return insn;
2522 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2523 its address. The return value will be both a valid address and a valid
2524 SET_SRC (either a REG or a LO_SUM). */
2526 static rtx
2527 riscv_legitimize_tls_address (rtx loc)
2529 rtx dest, tp, tmp, a0;
2530 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2532 #if 0
2533 /* TLS copy relocs are now deprecated and should not be used. */
2534 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */
2535 if (!flag_pic)
2536 model = TLS_MODEL_LOCAL_EXEC;
2537 #endif
2539 switch (model)
2541 case TLS_MODEL_LOCAL_DYNAMIC:
2542 /* Rely on section anchors for the optimization that LDM TLS
2543 provides. The anchor's address is loaded with GD TLS. */
2544 case TLS_MODEL_GLOBAL_DYNAMIC:
2545 if (TARGET_TLSDESC)
2547 static unsigned seqno;
2548 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2549 a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
2550 dest = gen_reg_rtx (Pmode);
2552 emit_insn (gen_tlsdesc (Pmode, loc, GEN_INT (seqno)));
2553 emit_insn (gen_add3_insn (dest, a0, tp));
2554 seqno++;
2556 else
2558 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2559 dest = gen_reg_rtx (Pmode);
2560 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp,
2561 loc);
2563 break;
2565 case TLS_MODEL_INITIAL_EXEC:
2566 /* la.tls.ie; tp-relative add */
2567 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2568 tmp = gen_reg_rtx (Pmode);
2569 emit_insn (riscv_got_load_tls_ie (tmp, loc));
2570 dest = gen_reg_rtx (Pmode);
2571 emit_insn (gen_add3_insn (dest, tmp, tp));
2572 break;
2574 case TLS_MODEL_LOCAL_EXEC:
2575 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
2576 dest = gen_reg_rtx (Pmode);
2577 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
2578 dest = gen_rtx_LO_SUM (Pmode, dest,
2579 riscv_unspec_address (loc, SYMBOL_TLS_LE));
2580 break;
2582 default:
2583 gcc_unreachable ();
2585 return dest;
2588 /* If X is not a valid address for mode MODE, force it into a register. */
2590 static rtx
2591 riscv_force_address (rtx x, machine_mode mode)
2593 if (!riscv_legitimate_address_p (mode, x, false))
2595 if (can_create_pseudo_p ())
2596 return force_reg (Pmode, x);
2597 else
2599 /* It's only safe for the thunk function.
2600 Use ra as the temp register. */
2601 gcc_assert (riscv_in_thunk_func);
2602 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode);
2603 riscv_emit_move (reg, x);
2604 return reg;
2608 return x;
2611 /* Modify base + offset so that offset fits within a compressed load/store insn
2612 and the excess is added to base. */
2614 static rtx
2615 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset)
2617 rtx addr, high;
2618 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess
2619 into HIGH. */
2620 high = GEN_INT (offset & ~CSW_MAX_OFFSET);
2621 offset &= CSW_MAX_OFFSET;
2622 if (!SMALL_OPERAND (INTVAL (high)))
2623 high = force_reg (Pmode, high);
2624 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base));
2625 addr = plus_constant (Pmode, base, offset);
2626 return addr;
2629 /* Helper for riscv_legitimize_address. Given X, return true if it
2630 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
2632 This respectively represent canonical shift-add rtxs or scaled
2633 memory addresses. */
2634 static bool
2635 mem_shadd_or_shadd_rtx_p (rtx x)
2637 return ((GET_CODE (x) == ASHIFT
2638 || GET_CODE (x) == MULT)
2639 && register_operand (XEXP (x, 0), GET_MODE (x))
2640 && CONST_INT_P (XEXP (x, 1))
2641 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
2642 || (GET_CODE (x) == MULT
2643 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
2646 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
2647 be legitimized in a way that the generic machinery might not expect,
2648 return a new address, otherwise return NULL. MODE is the mode of
2649 the memory being accessed. */
2651 static rtx
2652 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2653 machine_mode mode)
2655 rtx addr;
2657 if (riscv_tls_symbol_p (x))
2658 return riscv_legitimize_tls_address (x);
2660 /* See if the address can split into a high part and a LO_SUM. */
2661 if (riscv_split_symbol (NULL, x, mode, &addr))
2662 return riscv_force_address (addr, mode);
2664 /* Handle BASE + OFFSET. */
2665 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
2666 && INTVAL (XEXP (x, 1)) != 0)
2668 rtx base = XEXP (x, 0);
2669 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
2671 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
2672 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
2673 && SMALL_OPERAND (offset))
2675 rtx index = XEXP (base, 0);
2676 rtx fp = XEXP (base, 1);
2677 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
2680 /* If we were given a MULT, we must fix the constant
2681 as we're going to create the ASHIFT form. */
2682 int shift_val = INTVAL (XEXP (index, 1));
2683 if (GET_CODE (index) == MULT)
2684 shift_val = exact_log2 (shift_val);
2686 rtx reg1 = gen_reg_rtx (Pmode);
2687 rtx reg2 = gen_reg_rtx (Pmode);
2688 rtx reg3 = gen_reg_rtx (Pmode);
2689 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
2690 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val));
2691 riscv_emit_binary (PLUS, reg3, reg2, reg1);
2693 return reg3;
2697 if (!riscv_valid_base_register_p (base, mode, false))
2698 base = copy_to_mode_reg (Pmode, base);
2699 if (optimize_function_for_size_p (cfun)
2700 && (strcmp (current_pass->name, "shorten_memrefs") == 0)
2701 && mode == SImode)
2702 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow
2703 possible compressed load/store. */
2704 addr = riscv_shorten_lw_offset (base, offset);
2705 else
2706 addr = riscv_add_offset (NULL, base, offset);
2707 return riscv_force_address (addr, mode);
2710 return x;
2713 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE
2714 is the original src mode before promotion. */
2716 void
2717 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
2718 machine_mode orig_mode)
2720 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
2721 machine_mode mode;
2722 int i, num_ops;
2723 rtx x = NULL_RTX;
2725 mode = GET_MODE (dest);
2726 /* We use the original mode for the riscv_build_integer call, because HImode
2727 values are given special treatment. */
2728 num_ops = riscv_build_integer (codes, value, orig_mode);
2730 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
2731 && num_ops >= riscv_split_integer_cost (value))
2732 x = riscv_split_integer (value, mode);
2733 else
2735 rtx old_value = NULL_RTX;
2736 for (i = 0; i < num_ops; i++)
2738 if (i != 0 && !can_create_pseudo_p ())
2739 x = riscv_emit_set (temp, x);
2740 else if (i != 0)
2741 x = force_reg (mode, x);
2742 codes[i].value = trunc_int_for_mode (codes[i].value, mode);
2743 if (codes[i].code == UNKNOWN)
2745 /* UNKNOWN means load the constant value into X. */
2746 x = GEN_INT (codes[i].value);
2748 else if (codes[i].use_uw)
2750 /* If the sequence requires using a "uw" form of an insn, we're
2751 going to have to construct the RTL ourselves and put it in
2752 a register to avoid force_reg/force_operand from mucking
2753 things up. */
2754 gcc_assert (TARGET_64BIT || TARGET_ZBA);
2755 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
2757 /* Create the proper mask for the slli.uw instruction. */
2758 unsigned HOST_WIDE_INT value = 0xffffffff;
2759 value <<= codes[i].value;
2761 /* Right now the only "uw" form we use is slli, we may add more
2762 in the future. */
2763 x = gen_rtx_fmt_ee (codes[i].code, mode,
2764 x, GEN_INT (codes[i].value));
2765 x = gen_rtx_fmt_ee (AND, mode, x, GEN_INT (value));
2766 x = riscv_emit_set (t, x);
2768 else if (codes[i].code == FMA)
2770 HOST_WIDE_INT value = exact_log2 (codes[i].value - 1);
2771 rtx ashift = gen_rtx_fmt_ee (ASHIFT, mode, x, GEN_INT (value));
2772 x = gen_rtx_fmt_ee (PLUS, mode, ashift, x);
2773 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
2774 x = riscv_emit_set (t, x);
2776 else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
2778 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
2779 rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
2780 gcc_assert (t2);
2781 t2 = gen_lowpart (SImode, t2);
2782 emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
2783 x = t;
2785 else
2786 x = gen_rtx_fmt_ee (codes[i].code, mode,
2787 x, GEN_INT (codes[i].value));
2789 /* If this entry in the code table indicates we should save away
2790 the temporary holding the current value of X, then do so. */
2791 if (codes[i].save_temporary)
2793 gcc_assert (old_value == NULL_RTX);
2794 x = force_reg (mode, x);
2795 old_value = x;
2800 riscv_emit_set (dest, x);
2803 /* Subroutine of riscv_legitimize_move. Move constant SRC into register
2804 DEST given that SRC satisfies immediate_operand but doesn't satisfy
2805 move_operand. */
2807 static void
2808 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
2810 rtx base, offset;
2812 /* Split moves of big integers into smaller pieces. */
2813 if (splittable_const_int_operand (src, mode))
2815 riscv_move_integer (dest, dest, INTVAL (src), mode);
2816 return;
2819 if (satisfies_constraint_zfli (src))
2821 riscv_emit_set (dest, src);
2822 return;
2825 /* Split moves of symbolic constants into high/low pairs. */
2826 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
2828 riscv_emit_set (dest, src);
2829 return;
2832 /* Generate the appropriate access sequences for TLS symbols. */
2833 if (riscv_tls_symbol_p (src))
2835 riscv_emit_move (dest, riscv_legitimize_tls_address (src));
2836 return;
2839 /* If we have (const (plus symbol offset)), and that expression cannot
2840 be forced into memory, load the symbol first and add in the offset. Also
2841 prefer to do this even if the constant _can_ be forced into memory, as it
2842 usually produces better code. */
2843 split_const (src, &base, &offset);
2844 if (offset != const0_rtx
2845 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
2847 base = riscv_force_temporary (dest, base);
2848 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
2849 return;
2852 /* Handle below format.
2853 (const:DI
2854 (plus:DI
2855 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0
2856 (const_poly_int:DI [16, 16]) // <- op_1
2859 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS
2860 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1)))
2862 rtx dest_tmp = gen_reg_rtx (mode);
2863 rtx tmp = gen_reg_rtx (mode);
2865 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0));
2866 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1));
2868 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp)));
2869 return;
2872 src = force_const_mem (mode, src);
2874 /* When using explicit relocs, constant pool references are sometimes
2875 not legitimate addresses. */
2876 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
2877 riscv_emit_move (dest, src);
2880 /* Report when we try to do something that requires vector when vector is
2881 disabled. This is an error of last resort and isn't very high-quality. It
2882 usually involves attempts to measure the vector length in some way. */
2884 static void
2885 riscv_report_v_required (void)
2887 static bool reported_p = false;
2889 /* Avoid reporting a slew of messages for a single oversight. */
2890 if (reported_p)
2891 return;
2893 error ("this operation requires the RVV ISA extension");
2894 inform (input_location, "you can enable RVV using the command-line"
2895 " option %<-march%>, or by using the %<target%>"
2896 " attribute or pragma");
2897 reported_p = true;
2900 /* Helper function to operation for rtx_code CODE. */
2901 static void
2902 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1,
2903 rtx op2)
2905 if (can_create_pseudo_p ())
2907 rtx result;
2908 if (GET_RTX_CLASS (code) == RTX_UNARY)
2909 result = expand_simple_unop (mode, code, op1, NULL_RTX, false);
2910 else
2911 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false,
2912 OPTAB_DIRECT);
2913 riscv_emit_move (op0, result);
2915 else
2917 rtx pat;
2918 /* The following implementation is for prologue and epilogue.
2919 Because prologue and epilogue can not use pseudo register.
2920 We can't using expand_simple_binop or expand_simple_unop. */
2921 if (GET_RTX_CLASS (code) == RTX_UNARY)
2922 pat = gen_rtx_fmt_e (code, mode, op1);
2923 else
2924 pat = gen_rtx_fmt_ee (code, mode, op1, op2);
2925 emit_insn (gen_rtx_SET (op0, pat));
2929 /* Expand mult operation with constant integer, multiplicand also used as a
2930 * temporary register. */
2932 static void
2933 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand,
2934 HOST_WIDE_INT multiplier)
2936 if (multiplier == 0)
2938 riscv_emit_move (dest, GEN_INT (0));
2939 return;
2942 bool neg_p = multiplier < 0;
2943 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier);
2945 if (multiplier_abs == 1)
2947 if (neg_p)
2948 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX);
2949 else
2950 riscv_emit_move (dest, multiplicand);
2952 else
2954 if (pow2p_hwi (multiplier_abs))
2957 multiplicand = [BYTES_PER_RISCV_VECTOR].
2958 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8].
2959 Sequence:
2960 csrr a5, vlenb
2961 slli a5, a5, 3
2962 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8].
2963 Sequence:
2964 csrr a5, vlenb
2965 slli a5, a5, 3
2966 neg a5, a5
2968 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2969 gen_int_mode (exact_log2 (multiplier_abs), QImode));
2970 if (neg_p)
2971 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2973 else if (pow2p_hwi (multiplier_abs + 1))
2976 multiplicand = [BYTES_PER_RISCV_VECTOR].
2977 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7].
2978 Sequence:
2979 csrr a5, vlenb
2980 slli a4, a5, 3
2981 sub a5, a4, a5
2982 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7].
2983 Sequence:
2984 csrr a5, vlenb
2985 slli a4, a5, 3
2986 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4
2988 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2989 gen_int_mode (exact_log2 (multiplier_abs + 1),
2990 QImode));
2991 if (neg_p)
2992 riscv_expand_op (MINUS, mode, dest, multiplicand, dest);
2993 else
2994 riscv_expand_op (MINUS, mode, dest, dest, multiplicand);
2996 else if (pow2p_hwi (multiplier - 1))
2999 multiplicand = [BYTES_PER_RISCV_VECTOR].
3000 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9].
3001 Sequence:
3002 csrr a5, vlenb
3003 slli a4, a5, 3
3004 add a5, a4, a5
3005 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9].
3006 Sequence:
3007 csrr a5, vlenb
3008 slli a4, a5, 3
3009 add a5, a4, a5
3010 neg a5, a5
3012 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
3013 gen_int_mode (exact_log2 (multiplier_abs - 1),
3014 QImode));
3015 riscv_expand_op (PLUS, mode, dest, dest, multiplicand);
3016 if (neg_p)
3017 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
3019 else
3021 /* We use multiplication for remaining cases. */
3022 gcc_assert (
3023 TARGET_MUL
3024 && "M-extension must be enabled to calculate the poly_int "
3025 "size/offset.");
3026 riscv_emit_move (dest, gen_int_mode (multiplier, mode));
3027 riscv_expand_op (MULT, mode, dest, dest, multiplicand);
3032 /* Analyze src and emit const_poly_int mov sequence. */
3034 void
3035 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
3037 poly_int64 value = rtx_to_poly_int64 (src);
3038 /* It use HOST_WIDE_INT instead of int since 32bit type is not enough
3039 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */
3040 HOST_WIDE_INT offset = value.coeffs[0];
3041 HOST_WIDE_INT factor = value.coeffs[1];
3042 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
3043 int div_factor = 0;
3044 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions.
3045 For any (const_poly_int:MODE [m, n]), the calculation formula is as
3046 follows.
3047 constant = m - n.
3048 When minimum VLEN = 32, poly of VLENB = (4, 4).
3049 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1).
3050 When minimum VLEN > 32, poly of VLENB = (8, 8).
3051 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1).
3052 magn = (n, n) / base.
3053 (m, n) = base * magn + constant.
3054 This calculation doesn't need div operation. */
3056 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
3057 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode));
3058 else
3060 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode));
3061 emit_move_insn (gen_lowpart (Pmode, tmp),
3062 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
3065 if (BYTES_PER_RISCV_VECTOR.is_constant ())
3067 gcc_assert (value.is_constant ());
3068 riscv_emit_move (dest, GEN_INT (value.to_constant ()));
3069 return;
3071 else
3073 int max_power = exact_log2 (MAX_POLY_VARIANT);
3074 for (int i = 0; i <= max_power; i++)
3076 int possible_div_factor = 1 << i;
3077 if (factor % (vlenb / possible_div_factor) == 0)
3079 div_factor = possible_div_factor;
3080 break;
3083 gcc_assert (div_factor != 0);
3086 if (div_factor != 1)
3087 riscv_expand_op (LSHIFTRT, mode, tmp, tmp,
3088 gen_int_mode (exact_log2 (div_factor), QImode));
3090 riscv_expand_mult_with_const_int (mode, dest, tmp,
3091 factor / (vlenb / div_factor));
3092 HOST_WIDE_INT constant = offset - factor;
3094 if (constant == 0)
3095 return;
3096 else if (SMALL_OPERAND (constant))
3097 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
3098 else
3100 /* Handle the constant value is not a 12-bit value. */
3101 rtx high;
3103 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
3104 The addition inside the macro CONST_HIGH_PART may cause an
3105 overflow, so we need to force a sign-extension check. */
3106 high = gen_int_mode (CONST_HIGH_PART (constant), mode);
3107 constant = CONST_LOW_PART (constant);
3108 riscv_emit_move (tmp, high);
3109 riscv_expand_op (PLUS, mode, dest, tmp, dest);
3110 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
3114 /* Adjust scalable frame of vector for prologue && epilogue. */
3116 static void
3117 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
3119 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
3120 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
3121 rtx insn, dwarf, adjust_frame_rtx;
3123 riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
3124 gen_int_mode (offset, Pmode));
3126 if (epilogue)
3127 insn = gen_add3_insn (target, target, adjust_size);
3128 else
3129 insn = gen_sub3_insn (target, target, adjust_size);
3131 insn = emit_insn (insn);
3133 RTX_FRAME_RELATED_P (insn) = 1;
3135 adjust_frame_rtx
3136 = gen_rtx_SET (target,
3137 plus_constant (Pmode, target, epilogue ? offset : -offset));
3139 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
3140 NULL_RTX);
3142 REG_NOTES (insn) = dwarf;
3145 /* Take care below subreg const_poly_int move:
3147 1. (set (subreg:DI (reg:TI 237) 8)
3148 (subreg:DI (const_poly_int:TI [4, 2]) 8))
3150 (set (subreg:DI (reg:TI 237) 8)
3151 (const_int 0)) */
3153 static bool
3154 riscv_legitimize_subreg_const_poly_move (machine_mode mode, rtx dest, rtx src)
3156 gcc_assert (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src)));
3157 gcc_assert (SUBREG_BYTE (src).is_constant ());
3159 int byte_offset = SUBREG_BYTE (src).to_constant ();
3160 rtx const_poly = SUBREG_REG (src);
3161 machine_mode subreg_mode = GET_MODE (const_poly);
3163 if (subreg_mode != TImode) /* Only TImode is needed for now. */
3164 return false;
3166 if (byte_offset == 8)
3168 /* The const_poly_int cannot exceed int64, just set zero here. */
3169 emit_move_insn (dest, CONST0_RTX (mode));
3170 return true;
3173 /* The below transform will be covered in somewhere else.
3174 Thus, ignore this here.
3175 (set (subreg:DI (reg:TI 237) 0)
3176 (subreg:DI (const_poly_int:TI [4, 2]) 0))
3178 (set (subreg:DI (reg:TI 237) 0)
3179 (const_poly_int:DI [4, 2])) */
3181 return false;
3184 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
3185 sequence that is valid. */
3187 bool
3188 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
3190 if (CONST_POLY_INT_P (src))
3193 Handle:
3194 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156)
3195 (const_int 96 [0x60])) [0 S1 A8])
3196 (const_poly_int:QI [8, 8]))
3197 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil))
3199 if (MEM_P (dest))
3201 emit_move_insn (dest, force_reg (mode, src));
3202 return true;
3204 poly_int64 value = rtx_to_poly_int64 (src);
3205 if (!value.is_constant () && !TARGET_VECTOR)
3207 riscv_report_v_required ();
3208 return false;
3211 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode)
3212 return false;
3214 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode))
3216 /* In RV32 system, handle (const_poly_int:QI [m, n])
3217 (const_poly_int:HI [m, n]).
3218 In RV64 system, handle (const_poly_int:QI [m, n])
3219 (const_poly_int:HI [m, n])
3220 (const_poly_int:SI [m, n]). */
3221 rtx tmp = gen_reg_rtx (Pmode);
3222 riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
3223 src);
3225 else
3227 /* In RV32 system, handle (const_poly_int:SI [m, n])
3228 (const_poly_int:DI [m, n]).
3229 In RV64 system, handle (const_poly_int:DI [m, n]).
3230 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode,
3231 the offset should not exceed 4GiB in general. */
3232 rtx tmp = gen_reg_rtx (mode);
3233 riscv_legitimize_poly_move (mode, dest, tmp, src);
3235 return true;
3238 if (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src))
3239 && riscv_legitimize_subreg_const_poly_move (mode, dest, src))
3240 return true;
3242 /* Expand
3243 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
3244 Expand this data movement instead of simply forbid it since
3245 we can improve the code generation for this following scenario
3246 by RVV auto-vectorization:
3247 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
3248 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
3249 Since RVV mode and scalar mode are in different REG_CLASS,
3250 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
3251 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
3253 machine_mode vmode = GET_MODE (SUBREG_REG (src));
3254 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
3255 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant ();
3256 /* We should be able to handle both partial and paradoxical subreg. */
3257 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1;
3258 scalar_mode smode = as_a<scalar_mode> (mode);
3259 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size;
3260 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8)
3261 && !TARGET_VECTOR_ELEN_64 ? 2 : 1;
3262 bool need_int_reg_p = false;
3264 if (num == 2)
3266 /* If we want to extract 64bit value but ELEN < 64,
3267 we use RVV vector mode with EEW = 32 to extract
3268 the highpart and lowpart. */
3269 need_int_reg_p = smode == DFmode;
3270 smode = SImode;
3271 nunits = nunits * 2;
3274 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
3276 rtx v = gen_lowpart (vmode, SUBREG_REG (src));
3277 rtx int_reg = dest;
3279 if (need_int_reg_p)
3281 int_reg = gen_reg_rtx (DImode);
3282 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest));
3285 for (unsigned int i = 0; i < num; i++)
3287 rtx result;
3288 if (num == 1)
3289 result = int_reg;
3290 else if (i == 0)
3291 result = gen_lowpart (smode, int_reg);
3292 else
3293 result = gen_reg_rtx (smode);
3295 riscv_vector::emit_vec_extract (result, v,
3296 gen_int_mode (index + i, Pmode));
3298 if (i == 1)
3300 if (UNITS_PER_WORD < mode_size)
3301 /* If Pmode = SImode and mode = DImode, we just need to
3302 extract element of index = 1 from the vector and move it
3303 into the highpart of the DEST since DEST consists of 2
3304 scalar registers. */
3305 emit_move_insn (gen_highpart (smode, int_reg), result);
3306 else
3308 rtx tmp = expand_binop (Pmode, ashl_optab,
3309 gen_lowpart (Pmode, result),
3310 gen_int_mode (32, Pmode),
3311 NULL_RTX, 0, OPTAB_DIRECT);
3312 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg,
3313 NULL_RTX, 0, OPTAB_DIRECT);
3314 emit_move_insn (int_reg, tmp2);
3319 if (need_int_reg_p)
3320 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
3321 else
3322 emit_move_insn (dest, int_reg);
3324 else
3325 gcc_unreachable ();
3327 return true;
3329 /* Expand
3330 (set (reg:QI target) (mem:QI (address)))
3332 (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
3333 (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
3334 with auto-sign/zero extend. */
3335 if (GET_MODE_CLASS (mode) == MODE_INT
3336 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
3337 && can_create_pseudo_p ()
3338 && MEM_P (src))
3340 rtx temp_reg;
3341 int zero_extend_p;
3343 temp_reg = gen_reg_rtx (word_mode);
3344 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
3345 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
3346 zero_extend_p));
3347 riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
3348 return true;
3351 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
3353 rtx reg;
3355 if (GET_CODE (src) == CONST_INT)
3357 /* Apply the equivalent of PROMOTE_MODE here for constants to
3358 improve cse. */
3359 machine_mode promoted_mode = mode;
3360 if (GET_MODE_CLASS (mode) == MODE_INT
3361 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
3362 promoted_mode = word_mode;
3364 if (splittable_const_int_operand (src, mode))
3366 reg = gen_reg_rtx (promoted_mode);
3367 riscv_move_integer (reg, reg, INTVAL (src), mode);
3369 else
3370 reg = force_reg (promoted_mode, src);
3372 if (promoted_mode != mode)
3373 reg = gen_lowpart (mode, reg);
3375 else
3376 reg = force_reg (mode, src);
3377 riscv_emit_move (dest, reg);
3378 return true;
3381 /* In order to fit NaN boxing, expand
3382 (set FP_REG (reg:HF/BF src))
3384 (set (reg:SI/DI mask) (const_int -65536)
3385 (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF/BF src) 0)))
3386 (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
3387 (set (reg:HF/BF dest) (unspec:HF/BF[ (reg:SI/DI temp) ] UNSPEC_FMV_FP16_X))
3390 if (TARGET_HARD_FLOAT
3391 && !TARGET_ZFHMIN
3392 && (mode == HFmode || mode == BFmode)
3393 && REG_P (dest) && FP_REG_P (REGNO (dest))
3394 && REG_P (src) && !FP_REG_P (REGNO (src))
3395 && can_create_pseudo_p ())
3397 rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
3398 rtx temp = gen_reg_rtx (word_mode);
3399 emit_insn (gen_extend_insn (temp,
3400 simplify_gen_subreg (HImode, src, mode, 0),
3401 word_mode, HImode, 1));
3402 if (word_mode == SImode)
3403 emit_insn (gen_iorsi3 (temp, mask, temp));
3404 else
3405 emit_insn (gen_iordi3 (temp, mask, temp));
3407 riscv_emit_move (dest, gen_rtx_UNSPEC (mode, gen_rtvec (1, temp),
3408 UNSPEC_FMV_FP16_X));
3410 return true;
3413 /* We need to deal with constants that would be legitimate
3414 immediate_operands but aren't legitimate move_operands. */
3415 if (CONSTANT_P (src) && !move_operand (src, mode))
3417 riscv_legitimize_const_move (mode, dest, src);
3418 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
3419 return true;
3422 /* RISC-V GCC may generate non-legitimate address due to we provide some
3423 pattern for optimize access PIC local symbol and it's make GCC generate
3424 unrecognizable instruction during optimizing. */
3426 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
3427 reload_completed))
3429 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
3432 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
3433 reload_completed))
3435 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
3438 return false;
3441 /* Return true if there is an instruction that implements CODE and accepts
3442 X as an immediate operand. */
3444 static int
3445 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
3447 switch (code)
3449 case ASHIFT:
3450 case ASHIFTRT:
3451 case LSHIFTRT:
3452 /* All shift counts are truncated to a valid constant. */
3453 return true;
3455 case AND:
3456 case IOR:
3457 case XOR:
3458 case PLUS:
3459 case LT:
3460 case LTU:
3461 /* These instructions take 12-bit signed immediates. */
3462 return SMALL_OPERAND (x);
3464 case LE:
3465 /* We add 1 to the immediate and use SLT. */
3466 return SMALL_OPERAND (x + 1);
3468 case LEU:
3469 /* Likewise SLTU, but reject the always-true case. */
3470 return SMALL_OPERAND (x + 1) && x + 1 != 0;
3472 case GE:
3473 case GEU:
3474 /* We can emulate an immediate of 1 by using GT/GTU against x0. */
3475 return x == 1;
3477 default:
3478 /* By default assume that x0 can be used for 0. */
3479 return x == 0;
3483 /* Return the cost of binary operation X, given that the instruction
3484 sequence for a word-sized or smaller operation takes SINGLE_INSNS
3485 instructions and that the sequence of a double-word operation takes
3486 DOUBLE_INSNS instructions. */
3488 static int
3489 riscv_binary_cost (rtx x, int single_insns, int double_insns)
3491 if (!riscv_v_ext_mode_p (GET_MODE (x))
3492 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
3493 return COSTS_N_INSNS (double_insns);
3494 return COSTS_N_INSNS (single_insns);
3497 /* Return the cost of sign- or zero-extending OP. */
3499 static int
3500 riscv_extend_cost (rtx op, bool unsigned_p)
3502 if (MEM_P (op))
3503 return 0;
3505 if (unsigned_p && GET_MODE (op) == QImode)
3506 /* We can use ANDI. */
3507 return COSTS_N_INSNS (1);
3509 /* ZBA provide zext.w. */
3510 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode)
3511 return COSTS_N_INSNS (1);
3513 /* ZBB provide zext.h, sext.b and sext.h. */
3514 if (TARGET_ZBB)
3516 if (!unsigned_p && GET_MODE (op) == QImode)
3517 return COSTS_N_INSNS (1);
3519 if (GET_MODE (op) == HImode)
3520 return COSTS_N_INSNS (1);
3523 if (!unsigned_p && GET_MODE (op) == SImode)
3524 /* We can use SEXT.W. */
3525 return COSTS_N_INSNS (1);
3527 /* We need to use a shift left and a shift right. */
3528 return COSTS_N_INSNS (2);
3531 /* Implement TARGET_RTX_COSTS. */
3533 #define SINGLE_SHIFT_COST 1
3535 static bool
3536 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
3537 int *total, bool speed)
3539 /* TODO: We set RVV instruction cost as 1 by default.
3540 Cost Model need to be well analyzed and supported in the future. */
3541 if (riscv_v_ext_mode_p (mode))
3543 *total = COSTS_N_INSNS (1);
3544 return true;
3547 bool float_mode_p = FLOAT_MODE_P (mode);
3548 int cost;
3550 switch (GET_CODE (x))
3552 case SET:
3553 /* If we are called for an INSN that's a simple set of a register,
3554 then cost based on the SET_SRC alone. */
3555 if (outer_code == INSN
3556 && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x))))
3558 riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
3559 return true;
3562 /* Otherwise return FALSE indicating we should recurse into both the
3563 SET_DEST and SET_SRC combining the cost of both. */
3564 return false;
3566 case CONST_INT:
3567 /* trivial constants checked using OUTER_CODE in case they are
3568 encodable in insn itself w/o need for additional insn(s). */
3569 if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
3571 *total = 0;
3572 return true;
3574 /* Fall through. */
3576 case SYMBOL_REF:
3577 case LABEL_REF:
3578 case CONST_DOUBLE:
3579 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE
3580 rtl object. Weird recheck due to switch-case fall through above. */
3581 if (GET_CODE (x) == CONST_DOUBLE)
3582 gcc_assert (GET_MODE (x) != VOIDmode);
3583 /* Fall through. */
3585 case CONST:
3586 /* Non trivial CONST_INT Fall through: check if need multiple insns. */
3587 if ((cost = riscv_const_insns (x)) > 0)
3589 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
3590 2. For constants loaded more than once, the approach so far has
3591 been to duplicate the operation than to CSE the constant.
3592 3. TODO: make cost more accurate specially if riscv_const_insns
3593 returns > 1. */
3594 if (outer_code == SET || GET_MODE (x) == VOIDmode)
3595 *total = COSTS_N_INSNS (1);
3597 else /* The instruction will be fetched from the constant pool. */
3598 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
3599 return true;
3601 case MEM:
3602 /* If the address is legitimate, return the number of
3603 instructions it needs. */
3604 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
3606 /* When optimizing for size, make uncompressible 32-bit addresses
3607 more expensive so that compressible 32-bit addresses are
3608 preferred. */
3609 if ((TARGET_RVC || TARGET_ZCA)
3610 && !speed && riscv_mshorten_memrefs && mode == SImode
3611 && !riscv_compressed_lw_address_p (XEXP (x, 0)))
3612 cost++;
3614 *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
3615 return true;
3617 /* Otherwise use the default handling. */
3618 return false;
3620 case IF_THEN_ELSE:
3621 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
3622 && reg_or_0_operand (XEXP (x, 1), mode)
3623 && sfb_alu_operand (XEXP (x, 2), mode)
3624 && comparison_operator (XEXP (x, 0), VOIDmode))
3626 /* For predicated conditional-move operations we assume the cost
3627 of a single instruction even though there are actually two. */
3628 *total = COSTS_N_INSNS (1);
3629 return true;
3631 else if (TARGET_ZICOND_LIKE
3632 && outer_code == SET
3633 && ((GET_CODE (XEXP (x, 1)) == REG
3634 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1))))
3635 || (GET_CODE (XEXP (x, 2)) == REG
3636 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2))))
3637 || (GET_CODE (XEXP (x, 1)) == REG
3638 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0)))
3639 || (GET_CODE (XEXP (x, 1)) == REG
3640 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0)))))
3642 *total = COSTS_N_INSNS (1);
3643 return true;
3645 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx)
3647 if (equality_operator (XEXP (x, 0), mode)
3648 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
3650 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1);
3651 return true;
3653 if (ordered_comparison_operator (XEXP (x, 0), mode))
3655 *total = COSTS_N_INSNS (1);
3656 return true;
3659 return false;
3661 case NOT:
3662 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
3663 return false;
3665 case AND:
3666 /* slli.uw pattern for zba. */
3667 if (TARGET_ZBA && TARGET_64BIT && mode == DImode
3668 && GET_CODE (XEXP (x, 0)) == ASHIFT)
3670 rtx and_rhs = XEXP (x, 1);
3671 rtx ashift_lhs = XEXP (XEXP (x, 0), 0);
3672 rtx ashift_rhs = XEXP (XEXP (x, 0), 1);
3673 if (register_operand (ashift_lhs, GET_MODE (ashift_lhs))
3674 && CONST_INT_P (ashift_rhs)
3675 && CONST_INT_P (and_rhs)
3676 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3677 *total = COSTS_N_INSNS (1);
3678 return true;
3680 /* bclri pattern for zbs. */
3681 if (TARGET_ZBS
3682 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode))
3684 *total = COSTS_N_INSNS (1);
3685 return true;
3687 /* bclr pattern for zbs. */
3688 if (TARGET_ZBS
3689 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1)))
3690 && GET_CODE (XEXP (x, 0)) == ROTATE
3691 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0))
3692 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2)
3694 *total = COSTS_N_INSNS (1);
3695 return true;
3698 gcc_fallthrough ();
3699 case IOR:
3700 case XOR:
3701 /* orn, andn and xorn pattern for zbb. */
3702 if (TARGET_ZBB
3703 && GET_CODE (XEXP (x, 0)) == NOT)
3705 *total = riscv_binary_cost (x, 1, 2);
3706 return true;
3709 /* bset[i] and binv[i] pattern for zbs. */
3710 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR)
3711 && TARGET_ZBS
3712 && ((GET_CODE (XEXP (x, 0)) == ASHIFT
3713 && CONST_INT_P (XEXP (XEXP (x, 0), 0)))
3714 || single_bit_mask_operand (XEXP (x, 1), VOIDmode)))
3716 *total = COSTS_N_INSNS (1);
3717 return true;
3720 /* Double-word operations use two single-word operations. */
3721 *total = riscv_binary_cost (x, 1, 2);
3722 return false;
3724 case ZERO_EXTRACT:
3725 /* This is an SImode shift. */
3726 if (outer_code == SET
3727 && CONST_INT_P (XEXP (x, 1))
3728 && CONST_INT_P (XEXP (x, 2))
3729 && (INTVAL (XEXP (x, 2)) > 0)
3730 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32))
3732 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3733 return true;
3735 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */
3736 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET
3737 && GET_CODE (XEXP (x, 1)) == CONST_INT
3738 && INTVAL (XEXP (x, 1)) == 1)
3740 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3741 return true;
3743 gcc_fallthrough ();
3744 case SIGN_EXTRACT:
3745 if (TARGET_XTHEADBB && outer_code == SET
3746 && CONST_INT_P (XEXP (x, 1))
3747 && CONST_INT_P (XEXP (x, 2)))
3749 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3750 return true;
3752 return false;
3754 case ASHIFT:
3755 /* bset pattern for zbs. */
3756 if (TARGET_ZBS
3757 && CONST_INT_P (XEXP (x, 0))
3758 && INTVAL (XEXP (x, 0)) == 1)
3760 *total = COSTS_N_INSNS (1);
3761 return true;
3763 gcc_fallthrough ();
3764 case ASHIFTRT:
3765 case LSHIFTRT:
3766 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST,
3767 CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
3768 return false;
3770 case ABS:
3771 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
3772 return false;
3774 case LO_SUM:
3775 *total = set_src_cost (XEXP (x, 0), mode, speed);
3776 return true;
3778 case LT:
3779 /* This is an SImode shift. */
3780 if (outer_code == SET && GET_MODE (x) == DImode
3781 && GET_MODE (XEXP (x, 0)) == SImode)
3783 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3784 return true;
3786 /* Fall through. */
3787 case LTU:
3788 case LE:
3789 case LEU:
3790 case GT:
3791 case GTU:
3792 case GE:
3793 case GEU:
3794 case EQ:
3795 case NE:
3796 /* Branch comparisons have VOIDmode, so use the first operand's
3797 mode instead. */
3798 mode = GET_MODE (XEXP (x, 0));
3799 if (float_mode_p)
3800 *total = tune_param->fp_add[mode == DFmode];
3801 else
3802 *total = riscv_binary_cost (x, 1, 3);
3803 return false;
3805 case UNORDERED:
3806 case ORDERED:
3807 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
3808 mode = GET_MODE (XEXP (x, 0));
3809 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3810 return false;
3812 case UNEQ:
3813 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
3814 mode = GET_MODE (XEXP (x, 0));
3815 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
3816 return false;
3818 case LTGT:
3819 /* (FLT(A, A) || FGT(B, B)). */
3820 mode = GET_MODE (XEXP (x, 0));
3821 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3822 return false;
3824 case UNGE:
3825 case UNGT:
3826 case UNLE:
3827 case UNLT:
3828 /* FLT or FLE, but guarded by an FFLAGS read and write. */
3829 mode = GET_MODE (XEXP (x, 0));
3830 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
3831 return false;
3833 case MINUS:
3834 if (float_mode_p)
3835 *total = tune_param->fp_add[mode == DFmode];
3836 else
3837 *total = riscv_binary_cost (x, 1, 4);
3838 return false;
3840 case PLUS:
3841 /* add.uw pattern for zba. */
3842 if (TARGET_ZBA
3843 && (TARGET_64BIT && (mode == DImode))
3844 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
3845 && register_operand (XEXP (XEXP (x, 0), 0),
3846 GET_MODE (XEXP (XEXP (x, 0), 0)))
3847 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
3849 *total = COSTS_N_INSNS (1);
3850 return true;
3852 /* shNadd pattern for zba. */
3853 if (TARGET_ZBA
3854 && ((!TARGET_64BIT && (mode == SImode)) ||
3855 (TARGET_64BIT && (mode == DImode)))
3856 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
3857 && register_operand (XEXP (XEXP (x, 0), 0),
3858 GET_MODE (XEXP (XEXP (x, 0), 0)))
3859 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3860 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
3862 *total = COSTS_N_INSNS (1);
3863 return true;
3865 /* Before strength-reduction, the shNadd can be expressed as the addition
3866 of a multiplication with a power-of-two. If this case is not handled,
3867 the strength-reduction in expmed.c will calculate an inflated cost. */
3868 if (TARGET_ZBA
3869 && mode == word_mode
3870 && GET_CODE (XEXP (x, 0)) == MULT
3871 && register_operand (XEXP (XEXP (x, 0), 0),
3872 GET_MODE (XEXP (XEXP (x, 0), 0)))
3873 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3874 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1)))
3875 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
3877 *total = COSTS_N_INSNS (1);
3878 return true;
3880 /* shNadd.uw pattern for zba.
3881 [(set (match_operand:DI 0 "register_operand" "=r")
3882 (plus:DI
3883 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
3884 (match_operand:QI 2 "immediate_operand" "I"))
3885 (match_operand 3 "immediate_operand" ""))
3886 (match_operand:DI 4 "register_operand" "r")))]
3887 "TARGET_64BIT && TARGET_ZBA
3888 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
3889 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff"
3891 if (TARGET_ZBA
3892 && (TARGET_64BIT && (mode == DImode))
3893 && (GET_CODE (XEXP (x, 0)) == AND)
3894 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1))))
3896 do {
3897 rtx and_lhs = XEXP (XEXP (x, 0), 0);
3898 rtx and_rhs = XEXP (XEXP (x, 0), 1);
3899 if (GET_CODE (and_lhs) != ASHIFT)
3900 break;
3901 if (!CONST_INT_P (and_rhs))
3902 break;
3904 rtx ashift_rhs = XEXP (and_lhs, 1);
3906 if (!CONST_INT_P (ashift_rhs)
3907 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3))
3908 break;
3910 if (CONST_INT_P (and_rhs)
3911 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3913 *total = COSTS_N_INSNS (1);
3914 return true;
3916 } while (false);
3919 if (float_mode_p)
3920 *total = tune_param->fp_add[mode == DFmode];
3921 else
3922 *total = riscv_binary_cost (x, 1, 4);
3923 return false;
3925 case NEG:
3927 rtx op = XEXP (x, 0);
3928 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
3930 *total = (tune_param->fp_mul[mode == DFmode]
3931 + set_src_cost (XEXP (op, 0), mode, speed)
3932 + set_src_cost (XEXP (op, 1), mode, speed)
3933 + set_src_cost (XEXP (op, 2), mode, speed));
3934 return true;
3938 if (float_mode_p)
3939 *total = tune_param->fp_add[mode == DFmode];
3940 else
3941 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
3942 return false;
3944 case MULT:
3945 if (float_mode_p)
3946 *total = tune_param->fp_mul[mode == DFmode];
3947 else if (!(TARGET_MUL || TARGET_ZMMUL))
3948 /* Estimate the cost of a library call. */
3949 *total = COSTS_N_INSNS (speed ? 32 : 6);
3950 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
3951 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
3952 else if (!speed)
3953 *total = COSTS_N_INSNS (1);
3954 else
3955 *total = tune_param->int_mul[mode == DImode];
3956 return false;
3958 case DIV:
3959 case SQRT:
3960 case MOD:
3961 if (float_mode_p)
3963 *total = tune_param->fp_div[mode == DFmode];
3964 return false;
3966 /* Fall through. */
3968 case UDIV:
3969 case UMOD:
3970 if (!TARGET_DIV)
3971 /* Estimate the cost of a library call. */
3972 *total = COSTS_N_INSNS (speed ? 32 : 6);
3973 else if (speed)
3974 *total = tune_param->int_div[mode == DImode];
3975 else
3976 *total = COSTS_N_INSNS (1);
3977 return false;
3979 case ZERO_EXTEND:
3980 /* This is an SImode shift. */
3981 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3983 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3984 return true;
3986 /* Fall through. */
3987 case SIGN_EXTEND:
3988 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
3989 return false;
3991 case BSWAP:
3992 if (TARGET_ZBB)
3994 /* RISC-V only defines rev8 for XLEN, so we will need an extra
3995 shift-right instruction for smaller modes. */
3996 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2);
3997 return true;
3999 return false;
4001 case FLOAT:
4002 case UNSIGNED_FLOAT:
4003 case FIX:
4004 case FLOAT_EXTEND:
4005 case FLOAT_TRUNCATE:
4006 *total = tune_param->fp_add[mode == DFmode];
4007 return false;
4009 case FMA:
4010 *total = (tune_param->fp_mul[mode == DFmode]
4011 + set_src_cost (XEXP (x, 0), mode, speed)
4012 + set_src_cost (XEXP (x, 1), mode, speed)
4013 + set_src_cost (XEXP (x, 2), mode, speed));
4014 return true;
4016 case UNSPEC:
4017 if (XINT (x, 1) == UNSPEC_AUIPC)
4019 /* Make AUIPC cheap to avoid spilling its result to the stack. */
4020 *total = 1;
4021 return true;
4023 return false;
4025 default:
4026 return false;
4030 /* Implement TARGET_ADDRESS_COST. */
4032 static int
4033 riscv_address_cost (rtx addr, machine_mode mode,
4034 addr_space_t as ATTRIBUTE_UNUSED,
4035 bool speed ATTRIBUTE_UNUSED)
4037 /* When optimizing for size, make uncompressible 32-bit addresses more
4038 * expensive so that compressible 32-bit addresses are preferred. */
4039 if ((TARGET_RVC || TARGET_ZCA)
4040 && !speed && riscv_mshorten_memrefs && mode == SImode
4041 && !riscv_compressed_lw_address_p (addr))
4042 return riscv_address_insns (addr, mode, false) + 1;
4043 return riscv_address_insns (addr, mode, false);
4046 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost
4047 calculation for conditional branches: one unit is considered the cost
4048 of microarchitecture-dependent actual branch execution and therefore
4049 multiplied by BRANCH_COST and any remaining units are considered fixed
4050 branch overhead. Branches on a floating-point condition incur an extra
4051 instruction cost as they will be split into an FCMP operation followed
4052 by a branch on an integer condition. */
4054 static int
4055 riscv_insn_cost (rtx_insn *insn, bool speed)
4057 rtx x = PATTERN (insn);
4058 int cost = pattern_cost (x, speed);
4060 if (JUMP_P (insn))
4062 if (GET_CODE (x) == PARALLEL)
4063 x = XVECEXP (x, 0, 0);
4064 if (GET_CODE (x) == SET
4065 && GET_CODE (SET_DEST (x)) == PC
4066 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE)
4068 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1);
4069 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0))))
4070 cost += COSTS_N_INSNS (1);
4073 return cost;
4076 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
4077 but we consider cost units of branch instructions equal to cost units of
4078 other instructions. */
4080 static unsigned int
4081 riscv_max_noce_ifcvt_seq_cost (edge e)
4083 bool predictable_p = predictable_edge_p (e);
4085 if (predictable_p)
4087 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
4088 return param_max_rtl_if_conversion_predictable_cost;
4090 else
4092 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
4093 return param_max_rtl_if_conversion_unpredictable_cost;
4096 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p));
4099 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a
4100 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)'
4101 by our actual conditional branch cost, observing that our branches test
4102 conditions directly, so there is no preparatory extra condition-set
4103 instruction. */
4105 static bool
4106 riscv_noce_conversion_profitable_p (rtx_insn *seq,
4107 struct noce_if_info *if_info)
4109 struct noce_if_info riscv_if_info = *if_info;
4111 riscv_if_info.original_cost -= COSTS_N_INSNS (2);
4112 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p);
4114 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4'
4115 to emit a conditional set operation on DImode output it comes up
4116 with a sequence such as:
4118 (insn 26 0 27 (set (reg:SI 140)
4119 (eq:SI (reg/v:DI 137 [ c ])
4120 (const_int 0 [0]))) 302 {*seq_zero_disi}
4121 (nil))
4122 (insn 27 26 28 (set (reg:DI 139)
4123 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal}
4124 (nil))
4126 because our `cstore<mode>4' pattern expands to an insn that gives
4127 a SImode output. The output of conditional set is 0 or 1 boolean,
4128 so it is valid for input in any scalar integer mode and therefore
4129 combine later folds the zero extend operation into an equivalent
4130 conditional set operation that produces a DImode output, however
4131 this redundant zero extend operation counts towards the cost of
4132 the replacement sequence. Compensate for that by incrementing the
4133 cost of the original sequence as well as the maximum sequence cost
4134 accordingly. Likewise for sign extension. */
4135 rtx last_dest = NULL_RTX;
4136 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
4138 if (!NONDEBUG_INSN_P (insn))
4139 continue;
4141 rtx x = PATTERN (insn);
4142 if (NONJUMP_INSN_P (insn)
4143 && GET_CODE (x) == SET)
4145 rtx src = SET_SRC (x);
4146 enum rtx_code code = GET_CODE (src);
4147 if (last_dest != NULL_RTX
4148 && (code == SIGN_EXTEND || code == ZERO_EXTEND)
4149 && REG_P (XEXP (src, 0))
4150 && REGNO (XEXP (src, 0)) == REGNO (last_dest))
4152 riscv_if_info.original_cost += COSTS_N_INSNS (1);
4153 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
4155 last_dest = NULL_RTX;
4156 rtx dest = SET_DEST (x);
4157 if (COMPARISON_P (src)
4158 && REG_P (dest)
4159 && GET_MODE (dest) == SImode)
4160 last_dest = dest;
4162 else
4163 last_dest = NULL_RTX;
4166 return default_noce_conversion_profitable_p (seq, &riscv_if_info);
4169 /* Return one word of double-word value OP. HIGH_P is true to select the
4170 high part or false to select the low part. */
4173 riscv_subword (rtx op, bool high_p)
4175 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0;
4176 machine_mode mode = GET_MODE (op);
4178 if (mode == VOIDmode)
4179 mode = TARGET_64BIT ? TImode : DImode;
4181 if (MEM_P (op))
4182 return adjust_address (op, word_mode, byte);
4184 if (REG_P (op))
4185 gcc_assert (!FP_REG_RTX_P (op));
4187 return simplify_gen_subreg (word_mode, op, mode, byte);
4190 /* Return true if a 64-bit move from SRC to DEST should be split into two. */
4192 bool
4193 riscv_split_64bit_move_p (rtx dest, rtx src)
4195 if (TARGET_64BIT)
4196 return false;
4198 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
4199 if (satisfies_constraint_zfli (src))
4200 return false;
4202 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
4203 of zeroing an FPR with FCVT.D.W. */
4204 if (TARGET_DOUBLE_FLOAT
4205 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
4206 || (FP_REG_RTX_P (dest) && MEM_P (src))
4207 || (FP_REG_RTX_P (src) && MEM_P (dest))
4208 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
4209 return false;
4211 return true;
4214 /* Split a doubleword move from SRC to DEST. On 32-bit targets,
4215 this function handles 64-bit moves for which riscv_split_64bit_move_p
4216 holds. For 64-bit targets, this function handles 128-bit moves. */
4218 void
4219 riscv_split_doubleword_move (rtx dest, rtx src)
4221 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */
4222 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV))
4224 if (FP_REG_RTX_P (dest))
4226 rtx low_src = riscv_subword (src, false);
4227 rtx high_src = riscv_subword (src, true);
4229 if (TARGET_ZFA)
4230 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src));
4231 else
4232 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src));
4233 return;
4235 if (FP_REG_RTX_P (src))
4237 rtx low_dest = riscv_subword (dest, false);
4238 rtx high_dest = riscv_subword (dest, true);
4240 if (TARGET_ZFA)
4242 emit_insn (gen_movsidf2_low_rv32 (low_dest, src));
4243 emit_insn (gen_movsidf2_high_rv32 (high_dest, src));
4244 return;
4246 else
4248 emit_insn (gen_th_fmv_x_w (low_dest, src));
4249 emit_insn (gen_th_fmv_x_hw (high_dest, src));
4251 return;
4255 /* The operation can be split into two normal moves. Decide in
4256 which order to do them. */
4257 rtx low_dest = riscv_subword (dest, false);
4258 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
4260 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
4261 riscv_emit_move (low_dest, riscv_subword (src, false));
4263 else
4265 riscv_emit_move (low_dest, riscv_subword (src, false));
4266 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
4270 /* Constant VAL is known to be sum of two S12 constants. Break it into
4271 comprising BASE and OFF.
4272 Numerically S12 is -2048 to 2047, however it uses the more conservative
4273 range -2048 to 2032 as offsets pertain to stack related registers. */
4275 void
4276 riscv_split_sum_of_two_s12 (HOST_WIDE_INT val, HOST_WIDE_INT *base,
4277 HOST_WIDE_INT *off)
4279 if (SUM_OF_TWO_S12_N (val))
4281 *base = -2048;
4282 *off = val - (-2048);
4284 else if (SUM_OF_TWO_S12_P_ALGN (val))
4286 *base = 2032;
4287 *off = val - 2032;
4289 else
4291 gcc_unreachable ();
4296 /* Return the appropriate instructions to move SRC into DEST. Assume
4297 that SRC is operand 1 and DEST is operand 0. */
4299 const char *
4300 riscv_output_move (rtx dest, rtx src)
4302 enum rtx_code dest_code, src_code;
4303 machine_mode mode;
4304 bool dbl_p;
4305 unsigned width;
4306 const char *insn;
4308 if ((insn = th_output_move (dest, src)))
4309 return insn;
4311 dest_code = GET_CODE (dest);
4312 src_code = GET_CODE (src);
4313 mode = GET_MODE (dest);
4314 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
4315 width = GET_MODE_SIZE (mode).to_constant ();
4317 if (dbl_p && riscv_split_64bit_move_p (dest, src))
4318 return "#";
4320 if (dest_code == REG && GP_REG_P (REGNO (dest)))
4322 if (src_code == REG && FP_REG_P (REGNO (src)))
4323 switch (width)
4325 case 2:
4326 if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
4327 return "fmv.x.h\t%0,%1";
4328 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
4329 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
4330 case 4:
4331 return "fmv.x.s\t%0,%1";
4332 case 8:
4333 return "fmv.x.d\t%0,%1";
4336 if (src_code == MEM)
4337 switch (width)
4339 case 1: return "lbu\t%0,%1";
4340 case 2: return "lhu\t%0,%1";
4341 case 4: return "lw\t%0,%1";
4342 case 8: return "ld\t%0,%1";
4345 if (src_code == CONST_INT)
4347 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src)))
4348 return "li\t%0,%1";
4350 if (TARGET_ZBS
4351 && SINGLE_BIT_MASK_OPERAND (INTVAL (src)))
4352 return "bseti\t%0,zero,%S1";
4354 /* Should never reach here. */
4355 abort ();
4358 if (src_code == HIGH)
4359 return "lui\t%0,%h1";
4361 if (symbolic_operand (src, VOIDmode))
4362 switch (riscv_classify_symbolic_expression (src))
4364 case SYMBOL_GOT_DISP: return "la\t%0,%1";
4365 case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
4366 case SYMBOL_PCREL: return "lla\t%0,%1";
4367 default: gcc_unreachable ();
4370 if ((src_code == REG && GP_REG_P (REGNO (src)))
4371 || (src == CONST0_RTX (mode)))
4373 if (dest_code == REG)
4375 if (GP_REG_P (REGNO (dest)))
4376 return "mv\t%0,%z1";
4378 if (FP_REG_P (REGNO (dest)))
4379 switch (width)
4381 case 2:
4382 if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
4383 return "fmv.h.x\t%0,%z1";
4384 /* High 16 bits should be all-1, otherwise HW will treated
4385 as a n-bit canonical NaN, but isn't matter for softfloat. */
4386 return "fmv.s.x\t%0,%1";
4387 case 4:
4388 return "fmv.s.x\t%0,%z1";
4389 case 8:
4390 if (TARGET_64BIT)
4391 return "fmv.d.x\t%0,%z1";
4392 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
4393 gcc_assert (src == CONST0_RTX (mode));
4394 return "fcvt.d.w\t%0,x0";
4397 if (dest_code == MEM)
4398 switch (width)
4400 case 1: return "sb\t%z1,%0";
4401 case 2: return "sh\t%z1,%0";
4402 case 4: return "sw\t%z1,%0";
4403 case 8: return "sd\t%z1,%0";
4406 if (src_code == REG && FP_REG_P (REGNO (src)))
4408 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4409 switch (width)
4411 case 2:
4412 if (TARGET_ZFH)
4413 return "fmv.h\t%0,%1";
4414 return "fmv.s\t%0,%1";
4415 case 4:
4416 return "fmv.s\t%0,%1";
4417 case 8:
4418 return "fmv.d\t%0,%1";
4421 if (dest_code == MEM)
4422 switch (width)
4424 case 2:
4425 return "fsh\t%1,%0";
4426 case 4:
4427 return "fsw\t%1,%0";
4428 case 8:
4429 return "fsd\t%1,%0";
4432 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4434 if (src_code == MEM)
4435 switch (width)
4437 case 2:
4438 return "flh\t%0,%1";
4439 case 4:
4440 return "flw\t%0,%1";
4441 case 8:
4442 return "fld\t%0,%1";
4445 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src))
4446 switch (width)
4448 case 2:
4449 return "fli.h\t%0,%1";
4450 case 4:
4451 return "fli.s\t%0,%1";
4452 case 8:
4453 return "fli.d\t%0,%1";
4456 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT)
4458 /* We only want a single full vector register VLEN read after reload. */
4459 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR));
4460 return "csrr\t%0,vlenb";
4462 gcc_unreachable ();
4465 const char *
4466 riscv_output_return ()
4468 if (cfun->machine->naked_p)
4469 return "";
4471 return "ret";
4475 /* Return true if CMP1 is a suitable second operand for integer ordering
4476 test CODE. See also the *sCC patterns in riscv.md. */
4478 static bool
4479 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4481 switch (code)
4483 case GT:
4484 case GTU:
4485 return reg_or_0_operand (cmp1, VOIDmode);
4487 case GE:
4488 case GEU:
4489 return cmp1 == const1_rtx;
4491 case LT:
4492 case LTU:
4493 return arith_operand (cmp1, VOIDmode);
4495 case LE:
4496 return sle_operand (cmp1, VOIDmode);
4498 case LEU:
4499 return sleu_operand (cmp1, VOIDmode);
4501 default:
4502 gcc_unreachable ();
4506 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
4507 integer ordering test *CODE, or if an equivalent combination can
4508 be formed by adjusting *CODE and *CMP1. When returning true, update
4509 *CODE and *CMP1 with the chosen code and operand, otherwise leave
4510 them alone. */
4512 static bool
4513 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
4514 machine_mode mode)
4516 HOST_WIDE_INT plus_one;
4518 if (riscv_int_order_operand_ok_p (*code, *cmp1))
4519 return true;
4521 if (CONST_INT_P (*cmp1))
4522 switch (*code)
4524 case LE:
4525 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4526 if (INTVAL (*cmp1) < plus_one)
4528 *code = LT;
4529 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4530 return true;
4532 break;
4534 case LEU:
4535 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4536 if (plus_one != 0)
4538 *code = LTU;
4539 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4540 return true;
4542 break;
4544 default:
4545 break;
4547 return false;
4550 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
4551 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
4552 is nonnull, it's OK to set TARGET to the inverse of the result and
4553 flip *INVERT_PTR instead. */
4555 static void
4556 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
4557 rtx target, rtx cmp0, rtx cmp1)
4559 machine_mode mode;
4561 /* First see if there is a RISCV instruction that can do this operation.
4562 If not, try doing the same for the inverse operation. If that also
4563 fails, force CMP1 into a register and try again. */
4564 mode = GET_MODE (cmp0);
4565 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
4566 riscv_emit_binary (code, target, cmp0, cmp1);
4567 else
4569 enum rtx_code inv_code = reverse_condition (code);
4570 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
4572 cmp1 = force_reg (mode, cmp1);
4573 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
4575 else if (invert_ptr == 0)
4577 rtx inv_target = riscv_force_binary (word_mode,
4578 inv_code, cmp0, cmp1);
4579 riscv_emit_binary (EQ, target, inv_target, const0_rtx);
4581 else
4583 *invert_ptr = !*invert_ptr;
4584 riscv_emit_binary (inv_code, target, cmp0, cmp1);
4589 /* Return a register that is zero iff CMP0 and CMP1 are equal.
4590 The register will have the same mode as CMP0. */
4592 static rtx
4593 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
4595 if (cmp1 == const0_rtx)
4596 return cmp0;
4598 return expand_binop (GET_MODE (cmp0), sub_optab,
4599 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
4602 /* Helper function for riscv_extend_comparands to Sign-extend the OP.
4603 However if the OP is SI subreg promoted with an inner DI, such as
4604 (subreg/s/v:SI (reg/v:DI) 0)
4605 just peel off the SUBREG to get DI, avoiding extraneous extension. */
4607 static void
4608 riscv_sign_extend_if_not_subreg_prom (rtx *op)
4610 if (GET_CODE (*op) == SUBREG
4611 && SUBREG_PROMOTED_VAR_P (*op)
4612 && SUBREG_PROMOTED_SIGNED_P (*op)
4613 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant ()
4614 == GET_MODE_SIZE (word_mode)))
4615 *op = XEXP (*op, 0);
4616 else
4617 *op = gen_rtx_SIGN_EXTEND (word_mode, *op);
4620 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
4622 static void
4623 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
4625 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
4626 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
4628 /* It is more profitable to zero-extend QImode values. But not if the
4629 first operand has already been sign-extended, and the second one is
4630 is a constant or has already been sign-extended also. */
4631 if (unsigned_condition (code) == code
4632 && (GET_MODE (*op0) == QImode
4633 && ! (GET_CODE (*op0) == SUBREG
4634 && SUBREG_PROMOTED_VAR_P (*op0)
4635 && SUBREG_PROMOTED_SIGNED_P (*op0)
4636 && (CONST_INT_P (*op1)
4637 || (GET_CODE (*op1) == SUBREG
4638 && SUBREG_PROMOTED_VAR_P (*op1)
4639 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4641 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4642 if (CONST_INT_P (*op1))
4643 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4644 else
4645 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4647 else
4649 riscv_sign_extend_if_not_subreg_prom (op0);
4651 if (*op1 != const0_rtx)
4652 riscv_sign_extend_if_not_subreg_prom (op1);
4657 /* Convert a comparison into something that can be used in a branch or
4658 conditional move. On entry, *OP0 and *OP1 are the values being
4659 compared and *CODE is the code used to compare them.
4661 Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
4662 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are
4663 emitted. */
4665 static void
4666 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4667 bool need_eq_ne_p = false)
4669 if (need_eq_ne_p)
4671 rtx cmp_op0 = *op0;
4672 rtx cmp_op1 = *op1;
4673 if (*code == EQ || *code == NE)
4675 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1);
4676 *op1 = const0_rtx;
4677 return;
4679 gcc_unreachable ();
4682 if (splittable_const_int_operand (*op1, VOIDmode))
4684 HOST_WIDE_INT rhs = INTVAL (*op1);
4686 if (*code == EQ || *code == NE)
4688 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4689 if (SMALL_OPERAND (-rhs))
4691 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
4692 GEN_INT (-rhs));
4693 *op1 = const0_rtx;
4696 else
4698 static const enum rtx_code mag_comparisons[][2] = {
4699 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
4702 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4703 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4705 HOST_WIDE_INT new_rhs;
4706 bool increment = *code == mag_comparisons[i][0];
4707 bool decrement = *code == mag_comparisons[i][1];
4708 if (!increment && !decrement)
4709 continue;
4711 new_rhs = rhs + (increment ? 1 : -1);
4712 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
4713 if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
4714 && (rhs < 0) == (new_rhs < 0))
4716 *op1 = GEN_INT (new_rhs);
4717 *code = mag_comparisons[i][increment];
4719 break;
4724 riscv_extend_comparands (*code, op0, op1);
4726 *op0 = force_reg (word_mode, *op0);
4727 if (*op1 != const0_rtx)
4728 *op1 = force_reg (word_mode, *op1);
4731 /* Like riscv_emit_int_compare, but for floating-point comparisons. */
4733 static void
4734 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4735 bool *invert_ptr = nullptr)
4737 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
4738 enum rtx_code fp_code = *code;
4739 *code = NE;
4741 switch (fp_code)
4743 case UNORDERED:
4744 *code = EQ;
4745 /* Fall through. */
4747 case ORDERED:
4748 /* a == a && b == b */
4749 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4750 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4751 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4752 *op1 = const0_rtx;
4753 break;
4755 case UNEQ:
4756 /* ordered(a, b) > (a == b) */
4757 *code = EQ;
4758 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4759 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4760 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4761 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
4762 break;
4764 #define UNORDERED_COMPARISON(CODE, CMP) \
4765 case CODE: \
4766 *code = EQ; \
4767 *op0 = gen_reg_rtx (word_mode); \
4768 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \
4769 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \
4770 else if (GET_MODE (cmp_op0) == SFmode) \
4771 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \
4772 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \
4773 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \
4774 else if (GET_MODE (cmp_op0) == DFmode) \
4775 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \
4776 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \
4777 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \
4778 else if (GET_MODE (cmp_op0) == HFmode) \
4779 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \
4780 else \
4781 gcc_unreachable (); \
4782 *op1 = const0_rtx; \
4783 break;
4785 case UNLT:
4786 std::swap (cmp_op0, cmp_op1);
4787 gcc_fallthrough ();
4789 UNORDERED_COMPARISON(UNGT, le)
4791 case UNLE:
4792 std::swap (cmp_op0, cmp_op1);
4793 gcc_fallthrough ();
4795 UNORDERED_COMPARISON(UNGE, lt)
4796 #undef UNORDERED_COMPARISON
4798 case NE:
4799 fp_code = EQ;
4800 if (invert_ptr != nullptr)
4801 *invert_ptr = !*invert_ptr;
4802 else
4804 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
4805 cmp_op1 = const0_rtx;
4807 gcc_fallthrough ();
4809 case EQ:
4810 case LE:
4811 case LT:
4812 case GE:
4813 case GT:
4814 /* We have instructions for these cases. */
4815 *code = fp_code;
4816 *op0 = cmp_op0;
4817 *op1 = cmp_op1;
4818 break;
4820 case LTGT:
4821 /* (a < b) | (a > b) */
4822 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1);
4823 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1);
4824 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1);
4825 *op1 = const0_rtx;
4826 break;
4828 default:
4829 gcc_unreachable ();
4833 /* CODE-compare OP0 and OP1. Store the result in TARGET. */
4835 void
4836 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr)
4838 riscv_extend_comparands (code, &op0, &op1);
4839 op0 = force_reg (word_mode, op0);
4841 if (code == EQ || code == NE)
4843 rtx zie = riscv_zero_if_equal (op0, op1);
4844 riscv_emit_binary (code, target, zie, const0_rtx);
4846 else
4847 riscv_emit_int_order_test (code, invert_ptr, target, op0, op1);
4850 /* Like riscv_expand_int_scc, but for floating-point comparisons. */
4852 void
4853 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
4854 bool *invert_ptr)
4856 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr);
4858 machine_mode mode = GET_MODE (target);
4859 if (mode != word_mode)
4861 rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
4862 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
4864 else
4865 riscv_emit_binary (code, target, op0, op1);
4868 /* Jump to LABEL if (CODE OP0 OP1) holds. */
4870 void
4871 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
4873 if (FLOAT_MODE_P (GET_MODE (op1)))
4874 riscv_emit_float_compare (&code, &op0, &op1);
4875 else
4876 riscv_emit_int_compare (&code, &op0, &op1);
4878 if (FLOAT_MODE_P (GET_MODE (op0)))
4880 op0 = riscv_force_binary (word_mode, code, op0, op1);
4881 op1 = const0_rtx;
4882 code = NE;
4885 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
4886 emit_jump_insn (gen_condjump (condition, label));
4889 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
4890 Return 0 if expansion failed. */
4892 bool
4893 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
4895 machine_mode mode = GET_MODE (dest);
4896 rtx_code code = GET_CODE (op);
4897 rtx op0 = XEXP (op, 0);
4898 rtx op1 = XEXP (op, 1);
4900 if (((TARGET_ZICOND_LIKE
4901 || (arith_operand (cons, mode) && arith_operand (alt, mode)))
4902 && (GET_MODE_CLASS (mode) == MODE_INT))
4903 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4905 machine_mode mode0 = GET_MODE (op0);
4906 machine_mode mode1 = GET_MODE (op1);
4908 /* An integer comparison must be comparing WORD_MODE objects. We
4909 must enforce that so that we don't strip away a sign_extension
4910 thinking it is unnecessary. We might consider using
4911 riscv_extend_operands if they are not already properly extended. */
4912 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
4913 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
4914 return false;
4916 /* In the fallback generic case use MODE rather than WORD_MODE for
4917 the output of the SCC instruction, to match the mode of the NEG
4918 operation below. The output of SCC is 0 or 1 boolean, so it is
4919 valid for input in any scalar integer mode. */
4920 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
4921 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4922 ? word_mode : mode);
4923 bool invert = false;
4925 /* Canonicalize the comparison. It must be an equality comparison
4926 of integer operands, or with SFB it can be any comparison of
4927 integer operands. If it isn't, then emit an SCC instruction
4928 so that we can then use an equality comparison against zero. */
4929 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode))
4930 || !INTEGRAL_MODE_P (mode0))
4932 bool *invert_ptr = nullptr;
4934 /* If riscv_expand_int_scc inverts the condition, then it will
4935 flip the value of INVERT. We need to know where so that
4936 we can adjust it for our needs. */
4937 if (code == LE || code == LEU || code == GE || code == GEU)
4938 invert_ptr = &invert;
4940 /* Emit an SCC-like instruction into a temporary so that we can
4941 use an EQ/NE comparison. We can support both FP and integer
4942 conditional moves. */
4943 if (INTEGRAL_MODE_P (mode0))
4944 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr);
4945 else if (FLOAT_MODE_P (mode0)
4946 && fp_scc_comparison (op, GET_MODE (op)))
4947 riscv_expand_float_scc (tmp, code, op0, op1, &invert);
4948 else
4949 return false;
4951 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
4953 /* We've generated a new comparison. Update the local variables. */
4954 code = GET_CODE (op);
4955 op0 = XEXP (op, 0);
4956 op1 = XEXP (op, 1);
4958 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
4959 riscv_expand_int_scc (tmp, code, op0, op1, &invert);
4961 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4963 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
4964 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4966 /* The expander is a bit loose in its specification of the true
4967 arm of the conditional move. That allows us to support more
4968 cases for extensions which are more general than SFB. But
4969 does mean we need to force CONS into a register at this point. */
4970 cons = force_reg (mode, cons);
4971 /* With XTheadCondMov we need to force ALT into a register too. */
4972 alt = force_reg (mode, alt);
4973 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4974 cons, alt)));
4975 return true;
4977 else if (!TARGET_ZICOND_LIKE)
4979 if (invert)
4980 std::swap (cons, alt);
4982 rtx reg1 = gen_reg_rtx (mode);
4983 rtx reg2 = gen_reg_rtx (mode);
4984 rtx reg3 = gen_reg_rtx (mode);
4985 rtx reg4 = gen_reg_rtx (mode);
4987 riscv_emit_unary (NEG, reg1, tmp);
4988 riscv_emit_binary (AND, reg2, reg1, cons);
4989 riscv_emit_unary (NOT, reg3, reg1);
4990 riscv_emit_binary (AND, reg4, reg3, alt);
4991 riscv_emit_binary (IOR, dest, reg2, reg4);
4992 return true;
4994 /* 0, reg or 0, imm */
4995 else if (cons == CONST0_RTX (mode)
4996 && (REG_P (alt)
4997 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
4999 riscv_emit_int_compare (&code, &op0, &op1, true);
5000 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5001 alt = force_reg (mode, alt);
5002 emit_insn (gen_rtx_SET (dest,
5003 gen_rtx_IF_THEN_ELSE (mode, cond,
5004 cons, alt)));
5005 return true;
5007 /* imm, imm */
5008 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
5009 && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
5011 riscv_emit_int_compare (&code, &op0, &op1, true);
5012 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5013 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
5014 alt = force_reg (mode, gen_int_mode (t, mode));
5015 emit_insn (gen_rtx_SET (dest,
5016 gen_rtx_IF_THEN_ELSE (mode, cond,
5017 CONST0_RTX (mode),
5018 alt)));
5019 /* CONS might not fit into a signed 12 bit immediate suitable
5020 for an addi instruction. If that's the case, force it
5021 into a register. */
5022 if (!SMALL_OPERAND (INTVAL (cons)))
5023 cons = force_reg (mode, cons);
5024 riscv_emit_binary (PLUS, dest, dest, cons);
5025 return true;
5027 /* imm, reg */
5028 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
5030 /* Optimize for register value of 0. */
5031 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
5033 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5034 cons = force_reg (mode, cons);
5035 emit_insn (gen_rtx_SET (dest,
5036 gen_rtx_IF_THEN_ELSE (mode, cond,
5037 cons, alt)));
5038 return true;
5041 riscv_emit_int_compare (&code, &op0, &op1, true);
5042 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5044 rtx temp1 = gen_reg_rtx (mode);
5045 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
5047 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
5048 suitable for an addi instruction. If that's the case, force it
5049 into a register. */
5050 if (!SMALL_OPERAND (INTVAL (temp2)))
5051 temp2 = force_reg (mode, temp2);
5052 if (!SMALL_OPERAND (INTVAL (cons)))
5053 cons = force_reg (mode, cons);
5055 riscv_emit_binary (PLUS, temp1, alt, temp2);
5056 emit_insn (gen_rtx_SET (dest,
5057 gen_rtx_IF_THEN_ELSE (mode, cond,
5058 CONST0_RTX (mode),
5059 temp1)));
5060 riscv_emit_binary (PLUS, dest, dest, cons);
5061 return true;
5063 /* reg, 0 or imm, 0 */
5064 else if ((REG_P (cons)
5065 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
5066 && alt == CONST0_RTX (mode))
5068 riscv_emit_int_compare (&code, &op0, &op1, true);
5069 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5070 cons = force_reg (mode, cons);
5071 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
5072 cons, alt)));
5073 return true;
5075 /* reg, imm */
5076 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
5078 /* Optimize for register value of 0. */
5079 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
5081 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5082 alt = force_reg (mode, alt);
5083 emit_insn (gen_rtx_SET (dest,
5084 gen_rtx_IF_THEN_ELSE (mode, cond,
5085 cons, alt)));
5086 return true;
5089 riscv_emit_int_compare (&code, &op0, &op1, true);
5090 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5092 rtx temp1 = gen_reg_rtx (mode);
5093 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
5095 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
5096 suitable for an addi instruction. If that's the case, force it
5097 into a register. */
5098 if (!SMALL_OPERAND (INTVAL (temp2)))
5099 temp2 = force_reg (mode, temp2);
5100 if (!SMALL_OPERAND (INTVAL (alt)))
5101 alt = force_reg (mode, alt);
5103 riscv_emit_binary (PLUS, temp1, cons, temp2);
5104 emit_insn (gen_rtx_SET (dest,
5105 gen_rtx_IF_THEN_ELSE (mode, cond,
5106 temp1,
5107 CONST0_RTX (mode))));
5108 riscv_emit_binary (PLUS, dest, dest, alt);
5109 return true;
5111 /* reg, reg */
5112 else if (REG_P (cons) && REG_P (alt))
5114 if (((code == EQ && rtx_equal_p (cons, op0))
5115 || (code == NE && rtx_equal_p (alt, op0)))
5116 && op1 == CONST0_RTX (mode))
5118 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5119 alt = force_reg (mode, alt);
5120 emit_insn (gen_rtx_SET (dest,
5121 gen_rtx_IF_THEN_ELSE (mode, cond,
5122 cons, alt)));
5123 return true;
5126 rtx reg1 = gen_reg_rtx (mode);
5127 rtx reg2 = gen_reg_rtx (mode);
5128 riscv_emit_int_compare (&code, &op0, &op1, true);
5129 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5130 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
5131 GET_MODE (op0), op0, op1);
5132 emit_insn (gen_rtx_SET (reg2,
5133 gen_rtx_IF_THEN_ELSE (mode, cond2,
5134 CONST0_RTX (mode),
5135 cons)));
5136 emit_insn (gen_rtx_SET (reg1,
5137 gen_rtx_IF_THEN_ELSE (mode, cond1,
5138 CONST0_RTX (mode),
5139 alt)));
5140 riscv_emit_binary (PLUS, dest, reg1, reg2);
5141 return true;
5145 return false;
5148 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
5149 least PARM_BOUNDARY bits of alignment, but will be given anything up
5150 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
5152 static unsigned int
5153 riscv_function_arg_boundary (machine_mode mode, const_tree type)
5155 unsigned int alignment;
5157 /* Use natural alignment if the type is not aggregate data. */
5158 if (type && !AGGREGATE_TYPE_P (type))
5159 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
5160 else
5161 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
5163 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
5166 /* If MODE represents an argument that can be passed or returned in
5167 floating-point registers, return the number of registers, else 0. */
5169 static unsigned
5170 riscv_pass_mode_in_fpr_p (machine_mode mode)
5172 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
5174 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5175 return 1;
5177 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5178 return 2;
5181 return 0;
5184 typedef struct {
5185 const_tree type;
5186 HOST_WIDE_INT offset;
5187 } riscv_aggregate_field;
5189 /* Identify subfields of aggregates that are candidates for passing in
5190 floating-point registers. */
5192 static int
5193 riscv_flatten_aggregate_field (const_tree type,
5194 riscv_aggregate_field fields[2],
5195 int n, HOST_WIDE_INT offset,
5196 bool ignore_zero_width_bit_field_p)
5198 switch (TREE_CODE (type))
5200 case RECORD_TYPE:
5201 /* Can't handle incomplete types nor sizes that are not fixed. */
5202 if (!COMPLETE_TYPE_P (type)
5203 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5204 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
5205 return -1;
5207 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
5208 if (TREE_CODE (f) == FIELD_DECL)
5210 if (!TYPE_P (TREE_TYPE (f)))
5211 return -1;
5213 /* The C++ front end strips zero-length bit-fields from structs.
5214 So we need to ignore them in the C front end to make C code
5215 compatible with C++ code. */
5216 if (ignore_zero_width_bit_field_p
5217 && DECL_BIT_FIELD (f)
5218 && (DECL_SIZE (f) == NULL_TREE
5219 || integer_zerop (DECL_SIZE (f))))
5221 else
5223 HOST_WIDE_INT pos = offset + int_byte_position (f);
5224 n = riscv_flatten_aggregate_field (TREE_TYPE (f),
5225 fields, n, pos,
5226 ignore_zero_width_bit_field_p);
5228 if (n < 0)
5229 return -1;
5231 return n;
5233 case ARRAY_TYPE:
5235 HOST_WIDE_INT n_elts;
5236 riscv_aggregate_field subfields[2];
5237 tree index = TYPE_DOMAIN (type);
5238 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
5239 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
5240 subfields, 0, offset,
5241 ignore_zero_width_bit_field_p);
5243 /* Can't handle incomplete types nor sizes that are not fixed. */
5244 if (n_subfields <= 0
5245 || !COMPLETE_TYPE_P (type)
5246 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5247 || !index
5248 || !TYPE_MAX_VALUE (index)
5249 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5250 || !TYPE_MIN_VALUE (index)
5251 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5252 || !tree_fits_uhwi_p (elt_size))
5253 return -1;
5255 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5256 - tree_to_uhwi (TYPE_MIN_VALUE (index));
5257 gcc_assert (n_elts >= 0);
5259 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
5260 for (int j = 0; j < n_subfields; j++)
5262 if (n >= 2)
5263 return -1;
5265 fields[n] = subfields[j];
5266 fields[n++].offset += i * tree_to_uhwi (elt_size);
5269 return n;
5272 case COMPLEX_TYPE:
5274 /* Complex type need consume 2 field, so n must be 0. */
5275 if (n != 0)
5276 return -1;
5278 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
5280 if (elt_size <= UNITS_PER_FP_ARG)
5282 fields[0].type = TREE_TYPE (type);
5283 fields[0].offset = offset;
5284 fields[1].type = TREE_TYPE (type);
5285 fields[1].offset = offset + elt_size;
5287 return 2;
5290 return -1;
5293 default:
5294 if (n < 2
5295 && ((SCALAR_FLOAT_TYPE_P (type)
5296 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
5297 || (INTEGRAL_TYPE_P (type)
5298 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
5300 fields[n].type = type;
5301 fields[n].offset = offset;
5302 return n + 1;
5304 else
5305 return -1;
5309 /* Identify candidate aggregates for passing in floating-point registers.
5310 Candidates have at most two fields after flattening. */
5312 static int
5313 riscv_flatten_aggregate_argument (const_tree type,
5314 riscv_aggregate_field fields[2],
5315 bool ignore_zero_width_bit_field_p)
5317 if (!type || TREE_CODE (type) != RECORD_TYPE)
5318 return -1;
5320 return riscv_flatten_aggregate_field (type, fields, 0, 0,
5321 ignore_zero_width_bit_field_p);
5324 /* See whether TYPE is a record whose fields should be returned in one or
5325 two floating-point registers. If so, populate FIELDS accordingly. */
5327 static unsigned
5328 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
5329 riscv_aggregate_field fields[2])
5331 static int warned = 0;
5333 /* This is the old ABI, which differs for C++ and C. */
5334 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
5335 for (int i = 0; i < n_old; i++)
5336 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
5338 n_old = -1;
5339 break;
5342 /* This is the new ABI, which is the same for C++ and C. */
5343 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
5344 for (int i = 0; i < n_new; i++)
5345 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
5347 n_new = -1;
5348 break;
5351 if ((n_old != n_new) && (warned == 0))
5353 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
5354 "bit-fields changed in GCC 10");
5355 warned = 1;
5358 return n_new > 0 ? n_new : 0;
5361 /* See whether TYPE is a record whose fields should be returned in one or
5362 floating-point register and one integer register. If so, populate
5363 FIELDS accordingly. */
5365 static bool
5366 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
5367 riscv_aggregate_field fields[2])
5369 static int warned = 0;
5371 /* This is the old ABI, which differs for C++ and C. */
5372 unsigned num_int_old = 0, num_float_old = 0;
5373 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
5374 for (int i = 0; i < n_old; i++)
5376 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
5377 num_int_old += INTEGRAL_TYPE_P (fields[i].type);
5380 /* This is the new ABI, which is the same for C++ and C. */
5381 unsigned num_int_new = 0, num_float_new = 0;
5382 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
5383 for (int i = 0; i < n_new; i++)
5385 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
5386 num_int_new += INTEGRAL_TYPE_P (fields[i].type);
5389 if (((num_int_old == 1 && num_float_old == 1
5390 && (num_int_old != num_int_new || num_float_old != num_float_new))
5391 || (num_int_new == 1 && num_float_new == 1
5392 && (num_int_old != num_int_new || num_float_old != num_float_new)))
5393 && (warned == 0))
5395 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
5396 "bit-fields changed in GCC 10");
5397 warned = 1;
5400 return num_int_new == 1 && num_float_new == 1;
5403 /* Return the representation of an argument passed or returned in an FPR
5404 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
5405 two modes may be different for structures like:
5407 struct __attribute__((packed)) foo { float f; }
5409 where the SFmode value "f" is passed in REGNO but the struct itself
5410 has mode BLKmode. */
5412 static rtx
5413 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno,
5414 machine_mode value_mode,
5415 HOST_WIDE_INT offset)
5417 rtx x = gen_rtx_REG (value_mode, regno);
5419 if (type_mode != value_mode)
5421 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
5422 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
5424 return x;
5427 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
5428 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
5429 byte offset for the first value, likewise MODE2 and OFFSET2 for the
5430 second value. */
5432 static rtx
5433 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
5434 machine_mode mode1, HOST_WIDE_INT offset1,
5435 unsigned regno2, machine_mode mode2,
5436 HOST_WIDE_INT offset2)
5438 return gen_rtx_PARALLEL
5439 (mode,
5440 gen_rtvec (2,
5441 gen_rtx_EXPR_LIST (VOIDmode,
5442 gen_rtx_REG (mode1, regno1),
5443 GEN_INT (offset1)),
5444 gen_rtx_EXPR_LIST (VOIDmode,
5445 gen_rtx_REG (mode2, regno2),
5446 GEN_INT (offset2))));
5449 static rtx
5450 riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
5451 unsigned gpr_base)
5453 gcc_assert (riscv_v_ext_vls_mode_p (mode));
5455 unsigned count = 0;
5456 unsigned regnum = 0;
5457 machine_mode gpr_mode = VOIDmode;
5458 unsigned vls_size = GET_MODE_SIZE (mode).to_constant ();
5459 unsigned gpr_size = GET_MODE_SIZE (Xmode);
5461 if (IN_RANGE (vls_size, 0, gpr_size * 2))
5463 count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size);
5465 if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS)
5467 regnum = gpr_base + info->gpr_offset;
5468 info->num_gprs = count;
5469 gpr_mode = riscv_v_vls_to_gpr_mode (vls_size);
5473 if (!regnum)
5474 return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
5476 gcc_assert (gpr_mode != VOIDmode);
5478 rtx reg = gen_rtx_REG (gpr_mode, regnum);
5479 rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode));
5481 return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
5484 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5485 for a call to a function whose data type is FNTYPE.
5486 For a library call, FNTYPE is 0. */
5488 void
5489 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
5491 memset (cum, 0, sizeof (*cum));
5493 if (fntype)
5494 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
5495 else
5496 cum->variant_cc = RISCV_CC_BASE;
5499 /* Return true if TYPE is a vector type that can be passed in vector registers.
5502 static bool
5503 riscv_vector_type_p (const_tree type)
5505 /* Currently, only builtin scalabler vector type is allowed, in the future,
5506 more vector types may be allowed, such as GNU vector type, etc. */
5507 return riscv_vector::builtin_type_p (type);
5510 static unsigned int
5511 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
5513 /* Subroutine of riscv_get_arg_info. */
5515 static rtx
5516 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5517 machine_mode mode, bool return_p)
5519 gcc_assert (riscv_v_ext_mode_p (mode));
5521 info->mr_offset = cum->num_mrs;
5522 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5524 /* For scalable mask return value. */
5525 if (return_p)
5526 return gen_rtx_REG (mode, V_REG_FIRST);
5528 /* For the first scalable mask argument. */
5529 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS)
5531 info->num_mrs = 1;
5532 return gen_rtx_REG (mode, V_REG_FIRST);
5534 else
5536 /* Rest scalable mask arguments are treated as scalable data
5537 arguments. */
5541 /* The number and alignment of vector registers need for this scalable vector
5542 argument. When the mode size is less than a full vector, we use 1 vector
5543 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number
5544 information. */
5545 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode);
5546 int LMUL = riscv_v_ext_tuple_mode_p (mode)
5547 ? nregs / riscv_vector::get_nf (mode)
5548 : nregs;
5549 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST;
5550 int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
5551 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
5553 /* For scalable data and scalable tuple return value. */
5554 if (return_p)
5555 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
5557 /* Iterate through the USED_VRS array to find vector register groups that have
5558 not been allocated and the first register is aligned with LMUL. */
5559 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL)
5561 /* The index in USED_VRS array. */
5562 int idx = i - arg_reg_start;
5563 /* Find the first register unused. */
5564 if (!cum->used_vrs[idx])
5566 bool find_set = true;
5567 /* Ensure there are NREGS continuous unused registers. */
5568 for (int j = 1; j < nregs; j++)
5569 if (cum->used_vrs[idx + j])
5571 find_set = false;
5572 /* Update I to the last aligned register which
5573 cannot be used and the next iteration will add
5574 LMUL step to I. */
5575 i += (j / LMUL) * LMUL;
5576 break;
5579 if (find_set)
5581 info->num_vrs = nregs;
5582 info->vr_offset = idx;
5583 return gen_rtx_REG (mode, i + V_REG_FIRST);
5588 return NULL_RTX;
5591 /* Fill INFO with information about a single argument, and return an RTL
5592 pattern to pass or return the argument. Return NULL_RTX if argument cannot
5593 pass or return in registers, then the argument may be passed by reference or
5594 through the stack or . CUM is the cumulative state for earlier arguments.
5595 MODE is the mode of this argument and TYPE is its type (if known). NAMED is
5596 true if this is a named (fixed) argument rather than a variable one. RETURN_P
5597 is true if returning the argument, or false if passing the argument. */
5599 static rtx
5600 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5601 machine_mode mode, const_tree type, bool named,
5602 bool return_p)
5604 unsigned num_bytes, num_words;
5605 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
5606 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
5607 unsigned alignment = riscv_function_arg_boundary (mode, type);
5609 memset (info, 0, sizeof (*info));
5610 info->gpr_offset = cum->num_gprs;
5611 info->fpr_offset = cum->num_fprs;
5613 /* Passed by reference when the scalable vector argument is anonymous. */
5614 if (riscv_v_ext_mode_p (mode) && !named)
5615 return NULL_RTX;
5617 if (named)
5619 riscv_aggregate_field fields[2];
5620 unsigned fregno = fpr_base + info->fpr_offset;
5621 unsigned gregno = gpr_base + info->gpr_offset;
5623 /* Pass one- or two-element floating-point aggregates in FPRs. */
5624 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
5625 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5626 switch (info->num_fprs)
5628 case 1:
5629 return riscv_pass_fpr_single (mode, fregno,
5630 TYPE_MODE (fields[0].type),
5631 fields[0].offset);
5633 case 2:
5634 return riscv_pass_fpr_pair (mode, fregno,
5635 TYPE_MODE (fields[0].type),
5636 fields[0].offset,
5637 fregno + 1,
5638 TYPE_MODE (fields[1].type),
5639 fields[1].offset);
5641 default:
5642 gcc_unreachable ();
5645 /* Pass real and complex floating-point numbers in FPRs. */
5646 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
5647 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5648 switch (GET_MODE_CLASS (mode))
5650 case MODE_FLOAT:
5651 return gen_rtx_REG (mode, fregno);
5653 case MODE_COMPLEX_FLOAT:
5654 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
5655 fregno + 1, GET_MODE_INNER (mode),
5656 GET_MODE_UNIT_SIZE (mode));
5658 default:
5659 gcc_unreachable ();
5662 /* Pass structs with one float and one integer in an FPR and a GPR. */
5663 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
5664 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
5665 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
5667 info->num_gprs = 1;
5668 info->num_fprs = 1;
5670 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
5671 std::swap (fregno, gregno);
5673 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
5674 fields[0].offset,
5675 gregno, TYPE_MODE (fields[1].type),
5676 fields[1].offset);
5679 /* For scalable vector argument. */
5680 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
5681 return riscv_get_vector_arg (info, cum, mode, return_p);
5683 /* For vls mode aggregated in gpr. */
5684 if (riscv_v_ext_vls_mode_p (mode))
5685 return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
5688 /* Work out the size of the argument. */
5689 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
5690 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5692 /* Doubleword-aligned varargs start on an even register boundary. */
5693 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
5694 info->gpr_offset += info->gpr_offset & 1;
5696 /* Partition the argument between registers and stack. */
5697 info->num_fprs = 0;
5698 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
5699 info->stack_p = (num_words - info->num_gprs) != 0;
5701 if (info->num_gprs || return_p)
5702 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
5704 return NULL_RTX;
5707 /* Implement TARGET_FUNCTION_ARG. */
5709 static rtx
5710 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5712 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5713 struct riscv_arg_info info;
5715 if (arg.end_marker_p ())
5716 /* Return the calling convention that used by the current function. */
5717 return gen_int_mode (cum->variant_cc, SImode);
5719 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5722 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
5724 static void
5725 riscv_function_arg_advance (cumulative_args_t cum_v,
5726 const function_arg_info &arg)
5728 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5729 struct riscv_arg_info info;
5731 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5733 /* Set the corresponding register in USED_VRS to used status. */
5734 for (unsigned int i = 0; i < info.num_vrs; i++)
5736 gcc_assert (!cum->used_vrs[info.vr_offset + i]);
5737 cum->used_vrs[info.vr_offset + i] = true;
5740 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
5742 error ("RVV type %qT cannot be passed to an unprototyped function",
5743 arg.type);
5744 /* Avoid repeating the message */
5745 cum->variant_cc = RISCV_CC_V;
5748 /* Advance the register count. This has the effect of setting
5749 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
5750 argument required us to skip the final GPR and pass the whole
5751 argument on the stack. */
5752 cum->num_fprs = info.fpr_offset + info.num_fprs;
5753 cum->num_gprs = info.gpr_offset + info.num_gprs;
5754 cum->num_mrs = info.mr_offset + info.num_mrs;
5757 /* Implement TARGET_ARG_PARTIAL_BYTES. */
5759 static int
5760 riscv_arg_partial_bytes (cumulative_args_t cum,
5761 const function_arg_info &generic_arg)
5763 struct riscv_arg_info arg;
5765 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
5766 generic_arg.type, generic_arg.named, false);
5767 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
5770 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
5771 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
5772 VALTYPE is null and MODE is the mode of the return value. */
5775 riscv_function_value (const_tree type, const_tree func, machine_mode mode)
5777 struct riscv_arg_info info;
5778 CUMULATIVE_ARGS args;
5780 if (type)
5782 int unsigned_p = TYPE_UNSIGNED (type);
5784 mode = TYPE_MODE (type);
5786 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
5787 return values, promote the mode here too. */
5788 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
5791 memset (&args, 0, sizeof args);
5793 return riscv_get_arg_info (&info, &args, mode, type, true, true);
5796 /* Implement TARGET_PASS_BY_REFERENCE. */
5798 static bool
5799 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
5801 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();;
5802 struct riscv_arg_info info;
5803 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5805 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
5806 never pass variadic arguments in floating-point and vector registers,
5807 so we can avoid the call to riscv_get_arg_info in this case. */
5808 if (cum != NULL)
5810 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5812 /* Don't pass by reference if we can use a floating-point register. */
5813 if (info.num_fprs)
5814 return false;
5816 /* Don't pass by reference if we can use general register(s) for vls. */
5817 if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode))
5818 return false;
5820 /* Don't pass by reference if we can use vector register groups. */
5821 if (info.num_vrs > 0 || info.num_mrs > 0)
5822 return false;
5825 /* Passed by reference when:
5826 1. The scalable vector argument is anonymous.
5827 2. Args cannot be passed through vector registers. */
5828 if (riscv_v_ext_mode_p (arg.mode))
5829 return true;
5831 /* Pass by reference if the data do not fit in two integer registers. */
5832 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
5835 /* Implement TARGET_RETURN_IN_MEMORY. */
5837 static bool
5838 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5840 CUMULATIVE_ARGS args;
5841 cumulative_args_t cum = pack_cumulative_args (&args);
5843 /* The rules for returning in memory are the same as for passing the
5844 first named argument by reference. */
5845 memset (&args, 0, sizeof args);
5846 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
5847 return riscv_pass_by_reference (cum, arg);
5850 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5852 static void
5853 riscv_setup_incoming_varargs (cumulative_args_t cum,
5854 const function_arg_info &arg,
5855 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
5857 CUMULATIVE_ARGS local_cum;
5858 int gp_saved;
5860 /* The caller has advanced CUM up to, but not beyond, the last named
5861 argument. Advance a local copy of CUM past the last "real" named
5862 argument, to find out how many registers are left over. */
5863 local_cum = *get_cumulative_args (cum);
5864 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
5865 || arg.type != NULL_TREE)
5866 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg);
5868 /* Found out how many registers we need to save. */
5869 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
5871 if (!no_rtl && gp_saved > 0)
5873 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5874 REG_PARM_STACK_SPACE (cfun->decl)
5875 - gp_saved * UNITS_PER_WORD);
5876 rtx mem = gen_frame_mem (BLKmode, ptr);
5877 set_mem_alias_set (mem, get_varargs_alias_set ());
5879 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
5880 mem, gp_saved);
5882 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
5883 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
5886 /* Return the descriptor of the Standard Vector Calling Convention Variant. */
5888 static const predefined_function_abi &
5889 riscv_v_abi ()
5891 predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
5892 if (!v_abi.initialized_p ())
5894 HARD_REG_SET full_reg_clobbers
5895 = default_function_abi.full_reg_clobbers ();
5896 /* Callee-saved vector registers: v1-v7, v24-v31. */
5897 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1)
5898 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5899 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
5900 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5901 v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
5903 return v_abi;
5906 static bool
5907 riscv_vector_int_type_p (const_tree type)
5909 machine_mode mode = TYPE_MODE (type);
5911 if (VECTOR_MODE_P (mode))
5912 return INTEGRAL_MODE_P (GET_MODE_INNER (mode));
5914 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5915 return strstr (name, "int") != NULL || strstr (name, "uint") != NULL;
5918 static bool
5919 riscv_vector_float_type_p (const_tree type)
5921 machine_mode mode = TYPE_MODE (type);
5923 if (VECTOR_MODE_P (mode))
5924 return FLOAT_MODE_P (GET_MODE_INNER (mode));
5926 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5927 return strstr (name, "vfloat") != NULL;
5930 static int
5931 riscv_vector_element_bitsize (const_tree type)
5933 machine_mode mode = TYPE_MODE (type);
5935 if (VECTOR_MODE_P (mode))
5936 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
5938 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5940 if (strstr (name, "bool") != NULL)
5941 return 1;
5942 else if (strstr (name, "int8") != NULL)
5943 return 8;
5944 else if (strstr (name, "int16") != NULL || strstr (name, "float16") != NULL)
5945 return 16;
5946 else if (strstr (name, "int32") != NULL || strstr (name, "float32") != NULL)
5947 return 32;
5948 else if (strstr (name, "int64") != NULL || strstr (name, "float64") != NULL)
5949 return 64;
5951 gcc_unreachable ();
5954 static int
5955 riscv_vector_required_min_vlen (const_tree type)
5957 machine_mode mode = TYPE_MODE (type);
5959 if (riscv_v_ext_mode_p (mode))
5960 return TARGET_MIN_VLEN;
5962 int element_bitsize = riscv_vector_element_bitsize (type);
5963 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5965 if (strstr (name, "bool64") != NULL)
5966 return element_bitsize * 64;
5967 else if (strstr (name, "bool32") != NULL)
5968 return element_bitsize * 32;
5969 else if (strstr (name, "bool16") != NULL)
5970 return element_bitsize * 16;
5971 else if (strstr (name, "bool8") != NULL)
5972 return element_bitsize * 8;
5973 else if (strstr (name, "bool4") != NULL)
5974 return element_bitsize * 4;
5975 else if (strstr (name, "bool2") != NULL)
5976 return element_bitsize * 2;
5978 if (strstr (name, "mf8") != NULL)
5979 return element_bitsize * 8;
5980 else if (strstr (name, "mf4") != NULL)
5981 return element_bitsize * 4;
5982 else if (strstr (name, "mf2") != NULL)
5983 return element_bitsize * 2;
5985 return element_bitsize;
5988 static void
5989 riscv_validate_vector_type (const_tree type, const char *hint)
5991 gcc_assert (riscv_vector_type_p (type));
5993 if (!TARGET_VECTOR)
5995 error_at (input_location, "%s %qT requires the V ISA extension",
5996 hint, type);
5997 return;
6000 int element_bitsize = riscv_vector_element_bitsize (type);
6001 bool int_type_p = riscv_vector_int_type_p (type);
6003 if (int_type_p && element_bitsize == 64
6004 && !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
6006 error_at (input_location,
6007 "%s %qT requires the zve64x, zve64f, zve64d or v ISA extension",
6008 hint, type);
6009 return;
6012 bool float_type_p = riscv_vector_float_type_p (type);
6014 if (float_type_p && element_bitsize == 16
6015 && !TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
6017 error_at (input_location,
6018 "%s %qT requires the zvfhmin or zvfh ISA extension",
6019 hint, type);
6020 return;
6023 if (float_type_p && element_bitsize == 32
6024 && !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
6026 error_at (input_location,
6027 "%s %qT requires the zve32f, zve64f, zve64d or v ISA extension",
6028 hint, type);
6029 return;
6032 if (float_type_p && element_bitsize == 64
6033 && !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
6035 error_at (input_location,
6036 "%s %qT requires the zve64d or v ISA extension", hint, type);
6037 return;
6040 int required_min_vlen = riscv_vector_required_min_vlen (type);
6042 if (TARGET_MIN_VLEN < required_min_vlen)
6044 error_at (
6045 input_location,
6046 "%s %qT requires the minimal vector length %qd but %qd is given",
6047 hint, type, required_min_vlen, TARGET_MIN_VLEN);
6048 return;
6052 /* Return true if a function with type FNTYPE returns its value in
6053 RISC-V V registers. */
6055 static bool
6056 riscv_return_value_is_vector_type_p (const_tree fntype)
6058 tree return_type = TREE_TYPE (fntype);
6060 if (riscv_vector_type_p (return_type))
6062 riscv_validate_vector_type (return_type, "return type");
6063 return true;
6065 else
6066 return false;
6069 /* Return true if a function with type FNTYPE takes arguments in
6070 RISC-V V registers. */
6072 static bool
6073 riscv_arguments_is_vector_type_p (const_tree fntype)
6075 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
6076 chain = TREE_CHAIN (chain))
6078 tree arg_type = TREE_VALUE (chain);
6079 if (riscv_vector_type_p (arg_type))
6081 riscv_validate_vector_type (arg_type, "argument type");
6082 return true;
6086 return false;
6089 /* Return true if FUNC is a riscv_vector_cc function.
6090 For more details please reference the below link.
6091 https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
6092 static bool
6093 riscv_vector_cc_function_p (const_tree fntype)
6095 tree attr = TYPE_ATTRIBUTES (fntype);
6096 bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE
6097 || lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE;
6099 if (vector_cc_p && !TARGET_VECTOR)
6100 error_at (input_location,
6101 "function attribute %qs requires the V ISA extension",
6102 "riscv_vector_cc");
6104 return vector_cc_p;
6107 /* Implement TARGET_FNTYPE_ABI. */
6109 static const predefined_function_abi &
6110 riscv_fntype_abi (const_tree fntype)
6112 /* Implement the vector calling convention. For more details please
6113 reference the below link.
6114 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
6115 if (riscv_return_value_is_vector_type_p (fntype)
6116 || riscv_arguments_is_vector_type_p (fntype)
6117 || riscv_vector_cc_function_p (fntype))
6118 return riscv_v_abi ();
6120 return default_function_abi;
6123 /* Return riscv calling convention of call_insn. */
6124 riscv_cc
6125 get_riscv_cc (const rtx use)
6127 gcc_assert (GET_CODE (use) == USE);
6128 rtx unspec = XEXP (use, 0);
6129 gcc_assert (GET_CODE (unspec) == UNSPEC
6130 && XINT (unspec, 1) == UNSPEC_CALLEE_CC);
6131 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0));
6132 gcc_assert (cc < RISCV_CC_UNKNOWN);
6133 return cc;
6136 /* Implement TARGET_INSN_CALLEE_ABI. */
6138 const predefined_function_abi &
6139 riscv_insn_callee_abi (const rtx_insn *insn)
6141 rtx pat = PATTERN (insn);
6142 gcc_assert (GET_CODE (pat) == PARALLEL);
6143 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1));
6144 return function_abis[cc];
6147 /* Handle an attribute requiring a FUNCTION_DECL;
6148 arguments as in struct attribute_spec.handler. */
6149 static tree
6150 riscv_handle_fndecl_attribute (tree *node, tree name,
6151 tree args ATTRIBUTE_UNUSED,
6152 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6154 if (TREE_CODE (*node) != FUNCTION_DECL)
6156 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6157 name);
6158 *no_add_attrs = true;
6161 return NULL_TREE;
6164 /* Verify type based attributes. NODE is the what the attribute is being
6165 applied to. NAME is the attribute name. ARGS are the attribute args.
6166 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if
6167 the attribute should be ignored. */
6169 static tree
6170 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6171 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6173 /* Check for an argument. */
6174 if (is_attribute_p ("interrupt", name))
6176 if (args)
6178 tree cst = TREE_VALUE (args);
6179 const char *string;
6181 if (TREE_CODE (cst) != STRING_CST)
6183 warning (OPT_Wattributes,
6184 "%qE attribute requires a string argument",
6185 name);
6186 *no_add_attrs = true;
6187 return NULL_TREE;
6190 string = TREE_STRING_POINTER (cst);
6191 if (strcmp (string, "user") && strcmp (string, "supervisor")
6192 && strcmp (string, "machine"))
6194 warning (OPT_Wattributes,
6195 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
6196 "or %<\"machine\"%>", name);
6197 *no_add_attrs = true;
6202 return NULL_TREE;
6205 static tree
6206 riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args,
6207 ATTRIBUTE_UNUSED int flags,
6208 bool *no_add_attrs)
6210 if (!is_attribute_p ("riscv_rvv_vector_bits", name))
6211 return NULL_TREE;
6213 *no_add_attrs = true;
6215 if (rvv_vector_bits != RVV_VECTOR_BITS_ZVL)
6217 error (
6218 "%qs is only supported when %<-mrvv-vector-bits=zvl%> is specified",
6219 "riscv_rvv_vector_bits");
6220 return NULL_TREE;
6223 tree type = *node;
6225 if (!VECTOR_TYPE_P (type) || !riscv_vector::builtin_type_p (type))
6227 error ("%qs applied to non-RVV type %qT", "riscv_rvv_vector_bits", type);
6228 return NULL_TREE;
6231 tree size = TREE_VALUE (args);
6233 if (TREE_CODE (size) != INTEGER_CST)
6235 error ("%qs requires an integer constant", "riscv_rvv_vector_bits");
6236 return NULL_TREE;
6239 unsigned HOST_WIDE_INT args_in_bits = tree_to_uhwi (size);
6240 unsigned HOST_WIDE_INT type_mode_bits
6241 = GET_MODE_PRECISION (TYPE_MODE (type)).to_constant ();
6243 if (args_in_bits != type_mode_bits)
6245 error ("invalid RVV vector size %qd, "
6246 "expected size is %qd based on LMUL of type and %qs",
6247 (int)args_in_bits, (int)type_mode_bits, "-mrvv-vector-bits=zvl");
6248 return NULL_TREE;
6251 type = build_distinct_type_copy (type);
6252 TYPE_ATTRIBUTES (type)
6253 = remove_attribute ("RVV sizeless type",
6254 copy_list (TYPE_ATTRIBUTES (type)));
6256 /* The operations like alu/cmp on vbool*_t is not well defined,
6257 continue to treat vbool*_t as indivisible. */
6258 if (!VECTOR_BOOLEAN_TYPE_P (type))
6259 TYPE_INDIVISIBLE_P (type) = 0;
6261 *node = type;
6263 return NULL_TREE;
6266 /* Return true if function TYPE is an interrupt function. */
6267 static bool
6268 riscv_interrupt_type_p (tree type)
6270 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
6273 /* Return true if FUNC is a naked function. */
6274 static bool
6275 riscv_naked_function_p (tree func)
6277 tree func_decl = func;
6278 if (func == NULL_TREE)
6279 func_decl = current_function_decl;
6280 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
6283 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
6284 static bool
6285 riscv_allocate_stack_slots_for_args ()
6287 /* Naked functions should not allocate stack slots for arguments. */
6288 return !riscv_naked_function_p (current_function_decl);
6291 /* Implement TARGET_WARN_FUNC_RETURN. */
6292 static bool
6293 riscv_warn_func_return (tree decl)
6295 /* Naked functions are implemented entirely in assembly, including the
6296 return sequence, so suppress warnings about this. */
6297 return !riscv_naked_function_p (decl);
6300 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6302 static void
6303 riscv_va_start (tree valist, rtx nextarg)
6305 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
6306 std_expand_builtin_va_start (valist, nextarg);
6309 /* Make ADDR suitable for use as a call or sibcall target. */
6312 riscv_legitimize_call_address (rtx addr)
6314 if (!call_insn_operand (addr, VOIDmode))
6316 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
6317 riscv_emit_move (reg, addr);
6318 return reg;
6320 return addr;
6323 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
6324 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
6326 static void
6327 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
6329 const char *reloc;
6331 switch (riscv_classify_symbolic_expression (op))
6333 case SYMBOL_ABSOLUTE:
6334 reloc = hi_reloc ? "%hi" : "%lo";
6335 break;
6337 case SYMBOL_PCREL:
6338 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
6339 break;
6341 case SYMBOL_TLS_LE:
6342 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
6343 break;
6345 default:
6346 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R');
6347 return;
6350 fprintf (file, "%s(", reloc);
6351 output_addr_const (file, riscv_strip_unspec_address (op));
6352 fputc (')', file);
6355 /* Return the memory model that encapsulates both given models. */
6357 enum memmodel
6358 riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
6360 model1 = memmodel_base (model1);
6361 model2 = memmodel_base (model2);
6363 enum memmodel weaker = model1 <= model2 ? model1: model2;
6364 enum memmodel stronger = model1 > model2 ? model1: model2;
6366 switch (stronger)
6368 case MEMMODEL_SEQ_CST:
6369 case MEMMODEL_ACQ_REL:
6370 return stronger;
6371 case MEMMODEL_RELEASE:
6372 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
6373 return MEMMODEL_ACQ_REL;
6374 else
6375 return stronger;
6376 case MEMMODEL_ACQUIRE:
6377 case MEMMODEL_CONSUME:
6378 case MEMMODEL_RELAXED:
6379 return stronger;
6380 default:
6381 gcc_unreachable ();
6385 /* Return true if the .AQ suffix should be added to an AMO to implement the
6386 acquire portion of memory model MODEL. */
6388 static bool
6389 riscv_memmodel_needs_amo_acquire (enum memmodel model)
6391 /* ZTSO amo mappings require no annotations. */
6392 if (TARGET_ZTSO)
6393 return false;
6395 switch (model)
6397 case MEMMODEL_ACQ_REL:
6398 case MEMMODEL_SEQ_CST:
6399 case MEMMODEL_ACQUIRE:
6400 case MEMMODEL_CONSUME:
6401 return true;
6403 case MEMMODEL_RELEASE:
6404 case MEMMODEL_RELAXED:
6405 return false;
6407 default:
6408 gcc_unreachable ();
6412 /* Return true if the .RL suffix should be added to an AMO to implement the
6413 release portion of memory model MODEL. */
6415 static bool
6416 riscv_memmodel_needs_amo_release (enum memmodel model)
6418 /* ZTSO amo mappings require no annotations. */
6419 if (TARGET_ZTSO)
6420 return false;
6422 switch (model)
6424 case MEMMODEL_ACQ_REL:
6425 case MEMMODEL_SEQ_CST:
6426 case MEMMODEL_RELEASE:
6427 return true;
6429 case MEMMODEL_ACQUIRE:
6430 case MEMMODEL_CONSUME:
6431 case MEMMODEL_RELAXED:
6432 return false;
6434 default:
6435 gcc_unreachable ();
6439 /* Get REGNO alignment of vector mode.
6440 The alignment = LMUL when the LMUL >= 1.
6441 Otherwise, alignment = 1. */
6443 riscv_get_v_regno_alignment (machine_mode mode)
6445 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
6446 but for mask vector register, register numbers can be any number. */
6447 int lmul = 1;
6448 machine_mode rvv_mode = mode;
6449 if (riscv_v_ext_vls_mode_p (rvv_mode))
6451 int size = GET_MODE_BITSIZE (rvv_mode).to_constant ();
6452 if (size < TARGET_MIN_VLEN)
6453 return 1;
6454 else
6455 return size / TARGET_MIN_VLEN;
6457 if (riscv_v_ext_tuple_mode_p (rvv_mode))
6458 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
6459 poly_int64 size = GET_MODE_SIZE (rvv_mode);
6460 if (known_gt (size, UNITS_PER_V_REG))
6461 lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
6462 return lmul;
6465 /* Define ASM_OUTPUT_OPCODE to do anything special before
6466 emitting an opcode. */
6467 const char *
6468 riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
6470 if (TARGET_XTHEADVECTOR)
6471 return th_asm_output_opcode (asm_out_file, p);
6473 return p;
6476 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are:
6478 'h' Print the high-part relocation associated with OP, after stripping
6479 any outermost HIGH.
6480 'R' Print the low-part relocation associated with OP.
6481 'C' Print the integer branch condition for comparison OP.
6482 'N' Print the inverse of the integer branch condition for comparison OP.
6483 'A' Print the atomic operation suffix for memory model OP.
6484 'I' Print the LR suffix for memory model OP.
6485 'J' Print the SC suffix for memory model OP.
6486 'z' Print x0 if OP is zero, otherwise print OP normally.
6487 'i' Print i if the operand is not a register.
6488 'S' Print shift-index of single-bit mask OP.
6489 'T' Print shift-index of inverted single-bit mask OP.
6490 '~' Print w if TARGET_64BIT is true; otherwise not print anything.
6492 Note please keep this list and the list in riscv.md in sync. */
6494 static void
6495 riscv_print_operand (FILE *file, rtx op, int letter)
6497 /* `~` does not take an operand so op will be null
6498 Check for before accessing op.
6500 if (letter == '~')
6502 if (TARGET_64BIT)
6503 fputc('w', file);
6504 return;
6506 machine_mode mode = GET_MODE (op);
6507 enum rtx_code code = GET_CODE (op);
6509 switch (letter)
6511 case 'o': {
6512 /* Print 'OP' variant for RVV instructions.
6513 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv).
6514 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi).
6515 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */
6516 if (riscv_v_ext_mode_p (mode))
6518 if (REG_P (op))
6519 asm_fprintf (file, "v");
6520 else if (CONST_VECTOR_P (op))
6521 asm_fprintf (file, "i");
6522 else
6523 output_operand_lossage ("invalid vector operand");
6525 else
6527 if (CONST_INT_P (op))
6528 asm_fprintf (file, "i");
6529 else
6530 asm_fprintf (file, "x");
6532 break;
6534 case 'v': {
6535 rtx elt;
6537 if (REG_P (op))
6538 asm_fprintf (file, "%s", reg_names[REGNO (op)]);
6539 else
6541 if (!const_vec_duplicate_p (op, &elt))
6542 output_operand_lossage ("invalid vector constant");
6543 else if (satisfies_constraint_Wc0 (op))
6544 asm_fprintf (file, "0");
6545 else if (satisfies_constraint_vi (op)
6546 || satisfies_constraint_vj (op)
6547 || satisfies_constraint_vk (op))
6548 asm_fprintf (file, "%wd", INTVAL (elt));
6549 else
6550 output_operand_lossage ("invalid vector constant");
6552 break;
6554 case 'V': {
6555 rtx elt;
6556 if (!const_vec_duplicate_p (op, &elt))
6557 output_operand_lossage ("invalid vector constant");
6558 else if (satisfies_constraint_vj (op))
6559 asm_fprintf (file, "%wd", -INTVAL (elt));
6560 else
6561 output_operand_lossage ("invalid vector constant");
6562 break;
6564 case 'm': {
6565 if (riscv_v_ext_mode_p (mode))
6567 /* Calculate lmul according to mode and print the value. */
6568 int lmul = riscv_get_v_regno_alignment (mode);
6569 asm_fprintf (file, "%d", lmul);
6571 else if (code == CONST_INT)
6573 /* If it is a const_int value, it denotes the VLMUL field enum. */
6574 unsigned int vlmul = UINTVAL (op);
6575 switch (vlmul)
6577 case riscv_vector::LMUL_1:
6578 asm_fprintf (file, "%s", "m1");
6579 break;
6580 case riscv_vector::LMUL_2:
6581 asm_fprintf (file, "%s", "m2");
6582 break;
6583 case riscv_vector::LMUL_4:
6584 asm_fprintf (file, "%s", "m4");
6585 break;
6586 case riscv_vector::LMUL_8:
6587 asm_fprintf (file, "%s", "m8");
6588 break;
6589 case riscv_vector::LMUL_F8:
6590 asm_fprintf (file, "%s", "mf8");
6591 break;
6592 case riscv_vector::LMUL_F4:
6593 asm_fprintf (file, "%s", "mf4");
6594 break;
6595 case riscv_vector::LMUL_F2:
6596 asm_fprintf (file, "%s", "mf2");
6597 break;
6598 default:
6599 gcc_unreachable ();
6602 else
6603 output_operand_lossage ("invalid vector constant");
6604 break;
6606 case 'p': {
6607 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
6609 /* Print for RVV mask operand.
6610 If op is reg, print ",v0.t".
6611 Otherwise, don't print anything. */
6612 if (code == REG)
6613 fprintf (file, ",%s.t", reg_names[REGNO (op)]);
6615 else if (code == CONST_INT)
6617 /* Tail && Mask policy. */
6618 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u");
6620 else
6621 output_operand_lossage ("invalid vector constant");
6622 break;
6624 case 'h':
6625 if (code == HIGH)
6626 op = XEXP (op, 0);
6627 riscv_print_operand_reloc (file, op, true);
6628 break;
6630 case 'R':
6631 riscv_print_operand_reloc (file, op, false);
6632 break;
6634 case 'C':
6635 /* The RTL names match the instruction names. */
6636 fputs (GET_RTX_NAME (code), file);
6637 break;
6639 case 'N':
6640 /* The RTL names match the instruction names. */
6641 fputs (GET_RTX_NAME (reverse_condition (code)), file);
6642 break;
6644 case 'A': {
6645 const enum memmodel model = memmodel_base (INTVAL (op));
6646 if (riscv_memmodel_needs_amo_acquire (model)
6647 && riscv_memmodel_needs_amo_release (model))
6648 fputs (".aqrl", file);
6649 else if (riscv_memmodel_needs_amo_acquire (model))
6650 fputs (".aq", file);
6651 else if (riscv_memmodel_needs_amo_release (model))
6652 fputs (".rl", file);
6653 break;
6656 case 'I': {
6657 const enum memmodel model = memmodel_base (INTVAL (op));
6658 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
6659 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */
6660 break;
6661 else if (model == MEMMODEL_SEQ_CST)
6662 fputs (".aqrl", file);
6663 else if (riscv_memmodel_needs_amo_acquire (model))
6664 fputs (".aq", file);
6665 break;
6668 case 'J': {
6669 const enum memmodel model = memmodel_base (INTVAL (op));
6670 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
6671 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */
6672 fputs (".rl", file);
6673 else if (TARGET_ZTSO)
6674 break;
6675 else if (riscv_memmodel_needs_amo_release (model))
6676 fputs (".rl", file);
6677 break;
6680 case 'i':
6681 if (code != REG)
6682 fputs ("i", file);
6683 break;
6685 case 'B':
6686 fputs (GET_RTX_NAME (code), file);
6687 break;
6689 case 'S':
6691 rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
6692 output_addr_const (file, newop);
6693 break;
6695 case 'T':
6697 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
6698 output_addr_const (file, newop);
6699 break;
6701 case 'X':
6703 int ival = INTVAL (op) + 1;
6704 rtx newop = GEN_INT (ctz_hwi (ival) + 1);
6705 output_addr_const (file, newop);
6706 break;
6708 case 'Y':
6710 unsigned int imm = (UINTVAL (op) & 63);
6711 gcc_assert (imm <= 63);
6712 rtx newop = GEN_INT (imm);
6713 output_addr_const (file, newop);
6714 break;
6716 default:
6717 switch (code)
6719 case REG:
6720 if (letter && letter != 'z')
6721 output_operand_lossage ("invalid use of '%%%c'", letter);
6722 fprintf (file, "%s", reg_names[REGNO (op)]);
6723 break;
6725 case MEM:
6726 if (letter && letter != 'z')
6727 output_operand_lossage ("invalid use of '%%%c'", letter);
6728 else
6729 output_address (mode, XEXP (op, 0));
6730 break;
6732 case CONST_DOUBLE:
6734 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6736 fputs (reg_names[GP_REG_FIRST], file);
6737 break;
6740 int fli_index = riscv_float_const_rtx_index_for_fli (op);
6741 if (fli_index == -1 || fli_index > 31)
6743 output_operand_lossage ("invalid use of '%%%c'", letter);
6744 break;
6746 asm_fprintf (file, "%s", fli_value_print[fli_index]);
6747 break;
6750 default:
6751 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6752 fputs (reg_names[GP_REG_FIRST], file);
6753 else if (letter && letter != 'z')
6754 output_operand_lossage ("invalid use of '%%%c'", letter);
6755 else
6756 output_addr_const (file, riscv_strip_unspec_address (op));
6757 break;
6762 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
6763 static bool
6764 riscv_print_operand_punct_valid_p (unsigned char code)
6766 return (code == '~');
6769 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
6771 static void
6772 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6774 struct riscv_address_info addr;
6776 if (th_print_operand_address (file, mode, x))
6777 return;
6779 if (riscv_classify_address (&addr, x, word_mode, true))
6780 switch (addr.type)
6782 case ADDRESS_REG:
6783 output_addr_const (file, riscv_strip_unspec_address (addr.offset));
6784 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6785 return;
6787 case ADDRESS_LO_SUM:
6788 riscv_print_operand_reloc (file, addr.offset, false);
6789 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6790 return;
6792 case ADDRESS_CONST_INT:
6793 output_addr_const (file, x);
6794 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
6795 return;
6797 case ADDRESS_SYMBOLIC:
6798 output_addr_const (file, riscv_strip_unspec_address (x));
6799 return;
6801 default:
6802 gcc_unreachable ();
6805 gcc_unreachable ();
6808 static bool
6809 riscv_size_ok_for_small_data_p (int size)
6811 return g_switch_value && IN_RANGE (size, 1, g_switch_value);
6814 /* Return true if EXP should be placed in the small data section. */
6816 static bool
6817 riscv_in_small_data_p (const_tree x)
6819 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use
6820 the anchor symbol to address nearby objects. In large model, it can get
6821 the better result using the anchor optimization. */
6822 if (riscv_cmodel == CM_LARGE)
6823 return false;
6825 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
6826 return false;
6828 if (VAR_P (x) && DECL_SECTION_NAME (x))
6830 const char *sec = DECL_SECTION_NAME (x);
6831 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
6834 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
6837 /* Switch to the appropriate section for output of DECL. */
6839 static section *
6840 riscv_select_section (tree decl, int reloc,
6841 unsigned HOST_WIDE_INT align)
6843 switch (categorize_decl_for_section (decl, reloc))
6845 case SECCAT_SRODATA:
6846 return get_named_section (decl, ".srodata", reloc);
6848 default:
6849 return default_elf_select_section (decl, reloc, align);
6853 /* Switch to the appropriate section for output of DECL. */
6855 static void
6856 riscv_unique_section (tree decl, int reloc)
6858 const char *prefix = NULL;
6859 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
6861 switch (categorize_decl_for_section (decl, reloc))
6863 case SECCAT_SRODATA:
6864 prefix = one_only ? ".sr" : ".srodata";
6865 break;
6867 default:
6868 break;
6870 if (prefix)
6872 const char *name, *linkonce;
6873 char *string;
6875 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6876 name = targetm.strip_name_encoding (name);
6878 /* If we're using one_only, then there needs to be a .gnu.linkonce
6879 prefix to the section name. */
6880 linkonce = one_only ? ".gnu.linkonce" : "";
6882 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6884 set_decl_section_name (decl, string);
6885 return;
6887 default_unique_section (decl, reloc);
6890 /* Constant pools are per-function when in large code model. */
6892 static inline bool
6893 riscv_can_use_per_function_literal_pools_p (void)
6895 return riscv_cmodel == CM_LARGE;
6898 static bool
6899 riscv_use_blocks_for_constant_p (machine_mode, const_rtx)
6901 /* We can't use blocks for constants when we're using a per-function
6902 constant pool. */
6903 return !riscv_can_use_per_function_literal_pools_p ();
6906 /* Return a section for X, handling small data. */
6908 static section *
6909 riscv_elf_select_rtx_section (machine_mode mode, rtx x,
6910 unsigned HOST_WIDE_INT align)
6912 /* The literal pool stays with the function. */
6913 if (riscv_can_use_per_function_literal_pools_p ())
6914 return function_section (current_function_decl);
6916 section *s = default_elf_select_rtx_section (mode, x, align);
6918 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
6920 if (startswith (s->named.name, ".rodata.cst"))
6922 /* Rename .rodata.cst* to .srodata.cst*. */
6923 char *name = (char *) alloca (strlen (s->named.name) + 2);
6924 sprintf (name, ".s%s", s->named.name + 1);
6925 return get_section (name, s->named.common.flags, NULL);
6928 if (s == data_section)
6929 return sdata_section;
6932 return s;
6935 /* Make the last instruction frame-related and note that it performs
6936 the operation described by FRAME_PATTERN. */
6938 static void
6939 riscv_set_frame_expr (rtx frame_pattern)
6941 rtx insn;
6943 insn = get_last_insn ();
6944 RTX_FRAME_RELATED_P (insn) = 1;
6945 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6946 frame_pattern,
6947 REG_NOTES (insn));
6950 /* Return a frame-related rtx that stores REG at MEM.
6951 REG must be a single register. */
6953 static rtx
6954 riscv_frame_set (rtx mem, rtx reg)
6956 rtx set = gen_rtx_SET (mem, reg);
6957 RTX_FRAME_RELATED_P (set) = 1;
6958 return set;
6961 /* Returns true if the current function might contain a far jump. */
6963 static bool
6964 riscv_far_jump_used_p ()
6966 size_t func_size = 0;
6968 if (cfun->machine->far_jump_used)
6969 return true;
6971 /* We can't change far_jump_used during or after reload, as there is
6972 no chance to change stack frame layout. So we must rely on the
6973 conservative heuristic below having done the right thing. */
6974 if (reload_in_progress || reload_completed)
6975 return false;
6977 /* Estimate the function length. */
6978 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
6979 func_size += get_attr_length (insn);
6981 /* Conservatively determine whether some jump might exceed 1 MiB
6982 displacement. */
6983 if (func_size * 2 >= 0x100000)
6984 cfun->machine->far_jump_used = true;
6986 return cfun->machine->far_jump_used;
6989 /* Return true, if the current function must save the incoming return
6990 address. */
6992 static bool
6993 riscv_save_return_addr_reg_p (void)
6995 /* The $ra register is call-clobbered: if this is not a leaf function,
6996 save it. */
6997 if (!crtl->is_leaf)
6998 return true;
7000 /* We need to save the incoming return address if __builtin_eh_return
7001 is being used to set a different return address. */
7002 if (crtl->calls_eh_return)
7003 return true;
7005 /* Far jumps/branches use $ra as a temporary to set up the target jump
7006 location (clobbering the incoming return address). */
7007 if (riscv_far_jump_used_p ())
7008 return true;
7010 /* We need to save it if anyone has used that. */
7011 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
7012 return true;
7014 /* Need not to use ra for leaf when frame pointer is turned off by
7015 option whatever the omit-leaf-frame's value. */
7016 if (frame_pointer_needed && crtl->is_leaf
7017 && !TARGET_OMIT_LEAF_FRAME_POINTER)
7018 return true;
7020 return false;
7023 /* Return true if the current function must save register REGNO. */
7025 static bool
7026 riscv_save_reg_p (unsigned int regno)
7028 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
7029 bool might_clobber = crtl->saves_all_registers
7030 || df_regs_ever_live_p (regno);
7032 if (call_saved && might_clobber)
7033 return true;
7035 /* Save callee-saved V registers. */
7036 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno)
7037 && might_clobber)
7038 return true;
7040 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
7041 return true;
7043 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ())
7044 return true;
7046 /* If this is an interrupt handler, then must save extra registers. */
7047 if (cfun->machine->interrupt_handler_p)
7049 /* zero register is always zero. */
7050 if (regno == GP_REG_FIRST)
7051 return false;
7053 /* The function will return the stack pointer to its original value. */
7054 if (regno == STACK_POINTER_REGNUM)
7055 return false;
7057 /* By convention, we assume that gp and tp are safe. */
7058 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
7059 return false;
7061 /* We must save every register used in this function. If this is not a
7062 leaf function, then we must save all temporary registers. */
7063 if (df_regs_ever_live_p (regno)
7064 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
7065 return true;
7068 return false;
7071 /* Return TRUE if Zcmp push and pop insns should be
7072 avoided. FALSE otherwise.
7073 Only use multi push & pop if all GPRs masked can be covered,
7074 and stack access is SP based,
7075 and GPRs are at top of the stack frame,
7076 and no conflicts in stack allocation with other features */
7077 static bool
7078 riscv_avoid_multi_push (const struct riscv_frame_info *frame)
7080 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed
7081 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0
7082 || crtl->args.pretend_args_size != 0
7083 || (use_shrink_wrapping_separate ()
7084 && !riscv_avoid_shrink_wrapping_separate ())
7085 || (frame->mask & ~MULTI_PUSH_GPR_MASK))
7086 return true;
7088 return false;
7091 /* Determine whether to use multi push insn. */
7092 static bool
7093 riscv_use_multi_push (const struct riscv_frame_info *frame)
7095 if (riscv_avoid_multi_push (frame))
7096 return false;
7098 return (frame->multi_push_adj_base != 0);
7101 /* Return TRUE if a libcall to save/restore GPRs should be
7102 avoided. FALSE otherwise. */
7103 static bool
7104 riscv_avoid_save_libcall (void)
7106 if (!TARGET_SAVE_RESTORE
7107 || crtl->calls_eh_return
7108 || frame_pointer_needed
7109 || cfun->machine->interrupt_handler_p
7110 || cfun->machine->varargs_size != 0
7111 || crtl->args.pretend_args_size != 0)
7112 return true;
7114 return false;
7117 /* Determine whether to call GPR save/restore routines. */
7118 static bool
7119 riscv_use_save_libcall (const struct riscv_frame_info *frame)
7121 if (riscv_avoid_save_libcall ())
7122 return false;
7124 return frame->save_libcall_adjustment != 0;
7127 /* Determine which GPR save/restore routine to call. */
7129 static unsigned
7130 riscv_save_libcall_count (unsigned mask)
7132 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
7133 if (BITSET_P (mask, n))
7134 return CALLEE_SAVED_REG_NUMBER (n) + 1;
7135 abort ();
7138 /* calculate number of s regs in multi push and pop.
7139 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
7140 static unsigned
7141 riscv_multi_push_sregs_count (unsigned mask)
7143 unsigned num = riscv_save_libcall_count (mask);
7144 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS
7145 : num;
7148 /* calculate number of regs(ra, s0-sx) in multi push and pop. */
7149 static unsigned
7150 riscv_multi_push_regs_count (unsigned mask)
7152 /* 1 is for ra */
7153 return riscv_multi_push_sregs_count (mask) + 1;
7156 /* Handle 16 bytes align for poly_int. */
7157 static poly_int64
7158 riscv_16bytes_align (poly_int64 value)
7160 return aligned_upper_bound (value, 16);
7163 static HOST_WIDE_INT
7164 riscv_16bytes_align (HOST_WIDE_INT value)
7166 return ROUND_UP (value, 16);
7169 /* Handle stack align for poly_int. */
7170 static poly_int64
7171 riscv_stack_align (poly_int64 value)
7173 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
7176 static HOST_WIDE_INT
7177 riscv_stack_align (HOST_WIDE_INT value)
7179 return RISCV_STACK_ALIGN (value);
7182 /* Populate the current function's riscv_frame_info structure.
7184 RISC-V stack frames grown downward. High addresses are at the top.
7186 +-------------------------------+
7188 | incoming stack arguments |
7190 +-------------------------------+ <-- incoming stack pointer
7192 | callee-allocated save area |
7193 | for arguments that are |
7194 | split between registers and |
7195 | the stack |
7197 +-------------------------------+ <-- arg_pointer_rtx
7199 | callee-allocated save area |
7200 | for register varargs |
7202 +-------------------------------+ <-- hard_frame_pointer_rtx;
7203 | | stack_pointer_rtx + gp_sp_offset
7204 | GPR save area | + UNITS_PER_WORD
7206 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
7207 | | + UNITS_PER_FP_REG
7208 | FPR save area |
7210 +-------------------------------+ <-- stack_pointer_rtx
7211 | | + v_sp_offset_top
7212 | Vector Registers save area |
7214 | ----------------------------- | <-- stack_pointer_rtx
7215 | padding | + v_sp_offset_bottom
7216 +-------------------------------+ <-- frame_pointer_rtx (virtual)
7218 | local variables |
7220 P +-------------------------------+
7222 | outgoing stack arguments |
7224 +-------------------------------+ <-- stack_pointer_rtx
7226 Dynamic stack allocations such as alloca insert data at point P.
7227 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
7228 hard_frame_pointer_rtx unchanged. */
7230 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
7232 static void
7233 riscv_compute_frame_info (void)
7235 struct riscv_frame_info *frame;
7236 poly_int64 offset;
7237 bool interrupt_save_prologue_temp = false;
7238 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0;
7239 unsigned int num_v_saved = 0;
7241 frame = &cfun->machine->frame;
7243 /* In an interrupt function, there are two cases in which t0 needs to be used:
7244 1, If we have a large frame, then we need to save/restore t0. We check for
7245 this before clearing the frame struct.
7246 2, Need to save and restore some CSRs in the frame. */
7247 if (cfun->machine->interrupt_handler_p)
7249 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
7250 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
7251 || (TARGET_HARD_FLOAT || TARGET_ZFINX))
7252 interrupt_save_prologue_temp = true;
7255 frame->reset();
7257 if (!cfun->machine->naked_p)
7259 /* Find out which GPRs we need to save. */
7260 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7261 if (riscv_save_reg_p (regno)
7262 || (interrupt_save_prologue_temp
7263 && (regno == RISCV_PROLOGUE_TEMP_REGNUM)))
7264 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
7266 /* If this function calls eh_return, we must also save and restore the
7267 EH data registers. */
7268 if (crtl->calls_eh_return)
7269 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
7270 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
7272 /* Find out which FPRs we need to save. This loop must iterate over
7273 the same space as its companion in riscv_for_each_saved_reg. */
7274 if (TARGET_HARD_FLOAT)
7275 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7276 if (riscv_save_reg_p (regno))
7277 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
7279 /* Find out which V registers we need to save. */
7280 if (TARGET_VECTOR)
7281 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
7282 if (riscv_save_reg_p (regno))
7284 frame->vmask |= 1 << (regno - V_REG_FIRST);
7285 num_v_saved++;
7289 if (frame->mask)
7291 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
7293 /* 1 is for ra */
7294 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
7295 /* Only use save/restore routines if they don't alter the stack size. */
7296 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
7297 && !riscv_avoid_save_libcall ())
7299 /* Libcall saves/restores 3 registers at once, so we need to
7300 allocate 12 bytes for callee-saved register. */
7301 if (TARGET_RVE)
7302 x_save_size = 3 * UNITS_PER_WORD;
7304 frame->save_libcall_adjustment = x_save_size;
7307 if (!riscv_avoid_multi_push (frame))
7309 /* num(ra, s0-sx) */
7310 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask);
7311 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
7312 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
7316 /* In an interrupt function, we need extra space for the initial saves of CSRs. */
7317 if (cfun->machine->interrupt_handler_p
7318 && ((TARGET_HARD_FLOAT && frame->fmask)
7319 || (TARGET_ZFINX
7320 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */
7321 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
7322 /* Save and restore FCSR. */
7323 /* TODO: When P or V extensions support interrupts, some of their CSRs
7324 may also need to be saved and restored. */
7325 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD);
7327 /* At the bottom of the frame are any outgoing stack arguments. */
7328 offset = riscv_stack_align (crtl->outgoing_args_size);
7329 /* Next are local stack variables. */
7330 offset += riscv_stack_align (get_frame_size ());
7331 /* The virtual frame pointer points above the local variables. */
7332 frame->frame_pointer_offset = offset;
7333 /* Next are the callee-saved VRs. */
7334 if (frame->vmask)
7335 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG);
7336 frame->v_sp_offset_top = offset;
7337 frame->v_sp_offset_bottom
7338 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG;
7339 /* Next are the callee-saved FPRs. */
7340 if (frame->fmask)
7341 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
7342 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
7343 /* Next are the callee-saved GPRs. */
7344 if (frame->mask)
7346 offset += x_save_size;
7347 /* align to 16 bytes and add paddings to GPR part to honor
7348 both stack alignment and zcmp pus/pop size alignment. */
7349 if (riscv_use_multi_push (frame)
7350 && known_lt (offset, frame->multi_push_adj_base
7351 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
7352 offset = riscv_16bytes_align (offset);
7354 frame->gp_sp_offset = offset - UNITS_PER_WORD;
7355 /* The hard frame pointer points above the callee-saved GPRs. */
7356 frame->hard_frame_pointer_offset = offset;
7357 /* Above the hard frame pointer is the callee-allocated varags save area. */
7358 offset += riscv_stack_align (cfun->machine->varargs_size);
7359 /* Next is the callee-allocated area for pretend stack arguments. */
7360 offset += riscv_stack_align (crtl->args.pretend_args_size);
7361 /* Arg pointer must be below pretend args, but must be above alignment
7362 padding. */
7363 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
7364 frame->total_size = offset;
7366 /* Next points the incoming stack pointer and any incoming arguments. */
7369 /* Make sure that we're not trying to eliminate to the wrong hard frame
7370 pointer. */
7372 static bool
7373 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7375 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
7378 /* Helper to determine if reg X pertains to stack. */
7379 bool
7380 riscv_reg_frame_related (rtx x)
7382 return REG_P (x)
7383 && (REGNO (x) == FRAME_POINTER_REGNUM
7384 || REGNO (x) == HARD_FRAME_POINTER_REGNUM
7385 || REGNO (x) == ARG_POINTER_REGNUM
7386 || REGNO (x) == VIRTUAL_STACK_VARS_REGNUM);
7389 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
7390 or argument pointer. TO is either the stack pointer or hard frame
7391 pointer. */
7393 poly_int64
7394 riscv_initial_elimination_offset (int from, int to)
7396 poly_int64 src, dest;
7398 riscv_compute_frame_info ();
7400 if (to == HARD_FRAME_POINTER_REGNUM)
7401 dest = cfun->machine->frame.hard_frame_pointer_offset;
7402 else if (to == STACK_POINTER_REGNUM)
7403 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
7404 else
7405 gcc_unreachable ();
7407 if (from == FRAME_POINTER_REGNUM)
7408 src = cfun->machine->frame.frame_pointer_offset;
7409 else if (from == ARG_POINTER_REGNUM)
7410 src = cfun->machine->frame.arg_pointer_offset;
7411 else
7412 gcc_unreachable ();
7414 return src - dest;
7417 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
7418 previous frame. */
7421 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
7423 if (count != 0)
7424 return const0_rtx;
7426 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
7429 /* Emit code to change the current function's return address to
7430 ADDRESS. SCRATCH is available as a scratch register, if needed.
7431 ADDRESS and SCRATCH are both word-mode GPRs. */
7433 void
7434 riscv_set_return_address (rtx address, rtx scratch)
7436 rtx slot_address;
7438 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
7439 slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
7440 cfun->machine->frame.gp_sp_offset.to_constant());
7441 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
7444 /* Save register REG to MEM. Make the instruction frame-related. */
7446 static void
7447 riscv_save_reg (rtx reg, rtx mem)
7449 riscv_emit_move (mem, reg);
7450 riscv_set_frame_expr (riscv_frame_set (mem, reg));
7453 /* Restore register REG from MEM. */
7455 static void
7456 riscv_restore_reg (rtx reg, rtx mem)
7458 rtx insn = riscv_emit_move (reg, mem);
7459 rtx dwarf = NULL_RTX;
7460 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7462 if (known_gt (epilogue_cfa_sp_offset, 0)
7463 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
7465 rtx cfa_adjust_rtx
7466 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7467 gen_int_mode (epilogue_cfa_sp_offset, Pmode));
7468 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7471 REG_NOTES (insn) = dwarf;
7472 RTX_FRAME_RELATED_P (insn) = 1;
7475 /* A function to save or store a register. The first argument is the
7476 register and the second is the stack slot. */
7477 typedef void (*riscv_save_restore_fn) (rtx, rtx);
7479 /* Use FN to save or restore register REGNO. MODE is the register's
7480 mode and OFFSET is the offset of its save slot from the current
7481 stack pointer. */
7483 static void
7484 riscv_save_restore_reg (machine_mode mode, int regno,
7485 HOST_WIDE_INT offset, riscv_save_restore_fn fn)
7487 rtx mem;
7489 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
7490 fn (gen_rtx_REG (mode, regno), mem);
7493 /* Return the next register up from REGNO up to LIMIT for the callee
7494 to save or restore. OFFSET will be adjusted accordingly.
7495 If INC is set, then REGNO will be incremented first.
7496 Returns INVALID_REGNUM if there is no such next register. */
7498 static unsigned int
7499 riscv_next_saved_reg (unsigned int regno, unsigned int limit,
7500 HOST_WIDE_INT *offset, bool inc = true)
7502 if (inc)
7503 regno++;
7505 while (regno <= limit)
7507 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7509 *offset = *offset - UNITS_PER_WORD;
7510 return regno;
7513 regno++;
7515 return INVALID_REGNUM;
7518 /* Return TRUE if provided REGNO is eh return data register. */
7520 static bool
7521 riscv_is_eh_return_data_register (unsigned int regno)
7523 unsigned int i, regnum;
7525 if (!crtl->calls_eh_return)
7526 return false;
7528 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
7529 if (regno == regnum)
7531 return true;
7534 return false;
7537 /* Call FN for each register that is saved by the current function.
7538 SP_OFFSET is the offset of the current stack pointer from the start
7539 of the frame. */
7541 static void
7542 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
7543 bool epilogue, bool maybe_eh_return)
7545 HOST_WIDE_INT offset, first_fp_offset;
7546 unsigned int regno, num_masked_fp = 0;
7547 unsigned int start = GP_REG_FIRST;
7548 unsigned int limit = GP_REG_LAST;
7550 /* Save the link register and s-registers. */
7551 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ()
7552 + UNITS_PER_WORD;
7553 for (regno = riscv_next_saved_reg (start, limit, &offset, false);
7554 regno != INVALID_REGNUM;
7555 regno = riscv_next_saved_reg (regno, limit, &offset))
7557 if (cfun->machine->reg_is_wrapped_separately[regno])
7558 continue;
7560 /* If this is a normal return in a function that calls the eh_return
7561 builtin, then do not restore the eh return data registers as that
7562 would clobber the return value. But we do still need to save them
7563 in the prologue, and restore them for an exception return, so we
7564 need special handling here. */
7565 if (epilogue && !maybe_eh_return
7566 && riscv_is_eh_return_data_register (regno))
7567 continue;
7569 /* In an interrupt function, save and restore some necessary CSRs in the stack
7570 to avoid changes in CSRs. */
7571 if (regno == RISCV_PROLOGUE_TEMP_REGNUM
7572 && cfun->machine->interrupt_handler_p
7573 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask)
7574 || (TARGET_ZFINX
7575 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
7577 /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack
7578 offset misaligned later. */
7579 unsigned int fcsr_size = UNITS_PER_WORD;
7580 if (!epilogue)
7582 riscv_save_restore_reg (word_mode, regno, offset, fn);
7583 offset -= fcsr_size;
7584 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode)));
7585 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7586 offset, riscv_save_reg);
7588 else
7590 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7591 offset - fcsr_size, riscv_restore_reg);
7592 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode)));
7593 riscv_save_restore_reg (word_mode, regno, offset, fn);
7594 offset -= fcsr_size;
7596 continue;
7599 if (TARGET_XTHEADMEMPAIR)
7601 /* Get the next reg/offset pair. */
7602 HOST_WIDE_INT offset2 = offset;
7603 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
7605 /* Validate everything before emitting a mempair instruction. */
7606 if (regno2 != INVALID_REGNUM
7607 && !cfun->machine->reg_is_wrapped_separately[regno2]
7608 && !(epilogue && !maybe_eh_return
7609 && riscv_is_eh_return_data_register (regno2)))
7611 bool load_p = (fn == riscv_restore_reg);
7612 rtx operands[4];
7613 th_mempair_prepare_save_restore_operands (operands,
7614 load_p, word_mode,
7615 regno, offset,
7616 regno2, offset2);
7618 /* If the operands fit into a mempair insn, then emit one. */
7619 if (th_mempair_operands_p (operands, load_p, word_mode))
7621 th_mempair_save_restore_regs (operands, load_p, word_mode);
7622 offset = offset2;
7623 regno = regno2;
7624 continue;
7629 riscv_save_restore_reg (word_mode, regno, offset, fn);
7632 /* This loop must iterate over the same space as its companion in
7633 riscv_compute_frame_info. */
7634 first_fp_offset
7635 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
7636 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7637 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7639 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7640 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7641 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame))
7642 ? CALLEE_SAVED_FREG_NUMBER (regno)
7643 : num_masked_fp;
7644 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant ();
7645 if (handle_reg)
7646 riscv_save_restore_reg (mode, regno, offset, fn);
7647 num_masked_fp++;
7651 /* Call FN for each V register that is saved by the current function. */
7653 static void
7654 riscv_for_each_saved_v_reg (poly_int64 &remaining_size,
7655 riscv_save_restore_fn fn, bool prologue)
7657 rtx vlen = NULL_RTX;
7658 if (cfun->machine->frame.vmask != 0)
7660 if (UNITS_PER_V_REG.is_constant ()
7661 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ()))
7662 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ());
7663 else
7665 vlen = RISCV_PROLOGUE_TEMP (Pmode);
7666 rtx insn
7667 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode));
7668 RTX_FRAME_RELATED_P (insn) = 1;
7672 /* Select the mode where LMUL is 1 and SEW is largest. */
7673 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode;
7675 if (prologue)
7677 /* This loop must iterate over the same space as its companion in
7678 riscv_compute_frame_info. */
7679 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
7680 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7682 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7683 if (handle_reg)
7685 rtx insn = NULL_RTX;
7686 if (CONST_INT_P (vlen))
7688 gcc_assert (SMALL_OPERAND (-INTVAL (vlen)));
7689 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7690 stack_pointer_rtx,
7691 GEN_INT (-INTVAL (vlen))));
7693 else
7694 insn = emit_insn (
7695 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7696 gcc_assert (insn != NULL_RTX);
7697 RTX_FRAME_RELATED_P (insn) = 1;
7698 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7699 remaining_size -= UNITS_PER_V_REG;
7703 else
7705 /* This loop must iterate over the same space as its companion in
7706 riscv_compute_frame_info. */
7707 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--)
7708 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7710 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7711 if (handle_reg)
7713 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7714 rtx insn = emit_insn (
7715 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7716 gcc_assert (insn != NULL_RTX);
7717 RTX_FRAME_RELATED_P (insn) = 1;
7718 remaining_size -= UNITS_PER_V_REG;
7724 /* For stack frames that can't be allocated with a single ADDI instruction,
7725 compute the best value to initially allocate. It must at a minimum
7726 allocate enough space to spill the callee-saved registers. If TARGET_RVC,
7727 try to pick a value that will allow compression of the register saves
7728 without adding extra instructions. */
7730 static HOST_WIDE_INT
7731 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
7733 HOST_WIDE_INT remaining_const_size;
7734 if (!remaining_size.is_constant ())
7735 remaining_const_size
7736 = riscv_stack_align (remaining_size.coeffs[0])
7737 - riscv_stack_align (remaining_size.coeffs[1]);
7738 else
7739 remaining_const_size = remaining_size.to_constant ();
7741 /* First step must be set to the top of vector registers save area if any
7742 vector registers need be preserved. */
7743 if (frame->vmask != 0)
7744 return (remaining_size - frame->v_sp_offset_top).to_constant ();
7746 if (SMALL_OPERAND (remaining_const_size))
7747 return remaining_const_size;
7749 poly_int64 callee_saved_first_step =
7750 remaining_size - frame->frame_pointer_offset;
7751 gcc_assert(callee_saved_first_step.is_constant ());
7752 HOST_WIDE_INT min_first_step =
7753 riscv_stack_align (callee_saved_first_step.to_constant ());
7754 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
7755 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
7756 gcc_assert (min_first_step <= max_first_step);
7758 /* As an optimization, use the least-significant bits of the total frame
7759 size, so that the second adjustment step is just LUI + ADD. */
7760 if (!SMALL_OPERAND (min_second_step)
7761 && remaining_const_size % IMM_REACH <= max_first_step
7762 && remaining_const_size % IMM_REACH >= min_first_step)
7763 return remaining_const_size % IMM_REACH;
7765 if (TARGET_RVC || TARGET_ZCA)
7767 /* If we need two subtracts, and one is small enough to allow compressed
7768 loads and stores, then put that one first. */
7769 if (IN_RANGE (min_second_step, 0,
7770 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
7771 return MAX (min_second_step, min_first_step);
7773 /* If we need LUI + ADDI + ADD for the second adjustment step, then start
7774 with the minimum first step, so that we can get compressed loads and
7775 stores. */
7776 else if (!SMALL_OPERAND (min_second_step))
7777 return min_first_step;
7780 return max_first_step;
7783 static rtx
7784 riscv_adjust_libcall_cfi_prologue ()
7786 rtx dwarf = NULL_RTX;
7787 rtx adjust_sp_rtx, reg, mem, insn;
7788 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7789 int offset;
7791 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7792 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7794 /* The save order is ra, s0, s1, s2 to s11. */
7795 if (regno == RETURN_ADDR_REGNUM)
7796 offset = saved_size - UNITS_PER_WORD;
7797 else if (regno == S0_REGNUM)
7798 offset = saved_size - UNITS_PER_WORD * 2;
7799 else if (regno == S1_REGNUM)
7800 offset = saved_size - UNITS_PER_WORD * 3;
7801 else
7802 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
7804 reg = gen_rtx_REG (Pmode, regno);
7805 mem = gen_frame_mem (Pmode, plus_constant (Pmode,
7806 stack_pointer_rtx,
7807 offset));
7809 insn = gen_rtx_SET (mem, reg);
7810 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7813 /* Debug info for adjust sp. */
7814 adjust_sp_rtx =
7815 gen_rtx_SET (stack_pointer_rtx,
7816 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size)));
7817 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7818 dwarf);
7819 return dwarf;
7822 static rtx
7823 riscv_adjust_multi_push_cfi_prologue (int saved_size)
7825 rtx dwarf = NULL_RTX;
7826 rtx adjust_sp_rtx, reg, mem, insn;
7827 unsigned int mask = cfun->machine->frame.mask;
7828 int offset;
7829 int saved_cnt = 0;
7831 if (mask & S10_MASK)
7832 mask |= S11_MASK;
7834 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
7835 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
7837 /* The save order is s11-s0, ra
7838 from high to low addr. */
7839 offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
7841 reg = gen_rtx_REG (Pmode, regno);
7842 mem = gen_frame_mem (Pmode,
7843 plus_constant (Pmode, stack_pointer_rtx, offset));
7845 insn = gen_rtx_SET (mem, reg);
7846 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7849 /* Debug info for adjust sp. */
7850 adjust_sp_rtx
7851 = gen_rtx_SET (stack_pointer_rtx,
7852 plus_constant (Pmode, stack_pointer_rtx, -saved_size));
7853 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7854 return dwarf;
7857 static void
7858 riscv_emit_stack_tie (void)
7860 if (Pmode == SImode)
7861 emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
7862 else
7863 emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
7866 /*zcmp multi push and pop code_for_push_pop function ptr array */
7867 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM]
7868 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra,
7869 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra},
7870 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0,
7871 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0},
7872 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1,
7873 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1},
7874 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2,
7875 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2},
7876 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3,
7877 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3},
7878 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4,
7879 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4},
7880 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5,
7881 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5},
7882 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6,
7883 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6},
7884 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7,
7885 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7},
7886 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8,
7887 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8},
7888 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9,
7889 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9},
7890 {nullptr, nullptr, nullptr, nullptr},
7891 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11,
7892 code_for_gpr_multi_popret_up_to_s11,
7893 code_for_gpr_multi_popretz_up_to_s11}};
7895 static rtx
7896 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
7897 unsigned int regs_num)
7899 gcc_assert (op < ZCMP_OP_NUM);
7900 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS
7901 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/
7902 rtx stack_adj = GEN_INT (adj_size);
7903 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj);
7906 static unsigned
7907 get_multi_push_fpr_mask (unsigned max_fprs_push)
7909 unsigned mask_fprs_push = 0, num_f_pushed = 0;
7910 for (unsigned regno = FP_REG_FIRST;
7911 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++)
7912 if (riscv_save_reg_p (regno))
7913 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++;
7914 return mask_fprs_push;
7917 /* Expand the "prologue" pattern. */
7919 void
7920 riscv_expand_prologue (void)
7922 struct riscv_frame_info *frame = &cfun->machine->frame;
7923 poly_int64 remaining_size = frame->total_size;
7924 unsigned mask = frame->mask;
7925 unsigned fmask = frame->fmask;
7926 int spimm, multi_push_additional, stack_adj;
7927 rtx insn, dwarf = NULL_RTX;
7928 unsigned th_int_mask = 0;
7930 if (flag_stack_usage_info)
7931 current_function_static_stack_size = constant_lower_bound (remaining_size);
7933 if (cfun->machine->naked_p)
7934 return;
7936 /* prefer multi-push to save-restore libcall. */
7937 if (riscv_use_multi_push (frame))
7939 remaining_size -= frame->multi_push_adj_base;
7940 /* If there are vector registers that need to be saved, then it can only
7941 be reduced to the frame->v_sp_offset_top position at most, since the
7942 vector registers will need to be saved one by one by decreasing the SP
7943 later. */
7944 poly_int64 remaining_size_above_varea
7945 = frame->vmask != 0
7946 ? remaining_size - frame->v_sp_offset_top
7947 : remaining_size;
7949 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP))
7950 spimm = 3;
7951 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP))
7952 spimm = 2;
7953 else if (known_gt (remaining_size_above_varea, 0))
7954 spimm = 1;
7955 else
7956 spimm = 0;
7957 multi_push_additional = spimm * ZCMP_SP_INC_STEP;
7958 frame->multi_push_adj_addi = multi_push_additional;
7959 remaining_size -= multi_push_additional;
7961 /* emit multi push insn & dwarf along with it. */
7962 stack_adj = frame->multi_push_adj_base + multi_push_additional;
7963 insn = emit_insn (riscv_gen_multi_push_pop_insn (
7964 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask)));
7965 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj);
7966 RTX_FRAME_RELATED_P (insn) = 1;
7967 REG_NOTES (insn) = dwarf;
7969 /* Temporarily fib that we need not save GPRs. */
7970 frame->mask = 0;
7972 /* push FPRs into the additional reserved space by cm.push. */
7973 if (fmask)
7975 unsigned mask_fprs_push
7976 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
7977 frame->fmask &= mask_fprs_push;
7978 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
7979 false);
7980 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */
7983 /* When optimizing for size, call a subroutine to save the registers. */
7984 else if (riscv_use_save_libcall (frame))
7986 rtx dwarf = NULL_RTX;
7987 dwarf = riscv_adjust_libcall_cfi_prologue ();
7989 remaining_size -= frame->save_libcall_adjustment;
7990 insn = emit_insn (riscv_gen_gpr_save_insn (frame));
7991 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
7993 RTX_FRAME_RELATED_P (insn) = 1;
7994 REG_NOTES (insn) = dwarf;
7997 th_int_mask = th_int_get_mask (frame->mask);
7998 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8000 frame->mask &= ~th_int_mask;
8002 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
8003 interrupts, such as fcsr. */
8004 if ((TARGET_HARD_FLOAT && frame->fmask)
8005 || (TARGET_ZFINX && frame->mask))
8006 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
8008 unsigned save_adjustment = th_int_get_save_adjustment ();
8009 frame->gp_sp_offset -= save_adjustment;
8010 remaining_size -= save_adjustment;
8012 insn = emit_insn (gen_th_int_push ());
8014 rtx dwarf = th_int_adjust_cfi_prologue (th_int_mask);
8015 RTX_FRAME_RELATED_P (insn) = 1;
8016 REG_NOTES (insn) = dwarf;
8019 /* Save the GP, FP registers. */
8020 if ((frame->mask | frame->fmask) != 0)
8022 if (known_gt (remaining_size, frame->frame_pointer_offset))
8024 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
8025 remaining_size -= step1;
8026 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8027 GEN_INT (-step1));
8028 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8030 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
8033 /* Undo the above fib. */
8034 frame->mask = mask;
8035 frame->fmask = fmask;
8037 /* Set up the frame pointer, if we're using one. */
8038 if (frame_pointer_needed)
8040 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8041 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
8042 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8044 riscv_emit_stack_tie ();
8047 /* Save the V registers. */
8048 if (frame->vmask != 0)
8049 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true);
8051 /* Allocate the rest of the frame. */
8052 if (known_gt (remaining_size, 0))
8054 /* Two step adjustment:
8055 1.scalable frame. 2.constant frame. */
8056 poly_int64 scalable_frame (0, 0);
8057 if (!remaining_size.is_constant ())
8059 /* First for scalable frame. */
8060 poly_int64 scalable_frame = remaining_size;
8061 scalable_frame.coeffs[0] = remaining_size.coeffs[1];
8062 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
8063 remaining_size -= scalable_frame;
8066 /* Second step for constant frame. */
8067 HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
8068 if (constant_frame == 0)
8070 /* We must have allocated stack space for the scalable frame.
8071 Emit a stack tie if we have a frame pointer so that the
8072 allocation is ordered WRT fp setup and subsequent writes
8073 into the frame. */
8074 if (frame_pointer_needed)
8075 riscv_emit_stack_tie ();
8076 return;
8079 if (SMALL_OPERAND (-constant_frame))
8081 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8082 GEN_INT (-constant_frame));
8083 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8085 else if (SUM_OF_TWO_S12_ALGN (-constant_frame))
8087 HOST_WIDE_INT one, two;
8088 riscv_split_sum_of_two_s12 (-constant_frame, &one, &two);
8089 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8090 GEN_INT (one));
8091 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8092 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8093 GEN_INT (two));
8094 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8096 else
8098 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
8099 emit_insn (gen_add3_insn (stack_pointer_rtx,
8100 stack_pointer_rtx,
8101 RISCV_PROLOGUE_TEMP (Pmode)));
8103 /* Describe the effect of the previous instructions. */
8104 insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
8105 insn = gen_rtx_SET (stack_pointer_rtx, insn);
8106 riscv_set_frame_expr (insn);
8109 /* We must have allocated the remainder of the stack frame.
8110 Emit a stack tie if we have a frame pointer so that the
8111 allocation is ordered WRT fp setup and subsequent writes
8112 into the frame. */
8113 if (frame_pointer_needed)
8114 riscv_emit_stack_tie ();
8118 static rtx
8119 riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
8121 rtx dwarf = NULL_RTX;
8122 rtx adjust_sp_rtx, reg;
8123 unsigned int mask = cfun->machine->frame.mask;
8125 if (mask & S10_MASK)
8126 mask |= S11_MASK;
8128 /* Debug info for adjust sp. */
8129 adjust_sp_rtx
8130 = gen_rtx_SET (stack_pointer_rtx,
8131 plus_constant (Pmode, stack_pointer_rtx, saved_size));
8132 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
8134 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8135 if (BITSET_P (mask, regno - GP_REG_FIRST))
8137 reg = gen_rtx_REG (Pmode, regno);
8138 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
8141 return dwarf;
8144 static rtx
8145 riscv_adjust_libcall_cfi_epilogue ()
8147 rtx dwarf = NULL_RTX;
8148 rtx adjust_sp_rtx, reg;
8149 int saved_size = cfun->machine->frame.save_libcall_adjustment;
8151 /* Debug info for adjust sp. */
8152 adjust_sp_rtx =
8153 gen_rtx_SET (stack_pointer_rtx,
8154 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size)));
8155 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
8156 dwarf);
8158 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8159 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8161 reg = gen_rtx_REG (Pmode, regno);
8162 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
8165 return dwarf;
8168 /* return true if popretz pattern can be matched.
8169 set (reg 10 a0) (const_int 0)
8170 use (reg 10 a0)
8171 NOTE_INSN_EPILOGUE_BEG */
8172 static rtx_insn *
8173 riscv_zcmp_can_use_popretz (void)
8175 rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
8177 /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
8178 struct sequence_stack *outer_seq = get_current_sequence ()->next;
8179 if (!outer_seq)
8180 return NULL;
8181 insn = outer_seq->first;
8182 if (!insn || !NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)
8183 return NULL;
8185 /* sequence stack for the insn before NOTE_INSN_EPILOGUE_BEG*/
8186 outer_seq = outer_seq->next;
8187 if (outer_seq)
8188 insn = outer_seq->last;
8190 /* skip notes */
8191 while (insn && NOTE_P (insn))
8193 insn = PREV_INSN (insn);
8195 use = insn;
8197 /* match use (reg 10 a0) */
8198 if (use == NULL || !INSN_P (use) || GET_CODE (PATTERN (use)) != USE
8199 || !REG_P (XEXP (PATTERN (use), 0))
8200 || REGNO (XEXP (PATTERN (use), 0)) != A0_REGNUM)
8201 return NULL;
8203 /* match set (reg 10 a0) (const_int 0 [0]) */
8204 clear = PREV_INSN (use);
8205 if (clear != NULL && INSN_P (clear) && GET_CODE (PATTERN (clear)) == SET
8206 && REG_P (SET_DEST (PATTERN (clear)))
8207 && REGNO (SET_DEST (PATTERN (clear))) == A0_REGNUM
8208 && SET_SRC (PATTERN (clear)) == const0_rtx)
8209 return clear;
8211 return NULL;
8214 static void
8215 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
8216 unsigned multipop_size)
8218 rtx insn;
8219 unsigned regs_count = riscv_multi_push_regs_count (mask);
8221 if (!use_multi_pop_normal)
8222 insn = emit_insn (
8223 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
8224 else if (rtx_insn *clear_a0_insn = riscv_zcmp_can_use_popretz ())
8226 delete_insn (NEXT_INSN (clear_a0_insn));
8227 delete_insn (clear_a0_insn);
8228 insn = emit_jump_insn (
8229 riscv_gen_multi_push_pop_insn (POPRETZ_IDX, multipop_size, regs_count));
8231 else
8232 insn = emit_jump_insn (
8233 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
8235 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size);
8236 RTX_FRAME_RELATED_P (insn) = 1;
8237 REG_NOTES (insn) = dwarf;
8240 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern;
8241 style says which. */
8243 void
8244 riscv_expand_epilogue (int style)
8246 /* Split the frame into 3 steps. STEP1 is the amount of stack we should
8247 deallocate before restoring the registers. STEP2 is the amount we
8248 should deallocate afterwards including the callee saved regs. STEP3
8249 is the amount deallocated by save-restore libcall.
8251 Start off by assuming that no registers need to be restored. */
8252 struct riscv_frame_info *frame = &cfun->machine->frame;
8253 unsigned mask = frame->mask;
8254 unsigned fmask = frame->fmask;
8255 unsigned mask_fprs_push = 0;
8256 poly_int64 step2 = 0;
8257 bool use_multi_pop_normal
8258 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame));
8259 bool use_multi_pop_sibcall
8260 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame));
8261 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
8263 bool use_restore_libcall
8264 = !use_multi_pop
8265 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame));
8266 unsigned libcall_size = use_restore_libcall && !use_multi_pop
8267 ? frame->save_libcall_adjustment
8268 : 0;
8269 unsigned multipop_size
8270 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
8271 : 0;
8272 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
8273 unsigned th_int_mask = 0;
8274 rtx insn;
8276 /* We need to add memory barrier to prevent read from deallocated stack. */
8277 bool need_barrier_p = known_ne (get_frame_size ()
8278 + cfun->machine->frame.arg_pointer_offset, 0);
8280 if (cfun->machine->naked_p)
8282 gcc_assert (style == NORMAL_RETURN);
8284 emit_jump_insn (gen_return ());
8286 return;
8289 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ())
8291 emit_jump_insn (gen_return ());
8292 return;
8295 /* Reset the epilogue cfa info before starting to emit the epilogue. */
8296 epilogue_cfa_sp_offset = 0;
8298 /* Move past any dynamic stack allocations. */
8299 if (cfun->calls_alloca)
8301 /* Emit a barrier to prevent loads from a deallocated stack. */
8302 riscv_emit_stack_tie ();
8303 need_barrier_p = false;
8305 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
8306 rtx dwarf_adj = gen_int_mode (adjust_offset, Pmode);
8307 rtx adjust = NULL_RTX;
8308 bool sum_of_two_s12 = false;
8309 HOST_WIDE_INT one, two;
8311 if (!adjust_offset.is_constant ())
8313 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
8314 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
8315 riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
8316 gen_int_mode (adjust_offset, Pmode));
8317 adjust = tmp1;
8319 else
8321 HOST_WIDE_INT adj_off_value = adjust_offset.to_constant ();
8322 if (SMALL_OPERAND (adj_off_value))
8324 adjust = GEN_INT (adj_off_value);
8326 else if (SUM_OF_TWO_S12_ALGN (adj_off_value))
8328 riscv_split_sum_of_two_s12 (adj_off_value, &one, &two);
8329 dwarf_adj = adjust = GEN_INT (one);
8330 sum_of_two_s12 = true;
8332 else
8334 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
8335 GEN_INT (adj_off_value));
8336 adjust = RISCV_PROLOGUE_TEMP (Pmode);
8340 insn = emit_insn (
8341 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
8342 adjust));
8344 rtx dwarf = NULL_RTX;
8345 rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx,
8346 dwarf_adj);
8347 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
8348 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
8350 RTX_FRAME_RELATED_P (insn) = 1;
8352 REG_NOTES (insn) = dwarf;
8354 if (sum_of_two_s12)
8356 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8357 GEN_INT (two)));
8358 RTX_FRAME_RELATED_P (insn) = 1;
8362 if (use_restore_libcall || use_multi_pop)
8363 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
8365 /* If we need to restore registers, deallocate as much stack as
8366 possible in the second step without going out of range. */
8367 if (use_multi_pop)
8369 if (frame->fmask
8370 && known_gt (frame->total_size - multipop_size,
8371 frame->frame_pointer_offset))
8372 step2
8373 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
8375 else if ((frame->mask | frame->fmask) != 0)
8376 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
8378 if (use_restore_libcall || use_multi_pop)
8379 frame->mask = mask; /* Undo the above fib. */
8381 poly_int64 step1;
8382 /* STEP1 must be set to the bottom of vector registers save area if any
8383 vector registers need be preserved. */
8384 if (frame->vmask != 0)
8386 step1 = frame->v_sp_offset_bottom;
8387 step2 = frame->total_size - step1 - libcall_size - multipop_size;
8389 else
8390 step1 = frame->total_size - step2 - libcall_size - multipop_size;
8392 /* Set TARGET to BASE + STEP1. */
8393 if (known_gt (step1, 0))
8395 /* Emit a barrier to prevent loads from a deallocated stack. */
8396 riscv_emit_stack_tie ();
8397 need_barrier_p = false;
8399 /* Restore the scalable frame which is assigned in prologue. */
8400 if (!step1.is_constant ())
8402 poly_int64 scalable_frame = step1;
8403 scalable_frame.coeffs[0] = step1.coeffs[1];
8404 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
8405 true);
8406 step1 -= scalable_frame;
8409 /* Get an rtx for STEP1 that we can add to BASE.
8410 Skip if adjust equal to zero. */
8411 HOST_WIDE_INT step1_value = step1.to_constant ();
8412 if (step1_value != 0)
8414 rtx adjust = GEN_INT (step1_value);
8415 if (SUM_OF_TWO_S12_ALGN (step1_value))
8417 HOST_WIDE_INT one, two;
8418 riscv_split_sum_of_two_s12 (step1_value, &one, &two);
8419 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
8420 stack_pointer_rtx,
8421 GEN_INT (one)));
8422 RTX_FRAME_RELATED_P (insn) = 1;
8423 adjust = GEN_INT (two);
8425 else if (!SMALL_OPERAND (step1_value))
8427 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
8428 adjust = RISCV_PROLOGUE_TEMP (Pmode);
8431 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
8432 stack_pointer_rtx,
8433 adjust));
8434 rtx dwarf = NULL_RTX;
8435 rtx cfa_adjust_rtx
8436 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8437 gen_int_mode (step2 + libcall_size + multipop_size,
8438 Pmode));
8440 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
8441 RTX_FRAME_RELATED_P (insn) = 1;
8443 REG_NOTES (insn) = dwarf;
8446 else if (frame_pointer_needed)
8448 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring
8449 old value of FP. */
8450 epilogue_cfa_sp_offset = step2;
8453 if (use_multi_pop)
8455 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
8456 if (fmask)
8458 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi
8459 / UNITS_PER_WORD);
8460 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */
8463 else if (use_restore_libcall)
8464 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
8466 th_int_mask = th_int_get_mask (frame->mask);
8467 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8469 frame->mask &= ~th_int_mask;
8471 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
8472 interrupts, such as fcsr. */
8473 if ((TARGET_HARD_FLOAT && frame->fmask)
8474 || (TARGET_ZFINX && frame->mask))
8475 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
8478 /* Restore the registers. */
8479 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
8480 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
8481 - multipop_size,
8482 riscv_restore_reg, true, style == EXCEPTION_RETURN);
8484 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8486 frame->mask = mask; /* Undo the above fib. */
8487 unsigned save_adjustment = th_int_get_save_adjustment ();
8488 gcc_assert (step2.to_constant () >= save_adjustment);
8489 step2 -= save_adjustment;
8492 if (use_restore_libcall)
8493 frame->mask = mask; /* Undo the above fib. */
8495 if (need_barrier_p)
8496 riscv_emit_stack_tie ();
8498 /* Deallocate the final bit of the frame. */
8499 if (step2.to_constant () > 0)
8501 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8502 GEN_INT (step2.to_constant ())));
8504 rtx dwarf = NULL_RTX;
8505 rtx cfa_adjust_rtx
8506 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8507 GEN_INT (libcall_size + multipop_size));
8508 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
8509 RTX_FRAME_RELATED_P (insn) = 1;
8511 REG_NOTES (insn) = dwarf;
8514 if (use_multi_pop)
8516 /* restore FPRs pushed by cm.push. */
8517 frame->fmask = fmask & mask_fprs_push;
8518 if (frame->fmask)
8519 riscv_for_each_saved_reg (frame->total_size - libcall_size
8520 - multipop_size,
8521 riscv_restore_reg, true,
8522 style == EXCEPTION_RETURN);
8523 /* Undo the above fib. */
8524 frame->mask = mask;
8525 frame->fmask = fmask;
8526 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask,
8527 multipop_size);
8528 if (use_multi_pop_normal)
8529 return;
8531 else if (use_restore_libcall)
8533 rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
8534 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
8535 RTX_FRAME_RELATED_P (insn) = 1;
8536 REG_NOTES (insn) = dwarf;
8538 emit_jump_insn (gen_gpr_restore_return (ra));
8539 return;
8542 /* Add in the __builtin_eh_return stack adjustment. */
8543 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
8544 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8545 EH_RETURN_STACKADJ_RTX));
8547 /* Return from interrupt. */
8548 if (cfun->machine->interrupt_handler_p)
8550 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode;
8552 gcc_assert (mode != UNKNOWN_MODE);
8554 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8555 emit_jump_insn (gen_th_int_pop ());
8556 else if (mode == MACHINE_MODE)
8557 emit_jump_insn (gen_riscv_mret ());
8558 else if (mode == SUPERVISOR_MODE)
8559 emit_jump_insn (gen_riscv_sret ());
8560 else
8561 emit_jump_insn (gen_riscv_uret ());
8563 else if (style != SIBCALL_RETURN)
8564 emit_jump_insn (gen_simple_return_internal (ra));
8567 /* Implement EPILOGUE_USES. */
8569 bool
8570 riscv_epilogue_uses (unsigned int regno)
8572 if (regno == RETURN_ADDR_REGNUM)
8573 return true;
8575 if (epilogue_completed && cfun->machine->interrupt_handler_p)
8577 /* An interrupt function restores temp regs, so we must indicate that
8578 they are live at function end. */
8579 if (df_regs_ever_live_p (regno)
8580 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
8581 return true;
8584 return false;
8587 static bool
8588 riscv_avoid_shrink_wrapping_separate ()
8590 if (riscv_use_save_libcall (&cfun->machine->frame)
8591 || cfun->machine->interrupt_handler_p
8592 || !cfun->machine->frame.gp_sp_offset.is_constant ())
8593 return true;
8595 return false;
8598 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
8600 static sbitmap
8601 riscv_get_separate_components (void)
8603 HOST_WIDE_INT offset;
8604 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8605 bitmap_clear (components);
8607 if (riscv_avoid_shrink_wrapping_separate ())
8608 return components;
8610 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
8611 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8612 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8614 /* We can only wrap registers that have small operand offsets.
8615 For large offsets a pseudo register might be needed which
8616 cannot be created during the shrink wrapping pass. */
8617 if (SMALL_OPERAND (offset))
8618 bitmap_set_bit (components, regno);
8620 offset -= UNITS_PER_WORD;
8623 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
8624 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8625 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8627 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8629 /* We can only wrap registers that have small operand offsets.
8630 For large offsets a pseudo register might be needed which
8631 cannot be created during the shrink wrapping pass. */
8632 if (SMALL_OPERAND (offset))
8633 bitmap_set_bit (components, regno);
8635 offset -= GET_MODE_SIZE (mode).to_constant ();
8638 /* Don't mess with the hard frame pointer. */
8639 if (frame_pointer_needed)
8640 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
8642 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
8644 return components;
8647 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
8649 static sbitmap
8650 riscv_components_for_bb (basic_block bb)
8652 bitmap in = DF_LIVE_IN (bb);
8653 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
8654 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
8656 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8657 bitmap_clear (components);
8659 function_abi_aggregator callee_abis;
8660 rtx_insn *insn;
8661 FOR_BB_INSNS (bb, insn)
8662 if (CALL_P (insn))
8663 callee_abis.note_callee_abi (insn_callee_abi (insn));
8664 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
8666 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
8667 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8668 if (!fixed_regs[regno]
8669 && !crtl->abi->clobbers_full_reg_p (regno)
8670 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8671 || bitmap_bit_p (in, regno)
8672 || bitmap_bit_p (gen, regno)
8673 || bitmap_bit_p (kill, regno)))
8674 bitmap_set_bit (components, regno);
8676 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8677 if (!fixed_regs[regno]
8678 && !crtl->abi->clobbers_full_reg_p (regno)
8679 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8680 || bitmap_bit_p (in, regno)
8681 || bitmap_bit_p (gen, regno)
8682 || bitmap_bit_p (kill, regno)))
8683 bitmap_set_bit (components, regno);
8685 return components;
8688 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
8690 static void
8691 riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
8693 /* Nothing to do for riscv. */
8696 static void
8697 riscv_process_components (sbitmap components, bool prologue_p)
8699 HOST_WIDE_INT offset;
8700 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
8702 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
8703 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8704 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8706 if (bitmap_bit_p (components, regno))
8707 riscv_save_restore_reg (word_mode, regno, offset, fn);
8709 offset -= UNITS_PER_WORD;
8712 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
8713 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8714 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8716 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8718 if (bitmap_bit_p (components, regno))
8719 riscv_save_restore_reg (mode, regno, offset, fn);
8721 offset -= GET_MODE_SIZE (mode).to_constant ();
8725 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
8727 static void
8728 riscv_emit_prologue_components (sbitmap components)
8730 riscv_process_components (components, true);
8733 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
8735 static void
8736 riscv_emit_epilogue_components (sbitmap components)
8738 riscv_process_components (components, false);
8741 static void
8742 riscv_set_handled_components (sbitmap components)
8744 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8745 if (bitmap_bit_p (components, regno))
8746 cfun->machine->reg_is_wrapped_separately[regno] = true;
8748 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8749 if (bitmap_bit_p (components, regno))
8750 cfun->machine->reg_is_wrapped_separately[regno] = true;
8753 /* Return nonzero if this function is known to have a null epilogue.
8754 This allows the optimizer to omit jumps to jumps if no stack
8755 was created. */
8757 bool
8758 riscv_can_use_return_insn (void)
8760 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
8761 && ! cfun->machine->interrupt_handler_p);
8764 /* Given that there exists at least one variable that is set (produced)
8765 by OUT_INSN and read (consumed) by IN_INSN, return true iff
8766 IN_INSN represents one or more memory store operations and none of
8767 the variables set by OUT_INSN is used by IN_INSN as the address of a
8768 store operation. If either IN_INSN or OUT_INSN does not represent
8769 a "single" RTL SET expression (as loosely defined by the
8770 implementation of the single_set function) or a PARALLEL with only
8771 SETs, CLOBBERs, and USEs inside, this function returns false.
8773 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain
8774 conditions that result in assertion failures in the generic
8775 store_data_bypass_p function and returns FALSE in such cases.
8777 This is required to make -msave-restore work with the sifive-7
8778 pipeline description. */
8780 bool
8781 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
8783 rtx out_set, in_set;
8784 rtx out_pat, in_pat;
8785 rtx out_exp, in_exp;
8786 int i, j;
8788 in_set = single_set (in_insn);
8789 if (in_set)
8791 if (MEM_P (SET_DEST (in_set)))
8793 out_set = single_set (out_insn);
8794 if (!out_set)
8796 out_pat = PATTERN (out_insn);
8797 if (GET_CODE (out_pat) == PARALLEL)
8799 for (i = 0; i < XVECLEN (out_pat, 0); i++)
8801 out_exp = XVECEXP (out_pat, 0, i);
8802 if ((GET_CODE (out_exp) == CLOBBER)
8803 || (GET_CODE (out_exp) == USE))
8804 continue;
8805 else if (GET_CODE (out_exp) != SET)
8806 return false;
8812 else
8814 in_pat = PATTERN (in_insn);
8815 if (GET_CODE (in_pat) != PARALLEL)
8816 return false;
8818 for (i = 0; i < XVECLEN (in_pat, 0); i++)
8820 in_exp = XVECEXP (in_pat, 0, i);
8821 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
8822 continue;
8823 else if (GET_CODE (in_exp) != SET)
8824 return false;
8826 if (MEM_P (SET_DEST (in_exp)))
8828 out_set = single_set (out_insn);
8829 if (!out_set)
8831 out_pat = PATTERN (out_insn);
8832 if (GET_CODE (out_pat) != PARALLEL)
8833 return false;
8834 for (j = 0; j < XVECLEN (out_pat, 0); j++)
8836 out_exp = XVECEXP (out_pat, 0, j);
8837 if ((GET_CODE (out_exp) == CLOBBER)
8838 || (GET_CODE (out_exp) == USE))
8839 continue;
8840 else if (GET_CODE (out_exp) != SET)
8841 return false;
8848 return store_data_bypass_p (out_insn, in_insn);
8851 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
8853 When floating-point registers are wider than integer ones, moves between
8854 them must go through memory. */
8856 static bool
8857 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
8858 reg_class_t class2)
8860 return (!riscv_v_ext_mode_p (mode)
8861 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
8862 && (class1 == FP_REGS) != (class2 == FP_REGS)
8863 && !TARGET_XTHEADFMV
8864 && !TARGET_ZFA);
8867 /* Implement TARGET_REGISTER_MOVE_COST. */
8869 static int
8870 riscv_register_move_cost (machine_mode mode,
8871 reg_class_t from, reg_class_t to)
8873 if ((from == FP_REGS && to == GR_REGS) ||
8874 (from == GR_REGS && to == FP_REGS))
8875 return tune_param->fmv_cost;
8877 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
8880 /* Implement TARGET_HARD_REGNO_NREGS. */
8882 static unsigned int
8883 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
8885 if (riscv_v_ext_vector_mode_p (mode))
8887 /* Handle fractional LMUL, it only occupy part of vector register but
8888 still need one vector register to hold. */
8889 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
8890 return 1;
8892 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
8895 /* For tuple modes, the number of register = NF * LMUL. */
8896 if (riscv_v_ext_tuple_mode_p (mode))
8898 unsigned int nf = riscv_vector::get_nf (mode);
8899 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode);
8900 poly_int64 size = GET_MODE_SIZE (subpart_mode);
8901 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode)));
8902 if (maybe_lt (size, UNITS_PER_V_REG))
8903 return nf;
8904 else
8906 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
8907 return nf * lmul;
8911 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */
8912 if (riscv_v_ext_vls_mode_p (mode))
8914 int size = GET_MODE_BITSIZE (mode).to_constant ();
8915 if (size < TARGET_MIN_VLEN)
8916 return 1;
8917 else
8918 return size / TARGET_MIN_VLEN;
8921 /* mode for VL or VTYPE are just a marker, not holding value,
8922 so it always consume one register. */
8923 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8924 || FRM_REG_P (regno))
8925 return 1;
8927 /* Assume every valid non-vector mode fits in one vector register. */
8928 if (V_REG_P (regno))
8929 return 1;
8931 if (FP_REG_P (regno))
8932 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
8934 /* All other registers are word-sized. */
8935 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8938 /* Implement TARGET_HARD_REGNO_MODE_OK. */
8940 static bool
8941 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
8943 unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
8945 if (GP_REG_P (regno))
8947 if (riscv_v_ext_mode_p (mode))
8948 return false;
8950 if (!GP_REG_P (regno + nregs - 1))
8951 return false;
8953 else if (FP_REG_P (regno))
8955 if (riscv_v_ext_mode_p (mode))
8956 return false;
8958 if (!FP_REG_P (regno + nregs - 1))
8959 return false;
8961 if (GET_MODE_CLASS (mode) != MODE_FLOAT
8962 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
8963 return false;
8965 /* Only use callee-saved registers if a potential callee is guaranteed
8966 to spill the requisite width. */
8967 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
8968 || (!call_used_or_fixed_reg_p (regno)
8969 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
8970 return false;
8972 else if (V_REG_P (regno))
8974 if (!riscv_v_ext_mode_p (mode))
8975 return false;
8977 if (!V_REG_P (regno + nregs - 1))
8978 return false;
8980 int regno_alignment = riscv_get_v_regno_alignment (mode);
8981 if (regno_alignment != 1)
8982 return ((regno % regno_alignment) == 0);
8984 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8985 || FRM_REG_P (regno))
8986 return true;
8987 else
8988 return false;
8990 /* Require same callee-savedness for all registers. */
8991 for (unsigned i = 1; i < nregs; i++)
8992 if (call_used_or_fixed_reg_p (regno)
8993 != call_used_or_fixed_reg_p (regno + i))
8994 return false;
8996 /* Only use even registers in RV32 ZDINX */
8997 if (!TARGET_64BIT && TARGET_ZDINX){
8998 if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
8999 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
9000 return !(regno & 1);
9003 return true;
9006 /* Implement TARGET_MODES_TIEABLE_P.
9008 Don't allow floating-point modes to be tied, since type punning of
9009 single-precision and double-precision is implementation defined. */
9011 static bool
9012 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9014 /* We don't allow different REG_CLASS modes tieable since it
9015 will cause ICE in register allocation (RA).
9016 E.g. V2SI and DI are not tieable. */
9017 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
9018 return false;
9019 return (mode1 == mode2
9020 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
9021 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
9024 /* Implement TARGET_CLASS_MAX_NREGS. */
9026 static unsigned char
9027 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
9029 if (reg_class_subset_p (rclass, FP_REGS))
9030 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
9032 if (reg_class_subset_p (rclass, GR_REGS))
9033 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
9035 if (reg_class_subset_p (rclass, V_REGS))
9036 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
9038 return 0;
9041 /* Implement TARGET_MEMORY_MOVE_COST. */
9043 static int
9044 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
9046 return (tune_param->memory_cost
9047 + memory_move_secondary_cost (mode, rclass, in));
9050 /* Return the number of instructions that can be issued per cycle. */
9052 static int
9053 riscv_issue_rate (void)
9055 return tune_param->issue_rate;
9058 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
9059 static int
9060 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
9062 if (DEBUG_INSN_P (insn))
9063 return more;
9065 rtx_code code = GET_CODE (PATTERN (insn));
9066 if (code == USE || code == CLOBBER)
9067 return more;
9069 /* GHOST insns are used for blockage and similar cases which
9070 effectively end a cycle. */
9071 if (get_attr_type (insn) == TYPE_GHOST)
9072 return 0;
9074 /* If we ever encounter an insn with an unknown type, trip
9075 an assert so we can find and fix this problem. */
9076 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN);
9078 /* If we ever encounter an insn without an insn reservation, trip
9079 an assert so we can find and fix this problem. */
9080 gcc_assert (insn_has_dfa_reservation_p (insn));
9082 return more - 1;
9085 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
9086 instruction fusion of some sort. */
9088 static bool
9089 riscv_macro_fusion_p (void)
9091 return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
9094 /* Return true iff the instruction fusion described by OP is enabled. */
9096 static bool
9097 riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
9099 return tune_param->fusible_ops & op;
9102 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
9103 should be kept together during scheduling. */
9105 static bool
9106 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
9108 rtx prev_set = single_set (prev);
9109 rtx curr_set = single_set (curr);
9110 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
9111 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
9113 if (!riscv_macro_fusion_p ())
9114 return false;
9116 if (simple_sets_p
9117 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
9118 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
9120 /* We are trying to match the following:
9121 prev (slli) == (set (reg:DI rD)
9122 (ashift:DI (reg:DI rS) (const_int 32)))
9123 curr (slri) == (set (reg:DI rD)
9124 (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
9125 with <shift> being either 32 for FUSE_ZEXTW, or
9126 `less than 32 for FUSE_ZEXTWS. */
9128 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
9129 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
9130 && REG_P (SET_DEST (prev_set))
9131 && REG_P (SET_DEST (curr_set))
9132 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
9133 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
9134 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
9135 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9136 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
9137 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
9138 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
9139 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
9140 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
9141 return true;
9144 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
9146 /* We are trying to match the following:
9147 prev (slli) == (set (reg:DI rD)
9148 (ashift:DI (reg:DI rS) (const_int 48)))
9149 curr (slri) == (set (reg:DI rD)
9150 (lshiftrt:DI (reg:DI rD) (const_int 48))) */
9152 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
9153 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
9154 && REG_P (SET_DEST (prev_set))
9155 && REG_P (SET_DEST (curr_set))
9156 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
9157 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
9158 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
9159 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9160 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
9161 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
9162 return true;
9165 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
9167 /* We are trying to match the following:
9168 prev (add) == (set (reg:DI rD)
9169 (plus:DI (reg:DI rS1) (reg:DI rS2))
9170 curr (ld) == (set (reg:DI rD)
9171 (mem:DI (reg:DI rD))) */
9173 if (MEM_P (SET_SRC (curr_set))
9174 && REG_P (XEXP (SET_SRC (curr_set), 0))
9175 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
9176 && GET_CODE (SET_SRC (prev_set)) == PLUS
9177 && REG_P (XEXP (SET_SRC (prev_set), 0))
9178 && REG_P (XEXP (SET_SRC (prev_set), 1)))
9179 return true;
9181 /* We are trying to match the following:
9182 prev (add) == (set (reg:DI rD)
9183 (plus:DI (reg:DI rS1) (reg:DI rS2)))
9184 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
9186 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
9187 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
9188 && MEM_P (XEXP (SET_SRC (curr_set), 0))
9189 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
9190 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
9191 && GET_CODE (SET_SRC (prev_set)) == PLUS
9192 && REG_P (XEXP (SET_SRC (prev_set), 0))
9193 && REG_P (XEXP (SET_SRC (prev_set), 1)))
9194 return true;
9197 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
9199 /* We are trying to match the following:
9200 prev (add) == (set (reg:DI rS)
9201 (plus:DI (reg:DI rS) (const_int))
9202 curr (ld) == (set (reg:DI rD)
9203 (mem:DI (reg:DI rS))) */
9205 if (MEM_P (SET_SRC (curr_set))
9206 && REG_P (XEXP (SET_SRC (curr_set), 0))
9207 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
9208 && GET_CODE (SET_SRC (prev_set)) == PLUS
9209 && REG_P (XEXP (SET_SRC (prev_set), 0))
9210 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
9211 return true;
9214 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
9216 /* We are trying to match the following:
9217 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
9218 curr (addi) == (set (reg:DI rD)
9219 (plus:DI (reg:DI rD) (const_int IMM12))) */
9221 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
9222 || (GET_CODE (SET_SRC (curr_set)) == PLUS
9223 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9224 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
9225 && (GET_CODE (SET_SRC (prev_set)) == HIGH
9226 || (CONST_INT_P (SET_SRC (prev_set))
9227 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
9228 return true;
9231 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
9233 /* We are trying to match the following:
9234 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9235 curr (addi) == (set (reg:DI rD)
9236 (plus:DI (reg:DI rD) (const_int IMM12)))
9238 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9239 curr (addi) == (set (reg:DI rD)
9240 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
9242 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
9243 && XINT (SET_SRC (prev_set), 1) == UNSPEC_AUIPC
9244 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
9245 || (GET_CODE (SET_SRC (curr_set)) == PLUS
9246 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9247 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
9249 return true;
9252 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
9254 /* We are trying to match the following:
9255 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
9256 curr (ld) == (set (reg:DI rD)
9257 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
9259 if (CONST_INT_P (SET_SRC (prev_set))
9260 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
9261 && MEM_P (SET_SRC (curr_set))
9262 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
9263 return true;
9265 if (GET_CODE (SET_SRC (prev_set)) == HIGH
9266 && MEM_P (SET_SRC (curr_set))
9267 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
9268 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
9269 return true;
9271 if (GET_CODE (SET_SRC (prev_set)) == HIGH
9272 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
9273 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
9274 && MEM_P (XEXP (SET_SRC (curr_set), 0))
9275 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
9276 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
9277 return true;
9280 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
9282 /* We are trying to match the following:
9283 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9284 curr (ld) == (set (reg:DI rD)
9285 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
9287 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
9288 && XINT (prev_set, 1) == UNSPEC_AUIPC
9289 && MEM_P (SET_SRC (curr_set))
9290 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
9291 return true;
9294 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
9296 /* We are trying to match the following:
9297 prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
9298 (reg rS1))
9299 curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
9300 (reg rS2)) */
9302 if (MEM_P (SET_DEST (prev_set))
9303 && MEM_P (SET_DEST (curr_set))
9304 /* We can probably relax this condition. The documentation is a bit
9305 unclear about sub-word cases. So we just model DImode for now. */
9306 && GET_MODE (SET_DEST (curr_set)) == DImode
9307 && GET_MODE (SET_DEST (prev_set)) == DImode)
9309 rtx base_prev, base_curr, offset_prev, offset_curr;
9311 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
9312 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
9314 /* Fail if we did not find both bases. */
9315 if (base_prev == NULL_RTX || base_curr == NULL_RTX)
9316 return false;
9318 /* Fail if either base is not a register. */
9319 if (!REG_P (base_prev) || !REG_P (base_curr))
9320 return false;
9322 /* Fail if the bases are not the same register. */
9323 if (REGNO (base_prev) != REGNO (base_curr))
9324 return false;
9326 /* Originally the thought was to check MEM_ALIGN, but that was
9327 reporting incorrect alignments, even for SP/FP accesses, so we
9328 gave up on that approach. Instead just check for stack/hfp
9329 which we know are aligned. */
9330 if (REGNO (base_prev) != STACK_POINTER_REGNUM
9331 && REGNO (base_prev) != HARD_FRAME_POINTER_REGNUM)
9332 return false;
9334 /* The two stores must be contained within opposite halves of the
9335 same 16 byte aligned block of memory. We know that the stack
9336 pointer and the frame pointer have suitable alignment. So we
9337 just need to check the offsets of the two stores for suitable
9338 alignment. */
9339 /* Get the smaller offset into OFFSET_PREV. */
9340 if (INTVAL (offset_prev) > INTVAL (offset_curr))
9341 std::swap (offset_prev, offset_curr);
9343 /* If the smaller offset (OFFSET_PREV) is not 16 byte aligned,
9344 then fail. */
9345 if ((INTVAL (offset_prev) % 16) != 0)
9346 return false;
9348 /* The higher offset must be 8 bytes more than the lower
9349 offset. */
9350 return (INTVAL (offset_prev) + 8 == INTVAL (offset_curr));
9354 return false;
9357 /* Adjust the cost/latency of instructions for scheduling.
9358 For now this is just used to change the latency of vector instructions
9359 according to their LMUL. We assume that an insn with LMUL == 8 requires
9360 eight times more execution cycles than the same insn with LMUL == 1.
9361 As this may cause very high latencies which lead to scheduling artifacts
9362 we currently only perform the adjustment when -madjust-lmul-cost is given.
9364 static int
9365 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
9366 unsigned int)
9368 /* Only do adjustments for the generic out-of-order scheduling model. */
9369 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
9370 return cost;
9372 if (recog_memoized (insn) < 0)
9373 return cost;
9375 enum attr_type type = get_attr_type (insn);
9377 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
9379 /* TODO: For ordered reductions scale the base cost relative to the
9380 number of units. */
9384 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */
9385 if (!TARGET_ADJUST_LMUL_COST)
9386 return cost;
9388 /* vsetvl has a vlmul attribute but its latency does not depend on it. */
9389 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE)
9390 return cost;
9392 enum riscv_vector::vlmul_type lmul =
9393 (riscv_vector::vlmul_type)get_attr_vlmul (insn);
9395 double factor = 1;
9396 switch (lmul)
9398 case riscv_vector::LMUL_2:
9399 factor = 2;
9400 break;
9401 case riscv_vector::LMUL_4:
9402 factor = 4;
9403 break;
9404 case riscv_vector::LMUL_8:
9405 factor = 8;
9406 break;
9407 case riscv_vector::LMUL_F2:
9408 factor = 0.5;
9409 break;
9410 case riscv_vector::LMUL_F4:
9411 factor = 0.25;
9412 break;
9413 case riscv_vector::LMUL_F8:
9414 factor = 0.125;
9415 break;
9416 default:
9417 factor = 1;
9420 /* If the latency was nonzero, keep it that way. */
9421 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor);
9423 return new_cost;
9426 /* Auxiliary function to emit RISC-V ELF attribute. */
9427 static void
9428 riscv_emit_attribute ()
9430 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n",
9431 riscv_arch_str ().c_str ());
9433 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n",
9434 TARGET_STRICT_ALIGN ? 0 : 1);
9436 fprintf (asm_out_file, "\t.attribute stack_align, %d\n",
9437 riscv_stack_boundary / 8);
9440 /* Output .variant_cc for function symbol which follows vector calling
9441 convention. */
9443 static void
9444 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
9446 if (TREE_CODE (decl) == FUNCTION_DECL)
9448 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
9449 if (cc == RISCV_CC_V)
9451 fprintf (stream, "\t.variant_cc\t");
9452 assemble_name (stream, name);
9453 fprintf (stream, "\n");
9458 /* Implement ASM_DECLARE_FUNCTION_NAME. */
9460 void
9461 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
9463 riscv_asm_output_variant_cc (stream, fndecl, name);
9464 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
9465 ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl);
9466 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
9468 fprintf (stream, "\t.option push\n");
9470 std::string *target_name = riscv_func_target_get (fndecl);
9471 std::string isa = target_name != NULL
9472 ? *target_name
9473 : riscv_cmdline_subset_list ()->to_string (true);
9474 fprintf (stream, "\t.option arch, %s\n", isa.c_str ());
9475 riscv_func_target_remove_and_destory (fndecl);
9477 struct cl_target_option *local_cl_target =
9478 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
9479 struct cl_target_option *global_cl_target =
9480 TREE_TARGET_OPTION (target_option_default_node);
9481 const char *local_tune_str = get_tune_str (local_cl_target);
9482 const char *global_tune_str = get_tune_str (global_cl_target);
9483 if (strcmp (local_tune_str, global_tune_str) != 0)
9484 fprintf (stream, "\t# tune = %s\n", local_tune_str);
9488 void
9489 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
9491 if (!flag_inhibit_size_directive)
9492 ASM_OUTPUT_MEASURED_SIZE (stream, name);
9494 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
9496 fprintf (stream, "\t.option pop\n");
9500 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */
9502 void
9503 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target)
9505 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
9506 const char *value = IDENTIFIER_POINTER (target);
9507 riscv_asm_output_variant_cc (stream, decl, name);
9508 ASM_OUTPUT_DEF (stream, name, value);
9511 /* Implement ASM_OUTPUT_EXTERNAL. */
9513 void
9514 riscv_asm_output_external (FILE *stream, tree decl, const char *name)
9516 default_elf_asm_output_external (stream, decl, name);
9517 riscv_asm_output_variant_cc (stream, decl, name);
9520 /* Implement TARGET_ASM_FILE_START. */
9522 static void
9523 riscv_file_start (void)
9525 default_file_start ();
9527 /* Instruct GAS to generate position-[in]dependent code. */
9528 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
9530 /* If the user specifies "-mno-relax" on the command line then disable linker
9531 relaxation in the assembler. */
9532 if (! riscv_mrelax)
9533 fprintf (asm_out_file, "\t.option norelax\n");
9535 /* If the user specifies "-mcsr-check" on the command line then enable csr
9536 check in the assembler. */
9537 if (riscv_mcsr_check)
9538 fprintf (asm_out_file, "\t.option csr-check\n");
9540 if (riscv_emit_attribute_p)
9541 riscv_emit_attribute ();
9544 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
9545 in order to avoid duplicating too much logic from elsewhere. */
9547 static void
9548 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9549 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9550 tree function)
9552 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
9553 rtx this_rtx, temp1, temp2, fnaddr;
9554 rtx_insn *insn;
9556 riscv_in_thunk_func = true;
9558 /* Pretend to be a post-reload pass while generating rtl. */
9559 reload_completed = 1;
9561 /* Mark the end of the (empty) prologue. */
9562 emit_note (NOTE_INSN_PROLOGUE_END);
9564 /* Determine if we can use a sibcall to call FUNCTION directly. */
9565 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
9567 /* We need two temporary registers in some cases. */
9568 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
9569 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
9571 /* Find out which register contains the "this" pointer. */
9572 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9573 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
9574 else
9575 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
9577 /* Add DELTA to THIS_RTX. */
9578 if (delta != 0)
9580 rtx offset = GEN_INT (delta);
9581 if (!SMALL_OPERAND (delta))
9583 riscv_emit_move (temp1, offset);
9584 offset = temp1;
9586 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
9589 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
9590 if (vcall_offset != 0)
9592 rtx addr;
9594 /* Set TEMP1 to *THIS_RTX. */
9595 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
9597 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
9598 addr = riscv_add_offset (temp2, temp1, vcall_offset);
9600 /* Load the offset and add it to THIS_RTX. */
9601 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
9602 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
9605 /* Jump to the target function. */
9606 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode);
9607 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc));
9608 SIBLING_CALL_P (insn) = 1;
9610 /* Run just enough of rest_of_compilation. This sequence was
9611 "borrowed" from alpha.cc. */
9612 insn = get_insns ();
9613 split_all_insns_noflow ();
9614 shorten_branches (insn);
9615 assemble_start_function (thunk_fndecl, fnname);
9616 final_start_function (insn, file, 1);
9617 final (insn, file, 1);
9618 final_end_function ();
9619 assemble_end_function (thunk_fndecl, fnname);
9621 /* Clean up the vars set above. Note that final_end_function resets
9622 the global pointer for us. */
9623 reload_completed = 0;
9624 riscv_in_thunk_func = false;
9627 /* Allocate a chunk of memory for per-function machine-dependent data. */
9629 static struct machine_function *
9630 riscv_init_machine_status (void)
9632 return ggc_cleared_alloc<machine_function> ();
9635 /* Return the VLEN value associated with -march and -mwrvv-vector-bits.
9636 TODO: So far we only support length-agnostic value. */
9637 static poly_uint16
9638 riscv_convert_vector_chunks (struct gcc_options *opts)
9640 int chunk_num;
9641 int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
9642 if (min_vlen > 32)
9644 /* When targeting minimum VLEN > 32, we should use 64-bit chunk size.
9645 Otherwise we can not include SEW = 64bits.
9646 Runtime invariant: The single indeterminate represent the
9647 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
9648 Thus the number of bytes in a vector is 8 + 8 * x1 which is
9649 riscv_vector_chunks * 8 = poly_int (8, 8). */
9650 riscv_bytes_per_vector_chunk = 8;
9651 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN:
9652 - TARGET_MIN_VLEN = 64bit: [8,8]
9653 - TARGET_MIN_VLEN = 128bit: [16,16]
9654 - TARGET_MIN_VLEN = 256bit: [32,32]
9655 - TARGET_MIN_VLEN = 512bit: [64,64]
9656 - TARGET_MIN_VLEN = 1024bit: [128,128]
9657 - TARGET_MIN_VLEN = 2048bit: [256,256]
9658 - TARGET_MIN_VLEN = 4096bit: [512,512]
9659 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */
9660 chunk_num = min_vlen / 64;
9662 else
9664 /* When targeting minimum VLEN = 32, we should use 32-bit
9665 chunk size. Runtime invariant: The single indeterminate represent the
9666 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
9667 Thus the number of bytes in a vector is 4 + 4 * x1 which is
9668 riscv_vector_chunks * 4 = poly_int (4, 4). */
9669 riscv_bytes_per_vector_chunk = 4;
9670 chunk_num = 1;
9673 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR
9674 is enabled. Set riscv_vector_chunks as 1 compile-time constant if
9675 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def"
9676 to set RVV mode size. The RVV machine modes size are run-time constant if
9677 TARGET_VECTOR is enabled. The RVV machine modes size remains default
9678 compile-time constant if TARGET_VECTOR is disabled. */
9679 if (TARGET_VECTOR_OPTS_P (opts))
9681 switch (opts->x_rvv_vector_bits)
9683 case RVV_VECTOR_BITS_SCALABLE:
9684 return poly_uint16 (chunk_num, chunk_num);
9685 case RVV_VECTOR_BITS_ZVL:
9686 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
9687 default:
9688 gcc_unreachable ();
9691 else
9692 return 1;
9695 /* 'Unpack' up the internal tuning structs and update the options
9696 in OPTS. The caller must have set up selected_tune and selected_arch
9697 as all the other target-specific codegen decisions are
9698 derived from them. */
9699 void
9700 riscv_override_options_internal (struct gcc_options *opts)
9702 const struct riscv_tune_info *cpu;
9704 /* The presence of the M extension implies that division instructions
9705 are present, so include them unless explicitly disabled. */
9706 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
9707 opts->x_target_flags |= MASK_DIV;
9708 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
9709 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
9711 /* Likewise floating-point division and square root. */
9712 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
9713 && ((target_flags_explicit & MASK_FDIV) == 0))
9714 opts->x_target_flags |= MASK_FDIV;
9716 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
9717 if both -mtune and -mcpu are not given. */
9718 const char *tune_string = get_tune_str (opts);
9719 cpu = riscv_parse_tune (tune_string, false);
9720 riscv_microarchitecture = cpu->microarchitecture;
9721 tune_param = opts->x_optimize_size
9722 ? &optimize_size_tune_info
9723 : cpu->tune_param;
9725 /* Use -mtune's setting for slow_unaligned_access, even when optimizing
9726 for size. For architectures that trap and emulate unaligned accesses,
9727 the performance cost is too great, even for -Os. Similarly, if
9728 -m[no-]strict-align is left unspecified, heed -mtune's advice. */
9729 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
9730 || TARGET_STRICT_ALIGN);
9732 /* By default, when -mno-vector-strict-align is not specified, do not allow
9733 unaligned vector memory accesses except if -mtune's setting explicitly
9734 allows it. */
9735 riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0
9736 || cpu->tune_param->vector_unaligned_access;
9738 /* Make a note if user explicitly passed -mstrict-align for later
9739 builtin macro generation. Can't use target_flags_explicitly since
9740 it is set even for -mno-strict-align. */
9741 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts);
9743 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
9744 && cpu->tune_param->slow_unaligned_access)
9745 opts->x_target_flags |= MASK_STRICT_ALIGN;
9747 /* If the user hasn't specified a branch cost, use the processor's
9748 default. */
9749 if (opts->x_riscv_branch_cost == 0)
9750 opts->x_riscv_branch_cost = tune_param->branch_cost;
9752 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of
9753 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16.
9755 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */
9756 if (TARGET_MIN_VLEN_OPTS (opts) > 4096)
9757 sorry ("Current RISC-V GCC does not support VLEN greater than 4096bit for "
9758 "'V' Extension");
9760 /* FIXME: We don't support RVV in big-endian for now, we may enable RVV with
9761 big-endian after finishing full coverage testing. */
9762 if (TARGET_VECTOR && TARGET_BIG_ENDIAN)
9763 sorry ("Current RISC-V GCC does not support RVV in big-endian mode");
9765 /* Convert -march and -mrvv-vector-bits to a chunks count. */
9766 riscv_vector_chunks = riscv_convert_vector_chunks (opts);
9769 /* Implement TARGET_OPTION_OVERRIDE. */
9771 void
9772 riscv_option_override (void)
9774 #ifdef SUBTARGET_OVERRIDE_OPTIONS
9775 SUBTARGET_OVERRIDE_OPTIONS;
9776 #endif
9778 flag_pcc_struct_return = 0;
9780 if (flag_pic)
9781 g_switch_value = 0;
9783 /* Always prefer medlow than medany for RV32 since medlow can access
9784 full address space. */
9785 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT)
9786 riscv_cmodel = CM_MEDLOW;
9788 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS)
9789 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs");
9791 if (riscv_cmodel == CM_LARGE && flag_pic)
9792 sorry ("code model %qs with %qs", "large",
9793 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic");
9795 if (flag_pic)
9796 riscv_cmodel = CM_PIC;
9798 /* We need to save the fp with ra for non-leaf functions with no fp and ra
9799 for leaf functions while no-omit-frame-pointer with
9800 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first
9801 priority to determine whether the frame pointer is needed. If we do not
9802 override it, the fp and ra will be stored for leaf functions, which is not
9803 our wanted. */
9804 riscv_save_frame_pointer = false;
9805 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
9807 if (!global_options.x_flag_omit_frame_pointer)
9808 riscv_save_frame_pointer = true;
9810 global_options.x_flag_omit_frame_pointer = 1;
9813 /* We get better code with explicit relocs for CM_MEDLOW, but
9814 worse code for the others (for now). Pick the best default. */
9815 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
9816 if (riscv_cmodel == CM_MEDLOW)
9817 target_flags |= MASK_EXPLICIT_RELOCS;
9819 /* Require that the ISA supports the requested floating-point ABI. */
9820 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
9821 error ("requested ABI requires %<-march%> to subsume the %qc extension",
9822 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
9824 /* RVE requires specific ABI. */
9825 if (TARGET_RVE)
9827 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E)
9828 error ("rv32e requires ilp32e ABI");
9829 else if (TARGET_64BIT && riscv_abi != ABI_LP64E)
9830 error ("rv64e requires lp64e ABI");
9833 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */
9834 if (TARGET_ZFINX
9835 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64
9836 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E)
9837 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
9839 /* We do not yet support ILP32 on RV64. */
9840 if (BITS_PER_WORD != POINTER_SIZE)
9841 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
9843 /* Validate -mpreferred-stack-boundary= value. */
9844 riscv_stack_boundary = ABI_STACK_BOUNDARY;
9845 if (riscv_preferred_stack_boundary_arg)
9847 int min = ctz_hwi (STACK_BOUNDARY / 8);
9848 int max = 8;
9850 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max))
9851 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d",
9852 riscv_preferred_stack_boundary_arg, min, max);
9854 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg;
9857 if (riscv_emit_attribute_p < 0)
9858 #ifdef HAVE_AS_RISCV_ATTRIBUTE
9859 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE;
9860 #else
9861 riscv_emit_attribute_p = 0;
9863 if (riscv_emit_attribute_p)
9864 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32"
9865 " [%<-mriscv-attribute%>]");
9866 #endif
9868 if (riscv_stack_protector_guard == SSP_GLOBAL
9869 && OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9871 error ("incompatible options %<-mstack-protector-guard=global%> and "
9872 "%<-mstack-protector-guard-offset=%s%>",
9873 riscv_stack_protector_guard_offset_str);
9876 if (riscv_stack_protector_guard == SSP_TLS
9877 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str)
9878 && OPTION_SET_P (riscv_stack_protector_guard_reg_str)))
9880 error ("both %<-mstack-protector-guard-offset%> and "
9881 "%<-mstack-protector-guard-reg%> must be used "
9882 "with %<-mstack-protector-guard=sysreg%>");
9885 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str))
9887 const char *str = riscv_stack_protector_guard_reg_str;
9888 int reg = decode_reg_name (str);
9890 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
9891 error ("%qs is not a valid base register in %qs", str,
9892 "-mstack-protector-guard-reg=");
9894 riscv_stack_protector_guard_reg = reg;
9897 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9899 char *end;
9900 const char *str = riscv_stack_protector_guard_offset_str;
9901 errno = 0;
9902 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
9904 if (!*str || *end || errno)
9905 error ("%qs is not a valid number in %qs", str,
9906 "-mstack-protector-guard-offset=");
9908 if (!SMALL_OPERAND (offs))
9909 error ("%qs is not a valid offset in %qs", str,
9910 "-mstack-protector-guard-offset=");
9912 riscv_stack_protector_guard_offset = offs;
9915 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
9916 param_sched_pressure_algorithm,
9917 SCHED_PRESSURE_MODEL);
9919 /* Function to allocate machine-dependent function status. */
9920 init_machine_status = &riscv_init_machine_status;
9922 riscv_override_options_internal (&global_options);
9924 /* Save these options as the default ones in case we push and pop them later
9925 while processing functions with potential target attributes. */
9926 target_option_default_node = target_option_current_node
9927 = build_target_option_node (&global_options, &global_options_set);
9930 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9931 Used by riscv_set_current_function to
9932 make sure optab availability predicates are recomputed when necessary. */
9934 void
9935 riscv_save_restore_target_globals (tree new_tree)
9937 if (TREE_TARGET_GLOBALS (new_tree))
9938 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9939 else if (new_tree == target_option_default_node)
9940 restore_target_globals (&default_target_globals);
9941 else
9942 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9945 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9946 using the information saved in PTR. */
9948 static void
9949 riscv_option_restore (struct gcc_options *opts,
9950 struct gcc_options * /* opts_set */,
9951 struct cl_target_option * /* ptr */)
9953 riscv_override_options_internal (opts);
9956 static GTY (()) tree riscv_previous_fndecl;
9958 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9960 static void
9961 riscv_conditional_register_usage (void)
9963 /* We have only x0~x15 on RV32E/RV64E. */
9964 if (TARGET_RVE)
9966 for (int r = 16; r <= 31; r++)
9967 fixed_regs[r] = 1;
9970 if (riscv_abi == ABI_ILP32E)
9972 for (int r = 16; r <= 31; r++)
9973 call_used_regs[r] = 1;
9976 if (!TARGET_HARD_FLOAT)
9978 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9979 fixed_regs[regno] = call_used_regs[regno] = 1;
9982 /* In the soft-float ABI, there are no callee-saved FP registers. */
9983 if (UNITS_PER_FP_ARG == 0)
9985 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9986 call_used_regs[regno] = 1;
9989 if (!TARGET_VECTOR)
9991 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
9992 fixed_regs[regno] = call_used_regs[regno] = 1;
9994 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1;
9995 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1;
9996 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1;
9997 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1;
10001 /* Return a register priority for hard reg REGNO. */
10003 static int
10004 riscv_register_priority (int regno)
10006 /* Favor compressed registers to improve the odds of RVC instruction
10007 selection. */
10008 if (riscv_compressed_reg_p (regno))
10009 return 1;
10011 return 0;
10014 /* Implement TARGET_TRAMPOLINE_INIT. */
10016 static void
10017 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10019 rtx addr, end_addr, mem;
10020 uint32_t trampoline[4];
10021 unsigned int i;
10022 HOST_WIDE_INT static_chain_offset, target_function_offset;
10024 /* Work out the offsets of the pointers from the start of the
10025 trampoline code. */
10026 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
10028 /* Get pointers to the beginning and end of the code block. */
10029 addr = force_reg (Pmode, XEXP (m_tramp, 0));
10030 end_addr = riscv_force_binary (Pmode, PLUS, addr,
10031 GEN_INT (TRAMPOLINE_CODE_SIZE));
10034 if (Pmode == SImode)
10036 chain_value = force_reg (Pmode, chain_value);
10038 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10039 /* lui t2, hi(chain)
10040 lui t0, hi(func)
10041 addi t2, t2, lo(chain)
10042 jr t0, lo(func)
10044 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
10045 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
10047 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
10049 /* 0xfff. */
10050 rtx imm12_mask = gen_reg_rtx (SImode);
10051 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
10053 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
10055 /* Gen lui t2, hi(chain). */
10056 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
10057 fixup_value);
10058 hi_chain = riscv_force_binary (SImode, AND, hi_chain,
10059 uimm_mask);
10060 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
10061 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
10062 gen_int_mode (lui_hi_chain_code, SImode));
10064 mem = adjust_address (m_tramp, SImode, 0);
10065 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
10067 /* Gen lui t0, hi(func). */
10068 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
10069 fixup_value);
10070 hi_func = riscv_force_binary (SImode, AND, hi_func,
10071 uimm_mask);
10072 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
10073 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
10074 gen_int_mode (lui_hi_func_code, SImode));
10076 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
10077 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
10079 /* Gen addi t2, t2, lo(chain). */
10080 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
10081 imm12_mask);
10082 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
10084 lo_chain_code = OPCODE_ADDI
10085 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
10086 | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
10088 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
10089 force_reg (SImode, GEN_INT (lo_chain_code)));
10091 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
10092 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
10094 /* Gen jr t0, lo(func). */
10095 rtx lo_func = riscv_force_binary (SImode, AND, target_function,
10096 imm12_mask);
10097 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
10099 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
10101 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
10102 force_reg (SImode, GEN_INT (lo_func_code)));
10104 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
10105 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
10107 else
10109 static_chain_offset = TRAMPOLINE_CODE_SIZE;
10110 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
10112 /* auipc t2, 0
10113 l[wd] t0, target_function_offset(t2)
10114 l[wd] t2, static_chain_offset(t2)
10115 jr t0
10117 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
10118 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
10119 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
10120 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
10121 | (target_function_offset << SHIFT_IMM);
10122 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
10123 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
10124 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
10125 | (static_chain_offset << SHIFT_IMM);
10126 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
10128 /* Copy the trampoline code. */
10129 for (i = 0; i < ARRAY_SIZE (trampoline); i++)
10131 if (BYTES_BIG_ENDIAN)
10132 trampoline[i] = __builtin_bswap32(trampoline[i]);
10133 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
10134 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
10137 /* Set up the static chain pointer field. */
10138 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
10139 riscv_emit_move (mem, chain_value);
10141 /* Set up the target function field. */
10142 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
10143 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
10146 /* Flush the code part of the trampoline. */
10147 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
10148 emit_insn (gen_clear_cache (addr, end_addr));
10151 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
10153 static bool
10154 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
10155 tree exp ATTRIBUTE_UNUSED)
10157 /* Don't use sibcalls when use save-restore routine. */
10158 if (TARGET_SAVE_RESTORE)
10159 return false;
10161 /* Don't use sibcall for naked functions. */
10162 if (cfun->machine->naked_p)
10163 return false;
10165 /* Don't use sibcall for interrupt functions. */
10166 if (cfun->machine->interrupt_handler_p)
10167 return false;
10169 /* Don't use sibcalls in the large model, because a sibcall instruction
10170 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP
10171 register. */
10172 if (riscv_cmodel == CM_LARGE)
10173 return false;
10175 return true;
10178 /* Get the interrupt type, return UNKNOWN_MODE if it's not
10179 interrupt function. */
10180 static enum riscv_privilege_levels
10181 riscv_get_interrupt_type (tree decl)
10183 gcc_assert (decl != NULL_TREE);
10185 if ((TREE_CODE(decl) != FUNCTION_DECL)
10186 || (!riscv_interrupt_type_p (TREE_TYPE (decl))))
10187 return UNKNOWN_MODE;
10189 tree attr_args
10190 = TREE_VALUE (lookup_attribute ("interrupt",
10191 TYPE_ATTRIBUTES (TREE_TYPE (decl))));
10193 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE)
10195 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
10197 if (!strcmp (string, "user"))
10198 return USER_MODE;
10199 else if (!strcmp (string, "supervisor"))
10200 return SUPERVISOR_MODE;
10201 else /* Must be "machine". */
10202 return MACHINE_MODE;
10204 else
10205 /* Interrupt attributes are machine mode by default. */
10206 return MACHINE_MODE;
10209 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
10210 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
10211 of the function, if such exists. This function may be called multiple
10212 times on a single function so use aarch64_previous_fndecl to avoid
10213 setting up identical state. */
10215 /* Sanity checking for above function attributes. */
10216 static void
10217 riscv_set_current_function (tree decl)
10219 if (decl == NULL_TREE
10220 || current_function_decl == NULL_TREE
10221 || current_function_decl == error_mark_node
10222 || ! cfun->machine)
10223 return;
10225 if (!cfun->machine->attributes_checked_p)
10227 cfun->machine->naked_p = riscv_naked_function_p (decl);
10228 cfun->machine->interrupt_handler_p
10229 = riscv_interrupt_type_p (TREE_TYPE (decl));
10231 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p)
10232 error ("function attributes %qs and %qs are mutually exclusive",
10233 "interrupt", "naked");
10235 if (cfun->machine->interrupt_handler_p)
10237 tree ret = TREE_TYPE (TREE_TYPE (decl));
10238 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
10240 if (TREE_CODE (ret) != VOID_TYPE)
10241 error ("%qs function cannot return a value", "interrupt");
10243 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
10244 error ("%qs function cannot have arguments", "interrupt");
10246 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl);
10248 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE);
10251 /* Don't print the above diagnostics more than once. */
10252 cfun->machine->attributes_checked_p = 1;
10255 if (!decl || decl == riscv_previous_fndecl)
10256 return;
10258 tree old_tree = (riscv_previous_fndecl
10259 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl)
10260 : NULL_TREE);
10262 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl);
10264 /* If current function has no attributes but the previous one did,
10265 use the default node. */
10266 if (!new_tree && old_tree)
10267 new_tree = target_option_default_node;
10269 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
10270 the default have been handled by aarch64_save_restore_target_globals from
10271 aarch64_pragma_target_parse. */
10272 if (old_tree == new_tree)
10273 return;
10275 riscv_previous_fndecl = decl;
10277 /* First set the target options. */
10278 cl_target_option_restore (&global_options, &global_options_set,
10279 TREE_TARGET_OPTION (new_tree));
10281 /* The ISA extension can vary based on the function extension like target.
10282 Thus, make sure that the machine modes are reflected correctly here. */
10283 init_adjust_machine_modes ();
10285 riscv_save_restore_target_globals (new_tree);
10288 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
10289 static tree
10290 riscv_merge_decl_attributes (tree olddecl, tree newdecl)
10292 tree combined_attrs;
10294 enum riscv_privilege_levels old_interrupt_type
10295 = riscv_get_interrupt_type (olddecl);
10296 enum riscv_privilege_levels new_interrupt_type
10297 = riscv_get_interrupt_type (newdecl);
10299 /* Check old and new has same interrupt type. */
10300 if ((old_interrupt_type != UNKNOWN_MODE)
10301 && (new_interrupt_type != UNKNOWN_MODE)
10302 && (old_interrupt_type != new_interrupt_type))
10303 error ("%qs function cannot have different interrupt type", "interrupt");
10305 /* Create combined attributes. */
10306 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
10307 DECL_ATTRIBUTES (newdecl));
10309 return combined_attrs;
10312 /* Implement TARGET_CANNOT_COPY_INSN_P. */
10314 static bool
10315 riscv_cannot_copy_insn_p (rtx_insn *insn)
10317 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
10320 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
10322 static bool
10323 riscv_slow_unaligned_access (machine_mode, unsigned int)
10325 return riscv_slow_unaligned_access_p;
10328 static bool
10329 riscv_overlap_op_by_pieces (void)
10331 return tune_param->overlap_op_by_pieces;
10334 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10336 static bool
10337 riscv_can_change_mode_class (machine_mode from, machine_mode to,
10338 reg_class_t rclass)
10340 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS.
10341 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation
10342 between wider mode (FROM) and narrow mode (TO).
10344 E.g. We should not allow copy propagation
10345 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0])
10346 since we can't order their size which will cause ICE in regcprop.
10348 TODO: Even though they are have different size, they always change
10349 the whole register. We may enhance such case in regcprop to optimize
10350 it in the future. */
10351 if (reg_classes_intersect_p (V_REGS, rclass)
10352 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
10353 return false;
10354 return !reg_classes_intersect_p (FP_REGS, rclass);
10357 /* Implement TARGET_CONSTANT_ALIGNMENT. */
10359 static HOST_WIDE_INT
10360 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align)
10362 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
10363 && (riscv_align_data_type == riscv_align_data_type_xlen))
10364 return MAX (align, BITS_PER_WORD);
10365 return align;
10368 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
10370 /* This function is equivalent to default_promote_function_mode_always_promote
10371 except that it returns a promoted mode even if type is NULL_TREE. This is
10372 needed by libcalls which have no type (only a mode) such as fixed conversion
10373 routines that take a signed or unsigned char/short/int argument and convert
10374 it to a fixed type. */
10376 static machine_mode
10377 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10378 machine_mode mode,
10379 int *punsignedp ATTRIBUTE_UNUSED,
10380 const_tree fntype ATTRIBUTE_UNUSED,
10381 int for_return ATTRIBUTE_UNUSED)
10383 int unsignedp;
10385 if (type != NULL_TREE)
10386 return promote_mode (type, mode, punsignedp);
10388 unsignedp = *punsignedp;
10389 scalar_mode smode = as_a <scalar_mode> (mode);
10390 PROMOTE_MODE (smode, unsignedp, type);
10391 *punsignedp = unsignedp;
10392 return smode;
10395 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
10397 static void
10398 riscv_reorg (void)
10400 /* Do nothing unless we have -msave-restore */
10401 if (TARGET_SAVE_RESTORE)
10402 riscv_remove_unneeded_save_restore_calls ();
10405 /* Return nonzero if register FROM_REGNO can be renamed to register
10406 TO_REGNO. */
10408 bool
10409 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
10410 unsigned to_regno)
10412 /* Interrupt functions can only use registers that have already been
10413 saved by the prologue, even if they would normally be
10414 call-clobbered. */
10415 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno);
10418 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */
10420 bool
10421 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
10423 /* Prefer old address if it is less expensive. */
10424 addr_space_t as = MEM_ADDR_SPACE (memref);
10425 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
10426 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed);
10427 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed);
10428 return new_cost <= old_cost;
10431 /* Helper function for generating gpr_save pattern. */
10434 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
10436 unsigned count = riscv_save_libcall_count (frame->mask);
10437 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
10438 unsigned veclen = 1 + 2 + 1 + count;
10439 rtvec vec = rtvec_alloc (veclen);
10441 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
10443 RTVEC_ELT (vec, 0) =
10444 gen_rtx_UNSPEC_VOLATILE (VOIDmode,
10445 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE);
10447 for (unsigned i = 1; i < veclen; ++i)
10449 unsigned regno = gpr_save_reg_order[i];
10450 rtx reg = gen_rtx_REG (Pmode, regno);
10451 rtx elt;
10453 /* t0 and t1 are CLOBBERs, others are USEs. */
10454 if (i < 3)
10455 elt = gen_rtx_CLOBBER (Pmode, reg);
10456 else
10457 elt = gen_rtx_USE (Pmode, reg);
10459 RTVEC_ELT (vec, i) = elt;
10462 /* Largest number of caller-save register must set in mask if we are
10463 not using __riscv_save_0. */
10464 gcc_assert ((count == 0) ||
10465 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
10467 return gen_rtx_PARALLEL (VOIDmode, vec);
10470 static HOST_WIDE_INT
10471 zcmp_base_adj (int regs_num)
10473 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode));
10476 static HOST_WIDE_INT
10477 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num)
10479 return total - zcmp_base_adj (regs_num);
10482 bool
10483 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num)
10485 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num);
10486 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP
10487 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP
10488 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP;
10491 /* Return true if it's valid gpr_save pattern. */
10493 bool
10494 riscv_gpr_save_operation_p (rtx op)
10496 unsigned len = XVECLEN (op, 0);
10498 if (len > ARRAY_SIZE (gpr_save_reg_order))
10499 return false;
10501 for (unsigned i = 0; i < len; i++)
10503 rtx elt = XVECEXP (op, 0, i);
10504 if (i == 0)
10506 /* First element in parallel is unspec. */
10507 if (GET_CODE (elt) != UNSPEC_VOLATILE
10508 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
10509 || XINT (elt, 1) != UNSPECV_GPR_SAVE)
10510 return false;
10512 else
10514 /* Two CLOBBER and USEs, must check the order. */
10515 unsigned expect_code = i < 3 ? CLOBBER : USE;
10516 if (GET_CODE (elt) != expect_code
10517 || !REG_P (XEXP (elt, 1))
10518 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
10519 return false;
10521 break;
10523 return true;
10526 /* Implement TARGET_ASAN_SHADOW_OFFSET. */
10528 static unsigned HOST_WIDE_INT
10529 riscv_asan_shadow_offset (void)
10531 /* We only have libsanitizer support for RV64 at present.
10533 This number must match ASAN_SHADOW_OFFSET_CONST in the file
10534 libsanitizer/asan/asan_mapping.h. */
10535 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
10538 /* Implement TARGET_MANGLE_TYPE. */
10540 static const char *
10541 riscv_mangle_type (const_tree type)
10543 /* Half-precision float, _Float16 is "DF16_" and __bf16 is "DF16b". */
10544 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
10546 if (TYPE_MODE (type) == HFmode)
10547 return "DF16_";
10549 if (TYPE_MODE (type) == BFmode)
10550 return "DF16b";
10552 gcc_unreachable ();
10555 /* Mangle all vector type for vector extension. */
10556 /* The mangle name follows the rule of RVV LLVM
10557 that is "u" + length of (abi_name) + abi_name. */
10558 if (TYPE_NAME (type) != NULL)
10560 const char *res = riscv_vector::mangle_builtin_type (type);
10561 if (res)
10562 return res;
10565 /* Use the default mangling. */
10566 return NULL;
10569 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
10571 static bool
10572 riscv_scalar_mode_supported_p (scalar_mode mode)
10574 if (mode == HFmode || mode == BFmode)
10575 return true;
10576 else
10577 return default_scalar_mode_supported_p (mode);
10580 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
10581 if MODE is HFmode or BFmode, and punt to the generic implementation
10582 otherwise. */
10584 static bool
10585 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
10587 if (mode == HFmode || mode == BFmode)
10588 return true;
10589 else
10590 return default_libgcc_floating_mode_supported_p (mode);
10593 /* Set the value of FLT_EVAL_METHOD.
10594 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
10596 0: evaluate all operations and constants, whose semantic type has at
10597 most the range and precision of type float, to the range and
10598 precision of float; evaluate all other operations and constants to
10599 the range and precision of the semantic type;
10601 N, where _FloatN is a supported interchange floating type
10602 evaluate all operations and constants, whose semantic type has at
10603 most the range and precision of _FloatN type, to the range and
10604 precision of the _FloatN type; evaluate all other operations and
10605 constants to the range and precision of the semantic type;
10607 If we have the zfh/zhinx/zvfh extensions then we support _Float16
10608 in native precision, so we should set this to 16. */
10609 static enum flt_eval_method
10610 riscv_excess_precision (enum excess_precision_type type)
10612 switch (type)
10614 case EXCESS_PRECISION_TYPE_FAST:
10615 case EXCESS_PRECISION_TYPE_STANDARD:
10616 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
10617 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
10618 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
10619 case EXCESS_PRECISION_TYPE_IMPLICIT:
10620 case EXCESS_PRECISION_TYPE_FLOAT16:
10621 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
10622 default:
10623 gcc_unreachable ();
10625 return FLT_EVAL_METHOD_UNPREDICTABLE;
10628 /* Implement TARGET_FLOATN_MODE. */
10629 static opt_scalar_float_mode
10630 riscv_floatn_mode (int n, bool extended)
10632 if (!extended && n == 16)
10633 return HFmode;
10635 return default_floatn_mode (n, extended);
10638 /* Record that we have no arithmetic or comparison libfuncs for
10639 machine_mode MODE. */
10640 static void
10641 riscv_block_arith_comp_libfuncs_for_mode (machine_mode mode)
10643 /* Half-precision float or Brain float operations. The compiler handles all
10644 operations with NULL libfuncs by converting to SFmode. */
10646 /* Arithmetic. */
10647 set_optab_libfunc (add_optab, mode, NULL);
10648 set_optab_libfunc (sdiv_optab, mode, NULL);
10649 set_optab_libfunc (smul_optab, mode, NULL);
10650 set_optab_libfunc (neg_optab, mode, NULL);
10651 set_optab_libfunc (sub_optab, mode, NULL);
10653 /* Comparisons. */
10654 set_optab_libfunc (eq_optab, mode, NULL);
10655 set_optab_libfunc (ne_optab, mode, NULL);
10656 set_optab_libfunc (lt_optab, mode, NULL);
10657 set_optab_libfunc (le_optab, mode, NULL);
10658 set_optab_libfunc (ge_optab, mode, NULL);
10659 set_optab_libfunc (gt_optab, mode, NULL);
10660 set_optab_libfunc (unord_optab, mode, NULL);
10663 static void
10664 riscv_init_libfuncs (void)
10666 riscv_block_arith_comp_libfuncs_for_mode (HFmode);
10667 riscv_block_arith_comp_libfuncs_for_mode (BFmode);
10669 /* Convert between BFmode and HFmode using only trunc libfunc if needed. */
10670 set_conv_libfunc (sext_optab, BFmode, HFmode, "__trunchfbf2");
10671 set_conv_libfunc (sext_optab, HFmode, BFmode, "__truncbfhf2");
10672 set_conv_libfunc (trunc_optab, BFmode, HFmode, "__trunchfbf2");
10673 set_conv_libfunc (trunc_optab, HFmode, BFmode, "__truncbfhf2");
10676 #if CHECKING_P
10677 void
10678 riscv_reinit (void)
10680 riscv_option_override ();
10681 init_adjust_machine_modes ();
10682 init_derived_machine_modes ();
10683 reinit_regs ();
10684 init_optabs ();
10686 #endif
10688 #if CHECKING_P
10689 #undef TARGET_RUN_TARGET_SELFTESTS
10690 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests
10691 #endif /* #if CHECKING_P */
10693 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
10695 static bool
10696 riscv_vector_mode_supported_p (machine_mode mode)
10698 if (TARGET_VECTOR)
10699 return riscv_v_ext_mode_p (mode);
10701 return false;
10704 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */
10706 static bool
10707 riscv_verify_type_context (location_t loc, type_context_kind context,
10708 const_tree type, bool silent_p)
10710 return riscv_vector::verify_type_context (loc, context, type, silent_p);
10713 /* Implement TARGET_VECTOR_ALIGNMENT. */
10715 static HOST_WIDE_INT
10716 riscv_vector_alignment (const_tree type)
10718 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
10719 be set for non-predicate vectors of booleans. Modes are the most
10720 direct way we have of identifying real RVV predicate types. */
10721 /* FIXME: RVV didn't mention the alignment of bool, we uses
10722 one byte align. */
10723 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
10724 return 8;
10726 widest_int min_size
10727 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
10728 return wi::umin (min_size, 128).to_uhwi ();
10731 /* Implement REGMODE_NATURAL_SIZE. */
10733 poly_uint64
10734 riscv_regmode_natural_size (machine_mode mode)
10736 /* The natural size for RVV data modes is one RVV data vector,
10737 and similarly for predicates. We can't independently modify
10738 anything smaller than that. */
10739 /* ??? For now, only do this for variable-width RVV registers.
10740 Doing it for constant-sized registers breaks lower-subreg.c. */
10742 if (riscv_v_ext_mode_p (mode))
10744 poly_uint64 size = GET_MODE_SIZE (mode);
10745 if (riscv_v_ext_tuple_mode_p (mode))
10747 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode));
10748 if (known_lt (size, BYTES_PER_RISCV_VECTOR))
10749 return size;
10751 else if (riscv_v_ext_vector_mode_p (mode))
10753 /* RVV mask modes always consume a single register. */
10754 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
10755 return BYTES_PER_RISCV_VECTOR;
10757 if (!size.is_constant ())
10758 return BYTES_PER_RISCV_VECTOR;
10759 else if (!riscv_v_ext_vls_mode_p (mode))
10760 /* For -march=rv64gc_zve32f, the natural vector register size
10761 is 32bits which is smaller than scalar register size, so we
10762 return minimum size between vector register size and scalar
10763 register size. */
10764 return MIN (size.to_constant (), UNITS_PER_WORD);
10766 return UNITS_PER_WORD;
10769 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
10771 static unsigned int
10772 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
10773 int *offset)
10775 /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
10776 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
10777 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
10779 gcc_assert (i == 1);
10780 *factor = riscv_bytes_per_vector_chunk;
10781 *offset = 1;
10782 return RISCV_DWARF_VLENB;
10785 /* Implement TARGET_ESTIMATED_POLY_VALUE. */
10787 static HOST_WIDE_INT
10788 riscv_estimated_poly_value (poly_int64 val,
10789 poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
10791 if (TARGET_VECTOR)
10792 return riscv_vector::estimated_poly_value (val, kind);
10793 return default_estimated_poly_value (val, kind);
10796 /* Return true if the vector misalignment factor is supported by the
10797 target. */
10798 bool
10799 riscv_support_vector_misalignment (machine_mode mode,
10800 const_tree type ATTRIBUTE_UNUSED,
10801 int misalignment,
10802 bool is_packed ATTRIBUTE_UNUSED)
10804 /* Depend on movmisalign pattern. */
10805 return default_builtin_support_vector_misalignment (mode, type, misalignment,
10806 is_packed);
10809 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
10811 static opt_machine_mode
10812 riscv_get_mask_mode (machine_mode mode)
10814 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode))
10815 return riscv_vector::get_mask_mode (mode);
10817 return default_get_mask_mode (mode);
10820 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
10821 it isn't worth branching around empty masked ops (including masked
10822 stores). */
10824 static bool
10825 riscv_empty_mask_is_expensive (unsigned)
10827 return false;
10830 /* Return true if a shift-amount matches the trailing cleared bits on
10831 a bitmask. */
10833 bool
10834 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
10836 return shamt == ctz_hwi (mask);
10839 static HARD_REG_SET
10840 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10842 HARD_REG_SET zeroed_hardregs;
10843 CLEAR_HARD_REG_SET (zeroed_hardregs);
10845 /* Find a register to hold vl. */
10846 unsigned vl_regno = INVALID_REGNUM;
10847 /* Skip the first GPR, otherwise the existing vl is kept due to the same
10848 between vl and avl. */
10849 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
10851 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10853 vl_regno = regno;
10854 break;
10858 if (vl_regno > GP_REG_LAST)
10859 sorry ("cannot allocate vl register for %qs on this target",
10860 "-fzero-call-used-regs");
10862 /* Vector configurations need not be saved and restored here. The
10863 -fzero-call-used-regs=* option will zero all vector registers and
10864 return. So there's no vector operations between them. */
10866 bool emitted_vlmax_vsetvl = false;
10867 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */
10868 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
10870 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10872 rtx target = regno_reg_rtx[regno];
10873 machine_mode mode = GET_MODE (target);
10875 if (!emitted_vlmax_vsetvl)
10877 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl);
10878 emitted_vlmax_vsetvl = true;
10881 rtx ops[] = {target, CONST0_RTX (mode)};
10882 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode),
10883 riscv_vector::UNARY_OP, ops, vl);
10885 SET_HARD_REG_BIT (zeroed_hardregs, regno);
10889 return zeroed_hardregs;
10892 /* Generate a sequence of instructions that zero registers specified by
10893 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
10894 zeroed. */
10895 HARD_REG_SET
10896 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10898 HARD_REG_SET zeroed_hardregs;
10899 CLEAR_HARD_REG_SET (zeroed_hardregs);
10901 if (TARGET_VECTOR)
10902 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs);
10904 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs
10905 & ~zeroed_hardregs);
10908 /* Implement target hook TARGET_ARRAY_MODE. */
10910 static opt_machine_mode
10911 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
10913 machine_mode vmode;
10914 if (TARGET_VECTOR
10915 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode))
10916 return vmode;
10918 return opt_machine_mode ();
10921 /* Given memory reference MEM, expand code to compute the aligned
10922 memory address, shift and mask values and store them into
10923 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */
10925 void
10926 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
10927 rtx *not_mask)
10929 /* Align the memory address to a word. */
10930 rtx addr = force_reg (Pmode, XEXP (mem, 0));
10932 rtx addr_mask = gen_int_mode (-4, Pmode);
10934 rtx aligned_addr = gen_reg_rtx (Pmode);
10935 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask));
10937 *aligned_mem = change_address (mem, SImode, aligned_addr);
10939 /* Calculate the shift amount. */
10940 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
10941 gen_int_mode (3, SImode)));
10942 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
10943 gen_int_mode (3, SImode)));
10945 /* Calculate the mask. */
10946 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
10948 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
10950 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
10951 gen_lowpart (QImode, *shift)));
10953 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
10956 /* Leftshift a subword within an SImode register. */
10958 void
10959 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
10960 rtx *shifted_value)
10962 rtx value_reg = gen_reg_rtx (SImode);
10963 emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
10964 mode, 0));
10966 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
10967 gen_lowpart (QImode, shift)));
10970 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This
10971 allows the behavior to be tuned for specific implementations as well as
10972 when optimizing for size. */
10974 bool
10975 riscv_use_divmod_expander (void)
10977 return tune_param->use_divmod_expansion;
10980 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
10982 static machine_mode
10983 riscv_preferred_simd_mode (scalar_mode mode)
10985 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10986 return riscv_vector::preferred_simd_mode (mode);
10988 return word_mode;
10991 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
10993 static poly_uint64
10994 riscv_vectorize_preferred_vector_alignment (const_tree type)
10996 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
10997 return TYPE_ALIGN (TREE_TYPE (type));
10998 return TYPE_ALIGN (type);
11001 /* Return true if it is static FRM rounding mode. */
11003 static bool
11004 riscv_static_frm_mode_p (int mode)
11006 switch (mode)
11008 case riscv_vector::FRM_RDN:
11009 case riscv_vector::FRM_RUP:
11010 case riscv_vector::FRM_RTZ:
11011 case riscv_vector::FRM_RMM:
11012 case riscv_vector::FRM_RNE:
11013 return true;
11014 default:
11015 return false;
11018 gcc_unreachable ();
11021 /* Implement the floating-point Mode Switching. */
11023 static void
11024 riscv_emit_frm_mode_set (int mode, int prev_mode)
11026 rtx backup_reg = DYNAMIC_FRM_RTL (cfun);
11028 if (prev_mode == riscv_vector::FRM_DYN_CALL)
11029 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
11031 if (mode != prev_mode)
11033 rtx frm = gen_int_mode (mode, SImode);
11035 if (mode == riscv_vector::FRM_DYN_CALL
11036 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
11037 /* No need to emit when prev mode is DYN already. */
11038 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
11039 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
11040 && prev_mode != riscv_vector::FRM_DYN
11041 && prev_mode != riscv_vector::FRM_DYN_CALL)
11042 /* No need to emit when prev mode is DYN or DYN_CALL already. */
11043 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
11044 else if (mode == riscv_vector::FRM_DYN
11045 && prev_mode != riscv_vector::FRM_DYN_CALL)
11046 /* Restore frm value from backup when switch to DYN mode. */
11047 emit_insn (gen_fsrmsi_restore (backup_reg));
11048 else if (riscv_static_frm_mode_p (mode))
11049 /* Set frm value when switch to static mode. */
11050 emit_insn (gen_fsrmsi_restore (frm));
11054 /* Implement Mode switching. */
11056 static void
11057 riscv_emit_mode_set (int entity, int mode, int prev_mode,
11058 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
11060 switch (entity)
11062 case RISCV_VXRM:
11063 if (mode != VXRM_MODE_NONE && mode != prev_mode)
11064 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
11065 break;
11066 case RISCV_FRM:
11067 riscv_emit_frm_mode_set (mode, prev_mode);
11068 break;
11069 default:
11070 gcc_unreachable ();
11074 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the
11075 underlying emit. */
11077 static int
11078 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
11080 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
11082 if (insn && CALL_P (insn))
11083 return riscv_vector::FRM_DYN;
11085 return mode;
11088 /* Insert the backup frm insn to the end of the bb if and only if the call
11089 is the last insn of this bb. */
11091 static void
11092 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
11094 edge eg;
11095 bool abnormal_edge_p = false;
11096 edge_iterator eg_iterator;
11097 basic_block bb = BLOCK_FOR_INSN (cur_insn);
11099 FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
11101 if (eg->flags & EDGE_ABNORMAL)
11102 abnormal_edge_p = true;
11103 else
11105 start_sequence ();
11106 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11107 rtx_insn *backup_insn = get_insns ();
11108 end_sequence ();
11110 insert_insn_on_edge (backup_insn, eg);
11114 if (abnormal_edge_p)
11116 start_sequence ();
11117 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11118 rtx_insn *backup_insn = get_insns ();
11119 end_sequence ();
11121 insert_insn_end_basic_block (backup_insn, bb);
11124 commit_edge_insertions ();
11127 /* Return mode that frm must be switched into
11128 prior to the execution of insn. */
11130 static int
11131 riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
11133 if (!DYNAMIC_FRM_RTL(cfun))
11135 /* The dynamic frm will be initialized only onece during cfun. */
11136 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
11137 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11140 if (CALL_P (cur_insn))
11142 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
11144 if (!insn)
11145 riscv_frm_emit_after_bb_end (cur_insn);
11147 return riscv_vector::FRM_DYN_CALL;
11150 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
11152 if (mode == riscv_vector::FRM_NONE)
11153 /* After meet a call, we need to backup the frm because it may be
11154 updated during the call. Here, for each insn, we will check if
11155 the previous insn is a call or not. When previous insn is call,
11156 there will be 2 cases for the emit mode set.
11158 1. Current insn is not MODE_NONE, then the mode switch framework
11159 will do the mode switch from MODE_CALL to MODE_NONE natively.
11160 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
11161 the MODE_DYN, and leave the mode switch itself to perform
11162 the emit mode set.
11164 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
11166 return mode;
11169 /* Return mode that entity must be switched into
11170 prior to the execution of insn. */
11172 static int
11173 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
11175 int code = recog_memoized (insn);
11177 switch (entity)
11179 case RISCV_VXRM:
11180 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
11181 case RISCV_FRM:
11182 return riscv_frm_mode_needed (insn, code);
11183 default:
11184 gcc_unreachable ();
11188 /* Return TRUE that an insn is asm. */
11190 static bool
11191 asm_insn_p (rtx_insn *insn)
11193 extract_insn (insn);
11195 return recog_data.is_asm;
11198 /* Return TRUE that an insn is unknown for VXRM. */
11200 static bool
11201 vxrm_unknown_p (rtx_insn *insn)
11203 /* Return true if there is a definition of VXRM. */
11204 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn))
11205 return true;
11207 /* A CALL function may contain an instruction that modifies the VXRM,
11208 return true in this situation. */
11209 if (CALL_P (insn))
11210 return true;
11212 /* Return true for all assembly since users may hardcode a assembly
11213 like this: asm volatile ("csrwi vxrm, 0"). */
11214 if (asm_insn_p (insn))
11215 return true;
11217 return false;
11220 /* Return TRUE that an insn is unknown dynamic for FRM. */
11222 static bool
11223 frm_unknown_dynamic_p (rtx_insn *insn)
11225 /* Return true if there is a definition of FRM. */
11226 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn))
11227 return true;
11229 return false;
11232 /* Return the mode that an insn results in for VXRM. */
11234 static int
11235 riscv_vxrm_mode_after (rtx_insn *insn, int mode)
11237 if (vxrm_unknown_p (insn))
11238 return VXRM_MODE_NONE;
11240 if (recog_memoized (insn) < 0)
11241 return mode;
11243 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn)))
11244 return get_attr_vxrm_mode (insn);
11245 else
11246 return mode;
11249 /* Return the mode that an insn results in for FRM. */
11251 static int
11252 riscv_frm_mode_after (rtx_insn *insn, int mode)
11254 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode);
11256 if (CALL_P (insn))
11257 return mode;
11259 if (frm_unknown_dynamic_p (insn))
11260 return riscv_vector::FRM_DYN;
11262 if (recog_memoized (insn) < 0)
11263 return mode;
11265 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn)))
11266 return get_attr_frm_mode (insn);
11267 else
11268 return mode;
11271 /* Return the mode that an insn results in. */
11273 static int
11274 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
11276 switch (entity)
11278 case RISCV_VXRM:
11279 return riscv_vxrm_mode_after (insn, mode);
11280 case RISCV_FRM:
11281 return riscv_frm_mode_after (insn, mode);
11282 default:
11283 gcc_unreachable ();
11287 /* Return a mode that ENTITY is assumed to be
11288 switched to at function entry. */
11290 static int
11291 riscv_mode_entry (int entity)
11293 switch (entity)
11295 case RISCV_VXRM:
11296 return VXRM_MODE_NONE;
11297 case RISCV_FRM:
11299 /* According to RVV 1.0 spec, all vector floating-point operations use
11300 the dynamic rounding mode in the frm register. Likewise in other
11301 similar places. */
11302 return riscv_vector::FRM_DYN;
11304 default:
11305 gcc_unreachable ();
11309 /* Return a mode that ENTITY is assumed to be
11310 switched to at function exit. */
11312 static int
11313 riscv_mode_exit (int entity)
11315 switch (entity)
11317 case RISCV_VXRM:
11318 return VXRM_MODE_NONE;
11319 case RISCV_FRM:
11320 return riscv_vector::FRM_DYN_EXIT;
11321 default:
11322 gcc_unreachable ();
11326 static int
11327 riscv_mode_priority (int, int n)
11329 return n;
11332 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
11333 unsigned int
11334 riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
11336 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
11337 return riscv_vector::autovectorize_vector_modes (modes, all);
11339 return default_autovectorize_vector_modes (modes, all);
11342 /* Implement TARGET_VECTORIZE_RELATED_MODE. */
11343 opt_machine_mode
11344 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
11345 poly_uint64 nunits)
11347 if (TARGET_VECTOR)
11348 return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
11349 nunits);
11350 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
11353 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
11355 static bool
11356 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
11357 rtx target, rtx op0, rtx op1,
11358 const vec_perm_indices &sel)
11360 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode))
11361 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
11362 op1, sel);
11364 return false;
11367 static bool
11368 riscv_frame_pointer_required (void)
11370 return riscv_save_frame_pointer && !crtl->is_leaf;
11373 /* Return the appropriate common costs according to VECTYPE from COSTS. */
11374 static const common_vector_cost *
11375 get_common_costs (const cpu_vector_cost *costs, tree vectype)
11377 gcc_assert (costs);
11379 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
11380 return costs->vls;
11381 return costs->vla;
11384 /* Return the CPU vector costs according to -mtune if tune info has non-NULL
11385 vector cost. Otherwise, return the default generic vector costs. */
11386 const cpu_vector_cost *
11387 get_vector_costs ()
11389 const cpu_vector_cost *costs = tune_param->vec_costs;
11390 if (!costs)
11391 return &generic_vector_cost;
11392 return costs;
11395 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11397 static int
11398 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11399 tree vectype, int misalign ATTRIBUTE_UNUSED)
11401 const cpu_vector_cost *costs = get_vector_costs ();
11402 bool fp = false;
11404 if (vectype != NULL)
11405 fp = FLOAT_TYPE_P (vectype);
11407 const common_vector_cost *common_costs = get_common_costs (costs, vectype);
11408 gcc_assert (common_costs != NULL);
11409 switch (type_of_cost)
11411 case scalar_stmt:
11412 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
11414 case scalar_load:
11415 return costs->scalar_load_cost;
11417 case scalar_store:
11418 return costs->scalar_store_cost;
11420 case vector_stmt:
11421 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
11423 case vector_load:
11424 return common_costs->align_load_cost;
11426 case vector_store:
11427 return common_costs->align_store_cost;
11429 case vec_to_scalar:
11430 return common_costs->vec_to_scalar_cost;
11432 case scalar_to_vec:
11433 return common_costs->scalar_to_vec_cost;
11435 case unaligned_load:
11436 return common_costs->unalign_load_cost;
11437 case vector_gather_load:
11438 return common_costs->gather_load_cost;
11440 case unaligned_store:
11441 return common_costs->unalign_store_cost;
11442 case vector_scatter_store:
11443 return common_costs->scatter_store_cost;
11445 case cond_branch_taken:
11446 return costs->cond_taken_branch_cost;
11448 case cond_branch_not_taken:
11449 return costs->cond_not_taken_branch_cost;
11451 case vec_perm:
11452 return common_costs->permute_cost;
11454 case vec_promote_demote:
11455 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
11457 case vec_construct:
11458 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
11460 default:
11461 gcc_unreachable ();
11464 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
11467 /* Implement targetm.vectorize.create_costs. */
11469 static vector_costs *
11470 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
11472 if (TARGET_VECTOR)
11473 return new riscv_vector::costs (vinfo, costing_for_scalar);
11474 /* Default vector costs. */
11475 return new vector_costs (vinfo, costing_for_scalar);
11478 /* Implement TARGET_PREFERRED_ELSE_VALUE. */
11480 static tree
11481 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
11482 tree *ops)
11484 if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
11485 return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype));
11487 return default_preferred_else_value (ifn, vectype, nops, ops);
11490 /* If MEM is in the form of "base+offset", extract the two parts
11491 of address and set to BASE and OFFSET, otherwise return false
11492 after clearing BASE and OFFSET. */
11494 bool
11495 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
11497 rtx addr;
11499 gcc_assert (MEM_P (mem));
11501 addr = XEXP (mem, 0);
11503 if (REG_P (addr))
11505 *base = addr;
11506 *offset = const0_rtx;
11507 return true;
11510 if (GET_CODE (addr) == PLUS
11511 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
11513 *base = XEXP (addr, 0);
11514 *offset = XEXP (addr, 1);
11515 return true;
11518 *base = NULL_RTX;
11519 *offset = NULL_RTX;
11521 return false;
11524 /* Implements target hook vector_mode_supported_any_target_p. */
11526 static bool
11527 riscv_vector_mode_supported_any_target_p (machine_mode)
11529 if (TARGET_XTHEADVECTOR)
11530 return false;
11531 return true;
11534 /* Implements hook TARGET_FUNCTION_VALUE_REGNO_P. */
11536 static bool
11537 riscv_function_value_regno_p (const unsigned regno)
11539 if (GP_RETURN_FIRST <= regno && regno <= GP_RETURN_LAST)
11540 return true;
11542 if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST)
11543 return true;
11545 if (TARGET_VECTOR && regno == V_RETURN)
11546 return true;
11548 return false;
11551 /* Implements hook TARGET_GET_RAW_RESULT_MODE. */
11553 static fixed_size_mode
11554 riscv_get_raw_result_mode (int regno)
11556 if (!is_a <fixed_size_mode> (reg_raw_mode[regno]))
11557 return as_a <fixed_size_mode> (VOIDmode);
11559 return default_get_reg_raw_mode (regno);
11562 /* Implements the unsigned saturation add standard name usadd for int mode.
11564 z = SAT_ADD(x, y).
11566 1. sum = x + y.
11567 2. sum = truncate (sum) for QI and HI only.
11568 3. lt = sum < x.
11569 4. lt = -lt.
11570 5. z = sum | lt. */
11572 void
11573 riscv_expand_usadd (rtx dest, rtx x, rtx y)
11575 machine_mode mode = GET_MODE (dest);
11576 rtx xmode_sum = gen_reg_rtx (Xmode);
11577 rtx xmode_lt = gen_reg_rtx (Xmode);
11578 rtx xmode_x = gen_lowpart (Xmode, x);
11579 rtx xmode_y = gen_lowpart (Xmode, y);
11580 rtx xmode_dest = gen_reg_rtx (Xmode);
11582 /* Step-1: sum = x + y */
11583 if (mode == SImode && mode != Xmode)
11584 { /* Take addw to avoid the sum truncate. */
11585 rtx simode_sum = gen_reg_rtx (SImode);
11586 riscv_emit_binary (PLUS, simode_sum, x, y);
11587 emit_move_insn (xmode_sum, gen_lowpart (Xmode, simode_sum));
11589 else
11590 riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
11592 /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI. */
11593 if (mode == HImode || mode == QImode)
11595 int shift_bits = GET_MODE_BITSIZE (Xmode)
11596 - GET_MODE_BITSIZE (mode).to_constant ();
11598 gcc_assert (shift_bits > 0);
11600 riscv_emit_binary (ASHIFT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
11601 riscv_emit_binary (LSHIFTRT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
11604 /* Step-2: lt = sum < x */
11605 riscv_emit_binary (LTU, xmode_lt, xmode_sum, xmode_x);
11607 /* Step-3: lt = -lt */
11608 riscv_emit_unary (NEG, xmode_lt, xmode_lt);
11610 /* Step-4: xmode_dest = sum | lt */
11611 riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_sum);
11613 /* Step-5: dest = xmode_dest */
11614 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
11617 /* Implements the unsigned saturation sub standard name usadd for int mode.
11619 z = SAT_SUB(x, y).
11621 1. minus = x - y.
11622 2. lt = x < y.
11623 3. lt = lt - 1.
11624 4. z = minus & lt. */
11626 void
11627 riscv_expand_ussub (rtx dest, rtx x, rtx y)
11629 machine_mode mode = GET_MODE (dest);
11630 rtx pmode_x = gen_lowpart (Pmode, x);
11631 rtx pmode_y = gen_lowpart (Pmode, y);
11632 rtx pmode_lt = gen_reg_rtx (Pmode);
11633 rtx pmode_minus = gen_reg_rtx (Pmode);
11634 rtx pmode_dest = gen_reg_rtx (Pmode);
11636 /* Step-1: minus = x - y */
11637 riscv_emit_binary (MINUS, pmode_minus, pmode_x, pmode_y);
11639 /* Step-2: lt = x < y */
11640 riscv_emit_binary (LTU, pmode_lt, pmode_x, pmode_y);
11642 /* Step-3: lt = lt - 1 (lt + (-1)) */
11643 riscv_emit_binary (PLUS, pmode_lt, pmode_lt, CONSTM1_RTX (Pmode));
11645 /* Step-4: pmode_dest = minus & lt */
11646 riscv_emit_binary (AND, pmode_dest, pmode_lt, pmode_minus);
11648 /* Step-5: dest = pmode_dest */
11649 emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
11652 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for
11653 TI_LONG_DOUBLE_TYPE which is for long double type, go with the
11654 default one for the others. */
11656 static machine_mode
11657 riscv_c_mode_for_floating_type (enum tree_index ti)
11659 if (ti == TI_LONG_DOUBLE_TYPE)
11660 return TFmode;
11661 return default_mode_for_floating_type (ti);
11664 /* Initialize the GCC target structure. */
11665 #undef TARGET_ASM_ALIGNED_HI_OP
11666 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
11667 #undef TARGET_ASM_ALIGNED_SI_OP
11668 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11669 #undef TARGET_ASM_ALIGNED_DI_OP
11670 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
11672 #undef TARGET_OPTION_OVERRIDE
11673 #define TARGET_OPTION_OVERRIDE riscv_option_override
11675 #undef TARGET_OPTION_RESTORE
11676 #define TARGET_OPTION_RESTORE riscv_option_restore
11678 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
11679 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
11681 #undef TARGET_LEGITIMIZE_ADDRESS
11682 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
11684 #undef TARGET_SCHED_ISSUE_RATE
11685 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
11686 #undef TARGET_SCHED_MACRO_FUSION_P
11687 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
11688 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11689 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
11691 #undef TARGET_SCHED_VARIABLE_ISSUE
11692 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
11694 #undef TARGET_SCHED_ADJUST_COST
11695 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
11697 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11698 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
11700 #undef TARGET_SET_CURRENT_FUNCTION
11701 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function
11703 #undef TARGET_REGISTER_MOVE_COST
11704 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
11705 #undef TARGET_MEMORY_MOVE_COST
11706 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
11707 #undef TARGET_RTX_COSTS
11708 #define TARGET_RTX_COSTS riscv_rtx_costs
11709 #undef TARGET_ADDRESS_COST
11710 #define TARGET_ADDRESS_COST riscv_address_cost
11711 #undef TARGET_INSN_COST
11712 #define TARGET_INSN_COST riscv_insn_cost
11714 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
11715 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost
11716 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
11717 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p
11719 #undef TARGET_ASM_FILE_START
11720 #define TARGET_ASM_FILE_START riscv_file_start
11721 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
11722 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
11723 #undef TARGET_ASM_FILE_END
11724 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
11726 #undef TARGET_EXPAND_BUILTIN_VA_START
11727 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
11729 #undef TARGET_PROMOTE_FUNCTION_MODE
11730 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode
11732 #undef TARGET_RETURN_IN_MEMORY
11733 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
11735 #undef TARGET_ASM_OUTPUT_MI_THUNK
11736 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
11737 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11738 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11740 #undef TARGET_PRINT_OPERAND
11741 #define TARGET_PRINT_OPERAND riscv_print_operand
11742 #undef TARGET_PRINT_OPERAND_ADDRESS
11743 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
11744 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
11745 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
11747 #undef TARGET_SETUP_INCOMING_VARARGS
11748 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
11749 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
11750 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args
11751 #undef TARGET_STRICT_ARGUMENT_NAMING
11752 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
11753 #undef TARGET_MUST_PASS_IN_STACK
11754 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11755 #undef TARGET_PASS_BY_REFERENCE
11756 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
11757 #undef TARGET_ARG_PARTIAL_BYTES
11758 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
11759 #undef TARGET_FUNCTION_ARG
11760 #define TARGET_FUNCTION_ARG riscv_function_arg
11761 #undef TARGET_FUNCTION_ARG_ADVANCE
11762 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
11763 #undef TARGET_FUNCTION_ARG_BOUNDARY
11764 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
11765 #undef TARGET_FNTYPE_ABI
11766 #define TARGET_FNTYPE_ABI riscv_fntype_abi
11767 #undef TARGET_INSN_CALLEE_ABI
11768 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi
11770 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
11771 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
11772 riscv_get_separate_components
11774 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
11775 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
11776 riscv_components_for_bb
11778 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
11779 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
11780 riscv_disqualify_components
11782 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
11783 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
11784 riscv_emit_prologue_components
11786 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
11787 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
11788 riscv_emit_epilogue_components
11790 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
11791 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
11792 riscv_set_handled_components
11794 /* The generic ELF target does not always have TLS support. */
11795 #ifdef HAVE_AS_TLS
11796 #undef TARGET_HAVE_TLS
11797 #define TARGET_HAVE_TLS true
11798 #endif
11800 #undef TARGET_CANNOT_FORCE_CONST_MEM
11801 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
11803 #undef TARGET_LEGITIMATE_CONSTANT_P
11804 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
11806 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11807 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p
11809 #undef TARGET_LEGITIMATE_ADDRESS_P
11810 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p
11812 #undef TARGET_CAN_ELIMINATE
11813 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
11815 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11816 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
11818 #undef TARGET_CLASS_MAX_NREGS
11819 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
11821 #undef TARGET_TRAMPOLINE_INIT
11822 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
11824 #undef TARGET_IN_SMALL_DATA_P
11825 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
11827 #undef TARGET_HAVE_SRODATA_SECTION
11828 #define TARGET_HAVE_SRODATA_SECTION true
11830 #undef TARGET_ASM_SELECT_SECTION
11831 #define TARGET_ASM_SELECT_SECTION riscv_select_section
11833 #undef TARGET_ASM_UNIQUE_SECTION
11834 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section
11836 #undef TARGET_ASM_SELECT_RTX_SECTION
11837 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section
11839 #undef TARGET_MIN_ANCHOR_OFFSET
11840 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
11842 #undef TARGET_MAX_ANCHOR_OFFSET
11843 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
11845 #undef TARGET_REGISTER_PRIORITY
11846 #define TARGET_REGISTER_PRIORITY riscv_register_priority
11848 #undef TARGET_CANNOT_COPY_INSN_P
11849 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
11851 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11852 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
11854 #undef TARGET_INIT_BUILTINS
11855 #define TARGET_INIT_BUILTINS riscv_init_builtins
11857 #undef TARGET_BUILTIN_DECL
11858 #define TARGET_BUILTIN_DECL riscv_builtin_decl
11860 #undef TARGET_GIMPLE_FOLD_BUILTIN
11861 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
11863 #undef TARGET_EXPAND_BUILTIN
11864 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
11866 #undef TARGET_HARD_REGNO_NREGS
11867 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs
11868 #undef TARGET_HARD_REGNO_MODE_OK
11869 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok
11871 #undef TARGET_MODES_TIEABLE_P
11872 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
11874 #undef TARGET_SLOW_UNALIGNED_ACCESS
11875 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
11877 #undef TARGET_OVERLAP_OP_BY_PIECES_P
11878 #define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
11880 #undef TARGET_SECONDARY_MEMORY_NEEDED
11881 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
11883 #undef TARGET_CAN_CHANGE_MODE_CLASS
11884 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class
11886 #undef TARGET_CONSTANT_ALIGNMENT
11887 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment
11889 #undef TARGET_MERGE_DECL_ATTRIBUTES
11890 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes
11892 #undef TARGET_ATTRIBUTE_TABLE
11893 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table
11895 #undef TARGET_WARN_FUNC_RETURN
11896 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return
11898 /* The low bit is ignored by jump instructions so is safe to use. */
11899 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
11900 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
11902 #undef TARGET_MACHINE_DEPENDENT_REORG
11903 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
11905 #undef TARGET_NEW_ADDRESS_PROFITABLE_P
11906 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
11908 #undef TARGET_MANGLE_TYPE
11909 #define TARGET_MANGLE_TYPE riscv_mangle_type
11911 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11912 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
11914 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
11915 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
11916 riscv_libgcc_floating_mode_supported_p
11918 #undef TARGET_INIT_LIBFUNCS
11919 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
11921 #undef TARGET_C_EXCESS_PRECISION
11922 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision
11924 #undef TARGET_FLOATN_MODE
11925 #define TARGET_FLOATN_MODE riscv_floatn_mode
11927 #undef TARGET_ASAN_SHADOW_OFFSET
11928 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
11930 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11931 #undef TARGET_DEFAULT_TARGET_FLAGS
11932 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
11933 #endif
11935 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11936 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p
11938 #undef TARGET_VERIFY_TYPE_CONTEXT
11939 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
11941 #undef TARGET_ESTIMATED_POLY_VALUE
11942 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
11944 #undef TARGET_VECTORIZE_GET_MASK_MODE
11945 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
11947 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
11948 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive
11950 #undef TARGET_VECTOR_ALIGNMENT
11951 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
11953 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
11954 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment
11956 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
11957 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
11959 #undef TARGET_ZERO_CALL_USED_REGS
11960 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
11962 #undef TARGET_ARRAY_MODE
11963 #define TARGET_ARRAY_MODE riscv_array_mode
11965 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11966 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
11968 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
11969 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
11970 riscv_vectorize_preferred_vector_alignment
11972 /* Mode switching hooks. */
11974 #undef TARGET_MODE_EMIT
11975 #define TARGET_MODE_EMIT riscv_emit_mode_set
11976 #undef TARGET_MODE_NEEDED
11977 #define TARGET_MODE_NEEDED riscv_mode_needed
11978 #undef TARGET_MODE_AFTER
11979 #define TARGET_MODE_AFTER riscv_mode_after
11980 #undef TARGET_MODE_ENTRY
11981 #define TARGET_MODE_ENTRY riscv_mode_entry
11982 #undef TARGET_MODE_EXIT
11983 #define TARGET_MODE_EXIT riscv_mode_exit
11984 #undef TARGET_MODE_PRIORITY
11985 #define TARGET_MODE_PRIORITY riscv_mode_priority
11987 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
11988 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
11989 riscv_autovectorize_vector_modes
11991 #undef TARGET_VECTORIZE_RELATED_MODE
11992 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
11994 #undef TARGET_VECTORIZE_VEC_PERM_CONST
11995 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
11997 #undef TARGET_FRAME_POINTER_REQUIRED
11998 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
12000 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
12001 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
12002 riscv_builtin_vectorization_cost
12004 #undef TARGET_VECTORIZE_CREATE_COSTS
12005 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
12007 #undef TARGET_PREFERRED_ELSE_VALUE
12008 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
12010 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
12011 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
12013 #undef TARGET_FUNCTION_VALUE_REGNO_P
12014 #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
12016 #undef TARGET_GET_RAW_RESULT_MODE
12017 #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
12019 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
12020 #define TARGET_C_MODE_FOR_FLOATING_TYPE riscv_c_mode_for_floating_type
12022 struct gcc_target targetm = TARGET_INITIALIZER;
12024 #include "gt-riscv.h"