[committed] Fix RISC-V missing stack tie
[official-gcc.git] / gcc / config / riscv / riscv.cc
blob1358c243898af62e2bc5389213d9dd7f98c0e63d
1 /* Subroutines used for code generation for RISC-V.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Andrew Waterman (andrew@sifive.com).
4 Based on MIPS target for GNU compiler.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #define INCLUDE_STRING
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "target.h"
29 #include "backend.h"
30 #include "tm.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "insn-config.h"
34 #include "insn-attr.h"
35 #include "recog.h"
36 #include "output.h"
37 #include "alias.h"
38 #include "tree.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "varasm.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "function.h"
45 #include "explow.h"
46 #include "ifcvt.h"
47 #include "memmodel.h"
48 #include "emit-rtl.h"
49 #include "reload.h"
50 #include "tm_p.h"
51 #include "basic-block.h"
52 #include "expr.h"
53 #include "optabs.h"
54 #include "bitmap.h"
55 #include "df.h"
56 #include "function-abi.h"
57 #include "diagnostic.h"
58 #include "builtins.h"
59 #include "predict.h"
60 #include "tree-pass.h"
61 #include "opts.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
64 #include "gimple.h"
65 #include "cfghooks.h"
66 #include "cfgloop.h"
67 #include "cfgrtl.h"
68 #include "shrink-wrap.h"
69 #include "sel-sched.h"
70 #include "sched-int.h"
71 #include "fold-const.h"
72 #include "gimple-iterator.h"
73 #include "gimple-expr.h"
74 #include "tree-vectorizer.h"
75 #include "gcse.h"
76 #include "tree-dfa.h"
77 #include "target-globals.h"
79 /* This file should be included last. */
80 #include "target-def.h"
81 #include "riscv-vector-costs.h"
82 #include "riscv-subset.h"
84 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
85 #define UNSPEC_ADDRESS_P(X) \
86 (GET_CODE (X) == UNSPEC \
87 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
88 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
90 /* Extract the symbol or label from UNSPEC wrapper X. */
91 #define UNSPEC_ADDRESS(X) \
92 XVECEXP (X, 0, 0)
94 /* Extract the symbol type from UNSPEC wrapper X. */
95 #define UNSPEC_ADDRESS_TYPE(X) \
96 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
98 /* Extract the backup dynamic frm rtl. */
99 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm)
101 /* True the mode switching has static frm, or false. */
102 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
104 /* True if we can use the instructions in the XTheadInt extension
105 to handle interrupts, or false. */
106 #define TH_INT_INTERRUPT(c) \
107 (TARGET_XTHEADINT \
108 /* The XTheadInt extension only supports rv32. */ \
109 && !TARGET_64BIT \
110 && (c)->machine->interrupt_handler_p \
111 /* The XTheadInt instructions can only be executed in M-mode. */ \
112 && (c)->machine->interrupt_mode == MACHINE_MODE)
114 /* Information about a function's frame layout. */
115 struct GTY(()) riscv_frame_info {
116 /* The size of the frame in bytes. */
117 poly_int64 total_size;
119 /* Bit X is set if the function saves or restores GPR X. */
120 unsigned int mask;
122 /* Likewise FPR X. */
123 unsigned int fmask;
125 /* Likewise for vector registers. */
126 unsigned int vmask;
128 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */
129 unsigned save_libcall_adjustment;
131 /* the minimum number of bytes, in multiples of 16-byte address increments,
132 required to cover the registers in a multi push & pop. */
133 unsigned multi_push_adj_base;
135 /* the number of additional 16-byte address increments allocated for the stack
136 frame in a multi push & pop. */
137 unsigned multi_push_adj_addi;
139 /* Offsets of fixed-point and floating-point save areas from frame bottom */
140 poly_int64 gp_sp_offset;
141 poly_int64 fp_sp_offset;
143 /* Top and bottom offsets of vector save areas from frame bottom. */
144 poly_int64 v_sp_offset_top;
145 poly_int64 v_sp_offset_bottom;
147 /* Offset of virtual frame pointer from stack pointer/frame bottom */
148 poly_int64 frame_pointer_offset;
150 /* Offset of hard frame pointer from stack pointer/frame bottom */
151 poly_int64 hard_frame_pointer_offset;
153 /* The offset of arg_pointer_rtx from the bottom of the frame. */
154 poly_int64 arg_pointer_offset;
156 /* Reset this struct, clean all field to zero. */
157 void reset(void);
160 enum riscv_privilege_levels {
161 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
164 struct GTY(()) mode_switching_info {
165 /* The RTL variable which stores the dynamic FRM value. We always use this
166 RTX to restore dynamic FRM rounding mode in mode switching. */
167 rtx dynamic_frm;
169 /* The boolean variables indicates there is at least one static rounding
170 mode instruction in the function or not. */
171 bool static_frm_p;
173 mode_switching_info ()
175 dynamic_frm = NULL_RTX;
176 static_frm_p = false;
180 struct GTY(()) machine_function {
181 /* The number of extra stack bytes taken up by register varargs.
182 This area is allocated by the callee at the very top of the frame. */
183 int varargs_size;
185 /* True if current function is a naked function. */
186 bool naked_p;
188 /* True if current function is an interrupt function. */
189 bool interrupt_handler_p;
190 /* For an interrupt handler, indicates the privilege level. */
191 enum riscv_privilege_levels interrupt_mode;
193 /* True if attributes on current function have been checked. */
194 bool attributes_checked_p;
196 /* True if RA must be saved because of a far jump. */
197 bool far_jump_used;
199 /* The current frame information, calculated by riscv_compute_frame_info. */
200 struct riscv_frame_info frame;
202 /* The components already handled by separate shrink-wrapping, which should
203 not be considered by the prologue and epilogue. */
204 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
206 /* The mode swithching information for the FRM rounding modes. */
207 struct mode_switching_info mode_sw_info;
210 /* Information about a single argument. */
211 struct riscv_arg_info {
212 /* True if the argument is at least partially passed on the stack. */
213 bool stack_p;
215 /* The number of integer registers allocated to this argument. */
216 unsigned int num_gprs;
218 /* The offset of the first register used, provided num_gprs is nonzero.
219 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
220 unsigned int gpr_offset;
222 /* The number of floating-point registers allocated to this argument. */
223 unsigned int num_fprs;
225 /* The offset of the first register used, provided num_fprs is nonzero. */
226 unsigned int fpr_offset;
228 /* The number of vector registers allocated to this argument. */
229 unsigned int num_vrs;
231 /* The offset of the first register used, provided num_vrs is nonzero. */
232 unsigned int vr_offset;
234 /* The number of mask registers allocated to this argument. */
235 unsigned int num_mrs;
237 /* The offset of the first register used, provided num_mrs is nonzero. */
238 unsigned int mr_offset;
241 /* One stage in a constant building sequence. These sequences have
242 the form:
244 A = VALUE[0]
245 A = A CODE[1] VALUE[1]
246 A = A CODE[2] VALUE[2]
249 where A is an accumulator, each CODE[i] is a binary rtl operation
250 and each VALUE[i] is a constant integer. CODE[0] is undefined. */
251 struct riscv_integer_op {
252 enum rtx_code code;
253 unsigned HOST_WIDE_INT value;
256 /* The largest number of operations needed to load an integer constant.
257 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
258 #define RISCV_MAX_INTEGER_OPS 8
260 enum riscv_fusion_pairs
262 RISCV_FUSE_NOTHING = 0,
263 RISCV_FUSE_ZEXTW = (1 << 0),
264 RISCV_FUSE_ZEXTH = (1 << 1),
265 RISCV_FUSE_ZEXTWS = (1 << 2),
266 RISCV_FUSE_LDINDEXED = (1 << 3),
267 RISCV_FUSE_LUI_ADDI = (1 << 4),
268 RISCV_FUSE_AUIPC_ADDI = (1 << 5),
269 RISCV_FUSE_LUI_LD = (1 << 6),
270 RISCV_FUSE_AUIPC_LD = (1 << 7),
271 RISCV_FUSE_LDPREINCREMENT = (1 << 8),
272 RISCV_FUSE_ALIGNED_STD = (1 << 9),
275 /* Costs of various operations on the different architectures. */
277 struct riscv_tune_param
279 unsigned short fp_add[2];
280 unsigned short fp_mul[2];
281 unsigned short fp_div[2];
282 unsigned short int_mul[2];
283 unsigned short int_div[2];
284 unsigned short issue_rate;
285 unsigned short branch_cost;
286 unsigned short memory_cost;
287 unsigned short fmv_cost;
288 bool slow_unaligned_access;
289 bool use_divmod_expansion;
290 unsigned int fusible_ops;
291 const struct cpu_vector_cost *vec_costs;
295 /* Global variables for machine-dependent things. */
297 /* Whether unaligned accesses execute very slowly. */
298 bool riscv_slow_unaligned_access_p;
300 /* Whether user explicitly passed -mstrict-align. */
301 bool riscv_user_wants_strict_align;
303 /* Stack alignment to assume/maintain. */
304 unsigned riscv_stack_boundary;
306 /* Whether in riscv_output_mi_thunk. */
307 static bool riscv_in_thunk_func = false;
309 /* If non-zero, this is an offset to be added to SP to redefine the CFA
310 when restoring the FP register from the stack. Only valid when generating
311 the epilogue. */
312 static poly_int64 epilogue_cfa_sp_offset;
314 /* Which tuning parameters to use. */
315 static const struct riscv_tune_param *tune_param;
317 /* Which automaton to use for tuning. */
318 enum riscv_microarchitecture_type riscv_microarchitecture;
320 /* The number of chunks in a single vector register. */
321 poly_uint16 riscv_vector_chunks;
323 /* The number of bytes in a vector chunk. */
324 unsigned riscv_bytes_per_vector_chunk;
326 /* Index R is the smallest register class that contains register R. */
327 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
328 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
329 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS,
330 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS,
331 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
332 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS,
333 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
334 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
335 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
336 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
337 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
338 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
339 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
340 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
341 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
342 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
343 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
344 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS,
345 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
346 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
347 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
348 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
349 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
350 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
351 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
352 VM_REGS, VD_REGS, VD_REGS, VD_REGS,
353 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
354 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
355 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
356 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
357 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
358 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
359 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
362 /* RVV costs for VLS vector operations. */
363 static const common_vector_cost rvv_vls_vector_cost = {
364 1, /* int_stmt_cost */
365 1, /* fp_stmt_cost */
366 1, /* gather_load_cost */
367 1, /* scatter_store_cost */
368 1, /* vec_to_scalar_cost */
369 1, /* scalar_to_vec_cost */
370 1, /* permute_cost */
371 1, /* align_load_cost */
372 1, /* align_store_cost */
373 2, /* unalign_load_cost */
374 2, /* unalign_store_cost */
377 /* RVV costs for VLA vector operations. */
378 static const scalable_vector_cost rvv_vla_vector_cost = {
380 1, /* int_stmt_cost */
381 1, /* fp_stmt_cost */
382 1, /* gather_load_cost */
383 1, /* scatter_store_cost */
384 1, /* vec_to_scalar_cost */
385 1, /* scalar_to_vec_cost */
386 1, /* permute_cost */
387 1, /* align_load_cost */
388 1, /* align_store_cost */
389 2, /* unalign_load_cost */
390 2, /* unalign_store_cost */
394 /* RVV register move cost. */
395 static const regmove_vector_cost rvv_regmove_vector_cost = {
396 2, /* GR2VR */
397 2, /* FR2VR */
398 2, /* VR2GR */
399 2, /* VR2FR */
402 /* Generic costs for vector insn classes. It is supposed to be the vector cost
403 models used by default if no other cost model was specified. */
404 static const struct cpu_vector_cost generic_vector_cost = {
405 1, /* scalar_int_stmt_cost */
406 1, /* scalar_fp_stmt_cost */
407 1, /* scalar_load_cost */
408 1, /* scalar_store_cost */
409 3, /* cond_taken_branch_cost */
410 1, /* cond_not_taken_branch_cost */
411 &rvv_vls_vector_cost, /* vls */
412 &rvv_vla_vector_cost, /* vla */
413 &rvv_regmove_vector_cost, /* regmove */
416 /* Costs to use when optimizing for rocket. */
417 static const struct riscv_tune_param rocket_tune_info = {
418 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
419 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
420 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
421 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
422 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
423 1, /* issue_rate */
424 3, /* branch_cost */
425 5, /* memory_cost */
426 8, /* fmv_cost */
427 true, /* slow_unaligned_access */
428 false, /* use_divmod_expansion */
429 RISCV_FUSE_NOTHING, /* fusible_ops */
430 NULL, /* vector cost */
433 /* Costs to use when optimizing for Sifive 7 Series. */
434 static const struct riscv_tune_param sifive_7_tune_info = {
435 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
436 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
437 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
438 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
439 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
440 2, /* issue_rate */
441 4, /* branch_cost */
442 3, /* memory_cost */
443 8, /* fmv_cost */
444 true, /* slow_unaligned_access */
445 false, /* use_divmod_expansion */
446 RISCV_FUSE_NOTHING, /* fusible_ops */
447 NULL, /* vector cost */
450 /* Costs to use when optimizing for Sifive p400 Series. */
451 static const struct riscv_tune_param sifive_p400_tune_info = {
452 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
453 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
454 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
455 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
456 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
457 3, /* issue_rate */
458 4, /* branch_cost */
459 3, /* memory_cost */
460 4, /* fmv_cost */
461 true, /* slow_unaligned_access */
462 false, /* use_divmod_expansion */
463 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
464 &generic_vector_cost, /* vector cost */
467 /* Costs to use when optimizing for Sifive p600 Series. */
468 static const struct riscv_tune_param sifive_p600_tune_info = {
469 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
470 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
471 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
472 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
473 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
474 4, /* issue_rate */
475 4, /* branch_cost */
476 3, /* memory_cost */
477 4, /* fmv_cost */
478 true, /* slow_unaligned_access */
479 false, /* use_divmod_expansion */
480 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
481 &generic_vector_cost, /* vector cost */
484 /* Costs to use when optimizing for T-HEAD c906. */
485 static const struct riscv_tune_param thead_c906_tune_info = {
486 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
487 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
488 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
489 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
490 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */
491 1, /* issue_rate */
492 3, /* branch_cost */
493 5, /* memory_cost */
494 8, /* fmv_cost */
495 false, /* slow_unaligned_access */
496 false, /* use_divmod_expansion */
497 RISCV_FUSE_NOTHING, /* fusible_ops */
498 NULL, /* vector cost */
501 /* Costs to use when optimizing for xiangshan nanhu. */
502 static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
503 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_add */
504 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_mul */
505 {COSTS_N_INSNS (10), COSTS_N_INSNS (20)}, /* fp_div */
506 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* int_mul */
507 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
508 6, /* issue_rate */
509 3, /* branch_cost */
510 3, /* memory_cost */
511 3, /* fmv_cost */
512 true, /* slow_unaligned_access */
513 false, /* use_divmod_expansion */
514 RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
515 NULL, /* vector cost */
518 /* Costs to use when optimizing for a generic ooo profile. */
519 static const struct riscv_tune_param generic_ooo_tune_info = {
520 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */
521 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */
522 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */
523 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
524 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
525 1, /* issue_rate */
526 3, /* branch_cost */
527 4, /* memory_cost */
528 4, /* fmv_cost */
529 false, /* slow_unaligned_access */
530 false, /* use_divmod_expansion */
531 RISCV_FUSE_NOTHING, /* fusible_ops */
532 &generic_vector_cost, /* vector cost */
535 /* Costs to use when optimizing for size. */
536 static const struct riscv_tune_param optimize_size_tune_info = {
537 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
538 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
539 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
540 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */
541 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */
542 1, /* issue_rate */
543 1, /* branch_cost */
544 2, /* memory_cost */
545 8, /* fmv_cost */
546 false, /* slow_unaligned_access */
547 false, /* use_divmod_expansion */
548 RISCV_FUSE_NOTHING, /* fusible_ops */
549 NULL, /* vector cost */
552 static bool riscv_avoid_shrink_wrapping_separate ();
553 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
554 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
556 /* Defining target-specific uses of __attribute__. */
557 static const attribute_spec riscv_gnu_attributes[] =
559 /* Syntax: { name, min_len, max_len, decl_required, type_required,
560 function_type_required, affects_type_identity, handler,
561 exclude } */
563 /* The attribute telling no prologue/epilogue. */
564 {"naked", 0, 0, true, false, false, false, riscv_handle_fndecl_attribute,
565 NULL},
566 /* This attribute generates prologue/epilogue for interrupt handlers. */
567 {"interrupt", 0, 1, false, true, true, false, riscv_handle_type_attribute,
568 NULL},
570 /* The following two are used for the built-in properties of the Vector type
571 and are not used externally */
572 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL},
573 {"RVV type", 0, 0, false, true, false, true, NULL, NULL},
574 /* This attribute is used to declare a function, forcing it to use the
575 standard vector calling convention variant. Syntax:
576 __attribute__((riscv_vector_cc)). */
577 {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL}
580 static const scoped_attribute_specs riscv_gnu_attribute_table =
582 "gnu", {riscv_gnu_attributes}
585 static const attribute_spec riscv_attributes[] =
587 /* This attribute is used to declare a function, forcing it to use the
588 standard vector calling convention variant. Syntax:
589 [[riscv::vector_cc]]. */
590 {"vector_cc", 0, 0, false, true, true, true, NULL, NULL}
593 static const scoped_attribute_specs riscv_nongnu_attribute_table =
595 "riscv", {riscv_attributes}
598 static const scoped_attribute_specs *const riscv_attribute_table[] =
600 &riscv_gnu_attribute_table,
601 &riscv_nongnu_attribute_table
604 /* Order for the CLOBBERs/USEs of gpr_save. */
605 static const unsigned gpr_save_reg_order[] = {
606 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
607 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
608 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
609 S10_REGNUM, S11_REGNUM
612 /* A table describing all the processors GCC knows about. */
613 static const struct riscv_tune_info riscv_tune_info_table[] = {
614 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
615 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
616 #include "riscv-cores.def"
619 /* Global variable to distinguish whether we should save and restore s0/fp for
620 function. */
621 static bool riscv_save_frame_pointer;
623 typedef enum
625 PUSH_IDX = 0,
626 POP_IDX,
627 POPRET_IDX,
628 POPRETZ_IDX,
629 ZCMP_OP_NUM
630 } riscv_zcmp_op_t;
632 typedef insn_code (*code_for_push_pop_t) (machine_mode);
634 void riscv_frame_info::reset(void)
636 total_size = 0;
637 mask = 0;
638 fmask = 0;
639 vmask = 0;
640 save_libcall_adjustment = 0;
642 gp_sp_offset = 0;
643 fp_sp_offset = 0;
644 v_sp_offset_top = 0;
645 v_sp_offset_bottom = 0;
647 frame_pointer_offset = 0;
649 hard_frame_pointer_offset = 0;
651 arg_pointer_offset = 0;
654 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
656 static unsigned int
657 riscv_min_arithmetic_precision (void)
659 return 32;
662 template <class T>
663 static const char *
664 get_tune_str (const T *opts)
666 const char *tune_string = RISCV_TUNE_STRING_DEFAULT;
667 if (opts->x_riscv_tune_string)
668 tune_string = opts->x_riscv_tune_string;
669 else if (opts->x_riscv_cpu_string)
670 tune_string = opts->x_riscv_cpu_string;
671 return tune_string;
674 /* Return the riscv_tune_info entry for the given name string, return nullptr
675 if NULL_P is true, otherwise return an placeholder and report error. */
677 const struct riscv_tune_info *
678 riscv_parse_tune (const char *tune_string, bool null_p)
680 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
682 if (cpu)
683 tune_string = cpu->tune;
685 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
686 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
687 return riscv_tune_info_table + i;
689 if (null_p)
690 return nullptr;
692 error ("unknown cpu %qs for %<-mtune%>", tune_string);
693 return riscv_tune_info_table;
696 /* Helper function for riscv_build_integer; arguments are as for
697 riscv_build_integer. */
699 static int
700 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
701 HOST_WIDE_INT value, machine_mode mode)
703 HOST_WIDE_INT low_part = CONST_LOW_PART (value);
704 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
705 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
707 if (SMALL_OPERAND (value) || LUI_OPERAND (value))
709 /* Simply ADDI or LUI. */
710 codes[0].code = UNKNOWN;
711 codes[0].value = value;
712 return 1;
714 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
716 /* Simply BSETI. */
717 codes[0].code = UNKNOWN;
718 codes[0].value = value;
720 /* RISC-V sign-extends all 32bit values that live in a 32bit
721 register. To avoid paradoxes, we thus need to use the
722 sign-extended (negative) representation (-1 << 31) for the
723 value, if we want to build (1 << 31) in SImode. This will
724 then expand to an LUI instruction. */
725 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
726 codes[0].value = (HOST_WIDE_INT_M1U << 31);
728 return 1;
731 /* End with ADDI. When constructing HImode constants, do not generate any
732 intermediate value that is not itself a valid HImode constant. The
733 XORI case below will handle those remaining HImode constants. */
734 if (low_part != 0
735 && (mode != HImode
736 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
738 HOST_WIDE_INT upper_part = value - low_part;
739 if (mode != VOIDmode)
740 upper_part = trunc_int_for_mode (value - low_part, mode);
742 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
743 if (alt_cost < cost)
745 alt_codes[alt_cost-1].code = PLUS;
746 alt_codes[alt_cost-1].value = low_part;
747 memcpy (codes, alt_codes, sizeof (alt_codes));
748 cost = alt_cost;
752 /* End with XORI. */
753 if (cost > 2 && (low_part < 0 || mode == HImode))
755 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
756 if (alt_cost < cost)
758 alt_codes[alt_cost-1].code = XOR;
759 alt_codes[alt_cost-1].value = low_part;
760 memcpy (codes, alt_codes, sizeof (alt_codes));
761 cost = alt_cost;
765 /* Eliminate trailing zeros and end with SLLI. */
766 if (cost > 2 && (value & 1) == 0)
768 int shift = ctz_hwi (value);
769 unsigned HOST_WIDE_INT x = value;
770 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
772 /* Don't eliminate the lower 12 bits if LUI might apply. */
773 if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS))
774 shift -= IMM_BITS, x <<= IMM_BITS;
776 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
777 if (alt_cost < cost)
779 alt_codes[alt_cost-1].code = ASHIFT;
780 alt_codes[alt_cost-1].value = shift;
781 memcpy (codes, alt_codes, sizeof (alt_codes));
782 cost = alt_cost;
786 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB))
788 int leading_ones = clz_hwi (~value);
789 int trailing_ones = ctz_hwi (~value);
791 /* If all bits are one except a few that are zero, and the zero bits
792 are within a range of 11 bits, then we can synthesize a constant
793 by loading a small negative constant and rotating. */
794 if (leading_ones < 64
795 && ((64 - leading_ones - trailing_ones) < 12))
797 codes[0].code = UNKNOWN;
798 /* The sign-bit might be zero, so just rotate to be safe. */
799 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
800 | (value << (64 - trailing_ones)));
801 codes[1].code = ROTATERT;
802 codes[1].value = 64 - trailing_ones;
803 cost = 2;
805 /* Handle the case where the 11 bit range of zero bits wraps around. */
806 else
808 int upper_trailing_ones = ctz_hwi (~value >> 32);
809 int lower_leading_ones = clz_hwi (~value << 32);
811 if (upper_trailing_ones < 32 && lower_leading_ones < 32
812 && ((64 - upper_trailing_ones - lower_leading_ones) < 12))
814 codes[0].code = UNKNOWN;
815 /* The sign-bit might be zero, so just rotate to be safe. */
816 codes[0].value = ((value << (32 - upper_trailing_ones))
817 | ((unsigned HOST_WIDE_INT) value
818 >> (32 + upper_trailing_ones)));
819 codes[1].code = ROTATERT;
820 codes[1].value = 32 - upper_trailing_ones;
821 cost = 2;
826 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
827 return cost;
830 /* Fill CODES with a sequence of rtl operations to load VALUE.
831 Return the number of operations needed. */
833 static int
834 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
835 machine_mode mode)
837 int cost = riscv_build_integer_1 (codes, value, mode);
839 /* Eliminate leading zeros and end with SRLI. */
840 if (value > 0 && cost > 2)
842 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
843 int alt_cost, shift = clz_hwi (value);
844 HOST_WIDE_INT shifted_val;
846 /* Try filling trailing bits with 1s. */
847 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
848 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
849 if (alt_cost < cost)
851 alt_codes[alt_cost-1].code = LSHIFTRT;
852 alt_codes[alt_cost-1].value = shift;
853 memcpy (codes, alt_codes, sizeof (alt_codes));
854 cost = alt_cost;
857 /* Try filling trailing bits with 0s. */
858 shifted_val = value << shift;
859 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
860 if (alt_cost < cost)
862 alt_codes[alt_cost-1].code = LSHIFTRT;
863 alt_codes[alt_cost-1].value = shift;
864 memcpy (codes, alt_codes, sizeof (alt_codes));
865 cost = alt_cost;
869 if (!TARGET_64BIT
870 && (value > INT32_MAX || value < INT32_MIN))
872 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32);
873 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32);
874 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
875 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS];
876 int hi_cost, lo_cost;
878 hi_cost = riscv_build_integer_1 (hicode, hival, mode);
879 if (hi_cost < cost)
881 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode);
882 if (lo_cost + hi_cost < cost)
884 memcpy (codes, alt_codes,
885 lo_cost * sizeof (struct riscv_integer_op));
886 memcpy (codes + lo_cost, hicode,
887 hi_cost * sizeof (struct riscv_integer_op));
888 cost = lo_cost + hi_cost;
893 return cost;
896 /* Return the cost of constructing VAL in the event that a scratch
897 register is available. */
899 static int
900 riscv_split_integer_cost (HOST_WIDE_INT val)
902 int cost;
903 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
904 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
905 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
907 cost = 2 + riscv_build_integer (codes, loval, VOIDmode);
908 if (loval != hival)
909 cost += riscv_build_integer (codes, hival, VOIDmode);
911 return cost;
914 /* Return the cost of constructing the integer constant VAL. */
916 static int
917 riscv_integer_cost (HOST_WIDE_INT val)
919 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
920 return MIN (riscv_build_integer (codes, val, VOIDmode),
921 riscv_split_integer_cost (val));
924 /* Try to split a 64b integer into 32b parts, then reassemble. */
926 static rtx
927 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
929 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
930 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
931 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
933 riscv_move_integer (lo, lo, loval, mode);
935 if (loval == hival)
936 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
937 else
939 riscv_move_integer (hi, hi, hival, mode);
940 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32));
943 hi = force_reg (mode, hi);
944 return gen_rtx_PLUS (mode, hi, lo);
947 /* Return true if X is a thread-local symbol. */
949 static bool
950 riscv_tls_symbol_p (const_rtx x)
952 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
955 /* Return true if symbol X binds locally. */
957 static bool
958 riscv_symbol_binds_local_p (const_rtx x)
960 if (SYMBOL_REF_P (x))
961 return (SYMBOL_REF_DECL (x)
962 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
963 : SYMBOL_REF_LOCAL_P (x));
964 else
965 return false;
968 /* Return the method that should be used to access SYMBOL_REF or
969 LABEL_REF X. */
971 static enum riscv_symbol_type
972 riscv_classify_symbol (const_rtx x)
974 if (riscv_tls_symbol_p (x))
975 return SYMBOL_TLS;
977 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
978 return SYMBOL_GOT_DISP;
980 switch (riscv_cmodel)
982 case CM_MEDLOW:
983 return SYMBOL_ABSOLUTE;
984 case CM_LARGE:
985 if (SYMBOL_REF_P (x))
986 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM;
987 return SYMBOL_PCREL;
988 default:
989 return SYMBOL_PCREL;
993 /* Classify the base of symbolic expression X. */
995 enum riscv_symbol_type
996 riscv_classify_symbolic_expression (rtx x)
998 rtx offset;
1000 split_const (x, &x, &offset);
1001 if (UNSPEC_ADDRESS_P (x))
1002 return UNSPEC_ADDRESS_TYPE (x);
1004 return riscv_classify_symbol (x);
1007 /* Return true if X is a symbolic constant. If it is, store the type of
1008 the symbol in *SYMBOL_TYPE. */
1010 bool
1011 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
1013 rtx offset;
1015 split_const (x, &x, &offset);
1016 if (UNSPEC_ADDRESS_P (x))
1018 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1019 x = UNSPEC_ADDRESS (x);
1021 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
1022 *symbol_type = riscv_classify_symbol (x);
1023 else
1024 return false;
1026 if (offset == const0_rtx)
1027 return true;
1029 /* Nonzero offsets are only valid for references that don't use the GOT. */
1030 switch (*symbol_type)
1032 case SYMBOL_ABSOLUTE:
1033 case SYMBOL_PCREL:
1034 case SYMBOL_TLS_LE:
1035 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1036 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1038 default:
1039 return false;
1043 /* Returns the number of instructions necessary to reference a symbol. */
1045 static int riscv_symbol_insns (enum riscv_symbol_type type)
1047 switch (type)
1049 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */
1050 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */
1051 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */
1052 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */
1053 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */
1054 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */
1055 default: gcc_unreachable ();
1059 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA
1060 Manual draft. For details, please see:
1061 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */
1063 static unsigned HOST_WIDE_INT fli_value_hf[32] =
1065 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
1066 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
1067 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
1068 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
1069 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */
1070 0x78p8,
1071 0x7cp8, 0x7ep8
1074 static unsigned HOST_WIDE_INT fli_value_sf[32] =
1076 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
1077 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
1078 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
1079 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
1082 static unsigned HOST_WIDE_INT fli_value_df[32] =
1084 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
1085 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
1086 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
1087 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
1088 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
1089 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
1090 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
1091 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
1094 /* Display floating-point values at the assembly level, which is consistent
1095 with the zfa extension of llvm:
1096 https://reviews.llvm.org/D145645. */
1098 const char *fli_value_print[32] =
1100 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125",
1101 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875",
1102 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0",
1103 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan"
1106 /* Return index of the FLI instruction table if rtx X is an immediate constant that can
1107 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */
1110 riscv_float_const_rtx_index_for_fli (rtx x)
1112 unsigned HOST_WIDE_INT *fli_value_array;
1114 machine_mode mode = GET_MODE (x);
1116 if (!TARGET_ZFA
1117 || !CONST_DOUBLE_P(x)
1118 || mode == VOIDmode
1119 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH))
1120 || (mode == SFmode && !TARGET_HARD_FLOAT)
1121 || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
1122 return -1;
1124 if (!SCALAR_FLOAT_MODE_P (mode)
1125 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT
1126 /* Only support up to DF mode. */
1127 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode))
1128 return -1;
1130 unsigned HOST_WIDE_INT ival = 0;
1132 long res[2];
1133 real_to_target (res,
1134 CONST_DOUBLE_REAL_VALUE (x),
1135 REAL_MODE_FORMAT (mode));
1137 if (mode == DFmode)
1139 int order = BYTES_BIG_ENDIAN ? 1 : 0;
1140 ival = zext_hwi (res[order], 32);
1141 ival |= (zext_hwi (res[1 - order], 32) << 32);
1143 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */
1144 if (ival & (unsigned HOST_WIDE_INT)0xffffffff)
1145 return -1;
1147 else
1148 ival = zext_hwi (res[0], 32);
1150 switch (mode)
1152 case E_HFmode:
1153 fli_value_array = fli_value_hf;
1154 break;
1155 case E_SFmode:
1156 fli_value_array = fli_value_sf;
1157 break;
1158 case E_DFmode:
1159 fli_value_array = fli_value_df;
1160 break;
1161 default:
1162 return -1;
1165 if (fli_value_array[0] == ival)
1166 return 0;
1168 if (fli_value_array[1] == ival)
1169 return 1;
1171 /* Perform a binary search to find target index. */
1172 unsigned l, r, m;
1174 l = 2;
1175 r = 31;
1177 while (l <= r)
1179 m = (l + r) / 2;
1180 if (fli_value_array[m] == ival)
1181 return m;
1182 else if (fli_value_array[m] < ival)
1183 l = m+1;
1184 else
1185 r = m-1;
1188 return -1;
1191 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1193 static bool
1194 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1196 return riscv_const_insns (x) > 0;
1199 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1201 static bool
1202 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1204 enum riscv_symbol_type type;
1205 rtx base, offset;
1207 /* There's no way to calculate VL-based values using relocations. */
1208 subrtx_iterator::array_type array;
1209 FOR_EACH_SUBRTX (iter, array, x, ALL)
1210 if (GET_CODE (*iter) == CONST_POLY_INT)
1211 return true;
1213 /* There is no assembler syntax for expressing an address-sized
1214 high part. */
1215 if (GET_CODE (x) == HIGH)
1216 return true;
1218 if (satisfies_constraint_zfli (x))
1219 return true;
1221 split_const (x, &base, &offset);
1222 if (riscv_symbolic_constant_p (base, &type))
1224 if (type == SYMBOL_FORCE_TO_MEM)
1225 return false;
1227 /* As an optimization, don't spill symbolic constants that are as
1228 cheap to rematerialize as to access in the constant pool. */
1229 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
1230 return true;
1232 /* As an optimization, avoid needlessly generate dynamic relocations. */
1233 if (flag_pic)
1234 return true;
1237 /* TLS symbols must be computed by riscv_legitimize_move. */
1238 if (tls_referenced_p (x))
1239 return true;
1241 return false;
1244 /* Return true if register REGNO is a valid base register for mode MODE.
1245 STRICT_P is true if REG_OK_STRICT is in effect. */
1248 riscv_regno_mode_ok_for_base_p (int regno,
1249 machine_mode mode ATTRIBUTE_UNUSED,
1250 bool strict_p)
1252 if (!HARD_REGISTER_NUM_P (regno))
1254 if (!strict_p)
1255 return true;
1256 regno = reg_renumber[regno];
1259 /* These fake registers will be eliminated to either the stack or
1260 hard frame pointer, both of which are usually valid base registers.
1261 Reload deals with the cases where the eliminated form isn't valid. */
1262 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
1263 return true;
1265 return GP_REG_P (regno);
1268 /* Get valid index register class.
1269 The RISC-V base instructions don't support index registers,
1270 but extensions might support that. */
1272 enum reg_class
1273 riscv_index_reg_class ()
1275 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1276 return GR_REGS;
1278 return NO_REGS;
1281 /* Return true if register REGNO is a valid index register.
1282 The RISC-V base instructions don't support index registers,
1283 but extensions might support that. */
1286 riscv_regno_ok_for_index_p (int regno)
1288 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1289 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1);
1291 return 0;
1294 /* Return true if X is a valid base register for mode MODE.
1295 STRICT_P is true if REG_OK_STRICT is in effect. */
1297 bool
1298 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
1300 if (!strict_p && GET_CODE (x) == SUBREG)
1301 x = SUBREG_REG (x);
1303 return (REG_P (x)
1304 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
1307 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
1308 can address a value of mode MODE. */
1310 static bool
1311 riscv_valid_offset_p (rtx x, machine_mode mode)
1313 /* Check that X is a signed 12-bit number. */
1314 if (!const_arith_operand (x, Pmode))
1315 return false;
1317 /* We may need to split multiword moves, so make sure that every word
1318 is accessible. */
1319 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
1320 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
1321 return false;
1323 return true;
1326 /* Should a symbol of type SYMBOL_TYPE should be split in two? */
1328 bool
1329 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
1331 if (symbol_type == SYMBOL_TLS_LE)
1332 return true;
1334 if (!TARGET_EXPLICIT_RELOCS)
1335 return false;
1337 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
1340 /* Return true if a LO_SUM can address a value of mode MODE when the
1341 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which
1342 is used when the mode is BLKmode. */
1344 static bool
1345 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
1346 rtx x)
1348 int align, size;
1350 /* Check that symbols of type SYMBOL_TYPE can be used to access values
1351 of mode MODE. */
1352 if (riscv_symbol_insns (sym_type) == 0)
1353 return false;
1355 /* Check that there is a known low-part relocation. */
1356 if (!riscv_split_symbol_type (sym_type))
1357 return false;
1359 /* We can't tell size or alignment when we have BLKmode, so try extracing a
1360 decl from the symbol if possible. */
1361 if (mode == BLKmode)
1363 rtx offset;
1365 /* Extract the symbol from the LO_SUM operand, if any. */
1366 split_const (x, &x, &offset);
1368 /* Might be a CODE_LABEL. We can compute align but not size for that,
1369 so don't bother trying to handle it. */
1370 if (!SYMBOL_REF_P (x))
1371 return false;
1373 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
1374 align = (SYMBOL_REF_DECL (x)
1375 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
1376 : 1);
1377 size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
1378 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
1379 : 2*BITS_PER_WORD);
1381 else
1383 align = GET_MODE_ALIGNMENT (mode);
1384 size = GET_MODE_BITSIZE (mode).to_constant ();
1387 /* We may need to split multiword moves, so make sure that each word
1388 can be accessed without inducing a carry. */
1389 if (size > BITS_PER_WORD
1390 && (!TARGET_STRICT_ALIGN || size > align))
1391 return false;
1393 return true;
1396 /* Return true if mode is the RVV enabled mode.
1397 For example: 'RVVMF2SI' mode is disabled,
1398 wheras 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */
1400 bool
1401 riscv_v_ext_vector_mode_p (machine_mode mode)
1403 #define ENTRY(MODE, REQUIREMENT, ...) \
1404 case MODE##mode: \
1405 return REQUIREMENT;
1406 switch (mode)
1408 #include "riscv-vector-switch.def"
1409 default:
1410 return false;
1413 return false;
1416 /* Return true if mode is the RVV enabled tuple mode. */
1418 bool
1419 riscv_v_ext_tuple_mode_p (machine_mode mode)
1421 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \
1422 case MODE##mode: \
1423 return REQUIREMENT;
1424 switch (mode)
1426 #include "riscv-vector-switch.def"
1427 default:
1428 return false;
1431 return false;
1434 /* Return true if mode is the RVV enabled vls mode. */
1436 bool
1437 riscv_v_ext_vls_mode_p (machine_mode mode)
1439 #define VLS_ENTRY(MODE, REQUIREMENT) \
1440 case MODE##mode: \
1441 return REQUIREMENT;
1442 switch (mode)
1444 #include "riscv-vector-switch.def"
1445 default:
1446 return false;
1449 return false;
1452 /* Return true if it is either of below modes.
1453 1. RVV vector mode.
1454 2. RVV tuple mode.
1455 3. RVV vls mode. */
1457 static bool
1458 riscv_v_ext_mode_p (machine_mode mode)
1460 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
1461 || riscv_v_ext_vls_mode_p (mode);
1464 static unsigned
1465 riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size,
1466 unsigned scalar_unit_size)
1468 gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0);
1470 if (vls_unit_size < scalar_unit_size)
1471 return 1;
1473 /* Ensure the vls mode is exact_div by scalar_unit_size. */
1474 gcc_assert ((vls_unit_size % scalar_unit_size) == 0);
1476 return vls_unit_size / scalar_unit_size;
1479 static machine_mode
1480 riscv_v_vls_to_gpr_mode (unsigned vls_mode_size)
1482 switch (vls_mode_size)
1484 case 16:
1485 return TImode;
1486 case 8:
1487 return DImode;
1488 case 4:
1489 return SImode;
1490 case 2:
1491 return HImode;
1492 case 1:
1493 return QImode;
1494 default:
1495 gcc_unreachable ();
1499 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1500 NUNITS size for corresponding machine_mode. */
1502 poly_int64
1503 riscv_v_adjust_nunits (machine_mode mode, int scale)
1505 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
1506 if (riscv_v_ext_mode_p (mode))
1508 if (TARGET_MIN_VLEN == 32)
1509 scale = scale / 2;
1510 return riscv_vector_chunks * scale;
1512 return scale;
1515 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1516 NUNITS size for corresponding machine_mode. */
1518 poly_int64
1519 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf)
1521 if (riscv_v_ext_mode_p (mode))
1523 scalar_mode smode = GET_MODE_INNER (mode);
1524 int size = GET_MODE_SIZE (smode);
1525 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size;
1526 if (fractional_p)
1527 return nunits_per_chunk / lmul * riscv_vector_chunks * nf;
1528 else
1529 return nunits_per_chunk * lmul * riscv_vector_chunks * nf;
1531 /* Set the disabled RVV modes size as 1 by default. */
1532 return 1;
1535 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
1536 BYTE size for corresponding machine_mode. */
1538 poly_int64
1539 riscv_v_adjust_bytesize (machine_mode mode, int scale)
1541 if (riscv_v_ext_vector_mode_p (mode))
1543 if (TARGET_XTHEADVECTOR)
1544 return BYTES_PER_RISCV_VECTOR;
1546 poly_int64 nunits = GET_MODE_NUNITS (mode);
1548 if (nunits.coeffs[0] > 8)
1549 return exact_div (nunits, 8);
1550 else if (nunits.is_constant ())
1551 return 1;
1552 else
1553 return poly_int64 (1, 1);
1556 return scale;
1559 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
1560 PRECISION size for corresponding machine_mode. */
1562 poly_int64
1563 riscv_v_adjust_precision (machine_mode mode, int scale)
1565 return riscv_v_adjust_nunits (mode, scale);
1568 /* Return true if X is a valid address for machine mode MODE. If it is,
1569 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
1570 effect. */
1572 static bool
1573 riscv_classify_address (struct riscv_address_info *info, rtx x,
1574 machine_mode mode, bool strict_p)
1576 if (th_classify_address (info, x, mode, strict_p))
1577 return true;
1579 switch (GET_CODE (x))
1581 case REG:
1582 case SUBREG:
1583 info->type = ADDRESS_REG;
1584 info->reg = x;
1585 info->offset = const0_rtx;
1586 return riscv_valid_base_register_p (info->reg, mode, strict_p);
1588 case PLUS:
1589 /* RVV load/store disallow any offset. */
1590 if (riscv_v_ext_mode_p (mode))
1591 return false;
1593 info->type = ADDRESS_REG;
1594 info->reg = XEXP (x, 0);
1595 info->offset = XEXP (x, 1);
1596 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1597 && riscv_valid_offset_p (info->offset, mode));
1599 case LO_SUM:
1600 /* RVV load/store disallow LO_SUM. */
1601 if (riscv_v_ext_mode_p (mode))
1602 return false;
1604 info->type = ADDRESS_LO_SUM;
1605 info->reg = XEXP (x, 0);
1606 info->offset = XEXP (x, 1);
1607 /* We have to trust the creator of the LO_SUM to do something vaguely
1608 sane. Target-independent code that creates a LO_SUM should also
1609 create and verify the matching HIGH. Target-independent code that
1610 adds an offset to a LO_SUM must prove that the offset will not
1611 induce a carry. Failure to do either of these things would be
1612 a bug, and we are not required to check for it here. The RISC-V
1613 backend itself should only create LO_SUMs for valid symbolic
1614 constants, with the high part being either a HIGH or a copy
1615 of _gp. */
1616 info->symbol_type
1617 = riscv_classify_symbolic_expression (info->offset);
1618 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1619 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
1621 case CONST_INT:
1622 /* We only allow the const0_rtx for the RVV load/store. For example:
1623 +----------------------------------------------------------+
1624 | li a5,0 |
1625 | vsetvli zero,a1,e32,m1,ta,ma |
1626 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. |
1627 | vs1r.v v24,0(a0) |
1628 +----------------------------------------------------------+
1629 It can be folded to:
1630 +----------------------------------------------------------+
1631 | vsetvli zero,a1,e32,m1,ta,ma |
1632 | vle32.v v24,0(zero) |
1633 | vs1r.v v24,0(a0) |
1634 +----------------------------------------------------------+
1635 This behavior will benefit the underlying RVV auto vectorization. */
1636 if (riscv_v_ext_mode_p (mode))
1637 return x == const0_rtx;
1639 /* Small-integer addresses don't occur very often, but they
1640 are legitimate if x0 is a valid base register. */
1641 info->type = ADDRESS_CONST_INT;
1642 return SMALL_OPERAND (INTVAL (x));
1644 default:
1645 return false;
1649 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1651 static bool
1652 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
1653 code_helper = ERROR_MARK)
1655 /* Disallow RVV modes base address.
1656 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */
1657 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x))))
1658 return false;
1659 struct riscv_address_info addr;
1661 return riscv_classify_address (&addr, x, mode, strict_p);
1664 /* Return true if hard reg REGNO can be used in compressed instructions. */
1666 static bool
1667 riscv_compressed_reg_p (int regno)
1669 /* x8-x15/f8-f15 are compressible registers. */
1670 return ((TARGET_RVC || TARGET_ZCA)
1671 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
1672 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)));
1675 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */
1677 static bool
1678 riscv_compressed_lw_offset_p (rtx x)
1680 return (CONST_INT_P (x)
1681 && (INTVAL (x) & 3) == 0
1682 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET));
1685 /* Return true if load/store from/to address x can be compressed. */
1687 static bool
1688 riscv_compressed_lw_address_p (rtx x)
1690 struct riscv_address_info addr;
1691 bool result = riscv_classify_address (&addr, x, GET_MODE (x),
1692 reload_completed);
1694 /* Return false if address is not compressed_reg + small_offset. */
1695 if (!result
1696 || addr.type != ADDRESS_REG
1697 /* Before reload, assume all registers are OK. */
1698 || (reload_completed
1699 && !riscv_compressed_reg_p (REGNO (addr.reg))
1700 && addr.reg != stack_pointer_rtx)
1701 || !riscv_compressed_lw_offset_p (addr.offset))
1702 return false;
1704 return result;
1707 /* Return the number of instructions needed to load or store a value
1708 of mode MODE at address X. Return 0 if X isn't valid for MODE.
1709 Assume that multiword moves may need to be split into word moves
1710 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
1711 enough. */
1714 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
1716 struct riscv_address_info addr = {};
1717 int n = 1;
1719 if (!riscv_classify_address (&addr, x, mode, false))
1721 /* This could be a pattern from the pic.md file. In which case we want
1722 this address to always have a cost of 3 to make it as expensive as the
1723 most expensive symbol. This prevents constant propagation from
1724 preferring symbols over register plus offset. */
1725 return 3;
1728 /* BLKmode is used for single unaligned loads and stores and should
1729 not count as a multiword mode. */
1730 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p)
1731 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1733 if (addr.type == ADDRESS_LO_SUM)
1734 n += riscv_symbol_insns (addr.symbol_type) - 1;
1736 return n;
1739 /* Return the number of instructions needed to load constant X.
1740 Return 0 if X isn't a valid constant. */
1743 riscv_const_insns (rtx x)
1745 enum riscv_symbol_type symbol_type;
1746 rtx offset;
1748 switch (GET_CODE (x))
1750 case HIGH:
1751 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
1752 || !riscv_split_symbol_type (symbol_type))
1753 return 0;
1755 /* This is simply an LUI. */
1756 return 1;
1758 case CONST_INT:
1760 int cost = riscv_integer_cost (INTVAL (x));
1761 /* Force complicated constants to memory. */
1762 return cost < 4 ? cost : 0;
1765 case CONST_DOUBLE:
1766 /* See if we can use FMV directly. */
1767 if (satisfies_constraint_zfli (x))
1768 return 1;
1770 /* We can use x0 to load floating-point zero. */
1771 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1772 case CONST_VECTOR:
1774 /* TODO: This is not accurate, we will need to
1775 adapt the COST of CONST_VECTOR in the future
1776 for the following cases:
1778 - 1. const duplicate vector with element value
1779 in range of [-16, 15].
1780 - 2. const duplicate vector with element value
1781 out range of [-16, 15].
1782 - 3. const series vector.
1783 ...etc. */
1784 if (riscv_v_ext_mode_p (GET_MODE (x)))
1786 /* const series vector. */
1787 rtx base, step;
1788 if (const_vec_series_p (x, &base, &step))
1790 /* This is not accurate, we will need to adapt the COST
1791 * accurately according to BASE && STEP. */
1792 return 1;
1795 rtx elt;
1796 if (const_vec_duplicate_p (x, &elt))
1798 /* We don't allow CONST_VECTOR for DI vector on RV32
1799 system since the ELT constant value can not held
1800 within a single register to disable reload a DI
1801 register vec_duplicate into vmv.v.x. */
1802 scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
1803 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
1804 && !immediate_operand (elt, Pmode))
1805 return 0;
1806 /* Constants from -16 to 15 can be loaded with vmv.v.i.
1807 The Wc0, Wc1 constraints are already covered by the
1808 vi constraint so we do not need to check them here
1809 separately. */
1810 if (satisfies_constraint_vi (x))
1811 return 1;
1813 /* Any int/FP constants can always be broadcast from a
1814 scalar register. Loading of a floating-point
1815 constant incurs a literal-pool access. Allow this in
1816 order to increase vectorization possibilities. */
1817 int n = riscv_const_insns (elt);
1818 if (CONST_DOUBLE_P (elt))
1819 return 1 + 4; /* vfmv.v.f + memory access. */
1820 else
1822 /* We need as many insns as it takes to load the constant
1823 into a GPR and one vmv.v.x. */
1824 if (n != 0)
1825 return 1 + n;
1826 else
1827 return 1 + 4; /*vmv.v.x + memory access. */
1832 /* TODO: We may support more const vector in the future. */
1833 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1836 case CONST:
1837 /* See if we can refer to X directly. */
1838 if (riscv_symbolic_constant_p (x, &symbol_type))
1839 return riscv_symbol_insns (symbol_type);
1841 /* Otherwise try splitting the constant into a base and offset. */
1842 split_const (x, &x, &offset);
1843 if (offset != 0)
1845 int n = riscv_const_insns (x);
1846 if (n != 0)
1847 return n + riscv_integer_cost (INTVAL (offset));
1849 return 0;
1851 case SYMBOL_REF:
1852 case LABEL_REF:
1853 return riscv_symbol_insns (riscv_classify_symbol (x));
1855 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB
1856 instruction and several scalar shift or mult instructions,
1857 it is so far unknown. We set it to 4 temporarily. */
1858 case CONST_POLY_INT:
1859 return 4;
1861 default:
1862 return 0;
1866 /* X is a doubleword constant that can be handled by splitting it into
1867 two words and loading each word separately. Return the number of
1868 instructions required to do this. */
1871 riscv_split_const_insns (rtx x)
1873 unsigned int low, high;
1875 low = riscv_const_insns (riscv_subword (x, false));
1876 high = riscv_const_insns (riscv_subword (x, true));
1877 gcc_assert (low > 0 && high > 0);
1878 return low + high;
1881 /* Return the number of instructions needed to implement INSN,
1882 given that it loads from or stores to MEM. */
1885 riscv_load_store_insns (rtx mem, rtx_insn *insn)
1887 machine_mode mode;
1888 bool might_split_p;
1889 rtx set;
1891 gcc_assert (MEM_P (mem));
1892 mode = GET_MODE (mem);
1894 /* Try to prove that INSN does not need to be split. */
1895 might_split_p = true;
1896 if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
1897 might_split_p = false;
1898 else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
1900 set = single_set (insn);
1901 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
1902 might_split_p = false;
1905 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
1908 /* Emit a move from SRC to DEST. Assume that the move expanders can
1909 handle all moves if !can_create_pseudo_p (). The distinction is
1910 important because, unlike emit_move_insn, the move expanders know
1911 how to force Pmode objects into the constant pool even when the
1912 constant pool address is not itself legitimate. */
1915 riscv_emit_move (rtx dest, rtx src)
1917 return (can_create_pseudo_p ()
1918 ? emit_move_insn (dest, src)
1919 : emit_move_insn_1 (dest, src));
1922 /* Emit an instruction of the form (set TARGET SRC). */
1924 static rtx
1925 riscv_emit_set (rtx target, rtx src)
1927 emit_insn (gen_rtx_SET (target, src));
1928 return target;
1931 /* Emit an instruction of the form (set DEST (CODE X)). */
1934 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x)
1936 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x));
1939 /* Emit an instruction of the form (set DEST (CODE X Y)). */
1942 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
1944 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
1947 /* Compute (CODE X Y) and store the result in a new register
1948 of mode MODE. Return that new register. */
1950 static rtx
1951 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y)
1953 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
1956 static rtx
1957 riscv_swap_instruction (rtx inst)
1959 gcc_assert (GET_MODE (inst) == SImode);
1960 if (BYTES_BIG_ENDIAN)
1961 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
1962 return inst;
1965 /* Copy VALUE to a register and return that register. If new pseudos
1966 are allowed, copy it into a new register, otherwise use DEST. */
1968 static rtx
1969 riscv_force_temporary (rtx dest, rtx value)
1971 if (can_create_pseudo_p ())
1972 return force_reg (Pmode, value);
1973 else
1975 riscv_emit_move (dest, value);
1976 return dest;
1980 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
1981 then add CONST_INT OFFSET to the result. */
1983 static rtx
1984 riscv_unspec_address_offset (rtx base, rtx offset,
1985 enum riscv_symbol_type symbol_type)
1987 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
1988 UNSPEC_ADDRESS_FIRST + symbol_type);
1989 if (offset != const0_rtx)
1990 base = gen_rtx_PLUS (Pmode, base, offset);
1991 return gen_rtx_CONST (Pmode, base);
1994 /* Return an UNSPEC address with underlying address ADDRESS and symbol
1995 type SYMBOL_TYPE. */
1998 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
2000 rtx base, offset;
2002 split_const (address, &base, &offset);
2003 return riscv_unspec_address_offset (base, offset, symbol_type);
2006 /* If OP is an UNSPEC address, return the address to which it refers,
2007 otherwise return OP itself. */
2009 static rtx
2010 riscv_strip_unspec_address (rtx op)
2012 rtx base, offset;
2014 split_const (op, &base, &offset);
2015 if (UNSPEC_ADDRESS_P (base))
2016 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2017 return op;
2020 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
2021 high part to BASE and return the result. Just return BASE otherwise.
2022 TEMP is as for riscv_force_temporary.
2024 The returned expression can be used as the first operand to a LO_SUM. */
2026 static rtx
2027 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
2029 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
2030 return riscv_force_temporary (temp, addr);
2033 /* Load an entry from the GOT for a TLS GD access. */
2035 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
2037 if (Pmode == DImode)
2038 return gen_got_load_tls_gddi (dest, sym);
2039 else
2040 return gen_got_load_tls_gdsi (dest, sym);
2043 /* Load an entry from the GOT for a TLS IE access. */
2045 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
2047 if (Pmode == DImode)
2048 return gen_got_load_tls_iedi (dest, sym);
2049 else
2050 return gen_got_load_tls_iesi (dest, sym);
2053 /* Add in the thread pointer for a TLS LE access. */
2055 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
2057 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2058 if (Pmode == DImode)
2059 return gen_tls_add_tp_ledi (dest, base, tp, sym);
2060 else
2061 return gen_tls_add_tp_lesi (dest, base, tp, sym);
2064 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
2065 it appears in a MEM of that mode. Return true if ADDR is a legitimate
2066 constant in that context and can be split into high and low parts.
2067 If so, and if LOW_OUT is nonnull, emit the high part and store the
2068 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
2070 TEMP is as for riscv_force_temporary and is used to load the high
2071 part into a register.
2073 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
2074 a legitimize SET_SRC for an .md pattern, otherwise the low part
2075 is guaranteed to be a legitimate address for mode MODE. */
2077 bool
2078 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
2080 enum riscv_symbol_type symbol_type;
2082 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
2083 || !riscv_symbolic_constant_p (addr, &symbol_type)
2084 || riscv_symbol_insns (symbol_type) == 0
2085 || !riscv_split_symbol_type (symbol_type))
2086 return false;
2088 if (low_out)
2089 switch (symbol_type)
2091 case SYMBOL_FORCE_TO_MEM:
2092 return false;
2094 case SYMBOL_ABSOLUTE:
2096 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
2097 high = riscv_force_temporary (temp, high);
2098 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
2100 break;
2102 case SYMBOL_PCREL:
2104 static unsigned seqno;
2105 char buf[32];
2106 rtx label;
2108 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
2109 gcc_assert ((size_t) bytes < sizeof (buf));
2111 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
2112 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
2113 /* ??? Ugly hack to make weak symbols work. May need to change the
2114 RTL for the auipc and/or low patterns to get a better fix for
2115 this. */
2116 if (! nonzero_address_p (addr))
2117 SYMBOL_REF_WEAK (label) = 1;
2119 if (temp == NULL)
2120 temp = gen_reg_rtx (Pmode);
2122 if (Pmode == DImode)
2123 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
2124 else
2125 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
2127 *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
2129 seqno++;
2131 break;
2133 default:
2134 gcc_unreachable ();
2137 return true;
2140 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2141 riscv_force_temporary; it is only needed when OFFSET is not a
2142 SMALL_OPERAND. */
2144 static rtx
2145 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2147 if (!SMALL_OPERAND (offset))
2149 rtx high;
2151 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2152 The addition inside the macro CONST_HIGH_PART may cause an
2153 overflow, so we need to force a sign-extension check. */
2154 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2155 offset = CONST_LOW_PART (offset);
2156 high = riscv_force_temporary (temp, high);
2157 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2159 return plus_constant (Pmode, reg, offset);
2162 /* The __tls_get_attr symbol. */
2163 static GTY(()) rtx riscv_tls_symbol;
2165 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2166 the TLS symbol we are referencing and TYPE is the symbol type to use
2167 (either global dynamic or local dynamic). RESULT is an RTX for the
2168 return value location. */
2170 static rtx_insn *
2171 riscv_call_tls_get_addr (rtx sym, rtx result)
2173 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
2174 rtx_insn *insn;
2176 if (!riscv_tls_symbol)
2177 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
2178 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
2180 start_sequence ();
2182 emit_insn (riscv_got_load_tls_gd (a0, sym));
2183 insn = emit_call_insn (gen_call_value (result, func, const0_rtx,
2184 gen_int_mode (RISCV_CC_BASE, SImode)));
2185 RTL_CONST_CALL_P (insn) = 1;
2186 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2187 insn = get_insns ();
2189 end_sequence ();
2191 return insn;
2194 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2195 its address. The return value will be both a valid address and a valid
2196 SET_SRC (either a REG or a LO_SUM). */
2198 static rtx
2199 riscv_legitimize_tls_address (rtx loc)
2201 rtx dest, tp, tmp;
2202 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2204 #if 0
2205 /* TLS copy relocs are now deprecated and should not be used. */
2206 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */
2207 if (!flag_pic)
2208 model = TLS_MODEL_LOCAL_EXEC;
2209 #endif
2211 switch (model)
2213 case TLS_MODEL_LOCAL_DYNAMIC:
2214 /* Rely on section anchors for the optimization that LDM TLS
2215 provides. The anchor's address is loaded with GD TLS. */
2216 case TLS_MODEL_GLOBAL_DYNAMIC:
2217 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2218 dest = gen_reg_rtx (Pmode);
2219 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp, loc);
2220 break;
2222 case TLS_MODEL_INITIAL_EXEC:
2223 /* la.tls.ie; tp-relative add */
2224 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2225 tmp = gen_reg_rtx (Pmode);
2226 emit_insn (riscv_got_load_tls_ie (tmp, loc));
2227 dest = gen_reg_rtx (Pmode);
2228 emit_insn (gen_add3_insn (dest, tmp, tp));
2229 break;
2231 case TLS_MODEL_LOCAL_EXEC:
2232 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
2233 dest = gen_reg_rtx (Pmode);
2234 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
2235 dest = gen_rtx_LO_SUM (Pmode, dest,
2236 riscv_unspec_address (loc, SYMBOL_TLS_LE));
2237 break;
2239 default:
2240 gcc_unreachable ();
2242 return dest;
2245 /* If X is not a valid address for mode MODE, force it into a register. */
2247 static rtx
2248 riscv_force_address (rtx x, machine_mode mode)
2250 if (!riscv_legitimate_address_p (mode, x, false))
2252 if (can_create_pseudo_p ())
2253 return force_reg (Pmode, x);
2254 else
2256 /* It's only safe for the thunk function.
2257 Use ra as the temp regiater. */
2258 gcc_assert (riscv_in_thunk_func);
2259 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode);
2260 riscv_emit_move (reg, x);
2261 return reg;
2265 return x;
2268 /* Modify base + offset so that offset fits within a compressed load/store insn
2269 and the excess is added to base. */
2271 static rtx
2272 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset)
2274 rtx addr, high;
2275 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess
2276 into HIGH. */
2277 high = GEN_INT (offset & ~CSW_MAX_OFFSET);
2278 offset &= CSW_MAX_OFFSET;
2279 if (!SMALL_OPERAND (INTVAL (high)))
2280 high = force_reg (Pmode, high);
2281 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base));
2282 addr = plus_constant (Pmode, base, offset);
2283 return addr;
2286 /* Helper for riscv_legitimize_address. Given X, return true if it
2287 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
2289 This respectively represent canonical shift-add rtxs or scaled
2290 memory addresses. */
2291 static bool
2292 mem_shadd_or_shadd_rtx_p (rtx x)
2294 return ((GET_CODE (x) == ASHIFT
2295 || GET_CODE (x) == MULT)
2296 && CONST_INT_P (XEXP (x, 1))
2297 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
2298 || (GET_CODE (x) == MULT
2299 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
2302 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
2303 be legitimized in a way that the generic machinery might not expect,
2304 return a new address, otherwise return NULL. MODE is the mode of
2305 the memory being accessed. */
2307 static rtx
2308 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2309 machine_mode mode)
2311 rtx addr;
2313 if (riscv_tls_symbol_p (x))
2314 return riscv_legitimize_tls_address (x);
2316 /* See if the address can split into a high part and a LO_SUM. */
2317 if (riscv_split_symbol (NULL, x, mode, &addr))
2318 return riscv_force_address (addr, mode);
2320 /* Handle BASE + OFFSET. */
2321 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
2322 && INTVAL (XEXP (x, 1)) != 0)
2324 rtx base = XEXP (x, 0);
2325 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
2327 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
2328 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
2329 && SMALL_OPERAND (offset))
2331 rtx index = XEXP (base, 0);
2332 rtx fp = XEXP (base, 1);
2333 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
2336 /* If we were given a MULT, we must fix the constant
2337 as we're going to create the ASHIFT form. */
2338 int shift_val = INTVAL (XEXP (index, 1));
2339 if (GET_CODE (index) == MULT)
2340 shift_val = exact_log2 (shift_val);
2342 rtx reg1 = gen_reg_rtx (Pmode);
2343 rtx reg2 = gen_reg_rtx (Pmode);
2344 rtx reg3 = gen_reg_rtx (Pmode);
2345 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
2346 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val));
2347 riscv_emit_binary (PLUS, reg3, reg2, reg1);
2349 return reg3;
2353 if (!riscv_valid_base_register_p (base, mode, false))
2354 base = copy_to_mode_reg (Pmode, base);
2355 if (optimize_function_for_size_p (cfun)
2356 && (strcmp (current_pass->name, "shorten_memrefs") == 0)
2357 && mode == SImode)
2358 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow
2359 possible compressed load/store. */
2360 addr = riscv_shorten_lw_offset (base, offset);
2361 else
2362 addr = riscv_add_offset (NULL, base, offset);
2363 return riscv_force_address (addr, mode);
2366 return x;
2369 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE
2370 is the original src mode before promotion. */
2372 void
2373 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
2374 machine_mode orig_mode)
2376 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
2377 machine_mode mode;
2378 int i, num_ops;
2379 rtx x;
2381 mode = GET_MODE (dest);
2382 /* We use the original mode for the riscv_build_integer call, because HImode
2383 values are given special treatment. */
2384 num_ops = riscv_build_integer (codes, value, orig_mode);
2386 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
2387 && num_ops >= riscv_split_integer_cost (value))
2388 x = riscv_split_integer (value, mode);
2389 else
2391 codes[0].value = trunc_int_for_mode (codes[0].value, mode);
2392 /* Apply each binary operation to X. */
2393 x = GEN_INT (codes[0].value);
2395 for (i = 1; i < num_ops; i++)
2397 if (!can_create_pseudo_p ())
2398 x = riscv_emit_set (temp, x);
2399 else
2400 x = force_reg (mode, x);
2401 codes[i].value = trunc_int_for_mode (codes[i].value, mode);
2402 x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
2406 riscv_emit_set (dest, x);
2409 /* Subroutine of riscv_legitimize_move. Move constant SRC into register
2410 DEST given that SRC satisfies immediate_operand but doesn't satisfy
2411 move_operand. */
2413 static void
2414 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
2416 rtx base, offset;
2418 /* Split moves of big integers into smaller pieces. */
2419 if (splittable_const_int_operand (src, mode))
2421 riscv_move_integer (dest, dest, INTVAL (src), mode);
2422 return;
2425 if (satisfies_constraint_zfli (src))
2427 riscv_emit_set (dest, src);
2428 return;
2431 /* Split moves of symbolic constants into high/low pairs. */
2432 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
2434 riscv_emit_set (dest, src);
2435 return;
2438 /* Generate the appropriate access sequences for TLS symbols. */
2439 if (riscv_tls_symbol_p (src))
2441 riscv_emit_move (dest, riscv_legitimize_tls_address (src));
2442 return;
2445 /* If we have (const (plus symbol offset)), and that expression cannot
2446 be forced into memory, load the symbol first and add in the offset. Also
2447 prefer to do this even if the constant _can_ be forced into memory, as it
2448 usually produces better code. */
2449 split_const (src, &base, &offset);
2450 if (offset != const0_rtx
2451 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
2453 base = riscv_force_temporary (dest, base);
2454 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
2455 return;
2458 /* Handle below format.
2459 (const:DI
2460 (plus:DI
2461 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0
2462 (const_poly_int:DI [16, 16]) // <- op_1
2465 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS
2466 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1)))
2468 rtx dest_tmp = gen_reg_rtx (mode);
2469 rtx tmp = gen_reg_rtx (mode);
2471 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0));
2472 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1));
2474 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp)));
2475 return;
2478 src = force_const_mem (mode, src);
2480 /* When using explicit relocs, constant pool references are sometimes
2481 not legitimate addresses. */
2482 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
2483 riscv_emit_move (dest, src);
2486 /* Report when we try to do something that requires vector when vector is
2487 disabled. This is an error of last resort and isn't very high-quality. It
2488 usually involves attempts to measure the vector length in some way. */
2490 static void
2491 riscv_report_v_required (void)
2493 static bool reported_p = false;
2495 /* Avoid reporting a slew of messages for a single oversight. */
2496 if (reported_p)
2497 return;
2499 error ("this operation requires the RVV ISA extension");
2500 inform (input_location, "you can enable RVV using the command-line"
2501 " option %<-march%>, or by using the %<target%>"
2502 " attribute or pragma");
2503 reported_p = true;
2506 /* Helper function to operation for rtx_code CODE. */
2507 static void
2508 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1,
2509 rtx op2)
2511 if (can_create_pseudo_p ())
2513 rtx result;
2514 if (GET_RTX_CLASS (code) == RTX_UNARY)
2515 result = expand_simple_unop (mode, code, op1, NULL_RTX, false);
2516 else
2517 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false,
2518 OPTAB_DIRECT);
2519 riscv_emit_move (op0, result);
2521 else
2523 rtx pat;
2524 /* The following implementation is for prologue and epilogue.
2525 Because prologue and epilogue can not use pseudo register.
2526 We can't using expand_simple_binop or expand_simple_unop. */
2527 if (GET_RTX_CLASS (code) == RTX_UNARY)
2528 pat = gen_rtx_fmt_e (code, mode, op1);
2529 else
2530 pat = gen_rtx_fmt_ee (code, mode, op1, op2);
2531 emit_insn (gen_rtx_SET (op0, pat));
2535 /* Expand mult operation with constant integer, multiplicand also used as a
2536 * temporary register. */
2538 static void
2539 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand,
2540 HOST_WIDE_INT multiplier)
2542 if (multiplier == 0)
2544 riscv_emit_move (dest, GEN_INT (0));
2545 return;
2548 bool neg_p = multiplier < 0;
2549 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier);
2551 if (multiplier_abs == 1)
2553 if (neg_p)
2554 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX);
2555 else
2556 riscv_emit_move (dest, multiplicand);
2558 else
2560 if (pow2p_hwi (multiplier_abs))
2563 multiplicand = [BYTES_PER_RISCV_VECTOR].
2564 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8].
2565 Sequence:
2566 csrr a5, vlenb
2567 slli a5, a5, 3
2568 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8].
2569 Sequence:
2570 csrr a5, vlenb
2571 slli a5, a5, 3
2572 neg a5, a5
2574 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2575 gen_int_mode (exact_log2 (multiplier_abs), QImode));
2576 if (neg_p)
2577 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2579 else if (pow2p_hwi (multiplier_abs + 1))
2582 multiplicand = [BYTES_PER_RISCV_VECTOR].
2583 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7].
2584 Sequence:
2585 csrr a5, vlenb
2586 slli a4, a5, 3
2587 sub a5, a4, a5
2588 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7].
2589 Sequence:
2590 csrr a5, vlenb
2591 slli a4, a5, 3
2592 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4
2594 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2595 gen_int_mode (exact_log2 (multiplier_abs + 1),
2596 QImode));
2597 if (neg_p)
2598 riscv_expand_op (MINUS, mode, dest, multiplicand, dest);
2599 else
2600 riscv_expand_op (MINUS, mode, dest, dest, multiplicand);
2602 else if (pow2p_hwi (multiplier - 1))
2605 multiplicand = [BYTES_PER_RISCV_VECTOR].
2606 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9].
2607 Sequence:
2608 csrr a5, vlenb
2609 slli a4, a5, 3
2610 add a5, a4, a5
2611 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9].
2612 Sequence:
2613 csrr a5, vlenb
2614 slli a4, a5, 3
2615 add a5, a4, a5
2616 neg a5, a5
2618 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2619 gen_int_mode (exact_log2 (multiplier_abs - 1),
2620 QImode));
2621 riscv_expand_op (PLUS, mode, dest, dest, multiplicand);
2622 if (neg_p)
2623 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2625 else
2627 /* We use multiplication for remaining cases. */
2628 gcc_assert (
2629 TARGET_MUL
2630 && "M-extension must be enabled to calculate the poly_int "
2631 "size/offset.");
2632 riscv_emit_move (dest, gen_int_mode (multiplier, mode));
2633 riscv_expand_op (MULT, mode, dest, dest, multiplicand);
2638 /* Analyze src and emit const_poly_int mov sequence. */
2640 void
2641 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
2643 poly_int64 value = rtx_to_poly_int64 (src);
2644 /* It use HOST_WIDE_INT intead of int since 32bit type is not enough
2645 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */
2646 HOST_WIDE_INT offset = value.coeffs[0];
2647 HOST_WIDE_INT factor = value.coeffs[1];
2648 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
2649 int div_factor = 0;
2650 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions.
2651 For any (const_poly_int:MODE [m, n]), the calculation formula is as
2652 follows.
2653 constant = m - n.
2654 When minimum VLEN = 32, poly of VLENB = (4, 4).
2655 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1).
2656 When minimum VLEN > 32, poly of VLENB = (8, 8).
2657 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1).
2658 magn = (n, n) / base.
2659 (m, n) = base * magn + constant.
2660 This calculation doesn't need div operation. */
2662 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
2663 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode));
2664 else
2666 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode));
2667 emit_move_insn (gen_lowpart (Pmode, tmp),
2668 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
2671 if (BYTES_PER_RISCV_VECTOR.is_constant ())
2673 gcc_assert (value.is_constant ());
2674 riscv_emit_move (dest, GEN_INT (value.to_constant ()));
2675 return;
2677 else
2679 int max_power = exact_log2 (MAX_POLY_VARIANT);
2680 for (int i = 0; i <= max_power; i++)
2682 int possible_div_factor = 1 << i;
2683 if (factor % (vlenb / possible_div_factor) == 0)
2685 div_factor = possible_div_factor;
2686 break;
2689 gcc_assert (div_factor != 0);
2692 if (div_factor != 1)
2693 riscv_expand_op (LSHIFTRT, mode, tmp, tmp,
2694 gen_int_mode (exact_log2 (div_factor), QImode));
2696 riscv_expand_mult_with_const_int (mode, dest, tmp,
2697 factor / (vlenb / div_factor));
2698 HOST_WIDE_INT constant = offset - factor;
2700 if (constant == 0)
2701 return;
2702 else if (SMALL_OPERAND (constant))
2703 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2704 else
2706 /* Handle the constant value is not a 12-bit value. */
2707 rtx high;
2709 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2710 The addition inside the macro CONST_HIGH_PART may cause an
2711 overflow, so we need to force a sign-extension check. */
2712 high = gen_int_mode (CONST_HIGH_PART (constant), mode);
2713 constant = CONST_LOW_PART (constant);
2714 riscv_emit_move (tmp, high);
2715 riscv_expand_op (PLUS, mode, dest, tmp, dest);
2716 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2720 /* Adjust scalable frame of vector for prologue && epilogue. */
2722 static void
2723 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
2725 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
2726 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
2727 rtx insn, dwarf, adjust_frame_rtx;
2729 riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
2730 gen_int_mode (offset, Pmode));
2732 if (epilogue)
2733 insn = gen_add3_insn (target, target, adjust_size);
2734 else
2735 insn = gen_sub3_insn (target, target, adjust_size);
2737 insn = emit_insn (insn);
2739 RTX_FRAME_RELATED_P (insn) = 1;
2741 adjust_frame_rtx
2742 = gen_rtx_SET (target,
2743 plus_constant (Pmode, target, epilogue ? offset : -offset));
2745 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
2746 NULL_RTX);
2748 REG_NOTES (insn) = dwarf;
2751 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
2752 sequence that is valid. */
2754 bool
2755 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
2757 if (CONST_POLY_INT_P (src))
2760 Handle:
2761 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156)
2762 (const_int 96 [0x60])) [0 S1 A8])
2763 (const_poly_int:QI [8, 8]))
2764 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil))
2766 if (MEM_P (dest))
2768 emit_move_insn (dest, force_reg (mode, src));
2769 return true;
2771 poly_int64 value = rtx_to_poly_int64 (src);
2772 if (!value.is_constant () && !TARGET_VECTOR)
2774 riscv_report_v_required ();
2775 return false;
2778 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode)
2779 return false;
2781 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode))
2783 /* In RV32 system, handle (const_poly_int:QI [m, n])
2784 (const_poly_int:HI [m, n]).
2785 In RV64 system, handle (const_poly_int:QI [m, n])
2786 (const_poly_int:HI [m, n])
2787 (const_poly_int:SI [m, n]). */
2788 rtx tmp = gen_reg_rtx (Pmode);
2789 riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
2790 src);
2792 else
2794 /* In RV32 system, handle (const_poly_int:SI [m, n])
2795 (const_poly_int:DI [m, n]).
2796 In RV64 system, handle (const_poly_int:DI [m, n]).
2797 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode,
2798 the offset should not exceed 4GiB in general. */
2799 rtx tmp = gen_reg_rtx (mode);
2800 riscv_legitimize_poly_move (mode, dest, tmp, src);
2802 return true;
2804 /* Expand
2805 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2806 Expand this data movement instead of simply forbid it since
2807 we can improve the code generation for this following scenario
2808 by RVV auto-vectorization:
2809 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
2810 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2811 Since RVV mode and scalar mode are in different REG_CLASS,
2812 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
2813 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
2815 machine_mode vmode = GET_MODE (SUBREG_REG (src));
2816 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
2817 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant ();
2818 /* We should be able to handle both partial and paradoxical subreg. */
2819 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1;
2820 scalar_mode smode = as_a<scalar_mode> (mode);
2821 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size;
2822 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8)
2823 && !TARGET_VECTOR_ELEN_64 ? 2 : 1;
2824 bool need_int_reg_p = false;
2826 if (num == 2)
2828 /* If we want to extract 64bit value but ELEN < 64,
2829 we use RVV vector mode with EEW = 32 to extract
2830 the highpart and lowpart. */
2831 need_int_reg_p = smode == DFmode;
2832 smode = SImode;
2833 nunits = nunits * 2;
2836 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
2838 rtx v = gen_lowpart (vmode, SUBREG_REG (src));
2839 rtx int_reg = dest;
2841 if (need_int_reg_p)
2843 int_reg = gen_reg_rtx (DImode);
2844 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest));
2847 for (unsigned int i = 0; i < num; i++)
2849 rtx result;
2850 if (num == 1)
2851 result = int_reg;
2852 else if (i == 0)
2853 result = gen_lowpart (smode, int_reg);
2854 else
2855 result = gen_reg_rtx (smode);
2857 riscv_vector::emit_vec_extract (result, v,
2858 gen_int_mode (index + i, Pmode));
2860 if (i == 1)
2862 if (UNITS_PER_WORD < mode_size)
2863 /* If Pmode = SImode and mode = DImode, we just need to
2864 extract element of index = 1 from the vector and move it
2865 into the highpart of the DEST since DEST consists of 2
2866 scalar registers. */
2867 emit_move_insn (gen_highpart (smode, int_reg), result);
2868 else
2870 rtx tmp = expand_binop (Pmode, ashl_optab,
2871 gen_lowpart (Pmode, result),
2872 gen_int_mode (32, Pmode),
2873 NULL_RTX, 0, OPTAB_DIRECT);
2874 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg,
2875 NULL_RTX, 0, OPTAB_DIRECT);
2876 emit_move_insn (int_reg, tmp2);
2881 if (need_int_reg_p)
2882 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
2883 else
2884 emit_move_insn (dest, int_reg);
2886 else
2887 gcc_unreachable ();
2889 return true;
2891 /* Expand
2892 (set (reg:QI target) (mem:QI (address)))
2894 (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
2895 (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
2896 with auto-sign/zero extend. */
2897 if (GET_MODE_CLASS (mode) == MODE_INT
2898 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
2899 && can_create_pseudo_p ()
2900 && MEM_P (src))
2902 rtx temp_reg;
2903 int zero_extend_p;
2905 temp_reg = gen_reg_rtx (word_mode);
2906 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
2907 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
2908 zero_extend_p));
2909 riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
2910 return true;
2913 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
2915 rtx reg;
2917 if (GET_CODE (src) == CONST_INT)
2919 /* Apply the equivalent of PROMOTE_MODE here for constants to
2920 improve cse. */
2921 machine_mode promoted_mode = mode;
2922 if (GET_MODE_CLASS (mode) == MODE_INT
2923 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
2924 promoted_mode = word_mode;
2926 if (splittable_const_int_operand (src, mode))
2928 reg = gen_reg_rtx (promoted_mode);
2929 riscv_move_integer (reg, reg, INTVAL (src), mode);
2931 else
2932 reg = force_reg (promoted_mode, src);
2934 if (promoted_mode != mode)
2935 reg = gen_lowpart (mode, reg);
2937 else
2938 reg = force_reg (mode, src);
2939 riscv_emit_move (dest, reg);
2940 return true;
2943 /* In order to fit NaN boxing, expand
2944 (set FP_REG (reg:HF src))
2946 (set (reg:SI/DI mask) (const_int -65536)
2947 (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF src) 0)))
2948 (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
2949 (set (reg:HF dest) (unspec:HF [ (reg:SI/DI temp) ] UNSPEC_FMV_SFP16_X))
2952 if (TARGET_HARD_FLOAT
2953 && !TARGET_ZFHMIN && mode == HFmode
2954 && REG_P (dest) && FP_REG_P (REGNO (dest))
2955 && REG_P (src) && !FP_REG_P (REGNO (src))
2956 && can_create_pseudo_p ())
2958 rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
2959 rtx temp = gen_reg_rtx (word_mode);
2960 emit_insn (gen_extend_insn (temp,
2961 simplify_gen_subreg (HImode, src, mode, 0),
2962 word_mode, HImode, 1));
2963 if (word_mode == SImode)
2964 emit_insn (gen_iorsi3 (temp, mask, temp));
2965 else
2966 emit_insn (gen_iordi3 (temp, mask, temp));
2968 riscv_emit_move (dest, gen_rtx_UNSPEC (HFmode, gen_rtvec (1, temp),
2969 UNSPEC_FMV_SFP16_X));
2971 return true;
2974 /* We need to deal with constants that would be legitimate
2975 immediate_operands but aren't legitimate move_operands. */
2976 if (CONSTANT_P (src) && !move_operand (src, mode))
2978 riscv_legitimize_const_move (mode, dest, src);
2979 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
2980 return true;
2983 /* RISC-V GCC may generate non-legitimate address due to we provide some
2984 pattern for optimize access PIC local symbol and it's make GCC generate
2985 unrecognizable instruction during optmizing. */
2987 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
2988 reload_completed))
2990 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
2993 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
2994 reload_completed))
2996 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
2999 return false;
3002 /* Return true if there is an instruction that implements CODE and accepts
3003 X as an immediate operand. */
3005 static int
3006 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
3008 switch (code)
3010 case ASHIFT:
3011 case ASHIFTRT:
3012 case LSHIFTRT:
3013 /* All shift counts are truncated to a valid constant. */
3014 return true;
3016 case AND:
3017 case IOR:
3018 case XOR:
3019 case PLUS:
3020 case LT:
3021 case LTU:
3022 /* These instructions take 12-bit signed immediates. */
3023 return SMALL_OPERAND (x);
3025 case LE:
3026 /* We add 1 to the immediate and use SLT. */
3027 return SMALL_OPERAND (x + 1);
3029 case LEU:
3030 /* Likewise SLTU, but reject the always-true case. */
3031 return SMALL_OPERAND (x + 1) && x + 1 != 0;
3033 case GE:
3034 case GEU:
3035 /* We can emulate an immediate of 1 by using GT/GTU against x0. */
3036 return x == 1;
3038 default:
3039 /* By default assume that x0 can be used for 0. */
3040 return x == 0;
3044 /* Return the cost of binary operation X, given that the instruction
3045 sequence for a word-sized or smaller operation takes SIGNLE_INSNS
3046 instructions and that the sequence of a double-word operation takes
3047 DOUBLE_INSNS instructions. */
3049 static int
3050 riscv_binary_cost (rtx x, int single_insns, int double_insns)
3052 if (!riscv_v_ext_mode_p (GET_MODE (x))
3053 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
3054 return COSTS_N_INSNS (double_insns);
3055 return COSTS_N_INSNS (single_insns);
3058 /* Return the cost of sign- or zero-extending OP. */
3060 static int
3061 riscv_extend_cost (rtx op, bool unsigned_p)
3063 if (MEM_P (op))
3064 return 0;
3066 if (unsigned_p && GET_MODE (op) == QImode)
3067 /* We can use ANDI. */
3068 return COSTS_N_INSNS (1);
3070 /* ZBA provide zext.w. */
3071 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode)
3072 return COSTS_N_INSNS (1);
3074 /* ZBB provide zext.h, sext.b and sext.h. */
3075 if (TARGET_ZBB)
3077 if (!unsigned_p && GET_MODE (op) == QImode)
3078 return COSTS_N_INSNS (1);
3080 if (GET_MODE (op) == HImode)
3081 return COSTS_N_INSNS (1);
3084 if (!unsigned_p && GET_MODE (op) == SImode)
3085 /* We can use SEXT.W. */
3086 return COSTS_N_INSNS (1);
3088 /* We need to use a shift left and a shift right. */
3089 return COSTS_N_INSNS (2);
3092 /* Implement TARGET_RTX_COSTS. */
3094 #define SINGLE_SHIFT_COST 1
3096 static bool
3097 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
3098 int *total, bool speed)
3100 /* TODO: We set RVV instruction cost as 1 by default.
3101 Cost Model need to be well analyzed and supported in the future. */
3102 if (riscv_v_ext_mode_p (mode))
3104 *total = COSTS_N_INSNS (1);
3105 return true;
3108 bool float_mode_p = FLOAT_MODE_P (mode);
3109 int cost;
3111 switch (GET_CODE (x))
3113 case SET:
3114 /* If we are called for an INSN that's a simple set of a register,
3115 then cost based on the SET_SRC alone. */
3116 if (outer_code == INSN
3117 && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x))))
3119 riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
3120 return true;
3123 /* Otherwise return FALSE indicating we should recurse into both the
3124 SET_DEST and SET_SRC combining the cost of both. */
3125 return false;
3127 case CONST_INT:
3128 /* trivial constants checked using OUTER_CODE in case they are
3129 encodable in insn itself w/o need for additional insn(s). */
3130 if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
3132 *total = 0;
3133 return true;
3135 /* Fall through. */
3137 case SYMBOL_REF:
3138 case LABEL_REF:
3139 case CONST_DOUBLE:
3140 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE
3141 rtl object. Weird recheck due to switch-case fall through above. */
3142 if (GET_CODE (x) == CONST_DOUBLE)
3143 gcc_assert (GET_MODE (x) != VOIDmode);
3144 /* Fall through. */
3146 case CONST:
3147 /* Non trivial CONST_INT Fall through: check if need multiple insns. */
3148 if ((cost = riscv_const_insns (x)) > 0)
3150 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
3151 2. For constants loaded more than once, the approach so far has
3152 been to duplicate the operation than to CSE the constant.
3153 3. TODO: make cost more accurate specially if riscv_const_insns
3154 returns > 1. */
3155 if (outer_code == SET || GET_MODE (x) == VOIDmode)
3156 *total = COSTS_N_INSNS (1);
3158 else /* The instruction will be fetched from the constant pool. */
3159 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
3160 return true;
3162 case MEM:
3163 /* If the address is legitimate, return the number of
3164 instructions it needs. */
3165 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
3167 /* When optimizing for size, make uncompressible 32-bit addresses
3168 more expensive so that compressible 32-bit addresses are
3169 preferred. */
3170 if ((TARGET_RVC || TARGET_ZCA)
3171 && !speed && riscv_mshorten_memrefs && mode == SImode
3172 && !riscv_compressed_lw_address_p (XEXP (x, 0)))
3173 cost++;
3175 *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
3176 return true;
3178 /* Otherwise use the default handling. */
3179 return false;
3181 case IF_THEN_ELSE:
3182 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
3183 && reg_or_0_operand (XEXP (x, 1), mode)
3184 && sfb_alu_operand (XEXP (x, 2), mode)
3185 && comparison_operator (XEXP (x, 0), VOIDmode))
3187 /* For predicated conditional-move operations we assume the cost
3188 of a single instruction even though there are actually two. */
3189 *total = COSTS_N_INSNS (1);
3190 return true;
3192 else if (TARGET_ZICOND_LIKE
3193 && outer_code == SET
3194 && ((GET_CODE (XEXP (x, 1)) == REG
3195 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1))))
3196 || (GET_CODE (XEXP (x, 2)) == REG
3197 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2))))
3198 || (GET_CODE (XEXP (x, 1)) == REG
3199 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0)))
3200 || (GET_CODE (XEXP (x, 1)) == REG
3201 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0)))))
3203 *total = COSTS_N_INSNS (1);
3204 return true;
3206 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx)
3208 if (equality_operator (XEXP (x, 0), mode)
3209 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
3211 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1);
3212 return true;
3214 if (ordered_comparison_operator (XEXP (x, 0), mode))
3216 *total = COSTS_N_INSNS (1);
3217 return true;
3220 return false;
3222 case NOT:
3223 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
3224 return false;
3226 case AND:
3227 /* slli.uw pattern for zba. */
3228 if (TARGET_ZBA && TARGET_64BIT && mode == DImode
3229 && GET_CODE (XEXP (x, 0)) == ASHIFT)
3231 rtx and_rhs = XEXP (x, 1);
3232 rtx ashift_lhs = XEXP (XEXP (x, 0), 0);
3233 rtx ashift_rhs = XEXP (XEXP (x, 0), 1);
3234 if (register_operand (ashift_lhs, GET_MODE (ashift_lhs))
3235 && CONST_INT_P (ashift_rhs)
3236 && CONST_INT_P (and_rhs)
3237 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3238 *total = COSTS_N_INSNS (1);
3239 return true;
3241 /* bclri pattern for zbs. */
3242 if (TARGET_ZBS
3243 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode))
3245 *total = COSTS_N_INSNS (1);
3246 return true;
3248 /* bclr pattern for zbs. */
3249 if (TARGET_ZBS
3250 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1)))
3251 && GET_CODE (XEXP (x, 0)) == ROTATE
3252 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0))
3253 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2)
3255 *total = COSTS_N_INSNS (1);
3256 return true;
3259 gcc_fallthrough ();
3260 case IOR:
3261 case XOR:
3262 /* orn, andn and xorn pattern for zbb. */
3263 if (TARGET_ZBB
3264 && GET_CODE (XEXP (x, 0)) == NOT)
3266 *total = riscv_binary_cost (x, 1, 2);
3267 return true;
3270 /* bset[i] and binv[i] pattern for zbs. */
3271 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR)
3272 && TARGET_ZBS
3273 && ((GET_CODE (XEXP (x, 0)) == ASHIFT
3274 && CONST_INT_P (XEXP (XEXP (x, 0), 0)))
3275 || single_bit_mask_operand (XEXP (x, 1), VOIDmode)))
3277 *total = COSTS_N_INSNS (1);
3278 return true;
3281 /* Double-word operations use two single-word operations. */
3282 *total = riscv_binary_cost (x, 1, 2);
3283 return false;
3285 case ZERO_EXTRACT:
3286 /* This is an SImode shift. */
3287 if (outer_code == SET
3288 && CONST_INT_P (XEXP (x, 1))
3289 && CONST_INT_P (XEXP (x, 2))
3290 && (INTVAL (XEXP (x, 2)) > 0)
3291 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32))
3293 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3294 return true;
3296 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */
3297 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET
3298 && GET_CODE (XEXP (x, 1)) == CONST_INT
3299 && INTVAL (XEXP (x, 1)) == 1)
3301 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3302 return true;
3304 gcc_fallthrough ();
3305 case SIGN_EXTRACT:
3306 if (TARGET_XTHEADBB && outer_code == SET
3307 && CONST_INT_P (XEXP (x, 1))
3308 && CONST_INT_P (XEXP (x, 2)))
3310 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3311 return true;
3313 return false;
3315 case ASHIFT:
3316 /* bset pattern for zbs. */
3317 if (TARGET_ZBS
3318 && CONST_INT_P (XEXP (x, 0))
3319 && INTVAL (XEXP (x, 0)) == 1)
3321 *total = COSTS_N_INSNS (1);
3322 return true;
3324 gcc_fallthrough ();
3325 case ASHIFTRT:
3326 case LSHIFTRT:
3327 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST,
3328 CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
3329 return false;
3331 case ABS:
3332 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
3333 return false;
3335 case LO_SUM:
3336 *total = set_src_cost (XEXP (x, 0), mode, speed);
3337 return true;
3339 case LT:
3340 /* This is an SImode shift. */
3341 if (outer_code == SET && GET_MODE (x) == DImode
3342 && GET_MODE (XEXP (x, 0)) == SImode)
3344 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3345 return true;
3347 /* Fall through. */
3348 case LTU:
3349 case LE:
3350 case LEU:
3351 case GT:
3352 case GTU:
3353 case GE:
3354 case GEU:
3355 case EQ:
3356 case NE:
3357 /* Branch comparisons have VOIDmode, so use the first operand's
3358 mode instead. */
3359 mode = GET_MODE (XEXP (x, 0));
3360 if (float_mode_p)
3361 *total = tune_param->fp_add[mode == DFmode];
3362 else
3363 *total = riscv_binary_cost (x, 1, 3);
3364 return false;
3366 case UNORDERED:
3367 case ORDERED:
3368 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
3369 mode = GET_MODE (XEXP (x, 0));
3370 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3371 return false;
3373 case UNEQ:
3374 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
3375 mode = GET_MODE (XEXP (x, 0));
3376 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
3377 return false;
3379 case LTGT:
3380 /* (FLT(A, A) || FGT(B, B)). */
3381 mode = GET_MODE (XEXP (x, 0));
3382 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3383 return false;
3385 case UNGE:
3386 case UNGT:
3387 case UNLE:
3388 case UNLT:
3389 /* FLT or FLE, but guarded by an FFLAGS read and write. */
3390 mode = GET_MODE (XEXP (x, 0));
3391 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
3392 return false;
3394 case MINUS:
3395 if (float_mode_p)
3396 *total = tune_param->fp_add[mode == DFmode];
3397 else
3398 *total = riscv_binary_cost (x, 1, 4);
3399 return false;
3401 case PLUS:
3402 /* add.uw pattern for zba. */
3403 if (TARGET_ZBA
3404 && (TARGET_64BIT && (mode == DImode))
3405 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
3406 && register_operand (XEXP (XEXP (x, 0), 0),
3407 GET_MODE (XEXP (XEXP (x, 0), 0)))
3408 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
3410 *total = COSTS_N_INSNS (1);
3411 return true;
3413 /* shNadd pattern for zba. */
3414 if (TARGET_ZBA
3415 && ((!TARGET_64BIT && (mode == SImode)) ||
3416 (TARGET_64BIT && (mode == DImode)))
3417 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
3418 && register_operand (XEXP (XEXP (x, 0), 0),
3419 GET_MODE (XEXP (XEXP (x, 0), 0)))
3420 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3421 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
3423 *total = COSTS_N_INSNS (1);
3424 return true;
3426 /* Before strength-reduction, the shNadd can be expressed as the addition
3427 of a multiplication with a power-of-two. If this case is not handled,
3428 the strength-reduction in expmed.c will calculate an inflated cost. */
3429 if (TARGET_ZBA
3430 && mode == word_mode
3431 && GET_CODE (XEXP (x, 0)) == MULT
3432 && register_operand (XEXP (XEXP (x, 0), 0),
3433 GET_MODE (XEXP (XEXP (x, 0), 0)))
3434 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3435 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1)))
3436 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
3438 *total = COSTS_N_INSNS (1);
3439 return true;
3441 /* shNadd.uw pattern for zba.
3442 [(set (match_operand:DI 0 "register_operand" "=r")
3443 (plus:DI
3444 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
3445 (match_operand:QI 2 "immediate_operand" "I"))
3446 (match_operand 3 "immediate_operand" ""))
3447 (match_operand:DI 4 "register_operand" "r")))]
3448 "TARGET_64BIT && TARGET_ZBA
3449 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
3450 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff"
3452 if (TARGET_ZBA
3453 && (TARGET_64BIT && (mode == DImode))
3454 && (GET_CODE (XEXP (x, 0)) == AND)
3455 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1))))
3457 do {
3458 rtx and_lhs = XEXP (XEXP (x, 0), 0);
3459 rtx and_rhs = XEXP (XEXP (x, 0), 1);
3460 if (GET_CODE (and_lhs) != ASHIFT)
3461 break;
3462 if (!CONST_INT_P (and_rhs))
3463 break;
3465 rtx ashift_rhs = XEXP (and_lhs, 1);
3467 if (!CONST_INT_P (ashift_rhs)
3468 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3))
3469 break;
3471 if (CONST_INT_P (and_rhs)
3472 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3474 *total = COSTS_N_INSNS (1);
3475 return true;
3477 } while (false);
3480 if (float_mode_p)
3481 *total = tune_param->fp_add[mode == DFmode];
3482 else
3483 *total = riscv_binary_cost (x, 1, 4);
3484 return false;
3486 case NEG:
3488 rtx op = XEXP (x, 0);
3489 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
3491 *total = (tune_param->fp_mul[mode == DFmode]
3492 + set_src_cost (XEXP (op, 0), mode, speed)
3493 + set_src_cost (XEXP (op, 1), mode, speed)
3494 + set_src_cost (XEXP (op, 2), mode, speed));
3495 return true;
3499 if (float_mode_p)
3500 *total = tune_param->fp_add[mode == DFmode];
3501 else
3502 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
3503 return false;
3505 case MULT:
3506 if (float_mode_p)
3507 *total = tune_param->fp_mul[mode == DFmode];
3508 else if (!TARGET_MUL)
3509 /* Estimate the cost of a library call. */
3510 *total = COSTS_N_INSNS (speed ? 32 : 6);
3511 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
3512 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
3513 else if (!speed)
3514 *total = COSTS_N_INSNS (1);
3515 else
3516 *total = tune_param->int_mul[mode == DImode];
3517 return false;
3519 case DIV:
3520 case SQRT:
3521 case MOD:
3522 if (float_mode_p)
3524 *total = tune_param->fp_div[mode == DFmode];
3525 return false;
3527 /* Fall through. */
3529 case UDIV:
3530 case UMOD:
3531 if (!TARGET_DIV)
3532 /* Estimate the cost of a library call. */
3533 *total = COSTS_N_INSNS (speed ? 32 : 6);
3534 else if (speed)
3535 *total = tune_param->int_div[mode == DImode];
3536 else
3537 *total = COSTS_N_INSNS (1);
3538 return false;
3540 case ZERO_EXTEND:
3541 /* This is an SImode shift. */
3542 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3544 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3545 return true;
3547 /* Fall through. */
3548 case SIGN_EXTEND:
3549 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
3550 return false;
3552 case BSWAP:
3553 if (TARGET_ZBB)
3555 /* RISC-V only defines rev8 for XLEN, so we will need an extra
3556 shift-right instruction for smaller modes. */
3557 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2);
3558 return true;
3560 return false;
3562 case FLOAT:
3563 case UNSIGNED_FLOAT:
3564 case FIX:
3565 case FLOAT_EXTEND:
3566 case FLOAT_TRUNCATE:
3567 *total = tune_param->fp_add[mode == DFmode];
3568 return false;
3570 case FMA:
3571 *total = (tune_param->fp_mul[mode == DFmode]
3572 + set_src_cost (XEXP (x, 0), mode, speed)
3573 + set_src_cost (XEXP (x, 1), mode, speed)
3574 + set_src_cost (XEXP (x, 2), mode, speed));
3575 return true;
3577 case UNSPEC:
3578 if (XINT (x, 1) == UNSPEC_AUIPC)
3580 /* Make AUIPC cheap to avoid spilling its result to the stack. */
3581 *total = 1;
3582 return true;
3584 return false;
3586 default:
3587 return false;
3591 /* Implement TARGET_ADDRESS_COST. */
3593 static int
3594 riscv_address_cost (rtx addr, machine_mode mode,
3595 addr_space_t as ATTRIBUTE_UNUSED,
3596 bool speed ATTRIBUTE_UNUSED)
3598 /* When optimizing for size, make uncompressible 32-bit addresses more
3599 * expensive so that compressible 32-bit addresses are preferred. */
3600 if ((TARGET_RVC || TARGET_ZCA)
3601 && !speed && riscv_mshorten_memrefs && mode == SImode
3602 && !riscv_compressed_lw_address_p (addr))
3603 return riscv_address_insns (addr, mode, false) + 1;
3604 return riscv_address_insns (addr, mode, false);
3607 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost
3608 calculation for conditional branches: one unit is considered the cost
3609 of microarchitecture-dependent actual branch execution and therefore
3610 multiplied by BRANCH_COST and any remaining units are considered fixed
3611 branch overhead. Branches on a floating-point condition incur an extra
3612 instruction cost as they will be split into an FCMP operation followed
3613 by a branch on an integer condition. */
3615 static int
3616 riscv_insn_cost (rtx_insn *insn, bool speed)
3618 rtx x = PATTERN (insn);
3619 int cost = pattern_cost (x, speed);
3621 if (JUMP_P (insn))
3623 if (GET_CODE (x) == PARALLEL)
3624 x = XVECEXP (x, 0, 0);
3625 if (GET_CODE (x) == SET
3626 && GET_CODE (SET_DEST (x)) == PC
3627 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE)
3629 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1);
3630 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0))))
3631 cost += COSTS_N_INSNS (1);
3634 return cost;
3637 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
3638 but we consider cost units of branch instructions equal to cost units of
3639 other instructions. */
3641 static unsigned int
3642 riscv_max_noce_ifcvt_seq_cost (edge e)
3644 bool predictable_p = predictable_edge_p (e);
3646 if (predictable_p)
3648 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
3649 return param_max_rtl_if_conversion_predictable_cost;
3651 else
3653 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
3654 return param_max_rtl_if_conversion_unpredictable_cost;
3657 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p));
3660 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a
3661 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)'
3662 by our actual conditional branch cost, observing that our branches test
3663 conditions directly, so there is no preparatory extra condition-set
3664 instruction. */
3666 static bool
3667 riscv_noce_conversion_profitable_p (rtx_insn *seq,
3668 struct noce_if_info *if_info)
3670 struct noce_if_info riscv_if_info = *if_info;
3672 riscv_if_info.original_cost -= COSTS_N_INSNS (2);
3673 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p);
3675 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4'
3676 to emit a conditional set operation on DImode output it comes up
3677 with a sequence such as:
3679 (insn 26 0 27 (set (reg:SI 140)
3680 (eq:SI (reg/v:DI 137 [ c ])
3681 (const_int 0 [0]))) 302 {*seq_zero_disi}
3682 (nil))
3683 (insn 27 26 28 (set (reg:DI 139)
3684 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal}
3685 (nil))
3687 because our `cstore<mode>4' pattern expands to an insn that gives
3688 a SImode output. The output of conditional set is 0 or 1 boolean,
3689 so it is valid for input in any scalar integer mode and therefore
3690 combine later folds the zero extend operation into an equivalent
3691 conditional set operation that produces a DImode output, however
3692 this redundant zero extend operation counts towards the cost of
3693 the replacement sequence. Compensate for that by incrementing the
3694 cost of the original sequence as well as the maximum sequence cost
3695 accordingly. Likewise for sign extension. */
3696 rtx last_dest = NULL_RTX;
3697 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
3699 if (!NONDEBUG_INSN_P (insn))
3700 continue;
3702 rtx x = PATTERN (insn);
3703 if (NONJUMP_INSN_P (insn)
3704 && GET_CODE (x) == SET)
3706 rtx src = SET_SRC (x);
3707 enum rtx_code code = GET_CODE (src);
3708 if (last_dest != NULL_RTX
3709 && (code == SIGN_EXTEND || code == ZERO_EXTEND)
3710 && REG_P (XEXP (src, 0))
3711 && REGNO (XEXP (src, 0)) == REGNO (last_dest))
3713 riscv_if_info.original_cost += COSTS_N_INSNS (1);
3714 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
3716 last_dest = NULL_RTX;
3717 rtx dest = SET_DEST (x);
3718 if (COMPARISON_P (src)
3719 && REG_P (dest)
3720 && GET_MODE (dest) == SImode)
3721 last_dest = dest;
3723 else
3724 last_dest = NULL_RTX;
3727 return default_noce_conversion_profitable_p (seq, &riscv_if_info);
3730 /* Return one word of double-word value OP. HIGH_P is true to select the
3731 high part or false to select the low part. */
3734 riscv_subword (rtx op, bool high_p)
3736 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0;
3737 machine_mode mode = GET_MODE (op);
3739 if (mode == VOIDmode)
3740 mode = TARGET_64BIT ? TImode : DImode;
3742 if (MEM_P (op))
3743 return adjust_address (op, word_mode, byte);
3745 if (REG_P (op))
3746 gcc_assert (!FP_REG_RTX_P (op));
3748 return simplify_gen_subreg (word_mode, op, mode, byte);
3751 /* Return true if a 64-bit move from SRC to DEST should be split into two. */
3753 bool
3754 riscv_split_64bit_move_p (rtx dest, rtx src)
3756 if (TARGET_64BIT)
3757 return false;
3759 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
3760 if (satisfies_constraint_zfli (src))
3761 return false;
3763 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
3764 of zeroing an FPR with FCVT.D.W. */
3765 if (TARGET_DOUBLE_FLOAT
3766 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
3767 || (FP_REG_RTX_P (dest) && MEM_P (src))
3768 || (FP_REG_RTX_P (src) && MEM_P (dest))
3769 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
3770 return false;
3772 return true;
3775 /* Split a doubleword move from SRC to DEST. On 32-bit targets,
3776 this function handles 64-bit moves for which riscv_split_64bit_move_p
3777 holds. For 64-bit targets, this function handles 128-bit moves. */
3779 void
3780 riscv_split_doubleword_move (rtx dest, rtx src)
3782 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */
3783 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV))
3785 if (FP_REG_RTX_P (dest))
3787 rtx low_src = riscv_subword (src, false);
3788 rtx high_src = riscv_subword (src, true);
3790 if (TARGET_ZFA)
3791 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src));
3792 else
3793 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src));
3794 return;
3796 if (FP_REG_RTX_P (src))
3798 rtx low_dest = riscv_subword (dest, false);
3799 rtx high_dest = riscv_subword (dest, true);
3801 if (TARGET_ZFA)
3803 emit_insn (gen_movsidf2_low_rv32 (low_dest, src));
3804 emit_insn (gen_movsidf2_high_rv32 (high_dest, src));
3805 return;
3807 else
3809 emit_insn (gen_th_fmv_x_w (low_dest, src));
3810 emit_insn (gen_th_fmv_x_hw (high_dest, src));
3812 return;
3816 /* The operation can be split into two normal moves. Decide in
3817 which order to do them. */
3818 rtx low_dest = riscv_subword (dest, false);
3819 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
3821 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3822 riscv_emit_move (low_dest, riscv_subword (src, false));
3824 else
3826 riscv_emit_move (low_dest, riscv_subword (src, false));
3827 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3831 /* Return the appropriate instructions to move SRC into DEST. Assume
3832 that SRC is operand 1 and DEST is operand 0. */
3834 const char *
3835 riscv_output_move (rtx dest, rtx src)
3837 enum rtx_code dest_code, src_code;
3838 machine_mode mode;
3839 bool dbl_p;
3840 unsigned width;
3841 const char *insn;
3843 if ((insn = th_output_move (dest, src)))
3844 return insn;
3846 dest_code = GET_CODE (dest);
3847 src_code = GET_CODE (src);
3848 mode = GET_MODE (dest);
3849 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
3850 width = GET_MODE_SIZE (mode).to_constant ();
3852 if (dbl_p && riscv_split_64bit_move_p (dest, src))
3853 return "#";
3855 if (dest_code == REG && GP_REG_P (REGNO (dest)))
3857 if (src_code == REG && FP_REG_P (REGNO (src)))
3858 switch (width)
3860 case 2:
3861 if (TARGET_ZFHMIN)
3862 return "fmv.x.h\t%0,%1";
3863 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
3864 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
3865 case 4:
3866 return "fmv.x.s\t%0,%1";
3867 case 8:
3868 return "fmv.x.d\t%0,%1";
3871 if (src_code == MEM)
3872 switch (width)
3874 case 1: return "lbu\t%0,%1";
3875 case 2: return "lhu\t%0,%1";
3876 case 4: return "lw\t%0,%1";
3877 case 8: return "ld\t%0,%1";
3880 if (src_code == CONST_INT)
3882 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src)))
3883 return "li\t%0,%1";
3885 if (TARGET_ZBS
3886 && SINGLE_BIT_MASK_OPERAND (INTVAL (src)))
3887 return "bseti\t%0,zero,%S1";
3889 /* Should never reach here. */
3890 abort ();
3893 if (src_code == HIGH)
3894 return "lui\t%0,%h1";
3896 if (symbolic_operand (src, VOIDmode))
3897 switch (riscv_classify_symbolic_expression (src))
3899 case SYMBOL_GOT_DISP: return "la\t%0,%1";
3900 case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
3901 case SYMBOL_PCREL: return "lla\t%0,%1";
3902 default: gcc_unreachable ();
3905 if ((src_code == REG && GP_REG_P (REGNO (src)))
3906 || (src == CONST0_RTX (mode)))
3908 if (dest_code == REG)
3910 if (GP_REG_P (REGNO (dest)))
3911 return "mv\t%0,%z1";
3913 if (FP_REG_P (REGNO (dest)))
3914 switch (width)
3916 case 2:
3917 if (TARGET_ZFHMIN)
3918 return "fmv.h.x\t%0,%z1";
3919 /* High 16 bits should be all-1, otherwise HW will treated
3920 as a n-bit canonical NaN, but isn't matter for softfloat. */
3921 return "fmv.s.x\t%0,%1";
3922 case 4:
3923 return "fmv.s.x\t%0,%z1";
3924 case 8:
3925 if (TARGET_64BIT)
3926 return "fmv.d.x\t%0,%z1";
3927 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
3928 gcc_assert (src == CONST0_RTX (mode));
3929 return "fcvt.d.w\t%0,x0";
3932 if (dest_code == MEM)
3933 switch (width)
3935 case 1: return "sb\t%z1,%0";
3936 case 2: return "sh\t%z1,%0";
3937 case 4: return "sw\t%z1,%0";
3938 case 8: return "sd\t%z1,%0";
3941 if (src_code == REG && FP_REG_P (REGNO (src)))
3943 if (dest_code == REG && FP_REG_P (REGNO (dest)))
3944 switch (width)
3946 case 2:
3947 if (TARGET_ZFH)
3948 return "fmv.h\t%0,%1";
3949 return "fmv.s\t%0,%1";
3950 case 4:
3951 return "fmv.s\t%0,%1";
3952 case 8:
3953 return "fmv.d\t%0,%1";
3956 if (dest_code == MEM)
3957 switch (width)
3959 case 2:
3960 return "fsh\t%1,%0";
3961 case 4:
3962 return "fsw\t%1,%0";
3963 case 8:
3964 return "fsd\t%1,%0";
3967 if (dest_code == REG && FP_REG_P (REGNO (dest)))
3969 if (src_code == MEM)
3970 switch (width)
3972 case 2:
3973 return "flh\t%0,%1";
3974 case 4:
3975 return "flw\t%0,%1";
3976 case 8:
3977 return "fld\t%0,%1";
3980 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src))
3981 switch (width)
3983 case 2:
3984 return "fli.h\t%0,%1";
3985 case 4:
3986 return "fli.s\t%0,%1";
3987 case 8:
3988 return "fli.d\t%0,%1";
3991 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT)
3993 /* We only want a single full vector register VLEN read after reload. */
3994 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR));
3995 return "csrr\t%0,vlenb";
3997 gcc_unreachable ();
4000 const char *
4001 riscv_output_return ()
4003 if (cfun->machine->naked_p)
4004 return "";
4006 return "ret";
4010 /* Return true if CMP1 is a suitable second operand for integer ordering
4011 test CODE. See also the *sCC patterns in riscv.md. */
4013 static bool
4014 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4016 switch (code)
4018 case GT:
4019 case GTU:
4020 return reg_or_0_operand (cmp1, VOIDmode);
4022 case GE:
4023 case GEU:
4024 return cmp1 == const1_rtx;
4026 case LT:
4027 case LTU:
4028 return arith_operand (cmp1, VOIDmode);
4030 case LE:
4031 return sle_operand (cmp1, VOIDmode);
4033 case LEU:
4034 return sleu_operand (cmp1, VOIDmode);
4036 default:
4037 gcc_unreachable ();
4041 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
4042 integer ordering test *CODE, or if an equivalent combination can
4043 be formed by adjusting *CODE and *CMP1. When returning true, update
4044 *CODE and *CMP1 with the chosen code and operand, otherwise leave
4045 them alone. */
4047 static bool
4048 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
4049 machine_mode mode)
4051 HOST_WIDE_INT plus_one;
4053 if (riscv_int_order_operand_ok_p (*code, *cmp1))
4054 return true;
4056 if (CONST_INT_P (*cmp1))
4057 switch (*code)
4059 case LE:
4060 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4061 if (INTVAL (*cmp1) < plus_one)
4063 *code = LT;
4064 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4065 return true;
4067 break;
4069 case LEU:
4070 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4071 if (plus_one != 0)
4073 *code = LTU;
4074 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4075 return true;
4077 break;
4079 default:
4080 break;
4082 return false;
4085 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
4086 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
4087 is nonnull, it's OK to set TARGET to the inverse of the result and
4088 flip *INVERT_PTR instead. */
4090 static void
4091 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
4092 rtx target, rtx cmp0, rtx cmp1)
4094 machine_mode mode;
4096 /* First see if there is a RISCV instruction that can do this operation.
4097 If not, try doing the same for the inverse operation. If that also
4098 fails, force CMP1 into a register and try again. */
4099 mode = GET_MODE (cmp0);
4100 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
4101 riscv_emit_binary (code, target, cmp0, cmp1);
4102 else
4104 enum rtx_code inv_code = reverse_condition (code);
4105 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
4107 cmp1 = force_reg (mode, cmp1);
4108 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
4110 else if (invert_ptr == 0)
4112 rtx inv_target = riscv_force_binary (word_mode,
4113 inv_code, cmp0, cmp1);
4114 riscv_emit_binary (EQ, target, inv_target, const0_rtx);
4116 else
4118 *invert_ptr = !*invert_ptr;
4119 riscv_emit_binary (inv_code, target, cmp0, cmp1);
4124 /* Return a register that is zero iff CMP0 and CMP1 are equal.
4125 The register will have the same mode as CMP0. */
4127 static rtx
4128 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
4130 if (cmp1 == const0_rtx)
4131 return cmp0;
4133 return expand_binop (GET_MODE (cmp0), sub_optab,
4134 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
4137 /* Helper function for riscv_extend_comparands to Sign-extend the OP.
4138 However if the OP is SI subreg promoted with an inner DI, such as
4139 (subreg/s/v:SI (reg/v:DI) 0)
4140 just peel off the SUBREG to get DI, avoiding extraneous extension. */
4142 static void
4143 riscv_sign_extend_if_not_subreg_prom (rtx *op)
4145 if (GET_CODE (*op) == SUBREG
4146 && SUBREG_PROMOTED_VAR_P (*op)
4147 && SUBREG_PROMOTED_SIGNED_P (*op)
4148 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant ()
4149 == GET_MODE_SIZE (word_mode)))
4150 *op = XEXP (*op, 0);
4151 else
4152 *op = gen_rtx_SIGN_EXTEND (word_mode, *op);
4155 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
4157 static void
4158 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
4160 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
4161 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
4163 /* It is more profitable to zero-extend QImode values. But not if the
4164 first operand has already been sign-extended, and the second one is
4165 is a constant or has already been sign-extended also. */
4166 if (unsigned_condition (code) == code
4167 && (GET_MODE (*op0) == QImode
4168 && ! (GET_CODE (*op0) == SUBREG
4169 && SUBREG_PROMOTED_VAR_P (*op0)
4170 && SUBREG_PROMOTED_SIGNED_P (*op0)
4171 && (CONST_INT_P (*op1)
4172 || (GET_CODE (*op1) == SUBREG
4173 && SUBREG_PROMOTED_VAR_P (*op1)
4174 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4176 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4177 if (CONST_INT_P (*op1))
4178 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4179 else
4180 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4182 else
4184 riscv_sign_extend_if_not_subreg_prom (op0);
4186 if (*op1 != const0_rtx)
4187 riscv_sign_extend_if_not_subreg_prom (op1);
4192 /* Convert a comparison into something that can be used in a branch or
4193 conditional move. On entry, *OP0 and *OP1 are the values being
4194 compared and *CODE is the code used to compare them.
4196 Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
4197 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are
4198 emitted. */
4200 static void
4201 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4202 bool need_eq_ne_p = false)
4204 if (need_eq_ne_p)
4206 rtx cmp_op0 = *op0;
4207 rtx cmp_op1 = *op1;
4208 if (*code == EQ || *code == NE)
4210 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1);
4211 *op1 = const0_rtx;
4212 return;
4214 gcc_unreachable ();
4217 if (splittable_const_int_operand (*op1, VOIDmode))
4219 HOST_WIDE_INT rhs = INTVAL (*op1);
4221 if (*code == EQ || *code == NE)
4223 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4224 if (SMALL_OPERAND (-rhs))
4226 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
4227 GEN_INT (-rhs));
4228 *op1 = const0_rtx;
4231 else
4233 static const enum rtx_code mag_comparisons[][2] = {
4234 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
4237 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4238 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4240 HOST_WIDE_INT new_rhs;
4241 bool increment = *code == mag_comparisons[i][0];
4242 bool decrement = *code == mag_comparisons[i][1];
4243 if (!increment && !decrement)
4244 continue;
4246 new_rhs = rhs + (increment ? 1 : -1);
4247 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
4248 if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
4249 && (rhs < 0) == (new_rhs < 0))
4251 *op1 = GEN_INT (new_rhs);
4252 *code = mag_comparisons[i][increment];
4254 break;
4259 riscv_extend_comparands (*code, op0, op1);
4261 *op0 = force_reg (word_mode, *op0);
4262 if (*op1 != const0_rtx)
4263 *op1 = force_reg (word_mode, *op1);
4266 /* Like riscv_emit_int_compare, but for floating-point comparisons. */
4268 static void
4269 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4270 bool *invert_ptr = nullptr)
4272 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
4273 enum rtx_code fp_code = *code;
4274 *code = NE;
4276 switch (fp_code)
4278 case UNORDERED:
4279 *code = EQ;
4280 /* Fall through. */
4282 case ORDERED:
4283 /* a == a && b == b */
4284 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4285 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4286 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4287 *op1 = const0_rtx;
4288 break;
4290 case UNEQ:
4291 /* ordered(a, b) > (a == b) */
4292 *code = EQ;
4293 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4294 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4295 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4296 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
4297 break;
4299 #define UNORDERED_COMPARISON(CODE, CMP) \
4300 case CODE: \
4301 *code = EQ; \
4302 *op0 = gen_reg_rtx (word_mode); \
4303 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \
4304 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \
4305 else if (GET_MODE (cmp_op0) == SFmode) \
4306 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \
4307 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \
4308 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \
4309 else if (GET_MODE (cmp_op0) == DFmode) \
4310 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \
4311 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \
4312 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \
4313 else if (GET_MODE (cmp_op0) == HFmode) \
4314 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \
4315 else \
4316 gcc_unreachable (); \
4317 *op1 = const0_rtx; \
4318 break;
4320 case UNLT:
4321 std::swap (cmp_op0, cmp_op1);
4322 gcc_fallthrough ();
4324 UNORDERED_COMPARISON(UNGT, le)
4326 case UNLE:
4327 std::swap (cmp_op0, cmp_op1);
4328 gcc_fallthrough ();
4330 UNORDERED_COMPARISON(UNGE, lt)
4331 #undef UNORDERED_COMPARISON
4333 case NE:
4334 fp_code = EQ;
4335 if (invert_ptr != nullptr)
4336 *invert_ptr = !*invert_ptr;
4337 else
4339 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
4340 cmp_op1 = const0_rtx;
4342 gcc_fallthrough ();
4344 case EQ:
4345 case LE:
4346 case LT:
4347 case GE:
4348 case GT:
4349 /* We have instructions for these cases. */
4350 *code = fp_code;
4351 *op0 = cmp_op0;
4352 *op1 = cmp_op1;
4353 break;
4355 case LTGT:
4356 /* (a < b) | (a > b) */
4357 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1);
4358 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1);
4359 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1);
4360 *op1 = const0_rtx;
4361 break;
4363 default:
4364 gcc_unreachable ();
4368 /* CODE-compare OP0 and OP1. Store the result in TARGET. */
4370 void
4371 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr)
4373 riscv_extend_comparands (code, &op0, &op1);
4374 op0 = force_reg (word_mode, op0);
4376 if (code == EQ || code == NE)
4378 rtx zie = riscv_zero_if_equal (op0, op1);
4379 riscv_emit_binary (code, target, zie, const0_rtx);
4381 else
4382 riscv_emit_int_order_test (code, invert_ptr, target, op0, op1);
4385 /* Like riscv_expand_int_scc, but for floating-point comparisons. */
4387 void
4388 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
4389 bool *invert_ptr)
4391 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr);
4393 machine_mode mode = GET_MODE (target);
4394 if (mode != word_mode)
4396 rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
4397 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
4399 else
4400 riscv_emit_binary (code, target, op0, op1);
4403 /* Jump to LABEL if (CODE OP0 OP1) holds. */
4405 void
4406 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
4408 if (FLOAT_MODE_P (GET_MODE (op1)))
4409 riscv_emit_float_compare (&code, &op0, &op1);
4410 else
4411 riscv_emit_int_compare (&code, &op0, &op1);
4413 if (FLOAT_MODE_P (GET_MODE (op0)))
4415 op0 = riscv_force_binary (word_mode, code, op0, op1);
4416 op1 = const0_rtx;
4417 code = NE;
4420 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
4421 emit_jump_insn (gen_condjump (condition, label));
4424 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
4425 Return 0 if expansion failed. */
4427 bool
4428 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
4430 machine_mode mode = GET_MODE (dest);
4431 rtx_code code = GET_CODE (op);
4432 rtx op0 = XEXP (op, 0);
4433 rtx op1 = XEXP (op, 1);
4435 if (((TARGET_ZICOND_LIKE
4436 || (arith_operand (cons, mode) && arith_operand (alt, mode)))
4437 && (GET_MODE_CLASS (mode) == MODE_INT))
4438 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4440 machine_mode mode0 = GET_MODE (op0);
4441 machine_mode mode1 = GET_MODE (op1);
4443 /* An integer comparison must be comparing WORD_MODE objects. We
4444 must enforce that so that we don't strip away a sign_extension
4445 thinking it is unnecessary. We might consider using
4446 riscv_extend_operands if they are not already properly extended. */
4447 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
4448 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
4449 return false;
4451 /* In the fallback generic case use MODE rather than WORD_MODE for
4452 the output of the SCC instruction, to match the mode of the NEG
4453 operation below. The output of SCC is 0 or 1 boolean, so it is
4454 valid for input in any scalar integer mode. */
4455 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
4456 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4457 ? word_mode : mode);
4458 bool invert = false;
4460 /* Canonicalize the comparison. It must be an equality comparison
4461 of integer operands, or with SFB it can be any comparison of
4462 integer operands. If it isn't, then emit an SCC instruction
4463 so that we can then use an equality comparison against zero. */
4464 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode))
4465 || !INTEGRAL_MODE_P (mode0))
4467 bool *invert_ptr = nullptr;
4469 /* If riscv_expand_int_scc inverts the condition, then it will
4470 flip the value of INVERT. We need to know where so that
4471 we can adjust it for our needs. */
4472 if (code == LE || code == LEU || code == GE || code == GEU)
4473 invert_ptr = &invert;
4475 /* Emit an SCC-like instruction into a temporary so that we can
4476 use an EQ/NE comparison. We can support both FP and integer
4477 conditional moves. */
4478 if (INTEGRAL_MODE_P (mode0))
4479 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr);
4480 else if (FLOAT_MODE_P (mode0)
4481 && fp_scc_comparison (op, GET_MODE (op)))
4482 riscv_expand_float_scc (tmp, code, op0, op1, &invert);
4483 else
4484 return false;
4486 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
4488 /* We've generated a new comparison. Update the local variables. */
4489 code = GET_CODE (op);
4490 op0 = XEXP (op, 0);
4491 op1 = XEXP (op, 1);
4493 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
4494 riscv_expand_int_scc (tmp, code, op0, op1, &invert);
4496 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4498 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
4499 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4501 /* The expander is a bit loose in its specification of the true
4502 arm of the conditional move. That allows us to support more
4503 cases for extensions which are more general than SFB. But
4504 does mean we need to force CONS into a register at this point. */
4505 cons = force_reg (mode, cons);
4506 /* With XTheadCondMov we need to force ALT into a register too. */
4507 alt = force_reg (mode, alt);
4508 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4509 cons, alt)));
4510 return true;
4512 else if (!TARGET_ZICOND_LIKE)
4514 if (invert)
4515 std::swap (cons, alt);
4517 rtx reg1 = gen_reg_rtx (mode);
4518 rtx reg2 = gen_reg_rtx (mode);
4519 rtx reg3 = gen_reg_rtx (mode);
4520 rtx reg4 = gen_reg_rtx (mode);
4522 riscv_emit_unary (NEG, reg1, tmp);
4523 riscv_emit_binary (AND, reg2, reg1, cons);
4524 riscv_emit_unary (NOT, reg3, reg1);
4525 riscv_emit_binary (AND, reg4, reg3, alt);
4526 riscv_emit_binary (IOR, dest, reg2, reg4);
4527 return true;
4529 /* 0, reg or 0, imm */
4530 else if (cons == CONST0_RTX (mode)
4531 && (REG_P (alt)
4532 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
4534 riscv_emit_int_compare (&code, &op0, &op1, true);
4535 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4536 alt = force_reg (mode, alt);
4537 emit_insn (gen_rtx_SET (dest,
4538 gen_rtx_IF_THEN_ELSE (mode, cond,
4539 cons, alt)));
4540 return true;
4542 /* imm, imm */
4543 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
4544 && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4546 riscv_emit_int_compare (&code, &op0, &op1, true);
4547 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4548 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
4549 alt = force_reg (mode, gen_int_mode (t, mode));
4550 emit_insn (gen_rtx_SET (dest,
4551 gen_rtx_IF_THEN_ELSE (mode, cond,
4552 CONST0_RTX (mode),
4553 alt)));
4554 /* CONS might not fit into a signed 12 bit immediate suitable
4555 for an addi instruction. If that's the case, force it
4556 into a register. */
4557 if (!SMALL_OPERAND (INTVAL (cons)))
4558 cons = force_reg (mode, cons);
4559 riscv_emit_binary (PLUS, dest, dest, cons);
4560 return true;
4562 /* imm, reg */
4563 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
4565 /* Optimize for register value of 0. */
4566 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
4568 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4569 cons = force_reg (mode, cons);
4570 emit_insn (gen_rtx_SET (dest,
4571 gen_rtx_IF_THEN_ELSE (mode, cond,
4572 cons, alt)));
4573 return true;
4576 riscv_emit_int_compare (&code, &op0, &op1, true);
4577 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4579 rtx temp1 = gen_reg_rtx (mode);
4580 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
4582 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
4583 suitable for an addi instruction. If that's the case, force it
4584 into a register. */
4585 if (!SMALL_OPERAND (INTVAL (temp2)))
4586 temp2 = force_reg (mode, temp2);
4587 if (!SMALL_OPERAND (INTVAL (cons)))
4588 cons = force_reg (mode, cons);
4590 riscv_emit_binary (PLUS, temp1, alt, temp2);
4591 emit_insn (gen_rtx_SET (dest,
4592 gen_rtx_IF_THEN_ELSE (mode, cond,
4593 CONST0_RTX (mode),
4594 temp1)));
4595 riscv_emit_binary (PLUS, dest, dest, cons);
4596 return true;
4598 /* reg, 0 or imm, 0 */
4599 else if ((REG_P (cons)
4600 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
4601 && alt == CONST0_RTX (mode))
4603 riscv_emit_int_compare (&code, &op0, &op1, true);
4604 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4605 cons = force_reg (mode, cons);
4606 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4607 cons, alt)));
4608 return true;
4610 /* reg, imm */
4611 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4613 /* Optimize for register value of 0. */
4614 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
4616 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4617 alt = force_reg (mode, alt);
4618 emit_insn (gen_rtx_SET (dest,
4619 gen_rtx_IF_THEN_ELSE (mode, cond,
4620 cons, alt)));
4621 return true;
4624 riscv_emit_int_compare (&code, &op0, &op1, true);
4625 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4627 rtx temp1 = gen_reg_rtx (mode);
4628 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
4630 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
4631 suitable for an addi instruction. If that's the case, force it
4632 into a register. */
4633 if (!SMALL_OPERAND (INTVAL (temp2)))
4634 temp2 = force_reg (mode, temp2);
4635 if (!SMALL_OPERAND (INTVAL (alt)))
4636 alt = force_reg (mode, alt);
4638 riscv_emit_binary (PLUS, temp1, cons, temp2);
4639 emit_insn (gen_rtx_SET (dest,
4640 gen_rtx_IF_THEN_ELSE (mode, cond,
4641 temp1,
4642 CONST0_RTX (mode))));
4643 riscv_emit_binary (PLUS, dest, dest, alt);
4644 return true;
4646 /* reg, reg */
4647 else if (REG_P (cons) && REG_P (alt))
4649 if ((code == EQ && rtx_equal_p (cons, op0))
4650 || (code == NE && rtx_equal_p (alt, op0)))
4652 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4653 alt = force_reg (mode, alt);
4654 emit_insn (gen_rtx_SET (dest,
4655 gen_rtx_IF_THEN_ELSE (mode, cond,
4656 cons, alt)));
4657 return true;
4660 rtx reg1 = gen_reg_rtx (mode);
4661 rtx reg2 = gen_reg_rtx (mode);
4662 riscv_emit_int_compare (&code, &op0, &op1, true);
4663 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4664 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
4665 GET_MODE (op0), op0, op1);
4666 emit_insn (gen_rtx_SET (reg2,
4667 gen_rtx_IF_THEN_ELSE (mode, cond2,
4668 CONST0_RTX (mode),
4669 cons)));
4670 emit_insn (gen_rtx_SET (reg1,
4671 gen_rtx_IF_THEN_ELSE (mode, cond1,
4672 CONST0_RTX (mode),
4673 alt)));
4674 riscv_emit_binary (IOR, dest, reg1, reg2);
4675 return true;
4679 return false;
4682 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
4683 least PARM_BOUNDARY bits of alignment, but will be given anything up
4684 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
4686 static unsigned int
4687 riscv_function_arg_boundary (machine_mode mode, const_tree type)
4689 unsigned int alignment;
4691 /* Use natural alignment if the type is not aggregate data. */
4692 if (type && !AGGREGATE_TYPE_P (type))
4693 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
4694 else
4695 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
4697 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
4700 /* If MODE represents an argument that can be passed or returned in
4701 floating-point registers, return the number of registers, else 0. */
4703 static unsigned
4704 riscv_pass_mode_in_fpr_p (machine_mode mode)
4706 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
4708 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4709 return 1;
4711 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4712 return 2;
4715 return 0;
4718 typedef struct {
4719 const_tree type;
4720 HOST_WIDE_INT offset;
4721 } riscv_aggregate_field;
4723 /* Identify subfields of aggregates that are candidates for passing in
4724 floating-point registers. */
4726 static int
4727 riscv_flatten_aggregate_field (const_tree type,
4728 riscv_aggregate_field fields[2],
4729 int n, HOST_WIDE_INT offset,
4730 bool ignore_zero_width_bit_field_p)
4732 switch (TREE_CODE (type))
4734 case RECORD_TYPE:
4735 /* Can't handle incomplete types nor sizes that are not fixed. */
4736 if (!COMPLETE_TYPE_P (type)
4737 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4738 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
4739 return -1;
4741 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
4742 if (TREE_CODE (f) == FIELD_DECL)
4744 if (!TYPE_P (TREE_TYPE (f)))
4745 return -1;
4747 /* The C++ front end strips zero-length bit-fields from structs.
4748 So we need to ignore them in the C front end to make C code
4749 compatible with C++ code. */
4750 if (ignore_zero_width_bit_field_p
4751 && DECL_BIT_FIELD (f)
4752 && (DECL_SIZE (f) == NULL_TREE
4753 || integer_zerop (DECL_SIZE (f))))
4755 else
4757 HOST_WIDE_INT pos = offset + int_byte_position (f);
4758 n = riscv_flatten_aggregate_field (TREE_TYPE (f),
4759 fields, n, pos,
4760 ignore_zero_width_bit_field_p);
4762 if (n < 0)
4763 return -1;
4765 return n;
4767 case ARRAY_TYPE:
4769 HOST_WIDE_INT n_elts;
4770 riscv_aggregate_field subfields[2];
4771 tree index = TYPE_DOMAIN (type);
4772 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
4773 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
4774 subfields, 0, offset,
4775 ignore_zero_width_bit_field_p);
4777 /* Can't handle incomplete types nor sizes that are not fixed. */
4778 if (n_subfields <= 0
4779 || !COMPLETE_TYPE_P (type)
4780 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4781 || !index
4782 || !TYPE_MAX_VALUE (index)
4783 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4784 || !TYPE_MIN_VALUE (index)
4785 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4786 || !tree_fits_uhwi_p (elt_size))
4787 return -1;
4789 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4790 - tree_to_uhwi (TYPE_MIN_VALUE (index));
4791 gcc_assert (n_elts >= 0);
4793 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
4794 for (int j = 0; j < n_subfields; j++)
4796 if (n >= 2)
4797 return -1;
4799 fields[n] = subfields[j];
4800 fields[n++].offset += i * tree_to_uhwi (elt_size);
4803 return n;
4806 case COMPLEX_TYPE:
4808 /* Complex type need consume 2 field, so n must be 0. */
4809 if (n != 0)
4810 return -1;
4812 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
4814 if (elt_size <= UNITS_PER_FP_ARG)
4816 fields[0].type = TREE_TYPE (type);
4817 fields[0].offset = offset;
4818 fields[1].type = TREE_TYPE (type);
4819 fields[1].offset = offset + elt_size;
4821 return 2;
4824 return -1;
4827 default:
4828 if (n < 2
4829 && ((SCALAR_FLOAT_TYPE_P (type)
4830 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
4831 || (INTEGRAL_TYPE_P (type)
4832 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
4834 fields[n].type = type;
4835 fields[n].offset = offset;
4836 return n + 1;
4838 else
4839 return -1;
4843 /* Identify candidate aggregates for passing in floating-point registers.
4844 Candidates have at most two fields after flattening. */
4846 static int
4847 riscv_flatten_aggregate_argument (const_tree type,
4848 riscv_aggregate_field fields[2],
4849 bool ignore_zero_width_bit_field_p)
4851 if (!type || TREE_CODE (type) != RECORD_TYPE)
4852 return -1;
4854 return riscv_flatten_aggregate_field (type, fields, 0, 0,
4855 ignore_zero_width_bit_field_p);
4858 /* See whether TYPE is a record whose fields should be returned in one or
4859 two floating-point registers. If so, populate FIELDS accordingly. */
4861 static unsigned
4862 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
4863 riscv_aggregate_field fields[2])
4865 static int warned = 0;
4867 /* This is the old ABI, which differs for C++ and C. */
4868 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4869 for (int i = 0; i < n_old; i++)
4870 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4872 n_old = -1;
4873 break;
4876 /* This is the new ABI, which is the same for C++ and C. */
4877 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4878 for (int i = 0; i < n_new; i++)
4879 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4881 n_new = -1;
4882 break;
4885 if ((n_old != n_new) && (warned == 0))
4887 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4888 "bit-fields changed in GCC 10");
4889 warned = 1;
4892 return n_new > 0 ? n_new : 0;
4895 /* See whether TYPE is a record whose fields should be returned in one or
4896 floating-point register and one integer register. If so, populate
4897 FIELDS accordingly. */
4899 static bool
4900 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
4901 riscv_aggregate_field fields[2])
4903 static int warned = 0;
4905 /* This is the old ABI, which differs for C++ and C. */
4906 unsigned num_int_old = 0, num_float_old = 0;
4907 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4908 for (int i = 0; i < n_old; i++)
4910 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
4911 num_int_old += INTEGRAL_TYPE_P (fields[i].type);
4914 /* This is the new ABI, which is the same for C++ and C. */
4915 unsigned num_int_new = 0, num_float_new = 0;
4916 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4917 for (int i = 0; i < n_new; i++)
4919 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
4920 num_int_new += INTEGRAL_TYPE_P (fields[i].type);
4923 if (((num_int_old == 1 && num_float_old == 1
4924 && (num_int_old != num_int_new || num_float_old != num_float_new))
4925 || (num_int_new == 1 && num_float_new == 1
4926 && (num_int_old != num_int_new || num_float_old != num_float_new)))
4927 && (warned == 0))
4929 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4930 "bit-fields changed in GCC 10");
4931 warned = 1;
4934 return num_int_new == 1 && num_float_new == 1;
4937 /* Return the representation of an argument passed or returned in an FPR
4938 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
4939 two modes may be different for structures like:
4941 struct __attribute__((packed)) foo { float f; }
4943 where the SFmode value "f" is passed in REGNO but the struct itself
4944 has mode BLKmode. */
4946 static rtx
4947 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno,
4948 machine_mode value_mode,
4949 HOST_WIDE_INT offset)
4951 rtx x = gen_rtx_REG (value_mode, regno);
4953 if (type_mode != value_mode)
4955 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
4956 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
4958 return x;
4961 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
4962 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
4963 byte offset for the first value, likewise MODE2 and OFFSET2 for the
4964 second value. */
4966 static rtx
4967 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
4968 machine_mode mode1, HOST_WIDE_INT offset1,
4969 unsigned regno2, machine_mode mode2,
4970 HOST_WIDE_INT offset2)
4972 return gen_rtx_PARALLEL
4973 (mode,
4974 gen_rtvec (2,
4975 gen_rtx_EXPR_LIST (VOIDmode,
4976 gen_rtx_REG (mode1, regno1),
4977 GEN_INT (offset1)),
4978 gen_rtx_EXPR_LIST (VOIDmode,
4979 gen_rtx_REG (mode2, regno2),
4980 GEN_INT (offset2))));
4983 static rtx
4984 riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
4985 unsigned gpr_base)
4987 gcc_assert (riscv_v_ext_vls_mode_p (mode));
4989 unsigned count = 0;
4990 unsigned regnum = 0;
4991 machine_mode gpr_mode = VOIDmode;
4992 unsigned vls_size = GET_MODE_SIZE (mode).to_constant ();
4993 unsigned gpr_size = GET_MODE_SIZE (Xmode);
4995 if (IN_RANGE (vls_size, 0, gpr_size * 2))
4997 count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size);
4999 if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS)
5001 regnum = gpr_base + info->gpr_offset;
5002 info->num_gprs = count;
5003 gpr_mode = riscv_v_vls_to_gpr_mode (vls_size);
5007 if (!regnum)
5008 return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
5010 gcc_assert (gpr_mode != VOIDmode);
5012 rtx reg = gen_rtx_REG (gpr_mode, regnum);
5013 rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode));
5015 return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
5018 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5019 for a call to a function whose data type is FNTYPE.
5020 For a library call, FNTYPE is 0. */
5022 void
5023 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
5025 memset (cum, 0, sizeof (*cum));
5027 if (fntype)
5028 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
5029 else
5030 cum->variant_cc = RISCV_CC_BASE;
5033 /* Return true if TYPE is a vector type that can be passed in vector registers.
5036 static bool
5037 riscv_vector_type_p (const_tree type)
5039 /* Currently, only builtin scalabler vector type is allowed, in the future,
5040 more vector types may be allowed, such as GNU vector type, etc. */
5041 return riscv_vector::builtin_type_p (type);
5044 static unsigned int
5045 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
5047 /* Subroutine of riscv_get_arg_info. */
5049 static rtx
5050 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5051 machine_mode mode, bool return_p)
5053 gcc_assert (riscv_v_ext_mode_p (mode));
5055 info->mr_offset = cum->num_mrs;
5056 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5058 /* For scalable mask return value. */
5059 if (return_p)
5060 return gen_rtx_REG (mode, V_REG_FIRST);
5062 /* For the first scalable mask argument. */
5063 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS)
5065 info->num_mrs = 1;
5066 return gen_rtx_REG (mode, V_REG_FIRST);
5068 else
5070 /* Rest scalable mask arguments are treated as scalable data
5071 arguments. */
5075 /* The number and alignment of vector registers need for this scalable vector
5076 argument. When the mode size is less than a full vector, we use 1 vector
5077 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number
5078 information. */
5079 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode);
5080 int LMUL = riscv_v_ext_tuple_mode_p (mode)
5081 ? nregs / riscv_vector::get_nf (mode)
5082 : nregs;
5083 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST;
5084 int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
5085 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
5087 /* For scalable data and scalable tuple return value. */
5088 if (return_p)
5089 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
5091 /* Iterate through the USED_VRS array to find vector register groups that have
5092 not been allocated and the first register is aligned with LMUL. */
5093 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL)
5095 /* The index in USED_VRS array. */
5096 int idx = i - arg_reg_start;
5097 /* Find the first register unused. */
5098 if (!cum->used_vrs[idx])
5100 bool find_set = true;
5101 /* Ensure there are NREGS continuous unused registers. */
5102 for (int j = 1; j < nregs; j++)
5103 if (cum->used_vrs[idx + j])
5105 find_set = false;
5106 /* Update I to the last aligned register which
5107 cannot be used and the next iteration will add
5108 LMUL step to I. */
5109 i += (j / LMUL) * LMUL;
5110 break;
5113 if (find_set)
5115 info->num_vrs = nregs;
5116 info->vr_offset = idx;
5117 return gen_rtx_REG (mode, i + V_REG_FIRST);
5122 return NULL_RTX;
5125 /* Fill INFO with information about a single argument, and return an RTL
5126 pattern to pass or return the argument. Return NULL_RTX if argument cannot
5127 pass or return in registers, then the argument may be passed by reference or
5128 through the stack or . CUM is the cumulative state for earlier arguments.
5129 MODE is the mode of this argument and TYPE is its type (if known). NAMED is
5130 true if this is a named (fixed) argument rather than a variable one. RETURN_P
5131 is true if returning the argument, or false if passing the argument. */
5133 static rtx
5134 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5135 machine_mode mode, const_tree type, bool named,
5136 bool return_p)
5138 unsigned num_bytes, num_words;
5139 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
5140 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
5141 unsigned alignment = riscv_function_arg_boundary (mode, type);
5143 memset (info, 0, sizeof (*info));
5144 info->gpr_offset = cum->num_gprs;
5145 info->fpr_offset = cum->num_fprs;
5147 /* Passed by reference when the scalable vector argument is anonymous. */
5148 if (riscv_v_ext_mode_p (mode) && !named)
5149 return NULL_RTX;
5151 if (named)
5153 riscv_aggregate_field fields[2];
5154 unsigned fregno = fpr_base + info->fpr_offset;
5155 unsigned gregno = gpr_base + info->gpr_offset;
5157 /* Pass one- or two-element floating-point aggregates in FPRs. */
5158 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
5159 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5160 switch (info->num_fprs)
5162 case 1:
5163 return riscv_pass_fpr_single (mode, fregno,
5164 TYPE_MODE (fields[0].type),
5165 fields[0].offset);
5167 case 2:
5168 return riscv_pass_fpr_pair (mode, fregno,
5169 TYPE_MODE (fields[0].type),
5170 fields[0].offset,
5171 fregno + 1,
5172 TYPE_MODE (fields[1].type),
5173 fields[1].offset);
5175 default:
5176 gcc_unreachable ();
5179 /* Pass real and complex floating-point numbers in FPRs. */
5180 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
5181 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5182 switch (GET_MODE_CLASS (mode))
5184 case MODE_FLOAT:
5185 return gen_rtx_REG (mode, fregno);
5187 case MODE_COMPLEX_FLOAT:
5188 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
5189 fregno + 1, GET_MODE_INNER (mode),
5190 GET_MODE_UNIT_SIZE (mode));
5192 default:
5193 gcc_unreachable ();
5196 /* Pass structs with one float and one integer in an FPR and a GPR. */
5197 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
5198 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
5199 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
5201 info->num_gprs = 1;
5202 info->num_fprs = 1;
5204 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
5205 std::swap (fregno, gregno);
5207 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
5208 fields[0].offset,
5209 gregno, TYPE_MODE (fields[1].type),
5210 fields[1].offset);
5213 /* For scalable vector argument. */
5214 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
5215 return riscv_get_vector_arg (info, cum, mode, return_p);
5217 /* For vls mode aggregated in gpr. */
5218 if (riscv_v_ext_vls_mode_p (mode))
5219 return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
5222 /* Work out the size of the argument. */
5223 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
5224 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5226 /* Doubleword-aligned varargs start on an even register boundary. */
5227 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
5228 info->gpr_offset += info->gpr_offset & 1;
5230 /* Partition the argument between registers and stack. */
5231 info->num_fprs = 0;
5232 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
5233 info->stack_p = (num_words - info->num_gprs) != 0;
5235 if (info->num_gprs || return_p)
5236 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
5238 return NULL_RTX;
5241 /* Implement TARGET_FUNCTION_ARG. */
5243 static rtx
5244 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5246 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5247 struct riscv_arg_info info;
5249 if (arg.end_marker_p ())
5250 /* Return the calling convention that used by the current function. */
5251 return gen_int_mode (cum->variant_cc, SImode);
5253 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5256 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
5258 static void
5259 riscv_function_arg_advance (cumulative_args_t cum_v,
5260 const function_arg_info &arg)
5262 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5263 struct riscv_arg_info info;
5265 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5267 /* Set the corresponding register in USED_VRS to used status. */
5268 for (unsigned int i = 0; i < info.num_vrs; i++)
5270 gcc_assert (!cum->used_vrs[info.vr_offset + i]);
5271 cum->used_vrs[info.vr_offset + i] = true;
5274 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
5276 error ("RVV type %qT cannot be passed to an unprototyped function",
5277 arg.type);
5278 /* Avoid repeating the message */
5279 cum->variant_cc = RISCV_CC_V;
5282 /* Advance the register count. This has the effect of setting
5283 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
5284 argument required us to skip the final GPR and pass the whole
5285 argument on the stack. */
5286 cum->num_fprs = info.fpr_offset + info.num_fprs;
5287 cum->num_gprs = info.gpr_offset + info.num_gprs;
5288 cum->num_mrs = info.mr_offset + info.num_mrs;
5291 /* Implement TARGET_ARG_PARTIAL_BYTES. */
5293 static int
5294 riscv_arg_partial_bytes (cumulative_args_t cum,
5295 const function_arg_info &generic_arg)
5297 struct riscv_arg_info arg;
5299 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
5300 generic_arg.type, generic_arg.named, false);
5301 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
5304 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
5305 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
5306 VALTYPE is null and MODE is the mode of the return value. */
5309 riscv_function_value (const_tree type, const_tree func, machine_mode mode)
5311 struct riscv_arg_info info;
5312 CUMULATIVE_ARGS args;
5314 if (type)
5316 int unsigned_p = TYPE_UNSIGNED (type);
5318 mode = TYPE_MODE (type);
5320 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
5321 return values, promote the mode here too. */
5322 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
5325 memset (&args, 0, sizeof args);
5327 return riscv_get_arg_info (&info, &args, mode, type, true, true);
5330 /* Implement TARGET_PASS_BY_REFERENCE. */
5332 static bool
5333 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
5335 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();;
5336 struct riscv_arg_info info;
5337 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5339 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
5340 never pass variadic arguments in floating-point and vector registers,
5341 so we can avoid the call to riscv_get_arg_info in this case. */
5342 if (cum != NULL)
5344 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5346 /* Don't pass by reference if we can use a floating-point register. */
5347 if (info.num_fprs)
5348 return false;
5350 /* Don't pass by reference if we can use general register(s) for vls. */
5351 if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode))
5352 return false;
5354 /* Don't pass by reference if we can use vector register groups. */
5355 if (info.num_vrs > 0 || info.num_mrs > 0)
5356 return false;
5359 /* Passed by reference when:
5360 1. The scalable vector argument is anonymous.
5361 2. Args cannot be passed through vector registers. */
5362 if (riscv_v_ext_mode_p (arg.mode))
5363 return true;
5365 /* Pass by reference if the data do not fit in two integer registers. */
5366 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
5369 /* Implement TARGET_RETURN_IN_MEMORY. */
5371 static bool
5372 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5374 CUMULATIVE_ARGS args;
5375 cumulative_args_t cum = pack_cumulative_args (&args);
5377 /* The rules for returning in memory are the same as for passing the
5378 first named argument by reference. */
5379 memset (&args, 0, sizeof args);
5380 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
5381 return riscv_pass_by_reference (cum, arg);
5384 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5386 static void
5387 riscv_setup_incoming_varargs (cumulative_args_t cum,
5388 const function_arg_info &arg,
5389 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
5391 CUMULATIVE_ARGS local_cum;
5392 int gp_saved;
5394 /* The caller has advanced CUM up to, but not beyond, the last named
5395 argument. Advance a local copy of CUM past the last "real" named
5396 argument, to find out how many registers are left over. */
5397 local_cum = *get_cumulative_args (cum);
5398 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
5399 || arg.type != NULL_TREE)
5400 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg);
5402 /* Found out how many registers we need to save. */
5403 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
5405 if (!no_rtl && gp_saved > 0)
5407 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5408 REG_PARM_STACK_SPACE (cfun->decl)
5409 - gp_saved * UNITS_PER_WORD);
5410 rtx mem = gen_frame_mem (BLKmode, ptr);
5411 set_mem_alias_set (mem, get_varargs_alias_set ());
5413 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
5414 mem, gp_saved);
5416 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
5417 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
5420 /* Return the descriptor of the Standard Vector Calling Convention Variant. */
5422 static const predefined_function_abi &
5423 riscv_v_abi ()
5425 predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
5426 if (!v_abi.initialized_p ())
5428 HARD_REG_SET full_reg_clobbers
5429 = default_function_abi.full_reg_clobbers ();
5430 /* Callee-saved vector registers: v1-v7, v24-v31. */
5431 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1)
5432 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5433 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
5434 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5435 v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
5437 return v_abi;
5440 /* Return true if a function with type FNTYPE returns its value in
5441 RISC-V V registers. */
5443 static bool
5444 riscv_return_value_is_vector_type_p (const_tree fntype)
5446 tree return_type = TREE_TYPE (fntype);
5448 return riscv_vector_type_p (return_type);
5451 /* Return true if a function with type FNTYPE takes arguments in
5452 RISC-V V registers. */
5454 static bool
5455 riscv_arguments_is_vector_type_p (const_tree fntype)
5457 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
5458 chain = TREE_CHAIN (chain))
5460 tree arg_type = TREE_VALUE (chain);
5461 if (riscv_vector_type_p (arg_type))
5462 return true;
5465 return false;
5468 /* Return true if FUNC is a riscv_vector_cc function.
5469 For more details please reference the below link.
5470 https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
5471 static bool
5472 riscv_vector_cc_function_p (const_tree fntype)
5474 return lookup_attribute ("vector_cc", TYPE_ATTRIBUTES (fntype)) != NULL_TREE
5475 || lookup_attribute ("riscv_vector_cc", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
5478 /* Implement TARGET_FNTYPE_ABI. */
5480 static const predefined_function_abi &
5481 riscv_fntype_abi (const_tree fntype)
5483 /* Implement the vector calling convention. For more details please
5484 reference the below link.
5485 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
5486 if (riscv_return_value_is_vector_type_p (fntype)
5487 || riscv_arguments_is_vector_type_p (fntype)
5488 || riscv_vector_cc_function_p (fntype))
5489 return riscv_v_abi ();
5491 return default_function_abi;
5494 /* Return riscv calling convention of call_insn. */
5495 riscv_cc
5496 get_riscv_cc (const rtx use)
5498 gcc_assert (GET_CODE (use) == USE);
5499 rtx unspec = XEXP (use, 0);
5500 gcc_assert (GET_CODE (unspec) == UNSPEC
5501 && XINT (unspec, 1) == UNSPEC_CALLEE_CC);
5502 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0));
5503 gcc_assert (cc < RISCV_CC_UNKNOWN);
5504 return cc;
5507 /* Implement TARGET_INSN_CALLEE_ABI. */
5509 const predefined_function_abi &
5510 riscv_insn_callee_abi (const rtx_insn *insn)
5512 rtx pat = PATTERN (insn);
5513 gcc_assert (GET_CODE (pat) == PARALLEL);
5514 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1));
5515 return function_abis[cc];
5518 /* Handle an attribute requiring a FUNCTION_DECL;
5519 arguments as in struct attribute_spec.handler. */
5520 static tree
5521 riscv_handle_fndecl_attribute (tree *node, tree name,
5522 tree args ATTRIBUTE_UNUSED,
5523 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5525 if (TREE_CODE (*node) != FUNCTION_DECL)
5527 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5528 name);
5529 *no_add_attrs = true;
5532 return NULL_TREE;
5535 /* Verify type based attributes. NODE is the what the attribute is being
5536 applied to. NAME is the attribute name. ARGS are the attribute args.
5537 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if
5538 the attribute should be ignored. */
5540 static tree
5541 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5542 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5544 /* Check for an argument. */
5545 if (is_attribute_p ("interrupt", name))
5547 if (args)
5549 tree cst = TREE_VALUE (args);
5550 const char *string;
5552 if (TREE_CODE (cst) != STRING_CST)
5554 warning (OPT_Wattributes,
5555 "%qE attribute requires a string argument",
5556 name);
5557 *no_add_attrs = true;
5558 return NULL_TREE;
5561 string = TREE_STRING_POINTER (cst);
5562 if (strcmp (string, "user") && strcmp (string, "supervisor")
5563 && strcmp (string, "machine"))
5565 warning (OPT_Wattributes,
5566 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
5567 "or %<\"machine\"%>", name);
5568 *no_add_attrs = true;
5573 return NULL_TREE;
5576 /* Return true if function TYPE is an interrupt function. */
5577 static bool
5578 riscv_interrupt_type_p (tree type)
5580 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
5583 /* Return true if FUNC is a naked function. */
5584 static bool
5585 riscv_naked_function_p (tree func)
5587 tree func_decl = func;
5588 if (func == NULL_TREE)
5589 func_decl = current_function_decl;
5590 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
5593 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
5594 static bool
5595 riscv_allocate_stack_slots_for_args ()
5597 /* Naked functions should not allocate stack slots for arguments. */
5598 return !riscv_naked_function_p (current_function_decl);
5601 /* Implement TARGET_WARN_FUNC_RETURN. */
5602 static bool
5603 riscv_warn_func_return (tree decl)
5605 /* Naked functions are implemented entirely in assembly, including the
5606 return sequence, so suppress warnings about this. */
5607 return !riscv_naked_function_p (decl);
5610 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5612 static void
5613 riscv_va_start (tree valist, rtx nextarg)
5615 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
5616 std_expand_builtin_va_start (valist, nextarg);
5619 /* Make ADDR suitable for use as a call or sibcall target. */
5622 riscv_legitimize_call_address (rtx addr)
5624 if (!call_insn_operand (addr, VOIDmode))
5626 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
5627 riscv_emit_move (reg, addr);
5628 return reg;
5630 return addr;
5633 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
5634 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
5636 static void
5637 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
5639 const char *reloc;
5641 switch (riscv_classify_symbolic_expression (op))
5643 case SYMBOL_ABSOLUTE:
5644 reloc = hi_reloc ? "%hi" : "%lo";
5645 break;
5647 case SYMBOL_PCREL:
5648 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
5649 break;
5651 case SYMBOL_TLS_LE:
5652 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
5653 break;
5655 default:
5656 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R');
5657 return;
5660 fprintf (file, "%s(", reloc);
5661 output_addr_const (file, riscv_strip_unspec_address (op));
5662 fputc (')', file);
5665 /* Return the memory model that encapuslates both given models. */
5667 enum memmodel
5668 riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
5670 model1 = memmodel_base (model1);
5671 model2 = memmodel_base (model2);
5673 enum memmodel weaker = model1 <= model2 ? model1: model2;
5674 enum memmodel stronger = model1 > model2 ? model1: model2;
5676 switch (stronger)
5678 case MEMMODEL_SEQ_CST:
5679 case MEMMODEL_ACQ_REL:
5680 return stronger;
5681 case MEMMODEL_RELEASE:
5682 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
5683 return MEMMODEL_ACQ_REL;
5684 else
5685 return stronger;
5686 case MEMMODEL_ACQUIRE:
5687 case MEMMODEL_CONSUME:
5688 case MEMMODEL_RELAXED:
5689 return stronger;
5690 default:
5691 gcc_unreachable ();
5695 /* Return true if the .AQ suffix should be added to an AMO to implement the
5696 acquire portion of memory model MODEL. */
5698 static bool
5699 riscv_memmodel_needs_amo_acquire (enum memmodel model)
5701 /* ZTSO amo mappings require no annotations. */
5702 if (TARGET_ZTSO)
5703 return false;
5705 switch (model)
5707 case MEMMODEL_ACQ_REL:
5708 case MEMMODEL_SEQ_CST:
5709 case MEMMODEL_ACQUIRE:
5710 case MEMMODEL_CONSUME:
5711 return true;
5713 case MEMMODEL_RELEASE:
5714 case MEMMODEL_RELAXED:
5715 return false;
5717 default:
5718 gcc_unreachable ();
5722 /* Return true if the .RL suffix should be added to an AMO to implement the
5723 release portion of memory model MODEL. */
5725 static bool
5726 riscv_memmodel_needs_amo_release (enum memmodel model)
5728 /* ZTSO amo mappings require no annotations. */
5729 if (TARGET_ZTSO)
5730 return false;
5732 switch (model)
5734 case MEMMODEL_ACQ_REL:
5735 case MEMMODEL_SEQ_CST:
5736 case MEMMODEL_RELEASE:
5737 return true;
5739 case MEMMODEL_ACQUIRE:
5740 case MEMMODEL_CONSUME:
5741 case MEMMODEL_RELAXED:
5742 return false;
5744 default:
5745 gcc_unreachable ();
5749 /* Get REGNO alignment of vector mode.
5750 The alignment = LMUL when the LMUL >= 1.
5751 Otherwise, alignment = 1. */
5753 riscv_get_v_regno_alignment (machine_mode mode)
5755 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
5756 but for mask vector register, register numbers can be any number. */
5757 int lmul = 1;
5758 machine_mode rvv_mode = mode;
5759 if (riscv_v_ext_vls_mode_p (rvv_mode))
5761 int size = GET_MODE_BITSIZE (rvv_mode).to_constant ();
5762 if (size < TARGET_MIN_VLEN)
5763 return 1;
5764 else
5765 return size / TARGET_MIN_VLEN;
5767 if (riscv_v_ext_tuple_mode_p (rvv_mode))
5768 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
5769 poly_int64 size = GET_MODE_SIZE (rvv_mode);
5770 if (known_gt (size, UNITS_PER_V_REG))
5771 lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
5772 return lmul;
5775 /* Define ASM_OUTPUT_OPCODE to do anything special before
5776 emitting an opcode. */
5777 const char *
5778 riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
5780 if (TARGET_XTHEADVECTOR)
5781 return th_asm_output_opcode (asm_out_file, p);
5783 return p;
5786 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are:
5788 'h' Print the high-part relocation associated with OP, after stripping
5789 any outermost HIGH.
5790 'R' Print the low-part relocation associated with OP.
5791 'C' Print the integer branch condition for comparison OP.
5792 'N' Print the inverse of the integer branch condition for comparison OP.
5793 'A' Print the atomic operation suffix for memory model OP.
5794 'I' Print the LR suffix for memory model OP.
5795 'J' Print the SC suffix for memory model OP.
5796 'z' Print x0 if OP is zero, otherwise print OP normally.
5797 'i' Print i if the operand is not a register.
5798 'S' Print shift-index of single-bit mask OP.
5799 'T' Print shift-index of inverted single-bit mask OP.
5800 '~' Print w if TARGET_64BIT is true; otherwise not print anything.
5802 Note please keep this list and the list in riscv.md in sync. */
5804 static void
5805 riscv_print_operand (FILE *file, rtx op, int letter)
5807 /* `~` does not take an operand so op will be null
5808 Check for before accessing op.
5810 if (letter == '~')
5812 if (TARGET_64BIT)
5813 fputc('w', file);
5814 return;
5816 machine_mode mode = GET_MODE (op);
5817 enum rtx_code code = GET_CODE (op);
5819 switch (letter)
5821 case 'o': {
5822 /* Print 'OP' variant for RVV instructions.
5823 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv).
5824 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi).
5825 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */
5826 if (riscv_v_ext_mode_p (mode))
5828 if (REG_P (op))
5829 asm_fprintf (file, "v");
5830 else if (CONST_VECTOR_P (op))
5831 asm_fprintf (file, "i");
5832 else
5833 output_operand_lossage ("invalid vector operand");
5835 else
5837 if (CONST_INT_P (op))
5838 asm_fprintf (file, "i");
5839 else
5840 asm_fprintf (file, "x");
5842 break;
5844 case 'v': {
5845 rtx elt;
5847 if (REG_P (op))
5848 asm_fprintf (file, "%s", reg_names[REGNO (op)]);
5849 else
5851 if (!const_vec_duplicate_p (op, &elt))
5852 output_operand_lossage ("invalid vector constant");
5853 else if (satisfies_constraint_Wc0 (op))
5854 asm_fprintf (file, "0");
5855 else if (satisfies_constraint_vi (op)
5856 || satisfies_constraint_vj (op)
5857 || satisfies_constraint_vk (op))
5858 asm_fprintf (file, "%wd", INTVAL (elt));
5859 else
5860 output_operand_lossage ("invalid vector constant");
5862 break;
5864 case 'V': {
5865 rtx elt;
5866 if (!const_vec_duplicate_p (op, &elt))
5867 output_operand_lossage ("invalid vector constant");
5868 else if (satisfies_constraint_vj (op))
5869 asm_fprintf (file, "%wd", -INTVAL (elt));
5870 else
5871 output_operand_lossage ("invalid vector constant");
5872 break;
5874 case 'm': {
5875 if (riscv_v_ext_mode_p (mode))
5877 /* Calculate lmul according to mode and print the value. */
5878 int lmul = riscv_get_v_regno_alignment (mode);
5879 asm_fprintf (file, "%d", lmul);
5881 else if (code == CONST_INT)
5883 /* If it is a const_int value, it denotes the VLMUL field enum. */
5884 unsigned int vlmul = UINTVAL (op);
5885 switch (vlmul)
5887 case riscv_vector::LMUL_1:
5888 asm_fprintf (file, "%s", "m1");
5889 break;
5890 case riscv_vector::LMUL_2:
5891 asm_fprintf (file, "%s", "m2");
5892 break;
5893 case riscv_vector::LMUL_4:
5894 asm_fprintf (file, "%s", "m4");
5895 break;
5896 case riscv_vector::LMUL_8:
5897 asm_fprintf (file, "%s", "m8");
5898 break;
5899 case riscv_vector::LMUL_F8:
5900 asm_fprintf (file, "%s", "mf8");
5901 break;
5902 case riscv_vector::LMUL_F4:
5903 asm_fprintf (file, "%s", "mf4");
5904 break;
5905 case riscv_vector::LMUL_F2:
5906 asm_fprintf (file, "%s", "mf2");
5907 break;
5908 default:
5909 gcc_unreachable ();
5912 else
5913 output_operand_lossage ("invalid vector constant");
5914 break;
5916 case 'p': {
5917 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5919 /* Print for RVV mask operand.
5920 If op is reg, print ",v0.t".
5921 Otherwise, don't print anything. */
5922 if (code == REG)
5923 fprintf (file, ",%s.t", reg_names[REGNO (op)]);
5925 else if (code == CONST_INT)
5927 /* Tail && Mask policy. */
5928 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u");
5930 else
5931 output_operand_lossage ("invalid vector constant");
5932 break;
5934 case 'h':
5935 if (code == HIGH)
5936 op = XEXP (op, 0);
5937 riscv_print_operand_reloc (file, op, true);
5938 break;
5940 case 'R':
5941 riscv_print_operand_reloc (file, op, false);
5942 break;
5944 case 'C':
5945 /* The RTL names match the instruction names. */
5946 fputs (GET_RTX_NAME (code), file);
5947 break;
5949 case 'N':
5950 /* The RTL names match the instruction names. */
5951 fputs (GET_RTX_NAME (reverse_condition (code)), file);
5952 break;
5954 case 'A': {
5955 const enum memmodel model = memmodel_base (INTVAL (op));
5956 if (riscv_memmodel_needs_amo_acquire (model)
5957 && riscv_memmodel_needs_amo_release (model))
5958 fputs (".aqrl", file);
5959 else if (riscv_memmodel_needs_amo_acquire (model))
5960 fputs (".aq", file);
5961 else if (riscv_memmodel_needs_amo_release (model))
5962 fputs (".rl", file);
5963 break;
5966 case 'I': {
5967 const enum memmodel model = memmodel_base (INTVAL (op));
5968 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
5969 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */
5970 break;
5971 else if (model == MEMMODEL_SEQ_CST)
5972 fputs (".aqrl", file);
5973 else if (riscv_memmodel_needs_amo_acquire (model))
5974 fputs (".aq", file);
5975 break;
5978 case 'J': {
5979 const enum memmodel model = memmodel_base (INTVAL (op));
5980 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
5981 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */
5982 fputs (".rl", file);
5983 else if (TARGET_ZTSO)
5984 break;
5985 else if (riscv_memmodel_needs_amo_release (model))
5986 fputs (".rl", file);
5987 break;
5990 case 'i':
5991 if (code != REG)
5992 fputs ("i", file);
5993 break;
5995 case 'B':
5996 fputs (GET_RTX_NAME (code), file);
5997 break;
5999 case 'S':
6001 rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
6002 output_addr_const (file, newop);
6003 break;
6005 case 'T':
6007 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
6008 output_addr_const (file, newop);
6009 break;
6011 case 'X':
6013 int ival = INTVAL (op) + 1;
6014 rtx newop = GEN_INT (ctz_hwi (ival) + 1);
6015 output_addr_const (file, newop);
6016 break;
6018 case 'Y':
6020 unsigned int imm = (UINTVAL (op) & 63);
6021 gcc_assert (imm <= 63);
6022 rtx newop = GEN_INT (imm);
6023 output_addr_const (file, newop);
6024 break;
6026 default:
6027 switch (code)
6029 case REG:
6030 if (letter && letter != 'z')
6031 output_operand_lossage ("invalid use of '%%%c'", letter);
6032 fprintf (file, "%s", reg_names[REGNO (op)]);
6033 break;
6035 case MEM:
6036 if (letter && letter != 'z')
6037 output_operand_lossage ("invalid use of '%%%c'", letter);
6038 else
6039 output_address (mode, XEXP (op, 0));
6040 break;
6042 case CONST_DOUBLE:
6044 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6046 fputs (reg_names[GP_REG_FIRST], file);
6047 break;
6050 int fli_index = riscv_float_const_rtx_index_for_fli (op);
6051 if (fli_index == -1 || fli_index > 31)
6053 output_operand_lossage ("invalid use of '%%%c'", letter);
6054 break;
6056 asm_fprintf (file, "%s", fli_value_print[fli_index]);
6057 break;
6060 default:
6061 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6062 fputs (reg_names[GP_REG_FIRST], file);
6063 else if (letter && letter != 'z')
6064 output_operand_lossage ("invalid use of '%%%c'", letter);
6065 else
6066 output_addr_const (file, riscv_strip_unspec_address (op));
6067 break;
6072 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
6073 static bool
6074 riscv_print_operand_punct_valid_p (unsigned char code)
6076 return (code == '~');
6079 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
6081 static void
6082 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6084 struct riscv_address_info addr;
6086 if (th_print_operand_address (file, mode, x))
6087 return;
6089 if (riscv_classify_address (&addr, x, word_mode, true))
6090 switch (addr.type)
6092 case ADDRESS_REG:
6093 output_addr_const (file, riscv_strip_unspec_address (addr.offset));
6094 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6095 return;
6097 case ADDRESS_LO_SUM:
6098 riscv_print_operand_reloc (file, addr.offset, false);
6099 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6100 return;
6102 case ADDRESS_CONST_INT:
6103 output_addr_const (file, x);
6104 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
6105 return;
6107 case ADDRESS_SYMBOLIC:
6108 output_addr_const (file, riscv_strip_unspec_address (x));
6109 return;
6111 default:
6112 gcc_unreachable ();
6115 gcc_unreachable ();
6118 static bool
6119 riscv_size_ok_for_small_data_p (int size)
6121 return g_switch_value && IN_RANGE (size, 1, g_switch_value);
6124 /* Return true if EXP should be placed in the small data section. */
6126 static bool
6127 riscv_in_small_data_p (const_tree x)
6129 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use
6130 the anchor symbol to address nearby objects. In large model, it can get
6131 the better result using the anchor optiomization. */
6132 if (riscv_cmodel == CM_LARGE)
6133 return false;
6135 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
6136 return false;
6138 if (VAR_P (x) && DECL_SECTION_NAME (x))
6140 const char *sec = DECL_SECTION_NAME (x);
6141 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
6144 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
6147 /* Switch to the appropriate section for output of DECL. */
6149 static section *
6150 riscv_select_section (tree decl, int reloc,
6151 unsigned HOST_WIDE_INT align)
6153 switch (categorize_decl_for_section (decl, reloc))
6155 case SECCAT_SRODATA:
6156 return get_named_section (decl, ".srodata", reloc);
6158 default:
6159 return default_elf_select_section (decl, reloc, align);
6163 /* Switch to the appropriate section for output of DECL. */
6165 static void
6166 riscv_unique_section (tree decl, int reloc)
6168 const char *prefix = NULL;
6169 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
6171 switch (categorize_decl_for_section (decl, reloc))
6173 case SECCAT_SRODATA:
6174 prefix = one_only ? ".sr" : ".srodata";
6175 break;
6177 default:
6178 break;
6180 if (prefix)
6182 const char *name, *linkonce;
6183 char *string;
6185 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6186 name = targetm.strip_name_encoding (name);
6188 /* If we're using one_only, then there needs to be a .gnu.linkonce
6189 prefix to the section name. */
6190 linkonce = one_only ? ".gnu.linkonce" : "";
6192 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6194 set_decl_section_name (decl, string);
6195 return;
6197 default_unique_section (decl, reloc);
6200 /* Constant pools are per-function when in large code model. */
6202 static inline bool
6203 riscv_can_use_per_function_literal_pools_p (void)
6205 return riscv_cmodel == CM_LARGE;
6208 static bool
6209 riscv_use_blocks_for_constant_p (machine_mode, const_rtx)
6211 /* We can't use blocks for constants when we're using a per-function
6212 constant pool. */
6213 return !riscv_can_use_per_function_literal_pools_p ();
6216 /* Return a section for X, handling small data. */
6218 static section *
6219 riscv_elf_select_rtx_section (machine_mode mode, rtx x,
6220 unsigned HOST_WIDE_INT align)
6222 /* The literal pool stays with the function. */
6223 if (riscv_can_use_per_function_literal_pools_p ())
6224 return function_section (current_function_decl);
6226 section *s = default_elf_select_rtx_section (mode, x, align);
6228 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
6230 if (startswith (s->named.name, ".rodata.cst"))
6232 /* Rename .rodata.cst* to .srodata.cst*. */
6233 char *name = (char *) alloca (strlen (s->named.name) + 2);
6234 sprintf (name, ".s%s", s->named.name + 1);
6235 return get_section (name, s->named.common.flags, NULL);
6238 if (s == data_section)
6239 return sdata_section;
6242 return s;
6245 /* Make the last instruction frame-related and note that it performs
6246 the operation described by FRAME_PATTERN. */
6248 static void
6249 riscv_set_frame_expr (rtx frame_pattern)
6251 rtx insn;
6253 insn = get_last_insn ();
6254 RTX_FRAME_RELATED_P (insn) = 1;
6255 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6256 frame_pattern,
6257 REG_NOTES (insn));
6260 /* Return a frame-related rtx that stores REG at MEM.
6261 REG must be a single register. */
6263 static rtx
6264 riscv_frame_set (rtx mem, rtx reg)
6266 rtx set = gen_rtx_SET (mem, reg);
6267 RTX_FRAME_RELATED_P (set) = 1;
6268 return set;
6271 /* Returns true if the current function might contain a far jump. */
6273 static bool
6274 riscv_far_jump_used_p ()
6276 size_t func_size = 0;
6278 if (cfun->machine->far_jump_used)
6279 return true;
6281 /* We can't change far_jump_used during or after reload, as there is
6282 no chance to change stack frame layout. So we must rely on the
6283 conservative heuristic below having done the right thing. */
6284 if (reload_in_progress || reload_completed)
6285 return false;
6287 /* Estimate the function length. */
6288 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
6289 func_size += get_attr_length (insn);
6291 /* Conservatively determine whether some jump might exceed 1 MiB
6292 displacement. */
6293 if (func_size * 2 >= 0x100000)
6294 cfun->machine->far_jump_used = true;
6296 return cfun->machine->far_jump_used;
6299 /* Return true, if the current function must save the incoming return
6300 address. */
6302 static bool
6303 riscv_save_return_addr_reg_p (void)
6305 /* The $ra register is call-clobbered: if this is not a leaf function,
6306 save it. */
6307 if (!crtl->is_leaf)
6308 return true;
6310 /* We need to save the incoming return address if __builtin_eh_return
6311 is being used to set a different return address. */
6312 if (crtl->calls_eh_return)
6313 return true;
6315 /* Far jumps/branches use $ra as a temporary to set up the target jump
6316 location (clobbering the incoming return address). */
6317 if (riscv_far_jump_used_p ())
6318 return true;
6320 /* We need to save it if anyone has used that. */
6321 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
6322 return true;
6324 /* Need not to use ra for leaf when frame pointer is turned off by
6325 option whatever the omit-leaf-frame's value. */
6326 if (frame_pointer_needed && crtl->is_leaf
6327 && !TARGET_OMIT_LEAF_FRAME_POINTER)
6328 return true;
6330 return false;
6333 /* Return true if the current function must save register REGNO. */
6335 static bool
6336 riscv_save_reg_p (unsigned int regno)
6338 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
6339 bool might_clobber = crtl->saves_all_registers
6340 || df_regs_ever_live_p (regno);
6342 if (call_saved && might_clobber)
6343 return true;
6345 /* Save callee-saved V registers. */
6346 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno)
6347 && might_clobber)
6348 return true;
6350 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
6351 return true;
6353 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ())
6354 return true;
6356 /* If this is an interrupt handler, then must save extra registers. */
6357 if (cfun->machine->interrupt_handler_p)
6359 /* zero register is always zero. */
6360 if (regno == GP_REG_FIRST)
6361 return false;
6363 /* The function will return the stack pointer to its original value. */
6364 if (regno == STACK_POINTER_REGNUM)
6365 return false;
6367 /* By convention, we assume that gp and tp are safe. */
6368 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
6369 return false;
6371 /* We must save every register used in this function. If this is not a
6372 leaf function, then we must save all temporary registers. */
6373 if (df_regs_ever_live_p (regno)
6374 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
6375 return true;
6378 return false;
6381 /* Return TRUE if Zcmp push and pop insns should be
6382 avoided. FALSE otherwise.
6383 Only use multi push & pop if all GPRs masked can be covered,
6384 and stack access is SP based,
6385 and GPRs are at top of the stack frame,
6386 and no conflicts in stack allocation with other features */
6387 static bool
6388 riscv_avoid_multi_push (const struct riscv_frame_info *frame)
6390 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed
6391 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0
6392 || crtl->args.pretend_args_size != 0
6393 || (use_shrink_wrapping_separate ()
6394 && !riscv_avoid_shrink_wrapping_separate ())
6395 || (frame->mask & ~MULTI_PUSH_GPR_MASK))
6396 return true;
6398 return false;
6401 /* Determine whether to use multi push insn. */
6402 static bool
6403 riscv_use_multi_push (const struct riscv_frame_info *frame)
6405 if (riscv_avoid_multi_push (frame))
6406 return false;
6408 return (frame->multi_push_adj_base != 0);
6411 /* Return TRUE if a libcall to save/restore GPRs should be
6412 avoided. FALSE otherwise. */
6413 static bool
6414 riscv_avoid_save_libcall (void)
6416 if (!TARGET_SAVE_RESTORE
6417 || crtl->calls_eh_return
6418 || frame_pointer_needed
6419 || cfun->machine->interrupt_handler_p
6420 || cfun->machine->varargs_size != 0
6421 || crtl->args.pretend_args_size != 0)
6422 return true;
6424 return false;
6427 /* Determine whether to call GPR save/restore routines. */
6428 static bool
6429 riscv_use_save_libcall (const struct riscv_frame_info *frame)
6431 if (riscv_avoid_save_libcall ())
6432 return false;
6434 return frame->save_libcall_adjustment != 0;
6437 /* Determine which GPR save/restore routine to call. */
6439 static unsigned
6440 riscv_save_libcall_count (unsigned mask)
6442 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
6443 if (BITSET_P (mask, n))
6444 return CALLEE_SAVED_REG_NUMBER (n) + 1;
6445 abort ();
6448 /* calculate number of s regs in multi push and pop.
6449 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
6450 static unsigned
6451 riscv_multi_push_sregs_count (unsigned mask)
6453 unsigned num = riscv_save_libcall_count (mask);
6454 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS
6455 : num;
6458 /* calculate number of regs(ra, s0-sx) in multi push and pop. */
6459 static unsigned
6460 riscv_multi_push_regs_count (unsigned mask)
6462 /* 1 is for ra */
6463 return riscv_multi_push_sregs_count (mask) + 1;
6466 /* Handle 16 bytes align for poly_int. */
6467 static poly_int64
6468 riscv_16bytes_align (poly_int64 value)
6470 return aligned_upper_bound (value, 16);
6473 static HOST_WIDE_INT
6474 riscv_16bytes_align (HOST_WIDE_INT value)
6476 return ROUND_UP (value, 16);
6479 /* Handle stack align for poly_int. */
6480 static poly_int64
6481 riscv_stack_align (poly_int64 value)
6483 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
6486 static HOST_WIDE_INT
6487 riscv_stack_align (HOST_WIDE_INT value)
6489 return RISCV_STACK_ALIGN (value);
6492 /* Populate the current function's riscv_frame_info structure.
6494 RISC-V stack frames grown downward. High addresses are at the top.
6496 +-------------------------------+
6498 | incoming stack arguments |
6500 +-------------------------------+ <-- incoming stack pointer
6502 | callee-allocated save area |
6503 | for arguments that are |
6504 | split between registers and |
6505 | the stack |
6507 +-------------------------------+ <-- arg_pointer_rtx
6509 | callee-allocated save area |
6510 | for register varargs |
6512 +-------------------------------+ <-- hard_frame_pointer_rtx;
6513 | | stack_pointer_rtx + gp_sp_offset
6514 | GPR save area | + UNITS_PER_WORD
6516 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
6517 | | + UNITS_PER_FP_REG
6518 | FPR save area |
6520 +-------------------------------+ <-- stack_pointer_rtx
6521 | | + v_sp_offset_top
6522 | Vector Registers save area |
6524 | ----------------------------- | <-- stack_pointer_rtx
6525 | padding | + v_sp_offset_bottom
6526 +-------------------------------+ <-- frame_pointer_rtx (virtual)
6528 | local variables |
6530 P +-------------------------------+
6532 | outgoing stack arguments |
6534 +-------------------------------+ <-- stack_pointer_rtx
6536 Dynamic stack allocations such as alloca insert data at point P.
6537 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
6538 hard_frame_pointer_rtx unchanged. */
6540 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
6542 static void
6543 riscv_compute_frame_info (void)
6545 struct riscv_frame_info *frame;
6546 poly_int64 offset;
6547 bool interrupt_save_prologue_temp = false;
6548 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0;
6549 unsigned int num_v_saved = 0;
6551 frame = &cfun->machine->frame;
6553 /* In an interrupt function, there are two cases in which t0 needs to be used:
6554 1, If we have a large frame, then we need to save/restore t0. We check for
6555 this before clearing the frame struct.
6556 2, Need to save and restore some CSRs in the frame. */
6557 if (cfun->machine->interrupt_handler_p)
6559 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
6560 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
6561 || (TARGET_HARD_FLOAT || TARGET_ZFINX))
6562 interrupt_save_prologue_temp = true;
6565 frame->reset();
6567 if (!cfun->machine->naked_p)
6569 /* Find out which GPRs we need to save. */
6570 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
6571 if (riscv_save_reg_p (regno)
6572 || (interrupt_save_prologue_temp
6573 && (regno == RISCV_PROLOGUE_TEMP_REGNUM)))
6574 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6576 /* If this function calls eh_return, we must also save and restore the
6577 EH data registers. */
6578 if (crtl->calls_eh_return)
6579 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
6580 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6582 /* Find out which FPRs we need to save. This loop must iterate over
6583 the same space as its companion in riscv_for_each_saved_reg. */
6584 if (TARGET_HARD_FLOAT)
6585 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
6586 if (riscv_save_reg_p (regno))
6587 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
6589 /* Find out which V registers we need to save. */
6590 if (TARGET_VECTOR)
6591 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
6592 if (riscv_save_reg_p (regno))
6594 frame->vmask |= 1 << (regno - V_REG_FIRST);
6595 num_v_saved++;
6599 if (frame->mask)
6601 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
6603 /* 1 is for ra */
6604 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
6605 /* Only use save/restore routines if they don't alter the stack size. */
6606 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
6607 && !riscv_avoid_save_libcall ())
6609 /* Libcall saves/restores 3 registers at once, so we need to
6610 allocate 12 bytes for callee-saved register. */
6611 if (TARGET_RVE)
6612 x_save_size = 3 * UNITS_PER_WORD;
6614 frame->save_libcall_adjustment = x_save_size;
6617 if (!riscv_avoid_multi_push (frame))
6619 /* num(ra, s0-sx) */
6620 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask);
6621 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
6622 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
6626 /* In an interrupt function, we need extra space for the initial saves of CSRs. */
6627 if (cfun->machine->interrupt_handler_p
6628 && ((TARGET_HARD_FLOAT && frame->fmask)
6629 || (TARGET_ZFINX
6630 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */
6631 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
6632 /* Save and restore FCSR. */
6633 /* TODO: When P or V extensions support interrupts, some of their CSRs
6634 may also need to be saved and restored. */
6635 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD);
6637 /* At the bottom of the frame are any outgoing stack arguments. */
6638 offset = riscv_stack_align (crtl->outgoing_args_size);
6639 /* Next are local stack variables. */
6640 offset += riscv_stack_align (get_frame_size ());
6641 /* The virtual frame pointer points above the local variables. */
6642 frame->frame_pointer_offset = offset;
6643 /* Next are the callee-saved VRs. */
6644 if (frame->vmask)
6645 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG);
6646 frame->v_sp_offset_top = offset;
6647 frame->v_sp_offset_bottom
6648 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG;
6649 /* Next are the callee-saved FPRs. */
6650 if (frame->fmask)
6651 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
6652 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
6653 /* Next are the callee-saved GPRs. */
6654 if (frame->mask)
6656 offset += x_save_size;
6657 /* align to 16 bytes and add paddings to GPR part to honor
6658 both stack alignment and zcmp pus/pop size alignment. */
6659 if (riscv_use_multi_push (frame)
6660 && known_lt (offset, frame->multi_push_adj_base
6661 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
6662 offset = riscv_16bytes_align (offset);
6664 frame->gp_sp_offset = offset - UNITS_PER_WORD;
6665 /* The hard frame pointer points above the callee-saved GPRs. */
6666 frame->hard_frame_pointer_offset = offset;
6667 /* Above the hard frame pointer is the callee-allocated varags save area. */
6668 offset += riscv_stack_align (cfun->machine->varargs_size);
6669 /* Next is the callee-allocated area for pretend stack arguments. */
6670 offset += riscv_stack_align (crtl->args.pretend_args_size);
6671 /* Arg pointer must be below pretend args, but must be above alignment
6672 padding. */
6673 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
6674 frame->total_size = offset;
6676 /* Next points the incoming stack pointer and any incoming arguments. */
6679 /* Make sure that we're not trying to eliminate to the wrong hard frame
6680 pointer. */
6682 static bool
6683 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
6685 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
6688 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
6689 or argument pointer. TO is either the stack pointer or hard frame
6690 pointer. */
6692 poly_int64
6693 riscv_initial_elimination_offset (int from, int to)
6695 poly_int64 src, dest;
6697 riscv_compute_frame_info ();
6699 if (to == HARD_FRAME_POINTER_REGNUM)
6700 dest = cfun->machine->frame.hard_frame_pointer_offset;
6701 else if (to == STACK_POINTER_REGNUM)
6702 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
6703 else
6704 gcc_unreachable ();
6706 if (from == FRAME_POINTER_REGNUM)
6707 src = cfun->machine->frame.frame_pointer_offset;
6708 else if (from == ARG_POINTER_REGNUM)
6709 src = cfun->machine->frame.arg_pointer_offset;
6710 else
6711 gcc_unreachable ();
6713 return src - dest;
6716 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
6717 previous frame. */
6720 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
6722 if (count != 0)
6723 return const0_rtx;
6725 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
6728 /* Emit code to change the current function's return address to
6729 ADDRESS. SCRATCH is available as a scratch register, if needed.
6730 ADDRESS and SCRATCH are both word-mode GPRs. */
6732 void
6733 riscv_set_return_address (rtx address, rtx scratch)
6735 rtx slot_address;
6737 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
6738 slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
6739 cfun->machine->frame.gp_sp_offset.to_constant());
6740 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
6743 /* Save register REG to MEM. Make the instruction frame-related. */
6745 static void
6746 riscv_save_reg (rtx reg, rtx mem)
6748 riscv_emit_move (mem, reg);
6749 riscv_set_frame_expr (riscv_frame_set (mem, reg));
6752 /* Restore register REG from MEM. */
6754 static void
6755 riscv_restore_reg (rtx reg, rtx mem)
6757 rtx insn = riscv_emit_move (reg, mem);
6758 rtx dwarf = NULL_RTX;
6759 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
6761 if (known_gt (epilogue_cfa_sp_offset, 0)
6762 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
6764 rtx cfa_adjust_rtx
6765 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
6766 gen_int_mode (epilogue_cfa_sp_offset, Pmode));
6767 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
6770 REG_NOTES (insn) = dwarf;
6771 RTX_FRAME_RELATED_P (insn) = 1;
6774 /* A function to save or store a register. The first argument is the
6775 register and the second is the stack slot. */
6776 typedef void (*riscv_save_restore_fn) (rtx, rtx);
6778 /* Use FN to save or restore register REGNO. MODE is the register's
6779 mode and OFFSET is the offset of its save slot from the current
6780 stack pointer. */
6782 static void
6783 riscv_save_restore_reg (machine_mode mode, int regno,
6784 HOST_WIDE_INT offset, riscv_save_restore_fn fn)
6786 rtx mem;
6788 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
6789 fn (gen_rtx_REG (mode, regno), mem);
6792 /* Return the next register up from REGNO up to LIMIT for the callee
6793 to save or restore. OFFSET will be adjusted accordingly.
6794 If INC is set, then REGNO will be incremented first.
6795 Returns INVALID_REGNUM if there is no such next register. */
6797 static unsigned int
6798 riscv_next_saved_reg (unsigned int regno, unsigned int limit,
6799 HOST_WIDE_INT *offset, bool inc = true)
6801 if (inc)
6802 regno++;
6804 while (regno <= limit)
6806 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
6808 *offset = *offset - UNITS_PER_WORD;
6809 return regno;
6812 regno++;
6814 return INVALID_REGNUM;
6817 /* Return TRUE if provided REGNO is eh return data register. */
6819 static bool
6820 riscv_is_eh_return_data_register (unsigned int regno)
6822 unsigned int i, regnum;
6824 if (!crtl->calls_eh_return)
6825 return false;
6827 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
6828 if (regno == regnum)
6830 return true;
6833 return false;
6836 /* Call FN for each register that is saved by the current function.
6837 SP_OFFSET is the offset of the current stack pointer from the start
6838 of the frame. */
6840 static void
6841 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
6842 bool epilogue, bool maybe_eh_return)
6844 HOST_WIDE_INT offset, first_fp_offset;
6845 unsigned int regno, num_masked_fp = 0;
6846 unsigned int start = GP_REG_FIRST;
6847 unsigned int limit = GP_REG_LAST;
6849 /* Save the link register and s-registers. */
6850 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ()
6851 + UNITS_PER_WORD;
6852 for (regno = riscv_next_saved_reg (start, limit, &offset, false);
6853 regno != INVALID_REGNUM;
6854 regno = riscv_next_saved_reg (regno, limit, &offset))
6856 if (cfun->machine->reg_is_wrapped_separately[regno])
6857 continue;
6859 /* If this is a normal return in a function that calls the eh_return
6860 builtin, then do not restore the eh return data registers as that
6861 would clobber the return value. But we do still need to save them
6862 in the prologue, and restore them for an exception return, so we
6863 need special handling here. */
6864 if (epilogue && !maybe_eh_return
6865 && riscv_is_eh_return_data_register (regno))
6866 continue;
6868 /* In an interrupt function, save and restore some necessary CSRs in the stack
6869 to avoid changes in CSRs. */
6870 if (regno == RISCV_PROLOGUE_TEMP_REGNUM
6871 && cfun->machine->interrupt_handler_p
6872 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask)
6873 || (TARGET_ZFINX
6874 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
6876 /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack
6877 offset misaligned later. */
6878 unsigned int fcsr_size = UNITS_PER_WORD;
6879 if (!epilogue)
6881 riscv_save_restore_reg (word_mode, regno, offset, fn);
6882 offset -= fcsr_size;
6883 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode)));
6884 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
6885 offset, riscv_save_reg);
6887 else
6889 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
6890 offset - fcsr_size, riscv_restore_reg);
6891 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode)));
6892 riscv_save_restore_reg (word_mode, regno, offset, fn);
6893 offset -= fcsr_size;
6895 continue;
6898 if (TARGET_XTHEADMEMPAIR)
6900 /* Get the next reg/offset pair. */
6901 HOST_WIDE_INT offset2 = offset;
6902 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
6904 /* Validate everything before emitting a mempair instruction. */
6905 if (regno2 != INVALID_REGNUM
6906 && !cfun->machine->reg_is_wrapped_separately[regno2]
6907 && !(epilogue && !maybe_eh_return
6908 && riscv_is_eh_return_data_register (regno2)))
6910 bool load_p = (fn == riscv_restore_reg);
6911 rtx operands[4];
6912 th_mempair_prepare_save_restore_operands (operands,
6913 load_p, word_mode,
6914 regno, offset,
6915 regno2, offset2);
6917 /* If the operands fit into a mempair insn, then emit one. */
6918 if (th_mempair_operands_p (operands, load_p, word_mode))
6920 th_mempair_save_restore_regs (operands, load_p, word_mode);
6921 offset = offset2;
6922 regno = regno2;
6923 continue;
6928 riscv_save_restore_reg (word_mode, regno, offset, fn);
6931 /* This loop must iterate over the same space as its companion in
6932 riscv_compute_frame_info. */
6933 first_fp_offset
6934 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
6935 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
6936 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
6938 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
6939 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
6940 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame))
6941 ? CALLEE_SAVED_FREG_NUMBER (regno)
6942 : num_masked_fp;
6943 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant ();
6944 if (handle_reg)
6945 riscv_save_restore_reg (mode, regno, offset, fn);
6946 num_masked_fp++;
6950 /* Call FN for each V register that is saved by the current function. */
6952 static void
6953 riscv_for_each_saved_v_reg (poly_int64 &remaining_size,
6954 riscv_save_restore_fn fn, bool prologue)
6956 rtx vlen = NULL_RTX;
6957 if (cfun->machine->frame.vmask != 0)
6959 if (UNITS_PER_V_REG.is_constant ()
6960 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ()))
6961 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ());
6962 else
6964 vlen = RISCV_PROLOGUE_TEMP (Pmode);
6965 rtx insn
6966 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode));
6967 RTX_FRAME_RELATED_P (insn) = 1;
6971 /* Select the mode where LMUL is 1 and SEW is largest. */
6972 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode;
6974 if (prologue)
6976 /* This loop must iterate over the same space as its companion in
6977 riscv_compute_frame_info. */
6978 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
6979 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
6981 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
6982 if (handle_reg)
6984 rtx insn = NULL_RTX;
6985 if (CONST_INT_P (vlen))
6987 gcc_assert (SMALL_OPERAND (-INTVAL (vlen)));
6988 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
6989 stack_pointer_rtx,
6990 GEN_INT (-INTVAL (vlen))));
6992 else
6993 insn = emit_insn (
6994 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
6995 gcc_assert (insn != NULL_RTX);
6996 RTX_FRAME_RELATED_P (insn) = 1;
6997 riscv_save_restore_reg (m1_mode, regno, 0, fn);
6998 remaining_size -= UNITS_PER_V_REG;
7002 else
7004 /* This loop must iterate over the same space as its companion in
7005 riscv_compute_frame_info. */
7006 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--)
7007 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7009 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7010 if (handle_reg)
7012 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7013 rtx insn = emit_insn (
7014 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7015 gcc_assert (insn != NULL_RTX);
7016 RTX_FRAME_RELATED_P (insn) = 1;
7017 remaining_size -= UNITS_PER_V_REG;
7023 /* For stack frames that can't be allocated with a single ADDI instruction,
7024 compute the best value to initially allocate. It must at a minimum
7025 allocate enough space to spill the callee-saved registers. If TARGET_RVC,
7026 try to pick a value that will allow compression of the register saves
7027 without adding extra instructions. */
7029 static HOST_WIDE_INT
7030 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
7032 HOST_WIDE_INT remaining_const_size;
7033 if (!remaining_size.is_constant ())
7034 remaining_const_size
7035 = riscv_stack_align (remaining_size.coeffs[0])
7036 - riscv_stack_align (remaining_size.coeffs[1]);
7037 else
7038 remaining_const_size = remaining_size.to_constant ();
7040 /* First step must be set to the top of vector registers save area if any
7041 vector registers need be preversed. */
7042 if (frame->vmask != 0)
7043 return (remaining_size - frame->v_sp_offset_top).to_constant ();
7045 if (SMALL_OPERAND (remaining_const_size))
7046 return remaining_const_size;
7048 poly_int64 callee_saved_first_step =
7049 remaining_size - frame->frame_pointer_offset;
7050 gcc_assert(callee_saved_first_step.is_constant ());
7051 HOST_WIDE_INT min_first_step =
7052 riscv_stack_align (callee_saved_first_step.to_constant ());
7053 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
7054 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
7055 gcc_assert (min_first_step <= max_first_step);
7057 /* As an optimization, use the least-significant bits of the total frame
7058 size, so that the second adjustment step is just LUI + ADD. */
7059 if (!SMALL_OPERAND (min_second_step)
7060 && remaining_const_size % IMM_REACH <= max_first_step
7061 && remaining_const_size % IMM_REACH >= min_first_step)
7062 return remaining_const_size % IMM_REACH;
7064 if (TARGET_RVC || TARGET_ZCA)
7066 /* If we need two subtracts, and one is small enough to allow compressed
7067 loads and stores, then put that one first. */
7068 if (IN_RANGE (min_second_step, 0,
7069 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
7070 return MAX (min_second_step, min_first_step);
7072 /* If we need LUI + ADDI + ADD for the second adjustment step, then start
7073 with the minimum first step, so that we can get compressed loads and
7074 stores. */
7075 else if (!SMALL_OPERAND (min_second_step))
7076 return min_first_step;
7079 return max_first_step;
7082 static rtx
7083 riscv_adjust_libcall_cfi_prologue ()
7085 rtx dwarf = NULL_RTX;
7086 rtx adjust_sp_rtx, reg, mem, insn;
7087 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7088 int offset;
7090 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7091 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7093 /* The save order is ra, s0, s1, s2 to s11. */
7094 if (regno == RETURN_ADDR_REGNUM)
7095 offset = saved_size - UNITS_PER_WORD;
7096 else if (regno == S0_REGNUM)
7097 offset = saved_size - UNITS_PER_WORD * 2;
7098 else if (regno == S1_REGNUM)
7099 offset = saved_size - UNITS_PER_WORD * 3;
7100 else
7101 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
7103 reg = gen_rtx_REG (Pmode, regno);
7104 mem = gen_frame_mem (Pmode, plus_constant (Pmode,
7105 stack_pointer_rtx,
7106 offset));
7108 insn = gen_rtx_SET (mem, reg);
7109 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7112 /* Debug info for adjust sp. */
7113 adjust_sp_rtx =
7114 gen_rtx_SET (stack_pointer_rtx,
7115 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size)));
7116 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7117 dwarf);
7118 return dwarf;
7121 static rtx
7122 riscv_adjust_multi_push_cfi_prologue (int saved_size)
7124 rtx dwarf = NULL_RTX;
7125 rtx adjust_sp_rtx, reg, mem, insn;
7126 unsigned int mask = cfun->machine->frame.mask;
7127 int offset;
7128 int saved_cnt = 0;
7130 if (mask & S10_MASK)
7131 mask |= S11_MASK;
7133 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
7134 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
7136 /* The save order is s11-s0, ra
7137 from high to low addr. */
7138 offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
7140 reg = gen_rtx_REG (Pmode, regno);
7141 mem = gen_frame_mem (Pmode,
7142 plus_constant (Pmode, stack_pointer_rtx, offset));
7144 insn = gen_rtx_SET (mem, reg);
7145 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7148 /* Debug info for adjust sp. */
7149 adjust_sp_rtx
7150 = gen_rtx_SET (stack_pointer_rtx,
7151 plus_constant (Pmode, stack_pointer_rtx, -saved_size));
7152 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7153 return dwarf;
7156 static void
7157 riscv_emit_stack_tie (void)
7159 if (Pmode == SImode)
7160 emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
7161 else
7162 emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
7165 /*zcmp multi push and pop code_for_push_pop function ptr array */
7166 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM]
7167 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra,
7168 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra},
7169 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0,
7170 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0},
7171 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1,
7172 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1},
7173 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2,
7174 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2},
7175 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3,
7176 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3},
7177 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4,
7178 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4},
7179 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5,
7180 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5},
7181 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6,
7182 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6},
7183 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7,
7184 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7},
7185 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8,
7186 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8},
7187 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9,
7188 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9},
7189 {nullptr, nullptr, nullptr, nullptr},
7190 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11,
7191 code_for_gpr_multi_popret_up_to_s11,
7192 code_for_gpr_multi_popretz_up_to_s11}};
7194 static rtx
7195 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
7196 unsigned int regs_num)
7198 gcc_assert (op < ZCMP_OP_NUM);
7199 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS
7200 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/
7201 rtx stack_adj = GEN_INT (adj_size);
7202 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj);
7205 static unsigned
7206 get_multi_push_fpr_mask (unsigned max_fprs_push)
7208 unsigned mask_fprs_push = 0, num_f_pushed = 0;
7209 for (unsigned regno = FP_REG_FIRST;
7210 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++)
7211 if (riscv_save_reg_p (regno))
7212 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++;
7213 return mask_fprs_push;
7216 /* Expand the "prologue" pattern. */
7218 void
7219 riscv_expand_prologue (void)
7221 struct riscv_frame_info *frame = &cfun->machine->frame;
7222 poly_int64 remaining_size = frame->total_size;
7223 unsigned mask = frame->mask;
7224 unsigned fmask = frame->fmask;
7225 int spimm, multi_push_additional, stack_adj;
7226 rtx insn, dwarf = NULL_RTX;
7227 unsigned th_int_mask = 0;
7229 if (flag_stack_usage_info)
7230 current_function_static_stack_size = constant_lower_bound (remaining_size);
7232 if (cfun->machine->naked_p)
7233 return;
7235 /* prefer muti-push to save-restore libcall. */
7236 if (riscv_use_multi_push (frame))
7238 remaining_size -= frame->multi_push_adj_base;
7239 /* If there are vector registers that need to be saved, then it can only
7240 be reduced to the frame->v_sp_offset_top position at most, since the
7241 vector registers will need to be saved one by one by decreasing the SP
7242 later. */
7243 poly_int64 remaining_size_above_varea
7244 = frame->vmask != 0
7245 ? remaining_size - frame->v_sp_offset_top
7246 : remaining_size;
7248 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP))
7249 spimm = 3;
7250 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP))
7251 spimm = 2;
7252 else if (known_gt (remaining_size_above_varea, 0))
7253 spimm = 1;
7254 else
7255 spimm = 0;
7256 multi_push_additional = spimm * ZCMP_SP_INC_STEP;
7257 frame->multi_push_adj_addi = multi_push_additional;
7258 remaining_size -= multi_push_additional;
7260 /* emit multi push insn & dwarf along with it. */
7261 stack_adj = frame->multi_push_adj_base + multi_push_additional;
7262 insn = emit_insn (riscv_gen_multi_push_pop_insn (
7263 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask)));
7264 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj);
7265 RTX_FRAME_RELATED_P (insn) = 1;
7266 REG_NOTES (insn) = dwarf;
7268 /* Temporarily fib that we need not save GPRs. */
7269 frame->mask = 0;
7271 /* push FPRs into the addtional reserved space by cm.push. */
7272 if (fmask)
7274 unsigned mask_fprs_push
7275 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
7276 frame->fmask &= mask_fprs_push;
7277 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
7278 false);
7279 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */
7282 /* When optimizing for size, call a subroutine to save the registers. */
7283 else if (riscv_use_save_libcall (frame))
7285 rtx dwarf = NULL_RTX;
7286 dwarf = riscv_adjust_libcall_cfi_prologue ();
7288 remaining_size -= frame->save_libcall_adjustment;
7289 insn = emit_insn (riscv_gen_gpr_save_insn (frame));
7290 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
7292 RTX_FRAME_RELATED_P (insn) = 1;
7293 REG_NOTES (insn) = dwarf;
7296 th_int_mask = th_int_get_mask (frame->mask);
7297 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7299 frame->mask &= ~th_int_mask;
7301 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
7302 interrupts, such as fcsr. */
7303 if ((TARGET_HARD_FLOAT && frame->fmask)
7304 || (TARGET_ZFINX && frame->mask))
7305 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
7307 unsigned save_adjustment = th_int_get_save_adjustment ();
7308 frame->gp_sp_offset -= save_adjustment;
7309 remaining_size -= save_adjustment;
7311 insn = emit_insn (gen_th_int_push ());
7313 rtx dwarf = th_int_adjust_cfi_prologue (th_int_mask);
7314 RTX_FRAME_RELATED_P (insn) = 1;
7315 REG_NOTES (insn) = dwarf;
7318 /* Save the GP, FP registers. */
7319 if ((frame->mask | frame->fmask) != 0)
7321 if (known_gt (remaining_size, frame->frame_pointer_offset))
7323 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
7324 remaining_size -= step1;
7325 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7326 GEN_INT (-step1));
7327 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7329 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
7332 /* Undo the above fib. */
7333 frame->mask = mask;
7334 frame->fmask = fmask;
7336 /* Set up the frame pointer, if we're using one. */
7337 if (frame_pointer_needed)
7339 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
7340 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
7341 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7343 riscv_emit_stack_tie ();
7346 /* Save the V registers. */
7347 if (frame->vmask != 0)
7348 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true);
7350 /* Allocate the rest of the frame. */
7351 if (known_gt (remaining_size, 0))
7353 /* Two step adjustment:
7354 1.scalable frame. 2.constant frame. */
7355 poly_int64 scalable_frame (0, 0);
7356 if (!remaining_size.is_constant ())
7358 /* First for scalable frame. */
7359 poly_int64 scalable_frame = remaining_size;
7360 scalable_frame.coeffs[0] = remaining_size.coeffs[1];
7361 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
7362 remaining_size -= scalable_frame;
7365 /* Second step for constant frame. */
7366 HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
7367 if (constant_frame == 0)
7369 /* We must have allocated stack space for the scalable frame.
7370 Emit a stack tie if we have a frame pointer so that the
7371 allocation is ordered WRT fp setup and subsequent writes
7372 into the frame. */
7373 if (frame_pointer_needed)
7374 riscv_emit_stack_tie ();
7375 return;
7378 if (SMALL_OPERAND (-constant_frame))
7380 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7381 GEN_INT (-constant_frame));
7382 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7384 else
7386 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
7387 emit_insn (gen_add3_insn (stack_pointer_rtx,
7388 stack_pointer_rtx,
7389 RISCV_PROLOGUE_TEMP (Pmode)));
7391 /* Describe the effect of the previous instructions. */
7392 insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
7393 insn = gen_rtx_SET (stack_pointer_rtx, insn);
7394 riscv_set_frame_expr (insn);
7397 /* We must have allocated the remainder of the stack frame.
7398 Emit a stack tie if we have a frame pointer so that the
7399 allocation is ordered WRT fp setup and subsequent writes
7400 into the frame. */
7401 if (frame_pointer_needed)
7402 riscv_emit_stack_tie ();
7406 static rtx
7407 riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
7409 rtx dwarf = NULL_RTX;
7410 rtx adjust_sp_rtx, reg;
7411 unsigned int mask = cfun->machine->frame.mask;
7413 if (mask & S10_MASK)
7414 mask |= S11_MASK;
7416 /* Debug info for adjust sp. */
7417 adjust_sp_rtx
7418 = gen_rtx_SET (stack_pointer_rtx,
7419 plus_constant (Pmode, stack_pointer_rtx, saved_size));
7420 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7422 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7423 if (BITSET_P (mask, regno - GP_REG_FIRST))
7425 reg = gen_rtx_REG (Pmode, regno);
7426 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7429 return dwarf;
7432 static rtx
7433 riscv_adjust_libcall_cfi_epilogue ()
7435 rtx dwarf = NULL_RTX;
7436 rtx adjust_sp_rtx, reg;
7437 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7439 /* Debug info for adjust sp. */
7440 adjust_sp_rtx =
7441 gen_rtx_SET (stack_pointer_rtx,
7442 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size)));
7443 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7444 dwarf);
7446 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7447 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7449 reg = gen_rtx_REG (Pmode, regno);
7450 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7453 return dwarf;
7456 /* return true if popretz pattern can be matched.
7457 set (reg 10 a0) (const_int 0)
7458 use (reg 10 a0)
7459 NOTE_INSN_EPILOGUE_BEG */
7460 static rtx_insn *
7461 riscv_zcmp_can_use_popretz (void)
7463 rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
7465 /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
7466 struct sequence_stack *outer_seq = get_current_sequence ()->next;
7467 if (!outer_seq)
7468 return NULL;
7469 insn = outer_seq->first;
7470 if (!insn || !NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)
7471 return NULL;
7473 /* sequence stack for the insn before NOTE_INSN_EPILOGUE_BEG*/
7474 outer_seq = outer_seq->next;
7475 if (outer_seq)
7476 insn = outer_seq->last;
7478 /* skip notes */
7479 while (insn && NOTE_P (insn))
7481 insn = PREV_INSN (insn);
7483 use = insn;
7485 /* match use (reg 10 a0) */
7486 if (use == NULL || !INSN_P (use) || GET_CODE (PATTERN (use)) != USE
7487 || !REG_P (XEXP (PATTERN (use), 0))
7488 || REGNO (XEXP (PATTERN (use), 0)) != A0_REGNUM)
7489 return NULL;
7491 /* match set (reg 10 a0) (const_int 0 [0]) */
7492 clear = PREV_INSN (use);
7493 if (clear != NULL && INSN_P (clear) && GET_CODE (PATTERN (clear)) == SET
7494 && REG_P (SET_DEST (PATTERN (clear)))
7495 && REGNO (SET_DEST (PATTERN (clear))) == A0_REGNUM
7496 && SET_SRC (PATTERN (clear)) == const0_rtx)
7497 return clear;
7499 return NULL;
7502 static void
7503 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
7504 unsigned multipop_size)
7506 rtx insn;
7507 unsigned regs_count = riscv_multi_push_regs_count (mask);
7509 if (!use_multi_pop_normal)
7510 insn = emit_insn (
7511 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
7512 else if (rtx_insn *clear_a0_insn = riscv_zcmp_can_use_popretz ())
7514 delete_insn (NEXT_INSN (clear_a0_insn));
7515 delete_insn (clear_a0_insn);
7516 insn = emit_jump_insn (
7517 riscv_gen_multi_push_pop_insn (POPRETZ_IDX, multipop_size, regs_count));
7519 else
7520 insn = emit_jump_insn (
7521 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
7523 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size);
7524 RTX_FRAME_RELATED_P (insn) = 1;
7525 REG_NOTES (insn) = dwarf;
7528 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern;
7529 style says which. */
7531 void
7532 riscv_expand_epilogue (int style)
7534 /* Split the frame into 3 steps. STEP1 is the amount of stack we should
7535 deallocate before restoring the registers. STEP2 is the amount we
7536 should deallocate afterwards including the callee saved regs. STEP3
7537 is the amount deallocated by save-restore libcall.
7539 Start off by assuming that no registers need to be restored. */
7540 struct riscv_frame_info *frame = &cfun->machine->frame;
7541 unsigned mask = frame->mask;
7542 unsigned fmask = frame->fmask;
7543 unsigned mask_fprs_push = 0;
7544 poly_int64 step2 = 0;
7545 bool use_multi_pop_normal
7546 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame));
7547 bool use_multi_pop_sibcall
7548 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame));
7549 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
7551 bool use_restore_libcall
7552 = !use_multi_pop
7553 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame));
7554 unsigned libcall_size = use_restore_libcall && !use_multi_pop
7555 ? frame->save_libcall_adjustment
7556 : 0;
7557 unsigned multipop_size
7558 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
7559 : 0;
7560 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7561 unsigned th_int_mask = 0;
7562 rtx insn;
7564 /* We need to add memory barrier to prevent read from deallocated stack. */
7565 bool need_barrier_p = known_ne (get_frame_size ()
7566 + cfun->machine->frame.arg_pointer_offset, 0);
7568 if (cfun->machine->naked_p)
7570 gcc_assert (style == NORMAL_RETURN);
7572 emit_jump_insn (gen_return ());
7574 return;
7577 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ())
7579 emit_jump_insn (gen_return ());
7580 return;
7583 /* Reset the epilogue cfa info before starting to emit the epilogue. */
7584 epilogue_cfa_sp_offset = 0;
7586 /* Move past any dynamic stack allocations. */
7587 if (cfun->calls_alloca)
7589 /* Emit a barrier to prevent loads from a deallocated stack. */
7590 riscv_emit_stack_tie ();
7591 need_barrier_p = false;
7593 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
7594 rtx adjust = NULL_RTX;
7596 if (!adjust_offset.is_constant ())
7598 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
7599 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
7600 riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
7601 gen_int_mode (adjust_offset, Pmode));
7602 adjust = tmp1;
7604 else
7606 if (!SMALL_OPERAND (adjust_offset.to_constant ()))
7608 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
7609 GEN_INT (adjust_offset.to_constant ()));
7610 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7612 else
7613 adjust = GEN_INT (adjust_offset.to_constant ());
7616 insn = emit_insn (
7617 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
7618 adjust));
7620 rtx dwarf = NULL_RTX;
7621 rtx cfa_adjust_value = gen_rtx_PLUS (
7622 Pmode, hard_frame_pointer_rtx,
7623 gen_int_mode (-frame->hard_frame_pointer_offset, Pmode));
7624 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
7625 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
7626 RTX_FRAME_RELATED_P (insn) = 1;
7628 REG_NOTES (insn) = dwarf;
7631 if (use_restore_libcall || use_multi_pop)
7632 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7634 /* If we need to restore registers, deallocate as much stack as
7635 possible in the second step without going out of range. */
7636 if (use_multi_pop)
7638 if (frame->fmask
7639 && known_gt (frame->total_size - multipop_size,
7640 frame->frame_pointer_offset))
7641 step2
7642 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
7644 else if ((frame->mask | frame->fmask) != 0)
7645 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
7647 if (use_restore_libcall || use_multi_pop)
7648 frame->mask = mask; /* Undo the above fib. */
7650 poly_int64 step1;
7651 /* STEP1 must be set to the bottom of vector registers save area if any
7652 vector registers need be preversed. */
7653 if (frame->vmask != 0)
7655 step1 = frame->v_sp_offset_bottom;
7656 step2 = frame->total_size - step1 - libcall_size - multipop_size;
7658 else
7659 step1 = frame->total_size - step2 - libcall_size - multipop_size;
7661 /* Set TARGET to BASE + STEP1. */
7662 if (known_gt (step1, 0))
7664 /* Emit a barrier to prevent loads from a deallocated stack. */
7665 riscv_emit_stack_tie ();
7666 need_barrier_p = false;
7668 /* Restore the scalable frame which is assigned in prologue. */
7669 if (!step1.is_constant ())
7671 poly_int64 scalable_frame = step1;
7672 scalable_frame.coeffs[0] = step1.coeffs[1];
7673 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
7674 true);
7675 step1 -= scalable_frame;
7678 /* Get an rtx for STEP1 that we can add to BASE.
7679 Skip if adjust equal to zero. */
7680 if (step1.to_constant () != 0)
7682 rtx adjust = GEN_INT (step1.to_constant ());
7683 if (!SMALL_OPERAND (step1.to_constant ()))
7685 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
7686 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7689 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7690 stack_pointer_rtx,
7691 adjust));
7692 rtx dwarf = NULL_RTX;
7693 rtx cfa_adjust_rtx
7694 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7695 gen_int_mode (step2 + libcall_size + multipop_size,
7696 Pmode));
7698 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7699 RTX_FRAME_RELATED_P (insn) = 1;
7701 REG_NOTES (insn) = dwarf;
7704 else if (frame_pointer_needed)
7706 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring
7707 old value of FP. */
7708 epilogue_cfa_sp_offset = step2;
7711 if (use_multi_pop)
7713 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7714 if (fmask)
7716 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi
7717 / UNITS_PER_WORD);
7718 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */
7721 else if (use_restore_libcall)
7722 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7724 th_int_mask = th_int_get_mask (frame->mask);
7725 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7727 frame->mask &= ~th_int_mask;
7729 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
7730 interrupts, such as fcsr. */
7731 if ((TARGET_HARD_FLOAT && frame->fmask)
7732 || (TARGET_ZFINX && frame->mask))
7733 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
7736 /* Restore the registers. */
7737 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
7738 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
7739 - multipop_size,
7740 riscv_restore_reg, true, style == EXCEPTION_RETURN);
7742 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7744 frame->mask = mask; /* Undo the above fib. */
7745 unsigned save_adjustment = th_int_get_save_adjustment ();
7746 gcc_assert (step2.to_constant () >= save_adjustment);
7747 step2 -= save_adjustment;
7750 if (use_restore_libcall)
7751 frame->mask = mask; /* Undo the above fib. */
7753 if (need_barrier_p)
7754 riscv_emit_stack_tie ();
7756 /* Deallocate the final bit of the frame. */
7757 if (step2.to_constant () > 0)
7759 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7760 GEN_INT (step2.to_constant ())));
7762 rtx dwarf = NULL_RTX;
7763 rtx cfa_adjust_rtx
7764 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7765 GEN_INT (libcall_size + multipop_size));
7766 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7767 RTX_FRAME_RELATED_P (insn) = 1;
7769 REG_NOTES (insn) = dwarf;
7772 if (use_multi_pop)
7774 /* restore FPRs pushed by cm.push. */
7775 frame->fmask = fmask & mask_fprs_push;
7776 if (frame->fmask)
7777 riscv_for_each_saved_reg (frame->total_size - libcall_size
7778 - multipop_size,
7779 riscv_restore_reg, true,
7780 style == EXCEPTION_RETURN);
7781 /* Undo the above fib. */
7782 frame->mask = mask;
7783 frame->fmask = fmask;
7784 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask,
7785 multipop_size);
7786 if (use_multi_pop_normal)
7787 return;
7789 else if (use_restore_libcall)
7791 rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
7792 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
7793 RTX_FRAME_RELATED_P (insn) = 1;
7794 REG_NOTES (insn) = dwarf;
7796 emit_jump_insn (gen_gpr_restore_return (ra));
7797 return;
7800 /* Add in the __builtin_eh_return stack adjustment. */
7801 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
7802 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7803 EH_RETURN_STACKADJ_RTX));
7805 /* Return from interrupt. */
7806 if (cfun->machine->interrupt_handler_p)
7808 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode;
7810 gcc_assert (mode != UNKNOWN_MODE);
7812 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7813 emit_jump_insn (gen_th_int_pop ());
7814 else if (mode == MACHINE_MODE)
7815 emit_jump_insn (gen_riscv_mret ());
7816 else if (mode == SUPERVISOR_MODE)
7817 emit_jump_insn (gen_riscv_sret ());
7818 else
7819 emit_jump_insn (gen_riscv_uret ());
7821 else if (style != SIBCALL_RETURN)
7822 emit_jump_insn (gen_simple_return_internal (ra));
7825 /* Implement EPILOGUE_USES. */
7827 bool
7828 riscv_epilogue_uses (unsigned int regno)
7830 if (regno == RETURN_ADDR_REGNUM)
7831 return true;
7833 if (epilogue_completed && cfun->machine->interrupt_handler_p)
7835 /* An interrupt function restores temp regs, so we must indicate that
7836 they are live at function end. */
7837 if (df_regs_ever_live_p (regno)
7838 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
7839 return true;
7842 return false;
7845 static bool
7846 riscv_avoid_shrink_wrapping_separate ()
7848 if (riscv_use_save_libcall (&cfun->machine->frame)
7849 || cfun->machine->interrupt_handler_p
7850 || !cfun->machine->frame.gp_sp_offset.is_constant ())
7851 return true;
7853 return false;
7856 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
7858 static sbitmap
7859 riscv_get_separate_components (void)
7861 HOST_WIDE_INT offset;
7862 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
7863 bitmap_clear (components);
7865 if (riscv_avoid_shrink_wrapping_separate ())
7866 return components;
7868 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
7869 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7870 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7872 /* We can only wrap registers that have small operand offsets.
7873 For large offsets a pseudo register might be needed which
7874 cannot be created during the shrink wrapping pass. */
7875 if (SMALL_OPERAND (offset))
7876 bitmap_set_bit (components, regno);
7878 offset -= UNITS_PER_WORD;
7881 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
7882 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7883 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7885 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7887 /* We can only wrap registers that have small operand offsets.
7888 For large offsets a pseudo register might be needed which
7889 cannot be created during the shrink wrapping pass. */
7890 if (SMALL_OPERAND (offset))
7891 bitmap_set_bit (components, regno);
7893 offset -= GET_MODE_SIZE (mode).to_constant ();
7896 /* Don't mess with the hard frame pointer. */
7897 if (frame_pointer_needed)
7898 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
7900 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
7902 return components;
7905 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
7907 static sbitmap
7908 riscv_components_for_bb (basic_block bb)
7910 bitmap in = DF_LIVE_IN (bb);
7911 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
7912 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
7914 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
7915 bitmap_clear (components);
7917 function_abi_aggregator callee_abis;
7918 rtx_insn *insn;
7919 FOR_BB_INSNS (bb, insn)
7920 if (CALL_P (insn))
7921 callee_abis.note_callee_abi (insn_callee_abi (insn));
7922 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
7924 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
7925 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7926 if (!fixed_regs[regno]
7927 && !crtl->abi->clobbers_full_reg_p (regno)
7928 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
7929 || bitmap_bit_p (in, regno)
7930 || bitmap_bit_p (gen, regno)
7931 || bitmap_bit_p (kill, regno)))
7932 bitmap_set_bit (components, regno);
7934 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7935 if (!fixed_regs[regno]
7936 && !crtl->abi->clobbers_full_reg_p (regno)
7937 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
7938 || bitmap_bit_p (in, regno)
7939 || bitmap_bit_p (gen, regno)
7940 || bitmap_bit_p (kill, regno)))
7941 bitmap_set_bit (components, regno);
7943 return components;
7946 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
7948 static void
7949 riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
7951 /* Nothing to do for riscv. */
7954 static void
7955 riscv_process_components (sbitmap components, bool prologue_p)
7957 HOST_WIDE_INT offset;
7958 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
7960 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
7961 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7962 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7964 if (bitmap_bit_p (components, regno))
7965 riscv_save_restore_reg (word_mode, regno, offset, fn);
7967 offset -= UNITS_PER_WORD;
7970 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
7971 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7972 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7974 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7976 if (bitmap_bit_p (components, regno))
7977 riscv_save_restore_reg (mode, regno, offset, fn);
7979 offset -= GET_MODE_SIZE (mode).to_constant ();
7983 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
7985 static void
7986 riscv_emit_prologue_components (sbitmap components)
7988 riscv_process_components (components, true);
7991 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
7993 static void
7994 riscv_emit_epilogue_components (sbitmap components)
7996 riscv_process_components (components, false);
7999 static void
8000 riscv_set_handled_components (sbitmap components)
8002 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8003 if (bitmap_bit_p (components, regno))
8004 cfun->machine->reg_is_wrapped_separately[regno] = true;
8006 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8007 if (bitmap_bit_p (components, regno))
8008 cfun->machine->reg_is_wrapped_separately[regno] = true;
8011 /* Return nonzero if this function is known to have a null epilogue.
8012 This allows the optimizer to omit jumps to jumps if no stack
8013 was created. */
8015 bool
8016 riscv_can_use_return_insn (void)
8018 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
8019 && ! cfun->machine->interrupt_handler_p);
8022 /* Given that there exists at least one variable that is set (produced)
8023 by OUT_INSN and read (consumed) by IN_INSN, return true iff
8024 IN_INSN represents one or more memory store operations and none of
8025 the variables set by OUT_INSN is used by IN_INSN as the address of a
8026 store operation. If either IN_INSN or OUT_INSN does not represent
8027 a "single" RTL SET expression (as loosely defined by the
8028 implementation of the single_set function) or a PARALLEL with only
8029 SETs, CLOBBERs, and USEs inside, this function returns false.
8031 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain
8032 conditions that result in assertion failures in the generic
8033 store_data_bypass_p function and returns FALSE in such cases.
8035 This is required to make -msave-restore work with the sifive-7
8036 pipeline description. */
8038 bool
8039 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
8041 rtx out_set, in_set;
8042 rtx out_pat, in_pat;
8043 rtx out_exp, in_exp;
8044 int i, j;
8046 in_set = single_set (in_insn);
8047 if (in_set)
8049 if (MEM_P (SET_DEST (in_set)))
8051 out_set = single_set (out_insn);
8052 if (!out_set)
8054 out_pat = PATTERN (out_insn);
8055 if (GET_CODE (out_pat) == PARALLEL)
8057 for (i = 0; i < XVECLEN (out_pat, 0); i++)
8059 out_exp = XVECEXP (out_pat, 0, i);
8060 if ((GET_CODE (out_exp) == CLOBBER)
8061 || (GET_CODE (out_exp) == USE))
8062 continue;
8063 else if (GET_CODE (out_exp) != SET)
8064 return false;
8070 else
8072 in_pat = PATTERN (in_insn);
8073 if (GET_CODE (in_pat) != PARALLEL)
8074 return false;
8076 for (i = 0; i < XVECLEN (in_pat, 0); i++)
8078 in_exp = XVECEXP (in_pat, 0, i);
8079 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
8080 continue;
8081 else if (GET_CODE (in_exp) != SET)
8082 return false;
8084 if (MEM_P (SET_DEST (in_exp)))
8086 out_set = single_set (out_insn);
8087 if (!out_set)
8089 out_pat = PATTERN (out_insn);
8090 if (GET_CODE (out_pat) != PARALLEL)
8091 return false;
8092 for (j = 0; j < XVECLEN (out_pat, 0); j++)
8094 out_exp = XVECEXP (out_pat, 0, j);
8095 if ((GET_CODE (out_exp) == CLOBBER)
8096 || (GET_CODE (out_exp) == USE))
8097 continue;
8098 else if (GET_CODE (out_exp) != SET)
8099 return false;
8106 return store_data_bypass_p (out_insn, in_insn);
8109 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
8111 When floating-point registers are wider than integer ones, moves between
8112 them must go through memory. */
8114 static bool
8115 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
8116 reg_class_t class2)
8118 return (!riscv_v_ext_mode_p (mode)
8119 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
8120 && (class1 == FP_REGS) != (class2 == FP_REGS)
8121 && !TARGET_XTHEADFMV
8122 && !TARGET_ZFA);
8125 /* Implement TARGET_REGISTER_MOVE_COST. */
8127 static int
8128 riscv_register_move_cost (machine_mode mode,
8129 reg_class_t from, reg_class_t to)
8131 if ((from == FP_REGS && to == GR_REGS) ||
8132 (from == GR_REGS && to == FP_REGS))
8133 return tune_param->fmv_cost;
8135 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
8138 /* Implement TARGET_HARD_REGNO_NREGS. */
8140 static unsigned int
8141 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
8143 if (riscv_v_ext_vector_mode_p (mode))
8145 /* Handle fractional LMUL, it only occupy part of vector register but
8146 still need one vector register to hold. */
8147 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
8148 return 1;
8150 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
8153 /* For tuple modes, the number of register = NF * LMUL. */
8154 if (riscv_v_ext_tuple_mode_p (mode))
8156 unsigned int nf = riscv_vector::get_nf (mode);
8157 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode);
8158 poly_int64 size = GET_MODE_SIZE (subpart_mode);
8159 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode)));
8160 if (maybe_lt (size, UNITS_PER_V_REG))
8161 return nf;
8162 else
8164 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
8165 return nf * lmul;
8169 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */
8170 if (riscv_v_ext_vls_mode_p (mode))
8172 int size = GET_MODE_BITSIZE (mode).to_constant ();
8173 if (size < TARGET_MIN_VLEN)
8174 return 1;
8175 else
8176 return size / TARGET_MIN_VLEN;
8179 /* mode for VL or VTYPE are just a marker, not holding value,
8180 so it always consume one register. */
8181 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8182 || FRM_REG_P (regno))
8183 return 1;
8185 /* Assume every valid non-vector mode fits in one vector register. */
8186 if (V_REG_P (regno))
8187 return 1;
8189 if (FP_REG_P (regno))
8190 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
8192 /* All other registers are word-sized. */
8193 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8196 /* Implement TARGET_HARD_REGNO_MODE_OK. */
8198 static bool
8199 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
8201 unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
8203 if (GP_REG_P (regno))
8205 if (riscv_v_ext_mode_p (mode))
8206 return false;
8208 if (!GP_REG_P (regno + nregs - 1))
8209 return false;
8211 else if (FP_REG_P (regno))
8213 if (riscv_v_ext_mode_p (mode))
8214 return false;
8216 if (!FP_REG_P (regno + nregs - 1))
8217 return false;
8219 if (GET_MODE_CLASS (mode) != MODE_FLOAT
8220 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
8221 return false;
8223 /* Only use callee-saved registers if a potential callee is guaranteed
8224 to spill the requisite width. */
8225 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
8226 || (!call_used_or_fixed_reg_p (regno)
8227 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
8228 return false;
8230 else if (V_REG_P (regno))
8232 if (!riscv_v_ext_mode_p (mode))
8233 return false;
8235 if (!V_REG_P (regno + nregs - 1))
8236 return false;
8238 int regno_alignment = riscv_get_v_regno_alignment (mode);
8239 if (regno_alignment != 1)
8240 return ((regno % regno_alignment) == 0);
8242 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8243 || FRM_REG_P (regno))
8244 return true;
8245 else
8246 return false;
8248 /* Require same callee-savedness for all registers. */
8249 for (unsigned i = 1; i < nregs; i++)
8250 if (call_used_or_fixed_reg_p (regno)
8251 != call_used_or_fixed_reg_p (regno + i))
8252 return false;
8254 /* Only use even registers in RV32 ZDINX */
8255 if (!TARGET_64BIT && TARGET_ZDINX){
8256 if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
8257 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
8258 return !(regno & 1);
8261 return true;
8264 /* Implement TARGET_MODES_TIEABLE_P.
8266 Don't allow floating-point modes to be tied, since type punning of
8267 single-precision and double-precision is implementation defined. */
8269 static bool
8270 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
8272 /* We don't allow different REG_CLASS modes tieable since it
8273 will cause ICE in register allocation (RA).
8274 E.g. V2SI and DI are not tieable. */
8275 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
8276 return false;
8277 return (mode1 == mode2
8278 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
8279 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
8282 /* Implement CLASS_MAX_NREGS. */
8284 static unsigned char
8285 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
8287 if (reg_class_subset_p (rclass, FP_REGS))
8288 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
8290 if (reg_class_subset_p (rclass, GR_REGS))
8291 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
8293 if (reg_class_subset_p (rclass, V_REGS))
8294 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
8296 return 0;
8299 /* Implement TARGET_MEMORY_MOVE_COST. */
8301 static int
8302 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
8304 return (tune_param->memory_cost
8305 + memory_move_secondary_cost (mode, rclass, in));
8308 /* Return the number of instructions that can be issued per cycle. */
8310 static int
8311 riscv_issue_rate (void)
8313 return tune_param->issue_rate;
8316 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
8317 static int
8318 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
8320 if (DEBUG_INSN_P (insn))
8321 return more;
8323 rtx_code code = GET_CODE (PATTERN (insn));
8324 if (code == USE || code == CLOBBER)
8325 return more;
8327 /* GHOST insns are used for blockage and similar cases which
8328 effectively end a cycle. */
8329 if (get_attr_type (insn) == TYPE_GHOST)
8330 return 0;
8332 /* If we ever encounter an insn with an unknown type, trip
8333 an assert so we can find and fix this problem. */
8334 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN);
8336 /* If we ever encounter an insn without an insn reservation, trip
8337 an assert so we can find and fix this problem. */
8338 gcc_assert (insn_has_dfa_reservation_p (insn));
8340 return more - 1;
8343 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
8344 instruction fusion of some sort. */
8346 static bool
8347 riscv_macro_fusion_p (void)
8349 return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
8352 /* Return true iff the instruction fusion described by OP is enabled. */
8354 static bool
8355 riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
8357 return tune_param->fusible_ops & op;
8360 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
8361 should be kept together during scheduling. */
8363 static bool
8364 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
8366 rtx prev_set = single_set (prev);
8367 rtx curr_set = single_set (curr);
8368 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
8369 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
8371 if (!riscv_macro_fusion_p ())
8372 return false;
8374 if (simple_sets_p
8375 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
8376 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
8378 /* We are trying to match the following:
8379 prev (slli) == (set (reg:DI rD)
8380 (ashift:DI (reg:DI rS) (const_int 32)))
8381 curr (slri) == (set (reg:DI rD)
8382 (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
8383 with <shift> being either 32 for FUSE_ZEXTW, or
8384 `less than 32 for FUSE_ZEXTWS. */
8386 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8387 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8388 && REG_P (SET_DEST (prev_set))
8389 && REG_P (SET_DEST (curr_set))
8390 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8391 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8392 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8393 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8394 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
8395 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
8396 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
8397 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
8398 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
8399 return true;
8402 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
8404 /* We are trying to match the following:
8405 prev (slli) == (set (reg:DI rD)
8406 (ashift:DI (reg:DI rS) (const_int 48)))
8407 curr (slri) == (set (reg:DI rD)
8408 (lshiftrt:DI (reg:DI rD) (const_int 48))) */
8410 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8411 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8412 && REG_P (SET_DEST (prev_set))
8413 && REG_P (SET_DEST (curr_set))
8414 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8415 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8416 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8417 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8418 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
8419 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
8420 return true;
8423 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
8425 /* We are trying to match the following:
8426 prev (add) == (set (reg:DI rD)
8427 (plus:DI (reg:DI rS1) (reg:DI rS2))
8428 curr (ld) == (set (reg:DI rD)
8429 (mem:DI (reg:DI rD))) */
8431 if (MEM_P (SET_SRC (curr_set))
8432 && REG_P (XEXP (SET_SRC (curr_set), 0))
8433 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8434 && GET_CODE (SET_SRC (prev_set)) == PLUS
8435 && REG_P (XEXP (SET_SRC (prev_set), 0))
8436 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8437 return true;
8439 /* We are trying to match the following:
8440 prev (add) == (set (reg:DI rD)
8441 (plus:DI (reg:DI rS1) (reg:DI rS2)))
8442 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
8444 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8445 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
8446 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8447 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
8448 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
8449 && GET_CODE (SET_SRC (prev_set)) == PLUS
8450 && REG_P (XEXP (SET_SRC (prev_set), 0))
8451 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8452 return true;
8455 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
8457 /* We are trying to match the following:
8458 prev (add) == (set (reg:DI rS)
8459 (plus:DI (reg:DI rS) (const_int))
8460 curr (ld) == (set (reg:DI rD)
8461 (mem:DI (reg:DI rS))) */
8463 if (MEM_P (SET_SRC (curr_set))
8464 && REG_P (XEXP (SET_SRC (curr_set), 0))
8465 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8466 && GET_CODE (SET_SRC (prev_set)) == PLUS
8467 && REG_P (XEXP (SET_SRC (prev_set), 0))
8468 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
8469 return true;
8472 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
8474 /* We are trying to match the following:
8475 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8476 curr (addi) == (set (reg:DI rD)
8477 (plus:DI (reg:DI rD) (const_int IMM12))) */
8479 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
8480 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8481 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8482 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
8483 && (GET_CODE (SET_SRC (prev_set)) == HIGH
8484 || (CONST_INT_P (SET_SRC (prev_set))
8485 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
8486 return true;
8489 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
8491 /* We are trying to match the following:
8492 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8493 curr (addi) == (set (reg:DI rD)
8494 (plus:DI (reg:DI rD) (const_int IMM12)))
8496 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8497 curr (addi) == (set (reg:DI rD)
8498 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
8500 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8501 && XINT (SET_SRC (prev_set), 1) == UNSPEC_AUIPC
8502 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
8503 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8504 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
8506 return true;
8509 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
8511 /* We are trying to match the following:
8512 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8513 curr (ld) == (set (reg:DI rD)
8514 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8516 if (CONST_INT_P (SET_SRC (prev_set))
8517 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
8518 && MEM_P (SET_SRC (curr_set))
8519 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8520 return true;
8522 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8523 && MEM_P (SET_SRC (curr_set))
8524 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
8525 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
8526 return true;
8528 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8529 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8530 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
8531 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8532 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
8533 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
8534 return true;
8537 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
8539 /* We are trying to match the following:
8540 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8541 curr (ld) == (set (reg:DI rD)
8542 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8544 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8545 && XINT (prev_set, 1) == UNSPEC_AUIPC
8546 && MEM_P (SET_SRC (curr_set))
8547 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8548 return true;
8551 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
8553 /* We are trying to match the following:
8554 prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8555 (reg rS1))
8556 curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8557 (reg rS2)) */
8559 if (MEM_P (SET_DEST (prev_set))
8560 && MEM_P (SET_DEST (curr_set))
8561 /* We can probably relax this condition. The documentation is a bit
8562 unclear about sub-word cases. So we just model DImode for now. */
8563 && GET_MODE (SET_DEST (curr_set)) == DImode
8564 && GET_MODE (SET_DEST (prev_set)) == DImode)
8566 rtx base_prev, base_curr, offset_prev, offset_curr;
8568 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
8569 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
8571 /* The two stores must be contained within opposite halves of the same
8572 16 byte aligned block of memory. We know that the stack pointer and
8573 the frame pointer have suitable alignment. So we just need to check
8574 the offsets of the two stores for suitable alignment.
8576 Originally the thought was to check MEM_ALIGN, but that was reporting
8577 incorrect alignments, even for SP/FP accesses, so we gave up on that
8578 approach. */
8579 if (base_prev != NULL_RTX
8580 && base_curr != NULL_RTX
8581 && REG_P (base_prev)
8582 && REG_P (base_curr)
8583 && REGNO (base_prev) == REGNO (base_curr)
8584 && (REGNO (base_prev) == STACK_POINTER_REGNUM
8585 || REGNO (base_prev) == HARD_FRAME_POINTER_REGNUM)
8586 && ((INTVAL (offset_prev) == INTVAL (offset_curr) + 8
8587 && (INTVAL (offset_prev) % 16) == 0)
8588 || ((INTVAL (offset_curr) == INTVAL (offset_prev) + 8)
8589 && (INTVAL (offset_curr) % 16) == 0)))
8590 return true;
8594 return false;
8597 /* Adjust the cost/latency of instructions for scheduling.
8598 For now this is just used to change the latency of vector instructions
8599 according to their LMUL. We assume that an insn with LMUL == 8 requires
8600 eight times more execution cycles than the same insn with LMUL == 1.
8601 As this may cause very high latencies which lead to scheduling artifacts
8602 we currently only perform the adjustment when -madjust-lmul-cost is given.
8604 static int
8605 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
8606 unsigned int)
8608 /* Only do adjustments for the generic out-of-order scheduling model. */
8609 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
8610 return cost;
8612 if (recog_memoized (insn) < 0)
8613 return cost;
8615 enum attr_type type = get_attr_type (insn);
8617 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
8619 /* TODO: For ordered reductions scale the base cost relative to the
8620 number of units. */
8624 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */
8625 if (!TARGET_ADJUST_LMUL_COST)
8626 return cost;
8628 /* vsetvl has a vlmul attribute but its latency does not depend on it. */
8629 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE)
8630 return cost;
8632 enum riscv_vector::vlmul_type lmul =
8633 (riscv_vector::vlmul_type)get_attr_vlmul (insn);
8635 double factor = 1;
8636 switch (lmul)
8638 case riscv_vector::LMUL_2:
8639 factor = 2;
8640 break;
8641 case riscv_vector::LMUL_4:
8642 factor = 4;
8643 break;
8644 case riscv_vector::LMUL_8:
8645 factor = 8;
8646 break;
8647 case riscv_vector::LMUL_F2:
8648 factor = 0.5;
8649 break;
8650 case riscv_vector::LMUL_F4:
8651 factor = 0.25;
8652 break;
8653 case riscv_vector::LMUL_F8:
8654 factor = 0.125;
8655 break;
8656 default:
8657 factor = 1;
8660 /* If the latency was nonzero, keep it that way. */
8661 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor);
8663 return new_cost;
8666 /* Auxiliary function to emit RISC-V ELF attribute. */
8667 static void
8668 riscv_emit_attribute ()
8670 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n",
8671 riscv_arch_str ().c_str ());
8673 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n",
8674 TARGET_STRICT_ALIGN ? 0 : 1);
8676 fprintf (asm_out_file, "\t.attribute stack_align, %d\n",
8677 riscv_stack_boundary / 8);
8680 /* Output .variant_cc for function symbol which follows vector calling
8681 convention. */
8683 static void
8684 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
8686 if (TREE_CODE (decl) == FUNCTION_DECL)
8688 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
8689 if (cc == RISCV_CC_V)
8691 fprintf (stream, "\t.variant_cc\t");
8692 assemble_name (stream, name);
8693 fprintf (stream, "\n");
8698 /* Implement ASM_DECLARE_FUNCTION_NAME. */
8700 void
8701 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
8703 riscv_asm_output_variant_cc (stream, fndecl, name);
8704 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
8705 ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl);
8706 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8708 fprintf (stream, "\t.option push\n");
8710 std::string *target_name = riscv_func_target_get (fndecl);
8711 std::string isa = target_name != NULL
8712 ? *target_name
8713 : riscv_cmdline_subset_list ()->to_string (true);
8714 fprintf (stream, "\t.option arch, %s\n", isa.c_str ());
8715 riscv_func_target_remove_and_destory (fndecl);
8717 struct cl_target_option *local_cl_target =
8718 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
8719 struct cl_target_option *global_cl_target =
8720 TREE_TARGET_OPTION (target_option_default_node);
8721 const char *local_tune_str = get_tune_str (local_cl_target);
8722 const char *global_tune_str = get_tune_str (global_cl_target);
8723 if (strcmp (local_tune_str, global_tune_str) != 0)
8724 fprintf (stream, "\t# tune = %s\n", local_tune_str);
8728 void
8729 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
8731 if (!flag_inhibit_size_directive)
8732 ASM_OUTPUT_MEASURED_SIZE (stream, name);
8734 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8736 fprintf (stream, "\t.option pop\n");
8740 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */
8742 void
8743 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target)
8745 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8746 const char *value = IDENTIFIER_POINTER (target);
8747 riscv_asm_output_variant_cc (stream, decl, name);
8748 ASM_OUTPUT_DEF (stream, name, value);
8751 /* Implement ASM_OUTPUT_EXTERNAL. */
8753 void
8754 riscv_asm_output_external (FILE *stream, tree decl, const char *name)
8756 default_elf_asm_output_external (stream, decl, name);
8757 riscv_asm_output_variant_cc (stream, decl, name);
8760 /* Implement TARGET_ASM_FILE_START. */
8762 static void
8763 riscv_file_start (void)
8765 default_file_start ();
8767 /* Instruct GAS to generate position-[in]dependent code. */
8768 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
8770 /* If the user specifies "-mno-relax" on the command line then disable linker
8771 relaxation in the assembler. */
8772 if (! riscv_mrelax)
8773 fprintf (asm_out_file, "\t.option norelax\n");
8775 /* If the user specifies "-mcsr-check" on the command line then enable csr
8776 check in the assembler. */
8777 if (riscv_mcsr_check)
8778 fprintf (asm_out_file, "\t.option csr-check\n");
8780 if (riscv_emit_attribute_p)
8781 riscv_emit_attribute ();
8784 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
8785 in order to avoid duplicating too much logic from elsewhere. */
8787 static void
8788 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8789 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8790 tree function)
8792 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8793 rtx this_rtx, temp1, temp2, fnaddr;
8794 rtx_insn *insn;
8796 riscv_in_thunk_func = true;
8798 /* Pretend to be a post-reload pass while generating rtl. */
8799 reload_completed = 1;
8801 /* Mark the end of the (empty) prologue. */
8802 emit_note (NOTE_INSN_PROLOGUE_END);
8804 /* Determine if we can use a sibcall to call FUNCTION directly. */
8805 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
8807 /* We need two temporary registers in some cases. */
8808 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
8809 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
8811 /* Find out which register contains the "this" pointer. */
8812 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8813 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
8814 else
8815 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
8817 /* Add DELTA to THIS_RTX. */
8818 if (delta != 0)
8820 rtx offset = GEN_INT (delta);
8821 if (!SMALL_OPERAND (delta))
8823 riscv_emit_move (temp1, offset);
8824 offset = temp1;
8826 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
8829 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
8830 if (vcall_offset != 0)
8832 rtx addr;
8834 /* Set TEMP1 to *THIS_RTX. */
8835 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
8837 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
8838 addr = riscv_add_offset (temp2, temp1, vcall_offset);
8840 /* Load the offset and add it to THIS_RTX. */
8841 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
8842 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
8845 /* Jump to the target function. */
8846 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode);
8847 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc));
8848 SIBLING_CALL_P (insn) = 1;
8850 /* Run just enough of rest_of_compilation. This sequence was
8851 "borrowed" from alpha.cc. */
8852 insn = get_insns ();
8853 split_all_insns_noflow ();
8854 shorten_branches (insn);
8855 assemble_start_function (thunk_fndecl, fnname);
8856 final_start_function (insn, file, 1);
8857 final (insn, file, 1);
8858 final_end_function ();
8859 assemble_end_function (thunk_fndecl, fnname);
8861 /* Clean up the vars set above. Note that final_end_function resets
8862 the global pointer for us. */
8863 reload_completed = 0;
8864 riscv_in_thunk_func = false;
8867 /* Allocate a chunk of memory for per-function machine-dependent data. */
8869 static struct machine_function *
8870 riscv_init_machine_status (void)
8872 return ggc_cleared_alloc<machine_function> ();
8875 /* Return the VLEN value associated with -march and -mwrvv-vector-bits.
8876 TODO: So far we only support length-agnostic value. */
8877 static poly_uint16
8878 riscv_convert_vector_chunks (struct gcc_options *opts)
8880 int chunk_num;
8881 int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
8882 if (min_vlen > 32)
8884 /* When targetting minimum VLEN > 32, we should use 64-bit chunk size.
8885 Otherwise we can not include SEW = 64bits.
8886 Runtime invariant: The single indeterminate represent the
8887 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
8888 Thus the number of bytes in a vector is 8 + 8 * x1 which is
8889 riscv_vector_chunks * 8 = poly_int (8, 8). */
8890 riscv_bytes_per_vector_chunk = 8;
8891 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN:
8892 - TARGET_MIN_VLEN = 64bit: [8,8]
8893 - TARGET_MIN_VLEN = 128bit: [16,16]
8894 - TARGET_MIN_VLEN = 256bit: [32,32]
8895 - TARGET_MIN_VLEN = 512bit: [64,64]
8896 - TARGET_MIN_VLEN = 1024bit: [128,128]
8897 - TARGET_MIN_VLEN = 2048bit: [256,256]
8898 - TARGET_MIN_VLEN = 4096bit: [512,512]
8899 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */
8900 chunk_num = min_vlen / 64;
8902 else
8904 /* When targetting minimum VLEN = 32, we should use 32-bit
8905 chunk size. Runtime invariant: The single indeterminate represent the
8906 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
8907 Thus the number of bytes in a vector is 4 + 4 * x1 which is
8908 riscv_vector_chunks * 4 = poly_int (4, 4). */
8909 riscv_bytes_per_vector_chunk = 4;
8910 chunk_num = 1;
8913 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR
8914 is enabled. Set riscv_vector_chunks as 1 compile-time constant if
8915 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def"
8916 to set RVV mode size. The RVV machine modes size are run-time constant if
8917 TARGET_VECTOR is enabled. The RVV machine modes size remains default
8918 compile-time constant if TARGET_VECTOR is disabled. */
8919 if (TARGET_VECTOR_OPTS_P (opts))
8921 switch (opts->x_rvv_vector_bits)
8923 case RVV_VECTOR_BITS_SCALABLE:
8924 return poly_uint16 (chunk_num, chunk_num);
8925 case RVV_VECTOR_BITS_ZVL:
8926 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
8927 default:
8928 gcc_unreachable ();
8931 else
8932 return 1;
8935 /* 'Unpack' up the internal tuning structs and update the options
8936 in OPTS. The caller must have set up selected_tune and selected_arch
8937 as all the other target-specific codegen decisions are
8938 derived from them. */
8939 void
8940 riscv_override_options_internal (struct gcc_options *opts)
8942 const struct riscv_tune_info *cpu;
8944 /* The presence of the M extension implies that division instructions
8945 are present, so include them unless explicitly disabled. */
8946 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
8947 opts->x_target_flags |= MASK_DIV;
8948 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
8949 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
8951 /* Likewise floating-point division and square root. */
8952 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
8953 && ((target_flags_explicit & MASK_FDIV) == 0))
8954 opts->x_target_flags |= MASK_FDIV;
8956 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
8957 if both -mtune and -mcpu are not given. */
8958 const char *tune_string = get_tune_str (opts);
8959 cpu = riscv_parse_tune (tune_string, false);
8960 riscv_microarchitecture = cpu->microarchitecture;
8961 tune_param = opts->x_optimize_size
8962 ? &optimize_size_tune_info
8963 : cpu->tune_param;
8965 /* Use -mtune's setting for slow_unaligned_access, even when optimizing
8966 for size. For architectures that trap and emulate unaligned accesses,
8967 the performance cost is too great, even for -Os. Similarly, if
8968 -m[no-]strict-align is left unspecified, heed -mtune's advice. */
8969 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
8970 || TARGET_STRICT_ALIGN);
8972 /* Make a note if user explicity passed -mstrict-align for later
8973 builtin macro generation. Can't use target_flags_explicitly since
8974 it is set even for -mno-strict-align. */
8975 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts);
8977 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
8978 && cpu->tune_param->slow_unaligned_access)
8979 opts->x_target_flags |= MASK_STRICT_ALIGN;
8981 /* If the user hasn't specified a branch cost, use the processor's
8982 default. */
8983 if (opts->x_riscv_branch_cost == 0)
8984 opts->x_riscv_branch_cost = tune_param->branch_cost;
8986 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of
8987 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16.
8989 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */
8990 if (TARGET_MIN_VLEN_OPTS (opts) > 4096)
8991 sorry ("Current RISC-V GCC does not support VLEN greater than 4096bit for "
8992 "'V' Extension");
8994 /* FIXME: We don't support RVV in big-endian for now, we may enable RVV with
8995 big-endian after finishing full coverage testing. */
8996 if (TARGET_VECTOR && TARGET_BIG_ENDIAN)
8997 sorry ("Current RISC-V GCC does not support RVV in big-endian mode");
8999 /* Convert -march and -mrvv-vector-bits to a chunks count. */
9000 riscv_vector_chunks = riscv_convert_vector_chunks (opts);
9003 /* Implement TARGET_OPTION_OVERRIDE. */
9005 static void
9006 riscv_option_override (void)
9008 #ifdef SUBTARGET_OVERRIDE_OPTIONS
9009 SUBTARGET_OVERRIDE_OPTIONS;
9010 #endif
9012 flag_pcc_struct_return = 0;
9014 if (flag_pic)
9015 g_switch_value = 0;
9017 /* Always prefer medlow than medany for RV32 since medlow can access
9018 full address space. */
9019 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT)
9020 riscv_cmodel = CM_MEDLOW;
9022 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS)
9023 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs");
9025 if (riscv_cmodel == CM_LARGE && flag_pic)
9026 sorry ("code model %qs with %qs", "large",
9027 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic");
9029 if (flag_pic)
9030 riscv_cmodel = CM_PIC;
9032 /* We need to save the fp with ra for non-leaf functions with no fp and ra
9033 for leaf functions while no-omit-frame-pointer with
9034 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first
9035 priority to determine whether the frame pointer is needed. If we do not
9036 override it, the fp and ra will be stored for leaf functions, which is not
9037 our wanted. */
9038 riscv_save_frame_pointer = false;
9039 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
9041 if (!global_options.x_flag_omit_frame_pointer)
9042 riscv_save_frame_pointer = true;
9044 global_options.x_flag_omit_frame_pointer = 1;
9047 /* We get better code with explicit relocs for CM_MEDLOW, but
9048 worse code for the others (for now). Pick the best default. */
9049 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
9050 if (riscv_cmodel == CM_MEDLOW)
9051 target_flags |= MASK_EXPLICIT_RELOCS;
9053 /* Require that the ISA supports the requested floating-point ABI. */
9054 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
9055 error ("requested ABI requires %<-march%> to subsume the %qc extension",
9056 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
9058 /* RVE requires specific ABI. */
9059 if (TARGET_RVE)
9061 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E)
9062 error ("rv32e requires ilp32e ABI");
9063 else if (TARGET_64BIT && riscv_abi != ABI_LP64E)
9064 error ("rv64e requires lp64e ABI");
9067 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */
9068 if (TARGET_ZFINX
9069 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64
9070 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E)
9071 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
9073 /* We do not yet support ILP32 on RV64. */
9074 if (BITS_PER_WORD != POINTER_SIZE)
9075 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
9077 /* Validate -mpreferred-stack-boundary= value. */
9078 riscv_stack_boundary = ABI_STACK_BOUNDARY;
9079 if (riscv_preferred_stack_boundary_arg)
9081 int min = ctz_hwi (STACK_BOUNDARY / 8);
9082 int max = 8;
9084 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max))
9085 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d",
9086 riscv_preferred_stack_boundary_arg, min, max);
9088 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg;
9091 if (riscv_emit_attribute_p < 0)
9092 #ifdef HAVE_AS_RISCV_ATTRIBUTE
9093 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE;
9094 #else
9095 riscv_emit_attribute_p = 0;
9097 if (riscv_emit_attribute_p)
9098 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32"
9099 " [%<-mriscv-attribute%>]");
9100 #endif
9102 if (riscv_stack_protector_guard == SSP_GLOBAL
9103 && OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9105 error ("incompatible options %<-mstack-protector-guard=global%> and "
9106 "%<-mstack-protector-guard-offset=%s%>",
9107 riscv_stack_protector_guard_offset_str);
9110 if (riscv_stack_protector_guard == SSP_TLS
9111 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str)
9112 && OPTION_SET_P (riscv_stack_protector_guard_reg_str)))
9114 error ("both %<-mstack-protector-guard-offset%> and "
9115 "%<-mstack-protector-guard-reg%> must be used "
9116 "with %<-mstack-protector-guard=sysreg%>");
9119 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str))
9121 const char *str = riscv_stack_protector_guard_reg_str;
9122 int reg = decode_reg_name (str);
9124 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
9125 error ("%qs is not a valid base register in %qs", str,
9126 "-mstack-protector-guard-reg=");
9128 riscv_stack_protector_guard_reg = reg;
9131 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9133 char *end;
9134 const char *str = riscv_stack_protector_guard_offset_str;
9135 errno = 0;
9136 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
9138 if (!*str || *end || errno)
9139 error ("%qs is not a valid number in %qs", str,
9140 "-mstack-protector-guard-offset=");
9142 if (!SMALL_OPERAND (offs))
9143 error ("%qs is not a valid offset in %qs", str,
9144 "-mstack-protector-guard-offset=");
9146 riscv_stack_protector_guard_offset = offs;
9149 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
9150 param_sched_pressure_algorithm,
9151 SCHED_PRESSURE_MODEL);
9153 /* Function to allocate machine-dependent function status. */
9154 init_machine_status = &riscv_init_machine_status;
9156 riscv_override_options_internal (&global_options);
9158 /* Save these options as the default ones in case we push and pop them later
9159 while processing functions with potential target attributes. */
9160 target_option_default_node = target_option_current_node
9161 = build_target_option_node (&global_options, &global_options_set);
9164 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9165 Used by riscv_set_current_function to
9166 make sure optab availability predicates are recomputed when necessary. */
9168 void
9169 riscv_save_restore_target_globals (tree new_tree)
9171 if (TREE_TARGET_GLOBALS (new_tree))
9172 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9173 else if (new_tree == target_option_default_node)
9174 restore_target_globals (&default_target_globals);
9175 else
9176 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9179 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9180 using the information saved in PTR. */
9182 static void
9183 riscv_option_restore (struct gcc_options *opts,
9184 struct gcc_options * /* opts_set */,
9185 struct cl_target_option * /* ptr */)
9187 riscv_override_options_internal (opts);
9190 static GTY (()) tree riscv_previous_fndecl;
9192 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9194 static void
9195 riscv_conditional_register_usage (void)
9197 /* We have only x0~x15 on RV32E/RV64E. */
9198 if (TARGET_RVE)
9200 for (int r = 16; r <= 31; r++)
9201 fixed_regs[r] = 1;
9204 if (riscv_abi == ABI_ILP32E)
9206 for (int r = 16; r <= 31; r++)
9207 call_used_regs[r] = 1;
9210 if (!TARGET_HARD_FLOAT)
9212 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9213 fixed_regs[regno] = call_used_regs[regno] = 1;
9216 /* In the soft-float ABI, there are no callee-saved FP registers. */
9217 if (UNITS_PER_FP_ARG == 0)
9219 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9220 call_used_regs[regno] = 1;
9223 if (!TARGET_VECTOR)
9225 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
9226 fixed_regs[regno] = call_used_regs[regno] = 1;
9228 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1;
9229 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1;
9230 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1;
9231 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1;
9235 /* Return a register priority for hard reg REGNO. */
9237 static int
9238 riscv_register_priority (int regno)
9240 /* Favor compressed registers to improve the odds of RVC instruction
9241 selection. */
9242 if (riscv_compressed_reg_p (regno))
9243 return 1;
9245 return 0;
9248 /* Implement TARGET_TRAMPOLINE_INIT. */
9250 static void
9251 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9253 rtx addr, end_addr, mem;
9254 uint32_t trampoline[4];
9255 unsigned int i;
9256 HOST_WIDE_INT static_chain_offset, target_function_offset;
9258 /* Work out the offsets of the pointers from the start of the
9259 trampoline code. */
9260 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
9262 /* Get pointers to the beginning and end of the code block. */
9263 addr = force_reg (Pmode, XEXP (m_tramp, 0));
9264 end_addr = riscv_force_binary (Pmode, PLUS, addr,
9265 GEN_INT (TRAMPOLINE_CODE_SIZE));
9268 if (Pmode == SImode)
9270 chain_value = force_reg (Pmode, chain_value);
9272 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9273 /* lui t2, hi(chain)
9274 lui t0, hi(func)
9275 addi t2, t2, lo(chain)
9276 jr t0, lo(func)
9278 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
9279 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
9281 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
9283 /* 0xfff. */
9284 rtx imm12_mask = gen_reg_rtx (SImode);
9285 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
9287 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
9289 /* Gen lui t2, hi(chain). */
9290 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
9291 fixup_value);
9292 hi_chain = riscv_force_binary (SImode, AND, hi_chain,
9293 uimm_mask);
9294 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9295 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
9296 gen_int_mode (lui_hi_chain_code, SImode));
9298 mem = adjust_address (m_tramp, SImode, 0);
9299 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
9301 /* Gen lui t0, hi(func). */
9302 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
9303 fixup_value);
9304 hi_func = riscv_force_binary (SImode, AND, hi_func,
9305 uimm_mask);
9306 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
9307 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
9308 gen_int_mode (lui_hi_func_code, SImode));
9310 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
9311 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
9313 /* Gen addi t2, t2, lo(chain). */
9314 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
9315 imm12_mask);
9316 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
9318 lo_chain_code = OPCODE_ADDI
9319 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9320 | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
9322 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
9323 force_reg (SImode, GEN_INT (lo_chain_code)));
9325 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
9326 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
9328 /* Gen jr t0, lo(func). */
9329 rtx lo_func = riscv_force_binary (SImode, AND, target_function,
9330 imm12_mask);
9331 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
9333 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9335 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
9336 force_reg (SImode, GEN_INT (lo_func_code)));
9338 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
9339 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
9341 else
9343 static_chain_offset = TRAMPOLINE_CODE_SIZE;
9344 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
9346 /* auipc t2, 0
9347 l[wd] t0, target_function_offset(t2)
9348 l[wd] t2, static_chain_offset(t2)
9349 jr t0
9351 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9352 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9353 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
9354 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9355 | (target_function_offset << SHIFT_IMM);
9356 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9357 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9358 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9359 | (static_chain_offset << SHIFT_IMM);
9360 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9362 /* Copy the trampoline code. */
9363 for (i = 0; i < ARRAY_SIZE (trampoline); i++)
9365 if (BYTES_BIG_ENDIAN)
9366 trampoline[i] = __builtin_bswap32(trampoline[i]);
9367 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
9368 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
9371 /* Set up the static chain pointer field. */
9372 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
9373 riscv_emit_move (mem, chain_value);
9375 /* Set up the target function field. */
9376 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
9377 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
9380 /* Flush the code part of the trampoline. */
9381 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
9382 emit_insn (gen_clear_cache (addr, end_addr));
9385 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
9387 static bool
9388 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
9389 tree exp ATTRIBUTE_UNUSED)
9391 /* Don't use sibcalls when use save-restore routine. */
9392 if (TARGET_SAVE_RESTORE)
9393 return false;
9395 /* Don't use sibcall for naked functions. */
9396 if (cfun->machine->naked_p)
9397 return false;
9399 /* Don't use sibcall for interrupt functions. */
9400 if (cfun->machine->interrupt_handler_p)
9401 return false;
9403 /* Don't use sibcalls in the large model, because a sibcall instruction
9404 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP
9405 register. */
9406 if (riscv_cmodel == CM_LARGE)
9407 return false;
9409 return true;
9412 /* Get the interrupt type, return UNKNOWN_MODE if it's not
9413 interrupt function. */
9414 static enum riscv_privilege_levels
9415 riscv_get_interrupt_type (tree decl)
9417 gcc_assert (decl != NULL_TREE);
9419 if ((TREE_CODE(decl) != FUNCTION_DECL)
9420 || (!riscv_interrupt_type_p (TREE_TYPE (decl))))
9421 return UNKNOWN_MODE;
9423 tree attr_args
9424 = TREE_VALUE (lookup_attribute ("interrupt",
9425 TYPE_ATTRIBUTES (TREE_TYPE (decl))));
9427 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE)
9429 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
9431 if (!strcmp (string, "user"))
9432 return USER_MODE;
9433 else if (!strcmp (string, "supervisor"))
9434 return SUPERVISOR_MODE;
9435 else /* Must be "machine". */
9436 return MACHINE_MODE;
9438 else
9439 /* Interrupt attributes are machine mode by default. */
9440 return MACHINE_MODE;
9443 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
9444 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9445 of the function, if such exists. This function may be called multiple
9446 times on a single function so use aarch64_previous_fndecl to avoid
9447 setting up identical state. */
9449 /* Sanity cheching for above function attributes. */
9450 static void
9451 riscv_set_current_function (tree decl)
9453 if (decl == NULL_TREE
9454 || current_function_decl == NULL_TREE
9455 || current_function_decl == error_mark_node
9456 || ! cfun->machine)
9457 return;
9459 if (!cfun->machine->attributes_checked_p)
9461 cfun->machine->naked_p = riscv_naked_function_p (decl);
9462 cfun->machine->interrupt_handler_p
9463 = riscv_interrupt_type_p (TREE_TYPE (decl));
9465 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p)
9466 error ("function attributes %qs and %qs are mutually exclusive",
9467 "interrupt", "naked");
9469 if (cfun->machine->interrupt_handler_p)
9471 tree ret = TREE_TYPE (TREE_TYPE (decl));
9472 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
9474 if (TREE_CODE (ret) != VOID_TYPE)
9475 error ("%qs function cannot return a value", "interrupt");
9477 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
9478 error ("%qs function cannot have arguments", "interrupt");
9480 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl);
9482 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE);
9485 /* Don't print the above diagnostics more than once. */
9486 cfun->machine->attributes_checked_p = 1;
9489 if (!decl || decl == riscv_previous_fndecl)
9490 return;
9492 tree old_tree = (riscv_previous_fndecl
9493 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl)
9494 : NULL_TREE);
9496 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl);
9498 /* If current function has no attributes but the previous one did,
9499 use the default node. */
9500 if (!new_tree && old_tree)
9501 new_tree = target_option_default_node;
9503 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9504 the default have been handled by aarch64_save_restore_target_globals from
9505 aarch64_pragma_target_parse. */
9506 if (old_tree == new_tree)
9507 return;
9509 riscv_previous_fndecl = decl;
9511 /* First set the target options. */
9512 cl_target_option_restore (&global_options, &global_options_set,
9513 TREE_TARGET_OPTION (new_tree));
9515 /* The ISA extension can vary based on the function extension like target.
9516 Thus, make sure that the machine modes are reflected correctly here. */
9517 init_adjust_machine_modes ();
9519 riscv_save_restore_target_globals (new_tree);
9522 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
9523 static tree
9524 riscv_merge_decl_attributes (tree olddecl, tree newdecl)
9526 tree combined_attrs;
9528 enum riscv_privilege_levels old_interrupt_type
9529 = riscv_get_interrupt_type (olddecl);
9530 enum riscv_privilege_levels new_interrupt_type
9531 = riscv_get_interrupt_type (newdecl);
9533 /* Check old and new has same interrupt type. */
9534 if ((old_interrupt_type != UNKNOWN_MODE)
9535 && (new_interrupt_type != UNKNOWN_MODE)
9536 && (old_interrupt_type != new_interrupt_type))
9537 error ("%qs function cannot have different interrupt type", "interrupt");
9539 /* Create combined attributes. */
9540 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
9541 DECL_ATTRIBUTES (newdecl));
9543 return combined_attrs;
9546 /* Implement TARGET_CANNOT_COPY_INSN_P. */
9548 static bool
9549 riscv_cannot_copy_insn_p (rtx_insn *insn)
9551 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
9554 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
9556 static bool
9557 riscv_slow_unaligned_access (machine_mode, unsigned int)
9559 return riscv_slow_unaligned_access_p;
9562 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9564 static bool
9565 riscv_can_change_mode_class (machine_mode from, machine_mode to,
9566 reg_class_t rclass)
9568 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS.
9569 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation
9570 between wider mode (FROM) and narrow mode (TO).
9572 E.g. We should not allow copy propagation
9573 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0])
9574 since we can't order their size which will cause ICE in regcprop.
9576 TODO: Even though they are have different size, they always change
9577 the whole register. We may enhance such case in regcprop to optimize
9578 it in the future. */
9579 if (reg_classes_intersect_p (V_REGS, rclass)
9580 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
9581 return false;
9582 return !reg_classes_intersect_p (FP_REGS, rclass);
9585 /* Implement TARGET_CONSTANT_ALIGNMENT. */
9587 static HOST_WIDE_INT
9588 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align)
9590 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
9591 && (riscv_align_data_type == riscv_align_data_type_xlen))
9592 return MAX (align, BITS_PER_WORD);
9593 return align;
9596 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
9598 /* This function is equivalent to default_promote_function_mode_always_promote
9599 except that it returns a promoted mode even if type is NULL_TREE. This is
9600 needed by libcalls which have no type (only a mode) such as fixed conversion
9601 routines that take a signed or unsigned char/short/int argument and convert
9602 it to a fixed type. */
9604 static machine_mode
9605 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9606 machine_mode mode,
9607 int *punsignedp ATTRIBUTE_UNUSED,
9608 const_tree fntype ATTRIBUTE_UNUSED,
9609 int for_return ATTRIBUTE_UNUSED)
9611 int unsignedp;
9613 if (type != NULL_TREE)
9614 return promote_mode (type, mode, punsignedp);
9616 unsignedp = *punsignedp;
9617 scalar_mode smode = as_a <scalar_mode> (mode);
9618 PROMOTE_MODE (smode, unsignedp, type);
9619 *punsignedp = unsignedp;
9620 return smode;
9623 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
9625 static void
9626 riscv_reorg (void)
9628 /* Do nothing unless we have -msave-restore */
9629 if (TARGET_SAVE_RESTORE)
9630 riscv_remove_unneeded_save_restore_calls ();
9633 /* Return nonzero if register FROM_REGNO can be renamed to register
9634 TO_REGNO. */
9636 bool
9637 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
9638 unsigned to_regno)
9640 /* Interrupt functions can only use registers that have already been
9641 saved by the prologue, even if they would normally be
9642 call-clobbered. */
9643 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno);
9646 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */
9648 bool
9649 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
9651 /* Prefer old address if it is less expensive. */
9652 addr_space_t as = MEM_ADDR_SPACE (memref);
9653 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
9654 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed);
9655 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed);
9656 return new_cost <= old_cost;
9659 /* Helper function for generating gpr_save pattern. */
9662 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
9664 unsigned count = riscv_save_libcall_count (frame->mask);
9665 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
9666 unsigned veclen = 1 + 2 + 1 + count;
9667 rtvec vec = rtvec_alloc (veclen);
9669 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
9671 RTVEC_ELT (vec, 0) =
9672 gen_rtx_UNSPEC_VOLATILE (VOIDmode,
9673 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE);
9675 for (unsigned i = 1; i < veclen; ++i)
9677 unsigned regno = gpr_save_reg_order[i];
9678 rtx reg = gen_rtx_REG (Pmode, regno);
9679 rtx elt;
9681 /* t0 and t1 are CLOBBERs, others are USEs. */
9682 if (i < 3)
9683 elt = gen_rtx_CLOBBER (Pmode, reg);
9684 else
9685 elt = gen_rtx_USE (Pmode, reg);
9687 RTVEC_ELT (vec, i) = elt;
9690 /* Largest number of caller-save register must set in mask if we are
9691 not using __riscv_save_0. */
9692 gcc_assert ((count == 0) ||
9693 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
9695 return gen_rtx_PARALLEL (VOIDmode, vec);
9698 static HOST_WIDE_INT
9699 zcmp_base_adj (int regs_num)
9701 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode));
9704 static HOST_WIDE_INT
9705 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num)
9707 return total - zcmp_base_adj (regs_num);
9710 bool
9711 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num)
9713 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num);
9714 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP
9715 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP
9716 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP;
9719 /* Return true if it's valid gpr_save pattern. */
9721 bool
9722 riscv_gpr_save_operation_p (rtx op)
9724 unsigned len = XVECLEN (op, 0);
9726 if (len > ARRAY_SIZE (gpr_save_reg_order))
9727 return false;
9729 for (unsigned i = 0; i < len; i++)
9731 rtx elt = XVECEXP (op, 0, i);
9732 if (i == 0)
9734 /* First element in parallel is unspec. */
9735 if (GET_CODE (elt) != UNSPEC_VOLATILE
9736 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
9737 || XINT (elt, 1) != UNSPECV_GPR_SAVE)
9738 return false;
9740 else
9742 /* Two CLOBBER and USEs, must check the order. */
9743 unsigned expect_code = i < 3 ? CLOBBER : USE;
9744 if (GET_CODE (elt) != expect_code
9745 || !REG_P (XEXP (elt, 1))
9746 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
9747 return false;
9749 break;
9751 return true;
9754 /* Implement TARGET_ASAN_SHADOW_OFFSET. */
9756 static unsigned HOST_WIDE_INT
9757 riscv_asan_shadow_offset (void)
9759 /* We only have libsanitizer support for RV64 at present.
9761 This number must match ASAN_SHADOW_OFFSET_CONST in the file
9762 libsanitizer/asan/asan_mapping.h. */
9763 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
9766 /* Implement TARGET_MANGLE_TYPE. */
9768 static const char *
9769 riscv_mangle_type (const_tree type)
9771 /* Half-precision float, _Float16 is "DF16_". */
9772 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
9773 return "DF16_";
9775 /* Mangle all vector type for vector extension. */
9776 /* The mangle name follows the rule of RVV LLVM
9777 that is "u" + length of (abi_name) + abi_name. */
9778 if (TYPE_NAME (type) != NULL)
9780 const char *res = riscv_vector::mangle_builtin_type (type);
9781 if (res)
9782 return res;
9785 /* Use the default mangling. */
9786 return NULL;
9789 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
9791 static bool
9792 riscv_scalar_mode_supported_p (scalar_mode mode)
9794 if (mode == HFmode)
9795 return true;
9796 else
9797 return default_scalar_mode_supported_p (mode);
9800 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
9801 if MODE is HFmode, and punt to the generic implementation otherwise. */
9803 static bool
9804 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
9806 if (mode == HFmode)
9807 return true;
9808 else
9809 return default_libgcc_floating_mode_supported_p (mode);
9812 /* Set the value of FLT_EVAL_METHOD.
9813 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
9815 0: evaluate all operations and constants, whose semantic type has at
9816 most the range and precision of type float, to the range and
9817 precision of float; evaluate all other operations and constants to
9818 the range and precision of the semantic type;
9820 N, where _FloatN is a supported interchange floating type
9821 evaluate all operations and constants, whose semantic type has at
9822 most the range and precision of _FloatN type, to the range and
9823 precision of the _FloatN type; evaluate all other operations and
9824 constants to the range and precision of the semantic type;
9826 If we have the zfh/zhinx/zvfh extensions then we support _Float16
9827 in native precision, so we should set this to 16. */
9828 static enum flt_eval_method
9829 riscv_excess_precision (enum excess_precision_type type)
9831 switch (type)
9833 case EXCESS_PRECISION_TYPE_FAST:
9834 case EXCESS_PRECISION_TYPE_STANDARD:
9835 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
9836 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
9837 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
9838 case EXCESS_PRECISION_TYPE_IMPLICIT:
9839 case EXCESS_PRECISION_TYPE_FLOAT16:
9840 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
9841 default:
9842 gcc_unreachable ();
9844 return FLT_EVAL_METHOD_UNPREDICTABLE;
9847 /* Implement TARGET_FLOATN_MODE. */
9848 static opt_scalar_float_mode
9849 riscv_floatn_mode (int n, bool extended)
9851 if (!extended && n == 16)
9852 return HFmode;
9854 return default_floatn_mode (n, extended);
9857 static void
9858 riscv_init_libfuncs (void)
9860 /* Half-precision float operations. The compiler handles all operations
9861 with NULL libfuncs by converting to SFmode. */
9863 /* Arithmetic. */
9864 set_optab_libfunc (add_optab, HFmode, NULL);
9865 set_optab_libfunc (sdiv_optab, HFmode, NULL);
9866 set_optab_libfunc (smul_optab, HFmode, NULL);
9867 set_optab_libfunc (neg_optab, HFmode, NULL);
9868 set_optab_libfunc (sub_optab, HFmode, NULL);
9870 /* Comparisons. */
9871 set_optab_libfunc (eq_optab, HFmode, NULL);
9872 set_optab_libfunc (ne_optab, HFmode, NULL);
9873 set_optab_libfunc (lt_optab, HFmode, NULL);
9874 set_optab_libfunc (le_optab, HFmode, NULL);
9875 set_optab_libfunc (ge_optab, HFmode, NULL);
9876 set_optab_libfunc (gt_optab, HFmode, NULL);
9877 set_optab_libfunc (unord_optab, HFmode, NULL);
9880 #if CHECKING_P
9881 void
9882 riscv_reinit (void)
9884 riscv_option_override ();
9885 init_adjust_machine_modes ();
9886 init_derived_machine_modes ();
9887 reinit_regs ();
9888 init_optabs ();
9890 #endif
9892 #if CHECKING_P
9893 #undef TARGET_RUN_TARGET_SELFTESTS
9894 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests
9895 #endif /* #if CHECKING_P */
9897 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
9899 static bool
9900 riscv_vector_mode_supported_p (machine_mode mode)
9902 if (TARGET_VECTOR)
9903 return riscv_v_ext_mode_p (mode);
9905 return false;
9908 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */
9910 static bool
9911 riscv_verify_type_context (location_t loc, type_context_kind context,
9912 const_tree type, bool silent_p)
9914 return riscv_vector::verify_type_context (loc, context, type, silent_p);
9917 /* Implement TARGET_VECTOR_ALIGNMENT. */
9919 static HOST_WIDE_INT
9920 riscv_vector_alignment (const_tree type)
9922 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
9923 be set for non-predicate vectors of booleans. Modes are the most
9924 direct way we have of identifying real RVV predicate types. */
9925 /* FIXME: RVV didn't mention the alignment of bool, we uses
9926 one byte align. */
9927 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
9928 return 8;
9930 widest_int min_size
9931 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
9932 return wi::umin (min_size, 128).to_uhwi ();
9935 /* Implement REGMODE_NATURAL_SIZE. */
9937 poly_uint64
9938 riscv_regmode_natural_size (machine_mode mode)
9940 /* The natural size for RVV data modes is one RVV data vector,
9941 and similarly for predicates. We can't independently modify
9942 anything smaller than that. */
9943 /* ??? For now, only do this for variable-width RVV registers.
9944 Doing it for constant-sized registers breaks lower-subreg.c. */
9946 if (riscv_v_ext_mode_p (mode))
9948 poly_uint64 size = GET_MODE_SIZE (mode);
9949 if (riscv_v_ext_tuple_mode_p (mode))
9951 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode));
9952 if (known_lt (size, BYTES_PER_RISCV_VECTOR))
9953 return size;
9955 else if (riscv_v_ext_vector_mode_p (mode))
9957 /* RVV mask modes always consume a single register. */
9958 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
9959 return BYTES_PER_RISCV_VECTOR;
9961 if (!size.is_constant ())
9962 return BYTES_PER_RISCV_VECTOR;
9963 else if (!riscv_v_ext_vls_mode_p (mode))
9964 /* For -march=rv64gc_zve32f, the natural vector register size
9965 is 32bits which is smaller than scalar register size, so we
9966 return minimum size between vector register size and scalar
9967 register size. */
9968 return MIN (size.to_constant (), UNITS_PER_WORD);
9970 return UNITS_PER_WORD;
9973 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
9975 static unsigned int
9976 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
9977 int *offset)
9979 /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
9980 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
9981 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
9983 gcc_assert (i == 1);
9984 *factor = riscv_bytes_per_vector_chunk;
9985 *offset = 1;
9986 return RISCV_DWARF_VLENB;
9989 /* Implement TARGET_ESTIMATED_POLY_VALUE. */
9991 static HOST_WIDE_INT
9992 riscv_estimated_poly_value (poly_int64 val,
9993 poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
9995 if (TARGET_VECTOR)
9996 return riscv_vector::estimated_poly_value (val, kind);
9997 return default_estimated_poly_value (val, kind);
10000 /* Return true if the vector misalignment factor is supported by the
10001 target. */
10002 bool
10003 riscv_support_vector_misalignment (machine_mode mode,
10004 const_tree type ATTRIBUTE_UNUSED,
10005 int misalignment,
10006 bool is_packed ATTRIBUTE_UNUSED)
10008 /* Depend on movmisalign pattern. */
10009 return default_builtin_support_vector_misalignment (mode, type, misalignment,
10010 is_packed);
10013 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
10015 static opt_machine_mode
10016 riscv_get_mask_mode (machine_mode mode)
10018 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode))
10019 return riscv_vector::get_mask_mode (mode);
10021 return default_get_mask_mode (mode);
10024 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
10025 it isn't worth branching around empty masked ops (including masked
10026 stores). */
10028 static bool
10029 riscv_empty_mask_is_expensive (unsigned)
10031 return false;
10034 /* Return true if a shift-amount matches the trailing cleared bits on
10035 a bitmask. */
10037 bool
10038 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
10040 return shamt == ctz_hwi (mask);
10043 static HARD_REG_SET
10044 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10046 HARD_REG_SET zeroed_hardregs;
10047 CLEAR_HARD_REG_SET (zeroed_hardregs);
10049 /* Find a register to hold vl. */
10050 unsigned vl_regno = INVALID_REGNUM;
10051 /* Skip the first GPR, otherwise the existing vl is kept due to the same
10052 between vl and avl. */
10053 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
10055 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10057 vl_regno = regno;
10058 break;
10062 if (vl_regno > GP_REG_LAST)
10063 sorry ("cannot allocate vl register for %qs on this target",
10064 "-fzero-call-used-regs");
10066 /* Vector configurations need not be saved and restored here. The
10067 -fzero-call-used-regs=* option will zero all vector registers and
10068 return. So there's no vector operations between them. */
10070 bool emitted_vlmax_vsetvl = false;
10071 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */
10072 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
10074 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10076 rtx target = regno_reg_rtx[regno];
10077 machine_mode mode = GET_MODE (target);
10079 if (!emitted_vlmax_vsetvl)
10081 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl);
10082 emitted_vlmax_vsetvl = true;
10085 rtx ops[] = {target, CONST0_RTX (mode)};
10086 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode),
10087 riscv_vector::UNARY_OP, ops, vl);
10089 SET_HARD_REG_BIT (zeroed_hardregs, regno);
10093 return zeroed_hardregs;
10096 /* Generate a sequence of instructions that zero registers specified by
10097 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
10098 zeroed. */
10099 HARD_REG_SET
10100 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10102 HARD_REG_SET zeroed_hardregs;
10103 CLEAR_HARD_REG_SET (zeroed_hardregs);
10105 if (TARGET_VECTOR)
10106 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs);
10108 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs
10109 & ~zeroed_hardregs);
10112 /* Implement target hook TARGET_ARRAY_MODE. */
10114 static opt_machine_mode
10115 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
10117 machine_mode vmode;
10118 if (TARGET_VECTOR
10119 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode))
10120 return vmode;
10122 return opt_machine_mode ();
10125 /* Given memory reference MEM, expand code to compute the aligned
10126 memory address, shift and mask values and store them into
10127 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */
10129 void
10130 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
10131 rtx *not_mask)
10133 /* Align the memory address to a word. */
10134 rtx addr = force_reg (Pmode, XEXP (mem, 0));
10136 rtx addr_mask = gen_int_mode (-4, Pmode);
10138 rtx aligned_addr = gen_reg_rtx (Pmode);
10139 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask));
10141 *aligned_mem = change_address (mem, SImode, aligned_addr);
10143 /* Calculate the shift amount. */
10144 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
10145 gen_int_mode (3, SImode)));
10146 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
10147 gen_int_mode (3, SImode)));
10149 /* Calculate the mask. */
10150 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
10152 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
10154 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
10155 gen_lowpart (QImode, *shift)));
10157 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
10160 /* Leftshift a subword within an SImode register. */
10162 void
10163 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
10164 rtx *shifted_value)
10166 rtx value_reg = gen_reg_rtx (SImode);
10167 emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
10168 mode, 0));
10170 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
10171 gen_lowpart (QImode, shift)));
10174 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This
10175 allows the behavior to be tuned for specific implementations as well as
10176 when optimizing for size. */
10178 bool
10179 riscv_use_divmod_expander (void)
10181 return tune_param->use_divmod_expansion;
10184 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
10186 static machine_mode
10187 riscv_preferred_simd_mode (scalar_mode mode)
10189 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10190 return riscv_vector::preferred_simd_mode (mode);
10192 return word_mode;
10195 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
10197 static poly_uint64
10198 riscv_vectorize_preferred_vector_alignment (const_tree type)
10200 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
10201 return TYPE_ALIGN (TREE_TYPE (type));
10202 return TYPE_ALIGN (type);
10205 /* Return true if it is static FRM rounding mode. */
10207 static bool
10208 riscv_static_frm_mode_p (int mode)
10210 switch (mode)
10212 case riscv_vector::FRM_RDN:
10213 case riscv_vector::FRM_RUP:
10214 case riscv_vector::FRM_RTZ:
10215 case riscv_vector::FRM_RMM:
10216 case riscv_vector::FRM_RNE:
10217 return true;
10218 default:
10219 return false;
10222 gcc_unreachable ();
10225 /* Implement the floating-point Mode Switching. */
10227 static void
10228 riscv_emit_frm_mode_set (int mode, int prev_mode)
10230 rtx backup_reg = DYNAMIC_FRM_RTL (cfun);
10232 if (prev_mode == riscv_vector::FRM_DYN_CALL)
10233 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
10235 if (mode != prev_mode)
10237 rtx frm = gen_int_mode (mode, SImode);
10239 if (mode == riscv_vector::FRM_DYN_CALL
10240 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
10241 /* No need to emit when prev mode is DYN already. */
10242 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10243 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
10244 && prev_mode != riscv_vector::FRM_DYN
10245 && prev_mode != riscv_vector::FRM_DYN_CALL)
10246 /* No need to emit when prev mode is DYN or DYN_CALL already. */
10247 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10248 else if (mode == riscv_vector::FRM_DYN
10249 && prev_mode != riscv_vector::FRM_DYN_CALL)
10250 /* Restore frm value from backup when switch to DYN mode. */
10251 emit_insn (gen_fsrmsi_restore (backup_reg));
10252 else if (riscv_static_frm_mode_p (mode))
10253 /* Set frm value when switch to static mode. */
10254 emit_insn (gen_fsrmsi_restore (frm));
10258 /* Implement Mode switching. */
10260 static void
10261 riscv_emit_mode_set (int entity, int mode, int prev_mode,
10262 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
10264 switch (entity)
10266 case RISCV_VXRM:
10267 if (mode != VXRM_MODE_NONE && mode != prev_mode)
10268 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
10269 break;
10270 case RISCV_FRM:
10271 riscv_emit_frm_mode_set (mode, prev_mode);
10272 break;
10273 default:
10274 gcc_unreachable ();
10278 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the
10279 underlying emit. */
10281 static int
10282 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
10284 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
10286 if (insn && CALL_P (insn))
10287 return riscv_vector::FRM_DYN;
10289 return mode;
10292 /* Insert the backup frm insn to the end of the bb if and only if the call
10293 is the last insn of this bb. */
10295 static void
10296 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
10298 edge eg;
10299 bool abnormal_edge_p = false;
10300 edge_iterator eg_iterator;
10301 basic_block bb = BLOCK_FOR_INSN (cur_insn);
10303 FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
10305 if (eg->flags & EDGE_ABNORMAL)
10306 abnormal_edge_p = true;
10307 else
10309 start_sequence ();
10310 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10311 rtx_insn *backup_insn = get_insns ();
10312 end_sequence ();
10314 insert_insn_on_edge (backup_insn, eg);
10318 if (abnormal_edge_p)
10320 start_sequence ();
10321 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10322 rtx_insn *backup_insn = get_insns ();
10323 end_sequence ();
10325 insert_insn_end_basic_block (backup_insn, bb);
10328 commit_edge_insertions ();
10331 /* Return mode that frm must be switched into
10332 prior to the execution of insn. */
10334 static int
10335 riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
10337 if (!DYNAMIC_FRM_RTL(cfun))
10339 /* The dynamic frm will be initialized only onece during cfun. */
10340 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
10341 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10344 if (CALL_P (cur_insn))
10346 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
10348 if (!insn)
10349 riscv_frm_emit_after_bb_end (cur_insn);
10351 return riscv_vector::FRM_DYN_CALL;
10354 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
10356 if (mode == riscv_vector::FRM_NONE)
10357 /* After meet a call, we need to backup the frm because it may be
10358 updated during the call. Here, for each insn, we will check if
10359 the previous insn is a call or not. When previous insn is call,
10360 there will be 2 cases for the emit mode set.
10362 1. Current insn is not MODE_NONE, then the mode switch framework
10363 will do the mode switch from MODE_CALL to MODE_NONE natively.
10364 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
10365 the MODE_DYN, and leave the mode switch itself to perform
10366 the emit mode set.
10368 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
10370 return mode;
10373 /* Return mode that entity must be switched into
10374 prior to the execution of insn. */
10376 static int
10377 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
10379 int code = recog_memoized (insn);
10381 switch (entity)
10383 case RISCV_VXRM:
10384 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
10385 case RISCV_FRM:
10386 return riscv_frm_mode_needed (insn, code);
10387 default:
10388 gcc_unreachable ();
10392 /* Return TRUE that an insn is asm. */
10394 static bool
10395 asm_insn_p (rtx_insn *insn)
10397 extract_insn (insn);
10399 return recog_data.is_asm;
10402 /* Return TRUE that an insn is unknown for VXRM. */
10404 static bool
10405 vxrm_unknown_p (rtx_insn *insn)
10407 /* Return true if there is a definition of VXRM. */
10408 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn))
10409 return true;
10411 /* A CALL function may contain an instruction that modifies the VXRM,
10412 return true in this situation. */
10413 if (CALL_P (insn))
10414 return true;
10416 /* Return true for all assembly since users may hardcode a assembly
10417 like this: asm volatile ("csrwi vxrm, 0"). */
10418 if (asm_insn_p (insn))
10419 return true;
10421 return false;
10424 /* Return TRUE that an insn is unknown dynamic for FRM. */
10426 static bool
10427 frm_unknown_dynamic_p (rtx_insn *insn)
10429 /* Return true if there is a definition of FRM. */
10430 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn))
10431 return true;
10433 return false;
10436 /* Return the mode that an insn results in for VXRM. */
10438 static int
10439 riscv_vxrm_mode_after (rtx_insn *insn, int mode)
10441 if (vxrm_unknown_p (insn))
10442 return VXRM_MODE_NONE;
10444 if (recog_memoized (insn) < 0)
10445 return mode;
10447 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn)))
10448 return get_attr_vxrm_mode (insn);
10449 else
10450 return mode;
10453 /* Return the mode that an insn results in for FRM. */
10455 static int
10456 riscv_frm_mode_after (rtx_insn *insn, int mode)
10458 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode);
10460 if (CALL_P (insn))
10461 return mode;
10463 if (frm_unknown_dynamic_p (insn))
10464 return riscv_vector::FRM_DYN;
10466 if (recog_memoized (insn) < 0)
10467 return mode;
10469 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn)))
10470 return get_attr_frm_mode (insn);
10471 else
10472 return mode;
10475 /* Return the mode that an insn results in. */
10477 static int
10478 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
10480 switch (entity)
10482 case RISCV_VXRM:
10483 return riscv_vxrm_mode_after (insn, mode);
10484 case RISCV_FRM:
10485 return riscv_frm_mode_after (insn, mode);
10486 default:
10487 gcc_unreachable ();
10491 /* Return a mode that ENTITY is assumed to be
10492 switched to at function entry. */
10494 static int
10495 riscv_mode_entry (int entity)
10497 switch (entity)
10499 case RISCV_VXRM:
10500 return VXRM_MODE_NONE;
10501 case RISCV_FRM:
10503 /* According to RVV 1.0 spec, all vector floating-point operations use
10504 the dynamic rounding mode in the frm register. Likewise in other
10505 similar places. */
10506 return riscv_vector::FRM_DYN;
10508 default:
10509 gcc_unreachable ();
10513 /* Return a mode that ENTITY is assumed to be
10514 switched to at function exit. */
10516 static int
10517 riscv_mode_exit (int entity)
10519 switch (entity)
10521 case RISCV_VXRM:
10522 return VXRM_MODE_NONE;
10523 case RISCV_FRM:
10524 return riscv_vector::FRM_DYN_EXIT;
10525 default:
10526 gcc_unreachable ();
10530 static int
10531 riscv_mode_priority (int, int n)
10533 return n;
10536 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
10537 unsigned int
10538 riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
10540 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10541 return riscv_vector::autovectorize_vector_modes (modes, all);
10543 return default_autovectorize_vector_modes (modes, all);
10546 /* Implement TARGET_VECTORIZE_RELATED_MODE. */
10547 opt_machine_mode
10548 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
10549 poly_uint64 nunits)
10551 if (TARGET_VECTOR)
10552 return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
10553 nunits);
10554 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
10557 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
10559 static bool
10560 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
10561 rtx target, rtx op0, rtx op1,
10562 const vec_perm_indices &sel)
10564 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode))
10565 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
10566 op1, sel);
10568 return false;
10571 static bool
10572 riscv_frame_pointer_required (void)
10574 return riscv_save_frame_pointer && !crtl->is_leaf;
10577 /* Return the appropriate common costs according to VECTYPE from COSTS. */
10578 static const common_vector_cost *
10579 get_common_costs (const cpu_vector_cost *costs, tree vectype)
10581 gcc_assert (costs);
10583 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
10584 return costs->vls;
10585 return costs->vla;
10588 /* Return the CPU vector costs according to -mtune if tune info has non-NULL
10589 vector cost. Otherwide, return the default generic vector costs. */
10590 const cpu_vector_cost *
10591 get_vector_costs ()
10593 const cpu_vector_cost *costs = tune_param->vec_costs;
10594 if (!costs)
10595 return &generic_vector_cost;
10596 return costs;
10599 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10601 static int
10602 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10603 tree vectype, int misalign ATTRIBUTE_UNUSED)
10605 const cpu_vector_cost *costs = get_vector_costs ();
10606 bool fp = false;
10608 if (vectype != NULL)
10609 fp = FLOAT_TYPE_P (vectype);
10611 const common_vector_cost *common_costs = get_common_costs (costs, vectype);
10612 gcc_assert (common_costs != NULL);
10613 switch (type_of_cost)
10615 case scalar_stmt:
10616 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
10618 case scalar_load:
10619 return costs->scalar_load_cost;
10621 case scalar_store:
10622 return costs->scalar_store_cost;
10624 case vector_stmt:
10625 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10627 case vector_load:
10628 return common_costs->align_load_cost;
10630 case vector_store:
10631 return common_costs->align_store_cost;
10633 case vec_to_scalar:
10634 return common_costs->vec_to_scalar_cost;
10636 case scalar_to_vec:
10637 return common_costs->scalar_to_vec_cost;
10639 case unaligned_load:
10640 return common_costs->unalign_load_cost;
10641 case vector_gather_load:
10642 return common_costs->gather_load_cost;
10644 case unaligned_store:
10645 return common_costs->unalign_store_cost;
10646 case vector_scatter_store:
10647 return common_costs->scatter_store_cost;
10649 case cond_branch_taken:
10650 return costs->cond_taken_branch_cost;
10652 case cond_branch_not_taken:
10653 return costs->cond_not_taken_branch_cost;
10655 case vec_perm:
10656 return common_costs->permute_cost;
10658 case vec_promote_demote:
10659 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10661 case vec_construct:
10662 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
10664 default:
10665 gcc_unreachable ();
10668 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
10671 /* Implement targetm.vectorize.create_costs. */
10673 static vector_costs *
10674 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
10676 if (TARGET_VECTOR)
10677 return new riscv_vector::costs (vinfo, costing_for_scalar);
10678 /* Default vector costs. */
10679 return new vector_costs (vinfo, costing_for_scalar);
10682 /* Implement TARGET_PREFERRED_ELSE_VALUE. */
10684 static tree
10685 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
10686 tree *ops)
10688 if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
10689 return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype));
10691 return default_preferred_else_value (ifn, vectype, nops, ops);
10694 /* If MEM is in the form of "base+offset", extract the two parts
10695 of address and set to BASE and OFFSET, otherwise return false
10696 after clearing BASE and OFFSET. */
10698 bool
10699 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10701 rtx addr;
10703 gcc_assert (MEM_P (mem));
10705 addr = XEXP (mem, 0);
10707 if (REG_P (addr))
10709 *base = addr;
10710 *offset = const0_rtx;
10711 return true;
10714 if (GET_CODE (addr) == PLUS
10715 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10717 *base = XEXP (addr, 0);
10718 *offset = XEXP (addr, 1);
10719 return true;
10722 *base = NULL_RTX;
10723 *offset = NULL_RTX;
10725 return false;
10728 /* Implements target hook vector_mode_supported_any_target_p. */
10730 static bool
10731 riscv_vector_mode_supported_any_target_p (machine_mode)
10733 if (TARGET_XTHEADVECTOR)
10734 return false;
10735 return true;
10738 /* Initialize the GCC target structure. */
10739 #undef TARGET_ASM_ALIGNED_HI_OP
10740 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
10741 #undef TARGET_ASM_ALIGNED_SI_OP
10742 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10743 #undef TARGET_ASM_ALIGNED_DI_OP
10744 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
10746 #undef TARGET_OPTION_OVERRIDE
10747 #define TARGET_OPTION_OVERRIDE riscv_option_override
10749 #undef TARGET_OPTION_RESTORE
10750 #define TARGET_OPTION_RESTORE riscv_option_restore
10752 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
10753 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
10755 #undef TARGET_LEGITIMIZE_ADDRESS
10756 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
10758 #undef TARGET_SCHED_ISSUE_RATE
10759 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
10760 #undef TARGET_SCHED_MACRO_FUSION_P
10761 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
10762 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
10763 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
10765 #undef TARGET_SCHED_VARIABLE_ISSUE
10766 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
10768 #undef TARGET_SCHED_ADJUST_COST
10769 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
10771 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10772 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
10774 #undef TARGET_SET_CURRENT_FUNCTION
10775 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function
10777 #undef TARGET_REGISTER_MOVE_COST
10778 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
10779 #undef TARGET_MEMORY_MOVE_COST
10780 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
10781 #undef TARGET_RTX_COSTS
10782 #define TARGET_RTX_COSTS riscv_rtx_costs
10783 #undef TARGET_ADDRESS_COST
10784 #define TARGET_ADDRESS_COST riscv_address_cost
10785 #undef TARGET_INSN_COST
10786 #define TARGET_INSN_COST riscv_insn_cost
10788 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
10789 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost
10790 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
10791 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p
10793 #undef TARGET_ASM_FILE_START
10794 #define TARGET_ASM_FILE_START riscv_file_start
10795 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
10796 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
10797 #undef TARGET_ASM_FILE_END
10798 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
10800 #undef TARGET_EXPAND_BUILTIN_VA_START
10801 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
10803 #undef TARGET_PROMOTE_FUNCTION_MODE
10804 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode
10806 #undef TARGET_RETURN_IN_MEMORY
10807 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
10809 #undef TARGET_ASM_OUTPUT_MI_THUNK
10810 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
10811 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10812 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10814 #undef TARGET_PRINT_OPERAND
10815 #define TARGET_PRINT_OPERAND riscv_print_operand
10816 #undef TARGET_PRINT_OPERAND_ADDRESS
10817 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
10818 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
10819 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
10821 #undef TARGET_SETUP_INCOMING_VARARGS
10822 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
10823 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
10824 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args
10825 #undef TARGET_STRICT_ARGUMENT_NAMING
10826 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10827 #undef TARGET_MUST_PASS_IN_STACK
10828 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10829 #undef TARGET_PASS_BY_REFERENCE
10830 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
10831 #undef TARGET_ARG_PARTIAL_BYTES
10832 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
10833 #undef TARGET_FUNCTION_ARG
10834 #define TARGET_FUNCTION_ARG riscv_function_arg
10835 #undef TARGET_FUNCTION_ARG_ADVANCE
10836 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
10837 #undef TARGET_FUNCTION_ARG_BOUNDARY
10838 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
10839 #undef TARGET_FNTYPE_ABI
10840 #define TARGET_FNTYPE_ABI riscv_fntype_abi
10841 #undef TARGET_INSN_CALLEE_ABI
10842 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi
10844 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
10845 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
10846 riscv_get_separate_components
10848 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
10849 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
10850 riscv_components_for_bb
10852 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
10853 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
10854 riscv_disqualify_components
10856 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
10857 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
10858 riscv_emit_prologue_components
10860 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
10861 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
10862 riscv_emit_epilogue_components
10864 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
10865 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
10866 riscv_set_handled_components
10868 /* The generic ELF target does not always have TLS support. */
10869 #ifdef HAVE_AS_TLS
10870 #undef TARGET_HAVE_TLS
10871 #define TARGET_HAVE_TLS true
10872 #endif
10874 #undef TARGET_CANNOT_FORCE_CONST_MEM
10875 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
10877 #undef TARGET_LEGITIMATE_CONSTANT_P
10878 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
10880 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10881 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p
10883 #undef TARGET_LEGITIMATE_ADDRESS_P
10884 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p
10886 #undef TARGET_CAN_ELIMINATE
10887 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
10889 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10890 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
10892 #undef TARGET_CLASS_MAX_NREGS
10893 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
10895 #undef TARGET_TRAMPOLINE_INIT
10896 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
10898 #undef TARGET_IN_SMALL_DATA_P
10899 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
10901 #undef TARGET_HAVE_SRODATA_SECTION
10902 #define TARGET_HAVE_SRODATA_SECTION true
10904 #undef TARGET_ASM_SELECT_SECTION
10905 #define TARGET_ASM_SELECT_SECTION riscv_select_section
10907 #undef TARGET_ASM_UNIQUE_SECTION
10908 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section
10910 #undef TARGET_ASM_SELECT_RTX_SECTION
10911 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section
10913 #undef TARGET_MIN_ANCHOR_OFFSET
10914 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
10916 #undef TARGET_MAX_ANCHOR_OFFSET
10917 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
10919 #undef TARGET_REGISTER_PRIORITY
10920 #define TARGET_REGISTER_PRIORITY riscv_register_priority
10922 #undef TARGET_CANNOT_COPY_INSN_P
10923 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
10925 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10926 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
10928 #undef TARGET_INIT_BUILTINS
10929 #define TARGET_INIT_BUILTINS riscv_init_builtins
10931 #undef TARGET_BUILTIN_DECL
10932 #define TARGET_BUILTIN_DECL riscv_builtin_decl
10934 #undef TARGET_GIMPLE_FOLD_BUILTIN
10935 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
10937 #undef TARGET_EXPAND_BUILTIN
10938 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
10940 #undef TARGET_HARD_REGNO_NREGS
10941 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs
10942 #undef TARGET_HARD_REGNO_MODE_OK
10943 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok
10945 #undef TARGET_MODES_TIEABLE_P
10946 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
10948 #undef TARGET_SLOW_UNALIGNED_ACCESS
10949 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
10951 #undef TARGET_SECONDARY_MEMORY_NEEDED
10952 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
10954 #undef TARGET_CAN_CHANGE_MODE_CLASS
10955 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class
10957 #undef TARGET_CONSTANT_ALIGNMENT
10958 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment
10960 #undef TARGET_MERGE_DECL_ATTRIBUTES
10961 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes
10963 #undef TARGET_ATTRIBUTE_TABLE
10964 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table
10966 #undef TARGET_WARN_FUNC_RETURN
10967 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return
10969 /* The low bit is ignored by jump instructions so is safe to use. */
10970 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
10971 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
10973 #undef TARGET_MACHINE_DEPENDENT_REORG
10974 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
10976 #undef TARGET_NEW_ADDRESS_PROFITABLE_P
10977 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
10979 #undef TARGET_MANGLE_TYPE
10980 #define TARGET_MANGLE_TYPE riscv_mangle_type
10982 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10983 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
10985 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
10986 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
10987 riscv_libgcc_floating_mode_supported_p
10989 #undef TARGET_INIT_LIBFUNCS
10990 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
10992 #undef TARGET_C_EXCESS_PRECISION
10993 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision
10995 #undef TARGET_FLOATN_MODE
10996 #define TARGET_FLOATN_MODE riscv_floatn_mode
10998 #undef TARGET_ASAN_SHADOW_OFFSET
10999 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
11001 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11002 #undef TARGET_DEFAULT_TARGET_FLAGS
11003 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
11004 #endif
11006 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11007 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p
11009 #undef TARGET_VERIFY_TYPE_CONTEXT
11010 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
11012 #undef TARGET_ESTIMATED_POLY_VALUE
11013 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
11015 #undef TARGET_VECTORIZE_GET_MASK_MODE
11016 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
11018 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
11019 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive
11021 #undef TARGET_VECTOR_ALIGNMENT
11022 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
11024 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
11025 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment
11027 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
11028 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
11030 #undef TARGET_ZERO_CALL_USED_REGS
11031 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
11033 #undef TARGET_ARRAY_MODE
11034 #define TARGET_ARRAY_MODE riscv_array_mode
11036 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11037 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
11039 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
11040 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
11041 riscv_vectorize_preferred_vector_alignment
11043 /* Mode switching hooks. */
11045 #undef TARGET_MODE_EMIT
11046 #define TARGET_MODE_EMIT riscv_emit_mode_set
11047 #undef TARGET_MODE_NEEDED
11048 #define TARGET_MODE_NEEDED riscv_mode_needed
11049 #undef TARGET_MODE_AFTER
11050 #define TARGET_MODE_AFTER riscv_mode_after
11051 #undef TARGET_MODE_ENTRY
11052 #define TARGET_MODE_ENTRY riscv_mode_entry
11053 #undef TARGET_MODE_EXIT
11054 #define TARGET_MODE_EXIT riscv_mode_exit
11055 #undef TARGET_MODE_PRIORITY
11056 #define TARGET_MODE_PRIORITY riscv_mode_priority
11058 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
11059 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
11060 riscv_autovectorize_vector_modes
11062 #undef TARGET_VECTORIZE_RELATED_MODE
11063 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
11065 #undef TARGET_VECTORIZE_VEC_PERM_CONST
11066 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
11068 #undef TARGET_FRAME_POINTER_REQUIRED
11069 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
11071 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11072 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11073 riscv_builtin_vectorization_cost
11075 #undef TARGET_VECTORIZE_CREATE_COSTS
11076 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
11078 #undef TARGET_PREFERRED_ELSE_VALUE
11079 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
11081 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
11082 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
11084 struct gcc_target targetm = TARGET_INITIALIZER;
11086 #include "gt-riscv.h"