RISC-V: Implement TLS Descriptors.
[official-gcc.git] / gcc / config / riscv / riscv.cc
blob00defa69fd8e83fbc45858f94b29d11c04f0f169
1 /* Subroutines used for code generation for RISC-V.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Andrew Waterman (andrew@sifive.com).
4 Based on MIPS target for GNU compiler.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #define INCLUDE_STRING
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "target.h"
29 #include "backend.h"
30 #include "tm.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "insn-config.h"
34 #include "insn-attr.h"
35 #include "recog.h"
36 #include "output.h"
37 #include "alias.h"
38 #include "tree.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "varasm.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "function.h"
45 #include "explow.h"
46 #include "ifcvt.h"
47 #include "memmodel.h"
48 #include "emit-rtl.h"
49 #include "reload.h"
50 #include "tm_p.h"
51 #include "basic-block.h"
52 #include "expr.h"
53 #include "optabs.h"
54 #include "bitmap.h"
55 #include "df.h"
56 #include "function-abi.h"
57 #include "diagnostic.h"
58 #include "builtins.h"
59 #include "predict.h"
60 #include "tree-pass.h"
61 #include "opts.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
64 #include "gimple.h"
65 #include "cfghooks.h"
66 #include "cfgloop.h"
67 #include "cfgrtl.h"
68 #include "shrink-wrap.h"
69 #include "sel-sched.h"
70 #include "sched-int.h"
71 #include "fold-const.h"
72 #include "gimple-iterator.h"
73 #include "gimple-expr.h"
74 #include "tree-vectorizer.h"
75 #include "gcse.h"
76 #include "tree-dfa.h"
77 #include "target-globals.h"
79 /* This file should be included last. */
80 #include "target-def.h"
81 #include "riscv-vector-costs.h"
82 #include "riscv-subset.h"
84 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
85 #define UNSPEC_ADDRESS_P(X) \
86 (GET_CODE (X) == UNSPEC \
87 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
88 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
90 /* Extract the symbol or label from UNSPEC wrapper X. */
91 #define UNSPEC_ADDRESS(X) \
92 XVECEXP (X, 0, 0)
94 /* Extract the symbol type from UNSPEC wrapper X. */
95 #define UNSPEC_ADDRESS_TYPE(X) \
96 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
98 /* Extract the backup dynamic frm rtl. */
99 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm)
101 /* True the mode switching has static frm, or false. */
102 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
104 /* True if we can use the instructions in the XTheadInt extension
105 to handle interrupts, or false. */
106 #define TH_INT_INTERRUPT(c) \
107 (TARGET_XTHEADINT \
108 /* The XTheadInt extension only supports rv32. */ \
109 && !TARGET_64BIT \
110 && (c)->machine->interrupt_handler_p \
111 /* The XTheadInt instructions can only be executed in M-mode. */ \
112 && (c)->machine->interrupt_mode == MACHINE_MODE)
114 /* Information about a function's frame layout. */
115 struct GTY(()) riscv_frame_info {
116 /* The size of the frame in bytes. */
117 poly_int64 total_size;
119 /* Bit X is set if the function saves or restores GPR X. */
120 unsigned int mask;
122 /* Likewise FPR X. */
123 unsigned int fmask;
125 /* Likewise for vector registers. */
126 unsigned int vmask;
128 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */
129 unsigned save_libcall_adjustment;
131 /* the minimum number of bytes, in multiples of 16-byte address increments,
132 required to cover the registers in a multi push & pop. */
133 unsigned multi_push_adj_base;
135 /* the number of additional 16-byte address increments allocated for the stack
136 frame in a multi push & pop. */
137 unsigned multi_push_adj_addi;
139 /* Offsets of fixed-point and floating-point save areas from frame bottom */
140 poly_int64 gp_sp_offset;
141 poly_int64 fp_sp_offset;
143 /* Top and bottom offsets of vector save areas from frame bottom. */
144 poly_int64 v_sp_offset_top;
145 poly_int64 v_sp_offset_bottom;
147 /* Offset of virtual frame pointer from stack pointer/frame bottom */
148 poly_int64 frame_pointer_offset;
150 /* Offset of hard frame pointer from stack pointer/frame bottom */
151 poly_int64 hard_frame_pointer_offset;
153 /* The offset of arg_pointer_rtx from the bottom of the frame. */
154 poly_int64 arg_pointer_offset;
156 /* Reset this struct, clean all field to zero. */
157 void reset(void);
160 enum riscv_privilege_levels {
161 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
164 struct GTY(()) mode_switching_info {
165 /* The RTL variable which stores the dynamic FRM value. We always use this
166 RTX to restore dynamic FRM rounding mode in mode switching. */
167 rtx dynamic_frm;
169 /* The boolean variables indicates there is at least one static rounding
170 mode instruction in the function or not. */
171 bool static_frm_p;
173 mode_switching_info ()
175 dynamic_frm = NULL_RTX;
176 static_frm_p = false;
180 struct GTY(()) machine_function {
181 /* The number of extra stack bytes taken up by register varargs.
182 This area is allocated by the callee at the very top of the frame. */
183 int varargs_size;
185 /* True if current function is a naked function. */
186 bool naked_p;
188 /* True if current function is an interrupt function. */
189 bool interrupt_handler_p;
190 /* For an interrupt handler, indicates the privilege level. */
191 enum riscv_privilege_levels interrupt_mode;
193 /* True if attributes on current function have been checked. */
194 bool attributes_checked_p;
196 /* True if RA must be saved because of a far jump. */
197 bool far_jump_used;
199 /* The current frame information, calculated by riscv_compute_frame_info. */
200 struct riscv_frame_info frame;
202 /* The components already handled by separate shrink-wrapping, which should
203 not be considered by the prologue and epilogue. */
204 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
206 /* The mode swithching information for the FRM rounding modes. */
207 struct mode_switching_info mode_sw_info;
210 /* Information about a single argument. */
211 struct riscv_arg_info {
212 /* True if the argument is at least partially passed on the stack. */
213 bool stack_p;
215 /* The number of integer registers allocated to this argument. */
216 unsigned int num_gprs;
218 /* The offset of the first register used, provided num_gprs is nonzero.
219 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
220 unsigned int gpr_offset;
222 /* The number of floating-point registers allocated to this argument. */
223 unsigned int num_fprs;
225 /* The offset of the first register used, provided num_fprs is nonzero. */
226 unsigned int fpr_offset;
228 /* The number of vector registers allocated to this argument. */
229 unsigned int num_vrs;
231 /* The offset of the first register used, provided num_vrs is nonzero. */
232 unsigned int vr_offset;
234 /* The number of mask registers allocated to this argument. */
235 unsigned int num_mrs;
237 /* The offset of the first register used, provided num_mrs is nonzero. */
238 unsigned int mr_offset;
241 /* One stage in a constant building sequence. These sequences have
242 the form:
244 A = VALUE[0]
245 A = A CODE[1] VALUE[1]
246 A = A CODE[2] VALUE[2]
249 where A is an accumulator, each CODE[i] is a binary rtl operation
250 and each VALUE[i] is a constant integer. CODE[0] is undefined. */
251 struct riscv_integer_op {
252 enum rtx_code code;
253 unsigned HOST_WIDE_INT value;
256 /* The largest number of operations needed to load an integer constant.
257 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
258 #define RISCV_MAX_INTEGER_OPS 8
260 enum riscv_fusion_pairs
262 RISCV_FUSE_NOTHING = 0,
263 RISCV_FUSE_ZEXTW = (1 << 0),
264 RISCV_FUSE_ZEXTH = (1 << 1),
265 RISCV_FUSE_ZEXTWS = (1 << 2),
266 RISCV_FUSE_LDINDEXED = (1 << 3),
267 RISCV_FUSE_LUI_ADDI = (1 << 4),
268 RISCV_FUSE_AUIPC_ADDI = (1 << 5),
269 RISCV_FUSE_LUI_LD = (1 << 6),
270 RISCV_FUSE_AUIPC_LD = (1 << 7),
271 RISCV_FUSE_LDPREINCREMENT = (1 << 8),
272 RISCV_FUSE_ALIGNED_STD = (1 << 9),
275 /* Costs of various operations on the different architectures. */
277 struct riscv_tune_param
279 unsigned short fp_add[2];
280 unsigned short fp_mul[2];
281 unsigned short fp_div[2];
282 unsigned short int_mul[2];
283 unsigned short int_div[2];
284 unsigned short issue_rate;
285 unsigned short branch_cost;
286 unsigned short memory_cost;
287 unsigned short fmv_cost;
288 bool slow_unaligned_access;
289 bool use_divmod_expansion;
290 unsigned int fusible_ops;
291 const struct cpu_vector_cost *vec_costs;
295 /* Global variables for machine-dependent things. */
297 /* Whether unaligned accesses execute very slowly. */
298 bool riscv_slow_unaligned_access_p;
300 /* Whether user explicitly passed -mstrict-align. */
301 bool riscv_user_wants_strict_align;
303 /* Stack alignment to assume/maintain. */
304 unsigned riscv_stack_boundary;
306 /* Whether in riscv_output_mi_thunk. */
307 static bool riscv_in_thunk_func = false;
309 /* If non-zero, this is an offset to be added to SP to redefine the CFA
310 when restoring the FP register from the stack. Only valid when generating
311 the epilogue. */
312 static poly_int64 epilogue_cfa_sp_offset;
314 /* Which tuning parameters to use. */
315 static const struct riscv_tune_param *tune_param;
317 /* Which automaton to use for tuning. */
318 enum riscv_microarchitecture_type riscv_microarchitecture;
320 /* The number of chunks in a single vector register. */
321 poly_uint16 riscv_vector_chunks;
323 /* The number of bytes in a vector chunk. */
324 unsigned riscv_bytes_per_vector_chunk;
326 /* Index R is the smallest register class that contains register R. */
327 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
328 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
329 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS,
330 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS,
331 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
332 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS,
333 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
334 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
335 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
336 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
337 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
338 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
339 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
340 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
341 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
342 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
343 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
344 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS,
345 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
346 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
347 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
348 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
349 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
350 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
351 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
352 VM_REGS, VD_REGS, VD_REGS, VD_REGS,
353 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
354 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
355 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
356 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
357 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
358 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
359 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
362 /* RVV costs for VLS vector operations. */
363 static const common_vector_cost rvv_vls_vector_cost = {
364 1, /* int_stmt_cost */
365 1, /* fp_stmt_cost */
366 1, /* gather_load_cost */
367 1, /* scatter_store_cost */
368 1, /* vec_to_scalar_cost */
369 1, /* scalar_to_vec_cost */
370 1, /* permute_cost */
371 1, /* align_load_cost */
372 1, /* align_store_cost */
373 2, /* unalign_load_cost */
374 2, /* unalign_store_cost */
377 /* RVV costs for VLA vector operations. */
378 static const scalable_vector_cost rvv_vla_vector_cost = {
380 1, /* int_stmt_cost */
381 1, /* fp_stmt_cost */
382 1, /* gather_load_cost */
383 1, /* scatter_store_cost */
384 1, /* vec_to_scalar_cost */
385 1, /* scalar_to_vec_cost */
386 1, /* permute_cost */
387 1, /* align_load_cost */
388 1, /* align_store_cost */
389 2, /* unalign_load_cost */
390 2, /* unalign_store_cost */
394 /* RVV register move cost. */
395 static const regmove_vector_cost rvv_regmove_vector_cost = {
396 2, /* GR2VR */
397 2, /* FR2VR */
398 2, /* VR2GR */
399 2, /* VR2FR */
402 /* Generic costs for vector insn classes. It is supposed to be the vector cost
403 models used by default if no other cost model was specified. */
404 static const struct cpu_vector_cost generic_vector_cost = {
405 1, /* scalar_int_stmt_cost */
406 1, /* scalar_fp_stmt_cost */
407 1, /* scalar_load_cost */
408 1, /* scalar_store_cost */
409 3, /* cond_taken_branch_cost */
410 1, /* cond_not_taken_branch_cost */
411 &rvv_vls_vector_cost, /* vls */
412 &rvv_vla_vector_cost, /* vla */
413 &rvv_regmove_vector_cost, /* regmove */
416 /* Costs to use when optimizing for rocket. */
417 static const struct riscv_tune_param rocket_tune_info = {
418 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
419 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
420 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
421 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
422 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
423 1, /* issue_rate */
424 3, /* branch_cost */
425 5, /* memory_cost */
426 8, /* fmv_cost */
427 true, /* slow_unaligned_access */
428 false, /* use_divmod_expansion */
429 RISCV_FUSE_NOTHING, /* fusible_ops */
430 NULL, /* vector cost */
433 /* Costs to use when optimizing for Sifive 7 Series. */
434 static const struct riscv_tune_param sifive_7_tune_info = {
435 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
436 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
437 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
438 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
439 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
440 2, /* issue_rate */
441 4, /* branch_cost */
442 3, /* memory_cost */
443 8, /* fmv_cost */
444 true, /* slow_unaligned_access */
445 false, /* use_divmod_expansion */
446 RISCV_FUSE_NOTHING, /* fusible_ops */
447 NULL, /* vector cost */
450 /* Costs to use when optimizing for Sifive p400 Series. */
451 static const struct riscv_tune_param sifive_p400_tune_info = {
452 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
453 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
454 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
455 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
456 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
457 3, /* issue_rate */
458 4, /* branch_cost */
459 3, /* memory_cost */
460 4, /* fmv_cost */
461 true, /* slow_unaligned_access */
462 false, /* use_divmod_expansion */
463 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
464 &generic_vector_cost, /* vector cost */
467 /* Costs to use when optimizing for Sifive p600 Series. */
468 static const struct riscv_tune_param sifive_p600_tune_info = {
469 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
470 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
471 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
472 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
473 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
474 4, /* issue_rate */
475 4, /* branch_cost */
476 3, /* memory_cost */
477 4, /* fmv_cost */
478 true, /* slow_unaligned_access */
479 false, /* use_divmod_expansion */
480 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
481 &generic_vector_cost, /* vector cost */
484 /* Costs to use when optimizing for T-HEAD c906. */
485 static const struct riscv_tune_param thead_c906_tune_info = {
486 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
487 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
488 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
489 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
490 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */
491 1, /* issue_rate */
492 3, /* branch_cost */
493 5, /* memory_cost */
494 8, /* fmv_cost */
495 false, /* slow_unaligned_access */
496 false, /* use_divmod_expansion */
497 RISCV_FUSE_NOTHING, /* fusible_ops */
498 NULL, /* vector cost */
501 /* Costs to use when optimizing for xiangshan nanhu. */
502 static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
503 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_add */
504 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_mul */
505 {COSTS_N_INSNS (10), COSTS_N_INSNS (20)}, /* fp_div */
506 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* int_mul */
507 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
508 6, /* issue_rate */
509 3, /* branch_cost */
510 3, /* memory_cost */
511 3, /* fmv_cost */
512 true, /* slow_unaligned_access */
513 false, /* use_divmod_expansion */
514 RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
515 NULL, /* vector cost */
518 /* Costs to use when optimizing for a generic ooo profile. */
519 static const struct riscv_tune_param generic_ooo_tune_info = {
520 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */
521 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */
522 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */
523 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
524 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
525 1, /* issue_rate */
526 3, /* branch_cost */
527 4, /* memory_cost */
528 4, /* fmv_cost */
529 false, /* slow_unaligned_access */
530 false, /* use_divmod_expansion */
531 RISCV_FUSE_NOTHING, /* fusible_ops */
532 &generic_vector_cost, /* vector cost */
535 /* Costs to use when optimizing for size. */
536 static const struct riscv_tune_param optimize_size_tune_info = {
537 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
538 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
539 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
540 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */
541 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */
542 1, /* issue_rate */
543 1, /* branch_cost */
544 2, /* memory_cost */
545 8, /* fmv_cost */
546 false, /* slow_unaligned_access */
547 false, /* use_divmod_expansion */
548 RISCV_FUSE_NOTHING, /* fusible_ops */
549 NULL, /* vector cost */
552 static bool riscv_avoid_shrink_wrapping_separate ();
553 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
554 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
555 static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int,
556 bool *);
558 /* Defining target-specific uses of __attribute__. */
559 static const attribute_spec riscv_gnu_attributes[] =
561 /* Syntax: { name, min_len, max_len, decl_required, type_required,
562 function_type_required, affects_type_identity, handler,
563 exclude } */
565 /* The attribute telling no prologue/epilogue. */
566 {"naked", 0, 0, true, false, false, false, riscv_handle_fndecl_attribute,
567 NULL},
568 /* This attribute generates prologue/epilogue for interrupt handlers. */
569 {"interrupt", 0, 1, false, true, true, false, riscv_handle_type_attribute,
570 NULL},
572 /* The following two are used for the built-in properties of the Vector type
573 and are not used externally */
574 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL},
575 {"RVV type", 0, 0, false, true, false, true, NULL, NULL},
576 /* This attribute is used to declare a function, forcing it to use the
577 standard vector calling convention variant. Syntax:
578 __attribute__((riscv_vector_cc)). */
579 {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL},
580 /* This attribute is used to declare a new type, to appoint the exactly
581 bits size of the type. For example:
583 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
585 The new created type f_vint8m1_t will be exactly 256 bits. It can be
586 be used in globals, structs, unions, and arrays instead of sizeless
587 types. */
588 {"riscv_rvv_vector_bits", 1, 1, false, true, false, true,
589 riscv_handle_rvv_vector_bits_attribute, NULL},
592 static const scoped_attribute_specs riscv_gnu_attribute_table =
594 "gnu", {riscv_gnu_attributes}
597 static const attribute_spec riscv_attributes[] =
599 /* This attribute is used to declare a function, forcing it to use the
600 standard vector calling convention variant. Syntax:
601 [[riscv::vector_cc]]. */
602 {"vector_cc", 0, 0, false, true, true, true, NULL, NULL},
603 /* This attribute is used to declare a new type, to appoint the exactly
604 bits size of the type. For example:
606 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
608 The new created type f_vint8m1_t will be exactly 256 bits. It can be
609 be used in globals, structs, unions, and arrays instead of sizeless
610 types. */
611 {"rvv_vector_bits", 1, 1, false, true, false, true,
612 riscv_handle_rvv_vector_bits_attribute, NULL},
615 static const scoped_attribute_specs riscv_nongnu_attribute_table =
617 "riscv", {riscv_attributes}
620 static const scoped_attribute_specs *const riscv_attribute_table[] =
622 &riscv_gnu_attribute_table,
623 &riscv_nongnu_attribute_table
626 /* Order for the CLOBBERs/USEs of gpr_save. */
627 static const unsigned gpr_save_reg_order[] = {
628 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
629 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
630 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
631 S10_REGNUM, S11_REGNUM
634 /* A table describing all the processors GCC knows about. */
635 static const struct riscv_tune_info riscv_tune_info_table[] = {
636 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
637 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
638 #include "riscv-cores.def"
641 /* Global variable to distinguish whether we should save and restore s0/fp for
642 function. */
643 static bool riscv_save_frame_pointer;
645 typedef enum
647 PUSH_IDX = 0,
648 POP_IDX,
649 POPRET_IDX,
650 POPRETZ_IDX,
651 ZCMP_OP_NUM
652 } riscv_zcmp_op_t;
654 typedef insn_code (*code_for_push_pop_t) (machine_mode);
656 void riscv_frame_info::reset(void)
658 total_size = 0;
659 mask = 0;
660 fmask = 0;
661 vmask = 0;
662 save_libcall_adjustment = 0;
664 gp_sp_offset = 0;
665 fp_sp_offset = 0;
666 v_sp_offset_top = 0;
667 v_sp_offset_bottom = 0;
669 frame_pointer_offset = 0;
671 hard_frame_pointer_offset = 0;
673 arg_pointer_offset = 0;
676 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
678 static unsigned int
679 riscv_min_arithmetic_precision (void)
681 return 32;
684 template <class T>
685 static const char *
686 get_tune_str (const T *opts)
688 const char *tune_string = RISCV_TUNE_STRING_DEFAULT;
689 if (opts->x_riscv_tune_string)
690 tune_string = opts->x_riscv_tune_string;
691 else if (opts->x_riscv_cpu_string)
692 tune_string = opts->x_riscv_cpu_string;
693 return tune_string;
696 /* Return the riscv_tune_info entry for the given name string, return nullptr
697 if NULL_P is true, otherwise return an placeholder and report error. */
699 const struct riscv_tune_info *
700 riscv_parse_tune (const char *tune_string, bool null_p)
702 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
704 if (cpu)
705 tune_string = cpu->tune;
707 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
708 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
709 return riscv_tune_info_table + i;
711 if (null_p)
712 return nullptr;
714 error ("unknown cpu %qs for %<-mtune%>", tune_string);
715 return riscv_tune_info_table;
718 /* Helper function for riscv_build_integer; arguments are as for
719 riscv_build_integer. */
721 static int
722 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
723 HOST_WIDE_INT value, machine_mode mode)
725 HOST_WIDE_INT low_part = CONST_LOW_PART (value);
726 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
727 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
729 if (SMALL_OPERAND (value) || LUI_OPERAND (value))
731 /* Simply ADDI or LUI. */
732 codes[0].code = UNKNOWN;
733 codes[0].value = value;
734 return 1;
736 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
738 /* Simply BSETI. */
739 codes[0].code = UNKNOWN;
740 codes[0].value = value;
742 /* RISC-V sign-extends all 32bit values that live in a 32bit
743 register. To avoid paradoxes, we thus need to use the
744 sign-extended (negative) representation (-1 << 31) for the
745 value, if we want to build (1 << 31) in SImode. This will
746 then expand to an LUI instruction. */
747 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
748 codes[0].value = (HOST_WIDE_INT_M1U << 31);
750 return 1;
753 /* End with ADDI. When constructing HImode constants, do not generate any
754 intermediate value that is not itself a valid HImode constant. The
755 XORI case below will handle those remaining HImode constants. */
756 if (low_part != 0
757 && (mode != HImode
758 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
760 HOST_WIDE_INT upper_part = value - low_part;
761 if (mode != VOIDmode)
762 upper_part = trunc_int_for_mode (value - low_part, mode);
764 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
765 if (alt_cost < cost)
767 alt_codes[alt_cost-1].code = PLUS;
768 alt_codes[alt_cost-1].value = low_part;
769 memcpy (codes, alt_codes, sizeof (alt_codes));
770 cost = alt_cost;
774 /* End with XORI. */
775 if (cost > 2 && (low_part < 0 || mode == HImode))
777 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
778 if (alt_cost < cost)
780 alt_codes[alt_cost-1].code = XOR;
781 alt_codes[alt_cost-1].value = low_part;
782 memcpy (codes, alt_codes, sizeof (alt_codes));
783 cost = alt_cost;
787 /* Eliminate trailing zeros and end with SLLI. */
788 if (cost > 2 && (value & 1) == 0)
790 int shift = ctz_hwi (value);
791 unsigned HOST_WIDE_INT x = value;
792 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
794 /* Don't eliminate the lower 12 bits if LUI might apply. */
795 if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS))
796 shift -= IMM_BITS, x <<= IMM_BITS;
798 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
799 if (alt_cost < cost)
801 alt_codes[alt_cost-1].code = ASHIFT;
802 alt_codes[alt_cost-1].value = shift;
803 memcpy (codes, alt_codes, sizeof (alt_codes));
804 cost = alt_cost;
808 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB))
810 int leading_ones = clz_hwi (~value);
811 int trailing_ones = ctz_hwi (~value);
813 /* If all bits are one except a few that are zero, and the zero bits
814 are within a range of 11 bits, then we can synthesize a constant
815 by loading a small negative constant and rotating. */
816 if (leading_ones < 64
817 && ((64 - leading_ones - trailing_ones) < 12))
819 codes[0].code = UNKNOWN;
820 /* The sign-bit might be zero, so just rotate to be safe. */
821 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
822 | (value << (64 - trailing_ones)));
823 codes[1].code = ROTATERT;
824 codes[1].value = 64 - trailing_ones;
825 cost = 2;
827 /* Handle the case where the 11 bit range of zero bits wraps around. */
828 else
830 int upper_trailing_ones = ctz_hwi (~value >> 32);
831 int lower_leading_ones = clz_hwi (~value << 32);
833 if (upper_trailing_ones < 32 && lower_leading_ones < 32
834 && ((64 - upper_trailing_ones - lower_leading_ones) < 12))
836 codes[0].code = UNKNOWN;
837 /* The sign-bit might be zero, so just rotate to be safe. */
838 codes[0].value = ((value << (32 - upper_trailing_ones))
839 | ((unsigned HOST_WIDE_INT) value
840 >> (32 + upper_trailing_ones)));
841 codes[1].code = ROTATERT;
842 codes[1].value = 32 - upper_trailing_ones;
843 cost = 2;
848 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
849 return cost;
852 /* Fill CODES with a sequence of rtl operations to load VALUE.
853 Return the number of operations needed. */
855 static int
856 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
857 machine_mode mode)
859 int cost = riscv_build_integer_1 (codes, value, mode);
861 /* Eliminate leading zeros and end with SRLI. */
862 if (value > 0 && cost > 2)
864 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
865 int alt_cost, shift = clz_hwi (value);
866 HOST_WIDE_INT shifted_val;
868 /* Try filling trailing bits with 1s. */
869 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
870 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
871 if (alt_cost < cost)
873 alt_codes[alt_cost-1].code = LSHIFTRT;
874 alt_codes[alt_cost-1].value = shift;
875 memcpy (codes, alt_codes, sizeof (alt_codes));
876 cost = alt_cost;
879 /* Try filling trailing bits with 0s. */
880 shifted_val = value << shift;
881 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
882 if (alt_cost < cost)
884 alt_codes[alt_cost-1].code = LSHIFTRT;
885 alt_codes[alt_cost-1].value = shift;
886 memcpy (codes, alt_codes, sizeof (alt_codes));
887 cost = alt_cost;
891 if (!TARGET_64BIT
892 && (value > INT32_MAX || value < INT32_MIN))
894 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32);
895 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32);
896 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
897 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS];
898 int hi_cost, lo_cost;
900 hi_cost = riscv_build_integer_1 (hicode, hival, mode);
901 if (hi_cost < cost)
903 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode);
904 if (lo_cost + hi_cost < cost)
906 memcpy (codes, alt_codes,
907 lo_cost * sizeof (struct riscv_integer_op));
908 memcpy (codes + lo_cost, hicode,
909 hi_cost * sizeof (struct riscv_integer_op));
910 cost = lo_cost + hi_cost;
915 return cost;
918 /* Return the cost of constructing VAL in the event that a scratch
919 register is available. */
921 static int
922 riscv_split_integer_cost (HOST_WIDE_INT val)
924 int cost;
925 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
926 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
927 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
929 cost = 2 + riscv_build_integer (codes, loval, VOIDmode);
930 if (loval != hival)
931 cost += riscv_build_integer (codes, hival, VOIDmode);
933 return cost;
936 /* Return the cost of constructing the integer constant VAL. */
938 static int
939 riscv_integer_cost (HOST_WIDE_INT val)
941 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
942 return MIN (riscv_build_integer (codes, val, VOIDmode),
943 riscv_split_integer_cost (val));
946 /* Try to split a 64b integer into 32b parts, then reassemble. */
948 static rtx
949 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
951 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
952 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
953 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
955 riscv_move_integer (lo, lo, loval, mode);
957 if (loval == hival)
958 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
959 else
961 riscv_move_integer (hi, hi, hival, mode);
962 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32));
965 hi = force_reg (mode, hi);
966 return gen_rtx_PLUS (mode, hi, lo);
969 /* Return true if X is a thread-local symbol. */
971 static bool
972 riscv_tls_symbol_p (const_rtx x)
974 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
977 /* Return true if symbol X binds locally. */
979 static bool
980 riscv_symbol_binds_local_p (const_rtx x)
982 if (SYMBOL_REF_P (x))
983 return (SYMBOL_REF_DECL (x)
984 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
985 : SYMBOL_REF_LOCAL_P (x));
986 else
987 return false;
990 /* Return the method that should be used to access SYMBOL_REF or
991 LABEL_REF X. */
993 static enum riscv_symbol_type
994 riscv_classify_symbol (const_rtx x)
996 if (riscv_tls_symbol_p (x))
997 return SYMBOL_TLS;
999 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
1000 return SYMBOL_GOT_DISP;
1002 switch (riscv_cmodel)
1004 case CM_MEDLOW:
1005 return SYMBOL_ABSOLUTE;
1006 case CM_LARGE:
1007 if (SYMBOL_REF_P (x))
1008 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM;
1009 return SYMBOL_PCREL;
1010 default:
1011 return SYMBOL_PCREL;
1015 /* Classify the base of symbolic expression X. */
1017 enum riscv_symbol_type
1018 riscv_classify_symbolic_expression (rtx x)
1020 rtx offset;
1022 split_const (x, &x, &offset);
1023 if (UNSPEC_ADDRESS_P (x))
1024 return UNSPEC_ADDRESS_TYPE (x);
1026 return riscv_classify_symbol (x);
1029 /* Return true if X is a symbolic constant. If it is, store the type of
1030 the symbol in *SYMBOL_TYPE. */
1032 bool
1033 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
1035 rtx offset;
1037 split_const (x, &x, &offset);
1038 if (UNSPEC_ADDRESS_P (x))
1040 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1041 x = UNSPEC_ADDRESS (x);
1043 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
1044 *symbol_type = riscv_classify_symbol (x);
1045 else
1046 return false;
1048 if (offset == const0_rtx)
1049 return true;
1051 /* Nonzero offsets are only valid for references that don't use the GOT. */
1052 switch (*symbol_type)
1054 case SYMBOL_ABSOLUTE:
1055 case SYMBOL_PCREL:
1056 case SYMBOL_TLS_LE:
1057 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1058 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1060 default:
1061 return false;
1065 /* Returns the number of instructions necessary to reference a symbol. */
1067 static int riscv_symbol_insns (enum riscv_symbol_type type)
1069 switch (type)
1071 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */
1072 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */
1073 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */
1074 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */
1075 case SYMBOL_TLSDESC: return 6; /* 4-instruction call + ADD TP + the reference. */
1076 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */
1077 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */
1078 default: gcc_unreachable ();
1082 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA
1083 Manual draft. For details, please see:
1084 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */
1086 static unsigned HOST_WIDE_INT fli_value_hf[32] =
1088 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
1089 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
1090 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
1091 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
1092 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */
1093 0x78p8,
1094 0x7cp8, 0x7ep8
1097 static unsigned HOST_WIDE_INT fli_value_sf[32] =
1099 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
1100 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
1101 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
1102 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
1105 static unsigned HOST_WIDE_INT fli_value_df[32] =
1107 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
1108 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
1109 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
1110 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
1111 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
1112 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
1113 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
1114 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
1117 /* Display floating-point values at the assembly level, which is consistent
1118 with the zfa extension of llvm:
1119 https://reviews.llvm.org/D145645. */
1121 const char *fli_value_print[32] =
1123 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125",
1124 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875",
1125 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0",
1126 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan"
1129 /* Return index of the FLI instruction table if rtx X is an immediate constant that can
1130 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */
1133 riscv_float_const_rtx_index_for_fli (rtx x)
1135 unsigned HOST_WIDE_INT *fli_value_array;
1137 machine_mode mode = GET_MODE (x);
1139 if (!TARGET_ZFA
1140 || !CONST_DOUBLE_P(x)
1141 || mode == VOIDmode
1142 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH))
1143 || (mode == SFmode && !TARGET_HARD_FLOAT)
1144 || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
1145 return -1;
1147 if (!SCALAR_FLOAT_MODE_P (mode)
1148 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT
1149 /* Only support up to DF mode. */
1150 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode))
1151 return -1;
1153 unsigned HOST_WIDE_INT ival = 0;
1155 long res[2];
1156 real_to_target (res,
1157 CONST_DOUBLE_REAL_VALUE (x),
1158 REAL_MODE_FORMAT (mode));
1160 if (mode == DFmode)
1162 int order = BYTES_BIG_ENDIAN ? 1 : 0;
1163 ival = zext_hwi (res[order], 32);
1164 ival |= (zext_hwi (res[1 - order], 32) << 32);
1166 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */
1167 if (ival & (unsigned HOST_WIDE_INT)0xffffffff)
1168 return -1;
1170 else
1171 ival = zext_hwi (res[0], 32);
1173 switch (mode)
1175 case E_HFmode:
1176 fli_value_array = fli_value_hf;
1177 break;
1178 case E_SFmode:
1179 fli_value_array = fli_value_sf;
1180 break;
1181 case E_DFmode:
1182 fli_value_array = fli_value_df;
1183 break;
1184 default:
1185 return -1;
1188 if (fli_value_array[0] == ival)
1189 return 0;
1191 if (fli_value_array[1] == ival)
1192 return 1;
1194 /* Perform a binary search to find target index. */
1195 unsigned l, r, m;
1197 l = 2;
1198 r = 31;
1200 while (l <= r)
1202 m = (l + r) / 2;
1203 if (fli_value_array[m] == ival)
1204 return m;
1205 else if (fli_value_array[m] < ival)
1206 l = m+1;
1207 else
1208 r = m-1;
1211 return -1;
1214 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1216 static bool
1217 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1219 return riscv_const_insns (x) > 0;
1222 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1224 static bool
1225 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1227 enum riscv_symbol_type type;
1228 rtx base, offset;
1230 /* There's no way to calculate VL-based values using relocations. */
1231 subrtx_iterator::array_type array;
1232 FOR_EACH_SUBRTX (iter, array, x, ALL)
1233 if (GET_CODE (*iter) == CONST_POLY_INT)
1234 return true;
1236 /* There is no assembler syntax for expressing an address-sized
1237 high part. */
1238 if (GET_CODE (x) == HIGH)
1239 return true;
1241 if (satisfies_constraint_zfli (x))
1242 return true;
1244 split_const (x, &base, &offset);
1245 if (riscv_symbolic_constant_p (base, &type))
1247 if (type == SYMBOL_FORCE_TO_MEM)
1248 return false;
1250 /* As an optimization, don't spill symbolic constants that are as
1251 cheap to rematerialize as to access in the constant pool. */
1252 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
1253 return true;
1255 /* As an optimization, avoid needlessly generate dynamic relocations. */
1256 if (flag_pic)
1257 return true;
1260 /* TLS symbols must be computed by riscv_legitimize_move. */
1261 if (tls_referenced_p (x))
1262 return true;
1264 return false;
1267 /* Return true if register REGNO is a valid base register for mode MODE.
1268 STRICT_P is true if REG_OK_STRICT is in effect. */
1271 riscv_regno_mode_ok_for_base_p (int regno,
1272 machine_mode mode ATTRIBUTE_UNUSED,
1273 bool strict_p)
1275 if (!HARD_REGISTER_NUM_P (regno))
1277 if (!strict_p)
1278 return true;
1279 regno = reg_renumber[regno];
1282 /* These fake registers will be eliminated to either the stack or
1283 hard frame pointer, both of which are usually valid base registers.
1284 Reload deals with the cases where the eliminated form isn't valid. */
1285 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
1286 return true;
1288 return GP_REG_P (regno);
1291 /* Get valid index register class.
1292 The RISC-V base instructions don't support index registers,
1293 but extensions might support that. */
1295 enum reg_class
1296 riscv_index_reg_class ()
1298 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1299 return GR_REGS;
1301 return NO_REGS;
1304 /* Return true if register REGNO is a valid index register.
1305 The RISC-V base instructions don't support index registers,
1306 but extensions might support that. */
1309 riscv_regno_ok_for_index_p (int regno)
1311 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1312 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1);
1314 return 0;
1317 /* Return true if X is a valid base register for mode MODE.
1318 STRICT_P is true if REG_OK_STRICT is in effect. */
1320 bool
1321 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
1323 if (!strict_p && GET_CODE (x) == SUBREG)
1324 x = SUBREG_REG (x);
1326 return (REG_P (x)
1327 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
1330 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
1331 can address a value of mode MODE. */
1333 static bool
1334 riscv_valid_offset_p (rtx x, machine_mode mode)
1336 /* Check that X is a signed 12-bit number. */
1337 if (!const_arith_operand (x, Pmode))
1338 return false;
1340 /* We may need to split multiword moves, so make sure that every word
1341 is accessible. */
1342 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
1343 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
1344 return false;
1346 return true;
1349 /* Should a symbol of type SYMBOL_TYPE should be split in two? */
1351 bool
1352 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
1354 if (symbol_type == SYMBOL_TLS_LE)
1355 return true;
1357 if (!TARGET_EXPLICIT_RELOCS)
1358 return false;
1360 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
1363 /* Return true if a LO_SUM can address a value of mode MODE when the
1364 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which
1365 is used when the mode is BLKmode. */
1367 static bool
1368 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
1369 rtx x)
1371 int align, size;
1373 /* Check that symbols of type SYMBOL_TYPE can be used to access values
1374 of mode MODE. */
1375 if (riscv_symbol_insns (sym_type) == 0)
1376 return false;
1378 /* Check that there is a known low-part relocation. */
1379 if (!riscv_split_symbol_type (sym_type))
1380 return false;
1382 /* We can't tell size or alignment when we have BLKmode, so try extracing a
1383 decl from the symbol if possible. */
1384 if (mode == BLKmode)
1386 rtx offset;
1388 /* Extract the symbol from the LO_SUM operand, if any. */
1389 split_const (x, &x, &offset);
1391 /* Might be a CODE_LABEL. We can compute align but not size for that,
1392 so don't bother trying to handle it. */
1393 if (!SYMBOL_REF_P (x))
1394 return false;
1396 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
1397 align = (SYMBOL_REF_DECL (x)
1398 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
1399 : 1);
1400 size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
1401 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
1402 : 2*BITS_PER_WORD);
1404 else
1406 align = GET_MODE_ALIGNMENT (mode);
1407 size = GET_MODE_BITSIZE (mode).to_constant ();
1410 /* We may need to split multiword moves, so make sure that each word
1411 can be accessed without inducing a carry. */
1412 if (size > BITS_PER_WORD
1413 && (!TARGET_STRICT_ALIGN || size > align))
1414 return false;
1416 return true;
1419 /* Return true if mode is the RVV enabled mode.
1420 For example: 'RVVMF2SI' mode is disabled,
1421 wheras 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */
1423 bool
1424 riscv_v_ext_vector_mode_p (machine_mode mode)
1426 #define ENTRY(MODE, REQUIREMENT, ...) \
1427 case MODE##mode: \
1428 return REQUIREMENT;
1429 switch (mode)
1431 #include "riscv-vector-switch.def"
1432 default:
1433 return false;
1436 return false;
1439 /* Return true if mode is the RVV enabled tuple mode. */
1441 bool
1442 riscv_v_ext_tuple_mode_p (machine_mode mode)
1444 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \
1445 case MODE##mode: \
1446 return REQUIREMENT;
1447 switch (mode)
1449 #include "riscv-vector-switch.def"
1450 default:
1451 return false;
1454 return false;
1457 /* Return true if mode is the RVV enabled vls mode. */
1459 bool
1460 riscv_v_ext_vls_mode_p (machine_mode mode)
1462 #define VLS_ENTRY(MODE, REQUIREMENT) \
1463 case MODE##mode: \
1464 return REQUIREMENT;
1465 switch (mode)
1467 #include "riscv-vector-switch.def"
1468 default:
1469 return false;
1472 return false;
1475 /* Return true if it is either of below modes.
1476 1. RVV vector mode.
1477 2. RVV tuple mode.
1478 3. RVV vls mode. */
1480 static bool
1481 riscv_v_ext_mode_p (machine_mode mode)
1483 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
1484 || riscv_v_ext_vls_mode_p (mode);
1487 static unsigned
1488 riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size,
1489 unsigned scalar_unit_size)
1491 gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0);
1493 if (vls_unit_size < scalar_unit_size)
1494 return 1;
1496 /* Ensure the vls mode is exact_div by scalar_unit_size. */
1497 gcc_assert ((vls_unit_size % scalar_unit_size) == 0);
1499 return vls_unit_size / scalar_unit_size;
1502 static machine_mode
1503 riscv_v_vls_to_gpr_mode (unsigned vls_mode_size)
1505 switch (vls_mode_size)
1507 case 16:
1508 return TImode;
1509 case 8:
1510 return DImode;
1511 case 4:
1512 return SImode;
1513 case 2:
1514 return HImode;
1515 case 1:
1516 return QImode;
1517 default:
1518 gcc_unreachable ();
1522 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1523 NUNITS size for corresponding machine_mode. */
1525 poly_int64
1526 riscv_v_adjust_nunits (machine_mode mode, int scale)
1528 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
1529 if (riscv_v_ext_mode_p (mode))
1531 if (TARGET_MIN_VLEN == 32)
1532 scale = scale / 2;
1533 return riscv_vector_chunks * scale;
1535 return scale;
1538 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1539 NUNITS size for corresponding machine_mode. */
1541 poly_int64
1542 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf)
1544 if (riscv_v_ext_mode_p (mode))
1546 scalar_mode smode = GET_MODE_INNER (mode);
1547 int size = GET_MODE_SIZE (smode);
1548 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size;
1549 if (fractional_p)
1550 return nunits_per_chunk / lmul * riscv_vector_chunks * nf;
1551 else
1552 return nunits_per_chunk * lmul * riscv_vector_chunks * nf;
1554 /* Set the disabled RVV modes size as 1 by default. */
1555 return 1;
1558 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
1559 BYTE size for corresponding machine_mode. */
1561 poly_int64
1562 riscv_v_adjust_bytesize (machine_mode mode, int scale)
1564 if (riscv_v_ext_vector_mode_p (mode))
1566 if (TARGET_XTHEADVECTOR)
1567 return BYTES_PER_RISCV_VECTOR;
1569 poly_int64 nunits = GET_MODE_NUNITS (mode);
1571 if (nunits.coeffs[0] > 8)
1572 return exact_div (nunits, 8);
1573 else if (nunits.is_constant ())
1574 return 1;
1575 else
1576 return poly_int64 (1, 1);
1579 return scale;
1582 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
1583 PRECISION size for corresponding machine_mode. */
1585 poly_int64
1586 riscv_v_adjust_precision (machine_mode mode, int scale)
1588 return riscv_v_adjust_nunits (mode, scale);
1591 /* Return true if X is a valid address for machine mode MODE. If it is,
1592 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
1593 effect. */
1595 static bool
1596 riscv_classify_address (struct riscv_address_info *info, rtx x,
1597 machine_mode mode, bool strict_p)
1599 if (th_classify_address (info, x, mode, strict_p))
1600 return true;
1602 switch (GET_CODE (x))
1604 case REG:
1605 case SUBREG:
1606 info->type = ADDRESS_REG;
1607 info->reg = x;
1608 info->offset = const0_rtx;
1609 return riscv_valid_base_register_p (info->reg, mode, strict_p);
1611 case PLUS:
1612 /* RVV load/store disallow any offset. */
1613 if (riscv_v_ext_mode_p (mode))
1614 return false;
1616 info->type = ADDRESS_REG;
1617 info->reg = XEXP (x, 0);
1618 info->offset = XEXP (x, 1);
1619 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1620 && riscv_valid_offset_p (info->offset, mode));
1622 case LO_SUM:
1623 /* RVV load/store disallow LO_SUM. */
1624 if (riscv_v_ext_mode_p (mode))
1625 return false;
1627 info->type = ADDRESS_LO_SUM;
1628 info->reg = XEXP (x, 0);
1629 info->offset = XEXP (x, 1);
1630 /* We have to trust the creator of the LO_SUM to do something vaguely
1631 sane. Target-independent code that creates a LO_SUM should also
1632 create and verify the matching HIGH. Target-independent code that
1633 adds an offset to a LO_SUM must prove that the offset will not
1634 induce a carry. Failure to do either of these things would be
1635 a bug, and we are not required to check for it here. The RISC-V
1636 backend itself should only create LO_SUMs for valid symbolic
1637 constants, with the high part being either a HIGH or a copy
1638 of _gp. */
1639 info->symbol_type
1640 = riscv_classify_symbolic_expression (info->offset);
1641 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1642 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
1644 case CONST_INT:
1645 /* We only allow the const0_rtx for the RVV load/store. For example:
1646 +----------------------------------------------------------+
1647 | li a5,0 |
1648 | vsetvli zero,a1,e32,m1,ta,ma |
1649 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. |
1650 | vs1r.v v24,0(a0) |
1651 +----------------------------------------------------------+
1652 It can be folded to:
1653 +----------------------------------------------------------+
1654 | vsetvli zero,a1,e32,m1,ta,ma |
1655 | vle32.v v24,0(zero) |
1656 | vs1r.v v24,0(a0) |
1657 +----------------------------------------------------------+
1658 This behavior will benefit the underlying RVV auto vectorization. */
1659 if (riscv_v_ext_mode_p (mode))
1660 return x == const0_rtx;
1662 /* Small-integer addresses don't occur very often, but they
1663 are legitimate if x0 is a valid base register. */
1664 info->type = ADDRESS_CONST_INT;
1665 return SMALL_OPERAND (INTVAL (x));
1667 default:
1668 return false;
1672 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1674 static bool
1675 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
1676 code_helper = ERROR_MARK)
1678 /* Disallow RVV modes base address.
1679 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */
1680 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x))))
1681 return false;
1682 struct riscv_address_info addr;
1684 return riscv_classify_address (&addr, x, mode, strict_p);
1687 /* Return true if hard reg REGNO can be used in compressed instructions. */
1689 static bool
1690 riscv_compressed_reg_p (int regno)
1692 /* x8-x15/f8-f15 are compressible registers. */
1693 return ((TARGET_RVC || TARGET_ZCA)
1694 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
1695 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)));
1698 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */
1700 static bool
1701 riscv_compressed_lw_offset_p (rtx x)
1703 return (CONST_INT_P (x)
1704 && (INTVAL (x) & 3) == 0
1705 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET));
1708 /* Return true if load/store from/to address x can be compressed. */
1710 static bool
1711 riscv_compressed_lw_address_p (rtx x)
1713 struct riscv_address_info addr;
1714 bool result = riscv_classify_address (&addr, x, GET_MODE (x),
1715 reload_completed);
1717 /* Return false if address is not compressed_reg + small_offset. */
1718 if (!result
1719 || addr.type != ADDRESS_REG
1720 /* Before reload, assume all registers are OK. */
1721 || (reload_completed
1722 && !riscv_compressed_reg_p (REGNO (addr.reg))
1723 && addr.reg != stack_pointer_rtx)
1724 || !riscv_compressed_lw_offset_p (addr.offset))
1725 return false;
1727 return result;
1730 /* Return the number of instructions needed to load or store a value
1731 of mode MODE at address X. Return 0 if X isn't valid for MODE.
1732 Assume that multiword moves may need to be split into word moves
1733 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
1734 enough. */
1737 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
1739 struct riscv_address_info addr = {};
1740 int n = 1;
1742 if (!riscv_classify_address (&addr, x, mode, false))
1744 /* This could be a pattern from the pic.md file. In which case we want
1745 this address to always have a cost of 3 to make it as expensive as the
1746 most expensive symbol. This prevents constant propagation from
1747 preferring symbols over register plus offset. */
1748 return 3;
1751 /* BLKmode is used for single unaligned loads and stores and should
1752 not count as a multiword mode. */
1753 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p)
1754 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1756 if (addr.type == ADDRESS_LO_SUM)
1757 n += riscv_symbol_insns (addr.symbol_type) - 1;
1759 return n;
1762 /* Return the number of instructions needed to load constant X.
1763 Return 0 if X isn't a valid constant. */
1766 riscv_const_insns (rtx x)
1768 enum riscv_symbol_type symbol_type;
1769 rtx offset;
1771 switch (GET_CODE (x))
1773 case HIGH:
1774 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
1775 || !riscv_split_symbol_type (symbol_type))
1776 return 0;
1778 /* This is simply an LUI. */
1779 return 1;
1781 case CONST_INT:
1783 int cost = riscv_integer_cost (INTVAL (x));
1784 /* Force complicated constants to memory. */
1785 return cost < 4 ? cost : 0;
1788 case CONST_DOUBLE:
1789 /* See if we can use FMV directly. */
1790 if (satisfies_constraint_zfli (x))
1791 return 1;
1793 /* We can use x0 to load floating-point zero. */
1794 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1795 case CONST_VECTOR:
1797 /* TODO: This is not accurate, we will need to
1798 adapt the COST of CONST_VECTOR in the future
1799 for the following cases:
1801 - 1. const duplicate vector with element value
1802 in range of [-16, 15].
1803 - 2. const duplicate vector with element value
1804 out range of [-16, 15].
1805 - 3. const series vector.
1806 ...etc. */
1807 if (riscv_v_ext_mode_p (GET_MODE (x)))
1809 /* const series vector. */
1810 rtx base, step;
1811 if (const_vec_series_p (x, &base, &step))
1813 /* This is not accurate, we will need to adapt the COST
1814 * accurately according to BASE && STEP. */
1815 return 1;
1818 rtx elt;
1819 if (const_vec_duplicate_p (x, &elt))
1821 /* We don't allow CONST_VECTOR for DI vector on RV32
1822 system since the ELT constant value can not held
1823 within a single register to disable reload a DI
1824 register vec_duplicate into vmv.v.x. */
1825 scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
1826 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
1827 && !immediate_operand (elt, Pmode))
1828 return 0;
1829 /* Constants from -16 to 15 can be loaded with vmv.v.i.
1830 The Wc0, Wc1 constraints are already covered by the
1831 vi constraint so we do not need to check them here
1832 separately. */
1833 if (satisfies_constraint_vi (x))
1834 return 1;
1836 /* Any int/FP constants can always be broadcast from a
1837 scalar register. Loading of a floating-point
1838 constant incurs a literal-pool access. Allow this in
1839 order to increase vectorization possibilities. */
1840 int n = riscv_const_insns (elt);
1841 if (CONST_DOUBLE_P (elt))
1842 return 1 + 4; /* vfmv.v.f + memory access. */
1843 else
1845 /* We need as many insns as it takes to load the constant
1846 into a GPR and one vmv.v.x. */
1847 if (n != 0)
1848 return 1 + n;
1849 else
1850 return 1 + 4; /*vmv.v.x + memory access. */
1855 /* TODO: We may support more const vector in the future. */
1856 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1859 case CONST:
1860 /* See if we can refer to X directly. */
1861 if (riscv_symbolic_constant_p (x, &symbol_type))
1862 return riscv_symbol_insns (symbol_type);
1864 /* Otherwise try splitting the constant into a base and offset. */
1865 split_const (x, &x, &offset);
1866 if (offset != 0)
1868 int n = riscv_const_insns (x);
1869 if (n != 0)
1870 return n + riscv_integer_cost (INTVAL (offset));
1872 return 0;
1874 case SYMBOL_REF:
1875 case LABEL_REF:
1876 return riscv_symbol_insns (riscv_classify_symbol (x));
1878 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB
1879 instruction and several scalar shift or mult instructions,
1880 it is so far unknown. We set it to 4 temporarily. */
1881 case CONST_POLY_INT:
1882 return 4;
1884 default:
1885 return 0;
1889 /* X is a doubleword constant that can be handled by splitting it into
1890 two words and loading each word separately. Return the number of
1891 instructions required to do this. */
1894 riscv_split_const_insns (rtx x)
1896 unsigned int low, high;
1898 low = riscv_const_insns (riscv_subword (x, false));
1899 high = riscv_const_insns (riscv_subword (x, true));
1900 gcc_assert (low > 0 && high > 0);
1901 return low + high;
1904 /* Return the number of instructions needed to implement INSN,
1905 given that it loads from or stores to MEM. */
1908 riscv_load_store_insns (rtx mem, rtx_insn *insn)
1910 machine_mode mode;
1911 bool might_split_p;
1912 rtx set;
1914 gcc_assert (MEM_P (mem));
1915 mode = GET_MODE (mem);
1917 /* Try to prove that INSN does not need to be split. */
1918 might_split_p = true;
1919 if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
1920 might_split_p = false;
1921 else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
1923 set = single_set (insn);
1924 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
1925 might_split_p = false;
1928 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
1931 /* Emit a move from SRC to DEST. Assume that the move expanders can
1932 handle all moves if !can_create_pseudo_p (). The distinction is
1933 important because, unlike emit_move_insn, the move expanders know
1934 how to force Pmode objects into the constant pool even when the
1935 constant pool address is not itself legitimate. */
1938 riscv_emit_move (rtx dest, rtx src)
1940 return (can_create_pseudo_p ()
1941 ? emit_move_insn (dest, src)
1942 : emit_move_insn_1 (dest, src));
1945 /* Emit an instruction of the form (set TARGET SRC). */
1947 static rtx
1948 riscv_emit_set (rtx target, rtx src)
1950 emit_insn (gen_rtx_SET (target, src));
1951 return target;
1954 /* Emit an instruction of the form (set DEST (CODE X)). */
1957 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x)
1959 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x));
1962 /* Emit an instruction of the form (set DEST (CODE X Y)). */
1965 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
1967 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
1970 /* Compute (CODE X Y) and store the result in a new register
1971 of mode MODE. Return that new register. */
1973 static rtx
1974 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y)
1976 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
1979 static rtx
1980 riscv_swap_instruction (rtx inst)
1982 gcc_assert (GET_MODE (inst) == SImode);
1983 if (BYTES_BIG_ENDIAN)
1984 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
1985 return inst;
1988 /* Copy VALUE to a register and return that register. If new pseudos
1989 are allowed, copy it into a new register, otherwise use DEST. */
1991 static rtx
1992 riscv_force_temporary (rtx dest, rtx value)
1994 if (can_create_pseudo_p ())
1995 return force_reg (Pmode, value);
1996 else
1998 riscv_emit_move (dest, value);
1999 return dest;
2003 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
2004 then add CONST_INT OFFSET to the result. */
2006 static rtx
2007 riscv_unspec_address_offset (rtx base, rtx offset,
2008 enum riscv_symbol_type symbol_type)
2010 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
2011 UNSPEC_ADDRESS_FIRST + symbol_type);
2012 if (offset != const0_rtx)
2013 base = gen_rtx_PLUS (Pmode, base, offset);
2014 return gen_rtx_CONST (Pmode, base);
2017 /* Return an UNSPEC address with underlying address ADDRESS and symbol
2018 type SYMBOL_TYPE. */
2021 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
2023 rtx base, offset;
2025 split_const (address, &base, &offset);
2026 return riscv_unspec_address_offset (base, offset, symbol_type);
2029 /* If OP is an UNSPEC address, return the address to which it refers,
2030 otherwise return OP itself. */
2032 static rtx
2033 riscv_strip_unspec_address (rtx op)
2035 rtx base, offset;
2037 split_const (op, &base, &offset);
2038 if (UNSPEC_ADDRESS_P (base))
2039 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2040 return op;
2043 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
2044 high part to BASE and return the result. Just return BASE otherwise.
2045 TEMP is as for riscv_force_temporary.
2047 The returned expression can be used as the first operand to a LO_SUM. */
2049 static rtx
2050 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
2052 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
2053 return riscv_force_temporary (temp, addr);
2056 /* Load an entry from the GOT for a TLS GD access. */
2058 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
2060 if (Pmode == DImode)
2061 return gen_got_load_tls_gddi (dest, sym);
2062 else
2063 return gen_got_load_tls_gdsi (dest, sym);
2066 /* Load an entry from the GOT for a TLS IE access. */
2068 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
2070 if (Pmode == DImode)
2071 return gen_got_load_tls_iedi (dest, sym);
2072 else
2073 return gen_got_load_tls_iesi (dest, sym);
2076 /* Add in the thread pointer for a TLS LE access. */
2078 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
2080 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2081 if (Pmode == DImode)
2082 return gen_tls_add_tp_ledi (dest, base, tp, sym);
2083 else
2084 return gen_tls_add_tp_lesi (dest, base, tp, sym);
2087 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
2088 it appears in a MEM of that mode. Return true if ADDR is a legitimate
2089 constant in that context and can be split into high and low parts.
2090 If so, and if LOW_OUT is nonnull, emit the high part and store the
2091 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
2093 TEMP is as for riscv_force_temporary and is used to load the high
2094 part into a register.
2096 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
2097 a legitimize SET_SRC for an .md pattern, otherwise the low part
2098 is guaranteed to be a legitimate address for mode MODE. */
2100 bool
2101 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
2103 enum riscv_symbol_type symbol_type;
2105 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
2106 || !riscv_symbolic_constant_p (addr, &symbol_type)
2107 || riscv_symbol_insns (symbol_type) == 0
2108 || !riscv_split_symbol_type (symbol_type))
2109 return false;
2111 if (low_out)
2112 switch (symbol_type)
2114 case SYMBOL_FORCE_TO_MEM:
2115 return false;
2117 case SYMBOL_ABSOLUTE:
2119 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
2120 high = riscv_force_temporary (temp, high);
2121 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
2123 break;
2125 case SYMBOL_PCREL:
2127 static unsigned seqno;
2128 char buf[32];
2129 rtx label;
2131 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
2132 gcc_assert ((size_t) bytes < sizeof (buf));
2134 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
2135 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
2136 /* ??? Ugly hack to make weak symbols work. May need to change the
2137 RTL for the auipc and/or low patterns to get a better fix for
2138 this. */
2139 if (! nonzero_address_p (addr))
2140 SYMBOL_REF_WEAK (label) = 1;
2142 if (temp == NULL)
2143 temp = gen_reg_rtx (Pmode);
2145 if (Pmode == DImode)
2146 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
2147 else
2148 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
2150 *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
2152 seqno++;
2154 break;
2156 default:
2157 gcc_unreachable ();
2160 return true;
2163 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2164 riscv_force_temporary; it is only needed when OFFSET is not a
2165 SMALL_OPERAND. */
2167 static rtx
2168 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2170 if (!SMALL_OPERAND (offset))
2172 rtx high;
2174 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2175 The addition inside the macro CONST_HIGH_PART may cause an
2176 overflow, so we need to force a sign-extension check. */
2177 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2178 offset = CONST_LOW_PART (offset);
2179 high = riscv_force_temporary (temp, high);
2180 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2182 return plus_constant (Pmode, reg, offset);
2185 /* The __tls_get_attr symbol. */
2186 static GTY(()) rtx riscv_tls_symbol;
2188 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2189 the TLS symbol we are referencing and TYPE is the symbol type to use
2190 (either global dynamic or local dynamic). RESULT is an RTX for the
2191 return value location. */
2193 static rtx_insn *
2194 riscv_call_tls_get_addr (rtx sym, rtx result)
2196 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
2197 rtx_insn *insn;
2199 if (!riscv_tls_symbol)
2200 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
2201 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
2203 start_sequence ();
2205 emit_insn (riscv_got_load_tls_gd (a0, sym));
2206 insn = emit_call_insn (gen_call_value (result, func, const0_rtx,
2207 gen_int_mode (RISCV_CC_BASE, SImode)));
2208 RTL_CONST_CALL_P (insn) = 1;
2209 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2210 insn = get_insns ();
2212 end_sequence ();
2214 return insn;
2217 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2218 its address. The return value will be both a valid address and a valid
2219 SET_SRC (either a REG or a LO_SUM). */
2221 static rtx
2222 riscv_legitimize_tls_address (rtx loc)
2224 rtx dest, tp, tmp, a0;
2225 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2227 #if 0
2228 /* TLS copy relocs are now deprecated and should not be used. */
2229 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */
2230 if (!flag_pic)
2231 model = TLS_MODEL_LOCAL_EXEC;
2232 #endif
2234 switch (model)
2236 case TLS_MODEL_LOCAL_DYNAMIC:
2237 /* Rely on section anchors for the optimization that LDM TLS
2238 provides. The anchor's address is loaded with GD TLS. */
2239 case TLS_MODEL_GLOBAL_DYNAMIC:
2240 if (TARGET_TLSDESC)
2242 static unsigned seqno;
2243 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2244 a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
2245 dest = gen_reg_rtx (Pmode);
2247 emit_insn (gen_tlsdesc (Pmode, loc, GEN_INT (seqno)));
2248 emit_insn (gen_add3_insn (dest, a0, tp));
2249 seqno++;
2251 else
2253 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2254 dest = gen_reg_rtx (Pmode);
2255 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp,
2256 loc);
2258 break;
2260 case TLS_MODEL_INITIAL_EXEC:
2261 /* la.tls.ie; tp-relative add */
2262 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2263 tmp = gen_reg_rtx (Pmode);
2264 emit_insn (riscv_got_load_tls_ie (tmp, loc));
2265 dest = gen_reg_rtx (Pmode);
2266 emit_insn (gen_add3_insn (dest, tmp, tp));
2267 break;
2269 case TLS_MODEL_LOCAL_EXEC:
2270 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
2271 dest = gen_reg_rtx (Pmode);
2272 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
2273 dest = gen_rtx_LO_SUM (Pmode, dest,
2274 riscv_unspec_address (loc, SYMBOL_TLS_LE));
2275 break;
2277 default:
2278 gcc_unreachable ();
2280 return dest;
2283 /* If X is not a valid address for mode MODE, force it into a register. */
2285 static rtx
2286 riscv_force_address (rtx x, machine_mode mode)
2288 if (!riscv_legitimate_address_p (mode, x, false))
2290 if (can_create_pseudo_p ())
2291 return force_reg (Pmode, x);
2292 else
2294 /* It's only safe for the thunk function.
2295 Use ra as the temp regiater. */
2296 gcc_assert (riscv_in_thunk_func);
2297 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode);
2298 riscv_emit_move (reg, x);
2299 return reg;
2303 return x;
2306 /* Modify base + offset so that offset fits within a compressed load/store insn
2307 and the excess is added to base. */
2309 static rtx
2310 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset)
2312 rtx addr, high;
2313 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess
2314 into HIGH. */
2315 high = GEN_INT (offset & ~CSW_MAX_OFFSET);
2316 offset &= CSW_MAX_OFFSET;
2317 if (!SMALL_OPERAND (INTVAL (high)))
2318 high = force_reg (Pmode, high);
2319 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base));
2320 addr = plus_constant (Pmode, base, offset);
2321 return addr;
2324 /* Helper for riscv_legitimize_address. Given X, return true if it
2325 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
2327 This respectively represent canonical shift-add rtxs or scaled
2328 memory addresses. */
2329 static bool
2330 mem_shadd_or_shadd_rtx_p (rtx x)
2332 return ((GET_CODE (x) == ASHIFT
2333 || GET_CODE (x) == MULT)
2334 && CONST_INT_P (XEXP (x, 1))
2335 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
2336 || (GET_CODE (x) == MULT
2337 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
2340 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
2341 be legitimized in a way that the generic machinery might not expect,
2342 return a new address, otherwise return NULL. MODE is the mode of
2343 the memory being accessed. */
2345 static rtx
2346 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2347 machine_mode mode)
2349 rtx addr;
2351 if (riscv_tls_symbol_p (x))
2352 return riscv_legitimize_tls_address (x);
2354 /* See if the address can split into a high part and a LO_SUM. */
2355 if (riscv_split_symbol (NULL, x, mode, &addr))
2356 return riscv_force_address (addr, mode);
2358 /* Handle BASE + OFFSET. */
2359 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
2360 && INTVAL (XEXP (x, 1)) != 0)
2362 rtx base = XEXP (x, 0);
2363 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
2365 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
2366 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
2367 && SMALL_OPERAND (offset))
2369 rtx index = XEXP (base, 0);
2370 rtx fp = XEXP (base, 1);
2371 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
2374 /* If we were given a MULT, we must fix the constant
2375 as we're going to create the ASHIFT form. */
2376 int shift_val = INTVAL (XEXP (index, 1));
2377 if (GET_CODE (index) == MULT)
2378 shift_val = exact_log2 (shift_val);
2380 rtx reg1 = gen_reg_rtx (Pmode);
2381 rtx reg2 = gen_reg_rtx (Pmode);
2382 rtx reg3 = gen_reg_rtx (Pmode);
2383 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
2384 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val));
2385 riscv_emit_binary (PLUS, reg3, reg2, reg1);
2387 return reg3;
2391 if (!riscv_valid_base_register_p (base, mode, false))
2392 base = copy_to_mode_reg (Pmode, base);
2393 if (optimize_function_for_size_p (cfun)
2394 && (strcmp (current_pass->name, "shorten_memrefs") == 0)
2395 && mode == SImode)
2396 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow
2397 possible compressed load/store. */
2398 addr = riscv_shorten_lw_offset (base, offset);
2399 else
2400 addr = riscv_add_offset (NULL, base, offset);
2401 return riscv_force_address (addr, mode);
2404 return x;
2407 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE
2408 is the original src mode before promotion. */
2410 void
2411 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
2412 machine_mode orig_mode)
2414 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
2415 machine_mode mode;
2416 int i, num_ops;
2417 rtx x;
2419 mode = GET_MODE (dest);
2420 /* We use the original mode for the riscv_build_integer call, because HImode
2421 values are given special treatment. */
2422 num_ops = riscv_build_integer (codes, value, orig_mode);
2424 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
2425 && num_ops >= riscv_split_integer_cost (value))
2426 x = riscv_split_integer (value, mode);
2427 else
2429 codes[0].value = trunc_int_for_mode (codes[0].value, mode);
2430 /* Apply each binary operation to X. */
2431 x = GEN_INT (codes[0].value);
2433 for (i = 1; i < num_ops; i++)
2435 if (!can_create_pseudo_p ())
2436 x = riscv_emit_set (temp, x);
2437 else
2438 x = force_reg (mode, x);
2439 codes[i].value = trunc_int_for_mode (codes[i].value, mode);
2440 x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
2444 riscv_emit_set (dest, x);
2447 /* Subroutine of riscv_legitimize_move. Move constant SRC into register
2448 DEST given that SRC satisfies immediate_operand but doesn't satisfy
2449 move_operand. */
2451 static void
2452 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
2454 rtx base, offset;
2456 /* Split moves of big integers into smaller pieces. */
2457 if (splittable_const_int_operand (src, mode))
2459 riscv_move_integer (dest, dest, INTVAL (src), mode);
2460 return;
2463 if (satisfies_constraint_zfli (src))
2465 riscv_emit_set (dest, src);
2466 return;
2469 /* Split moves of symbolic constants into high/low pairs. */
2470 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
2472 riscv_emit_set (dest, src);
2473 return;
2476 /* Generate the appropriate access sequences for TLS symbols. */
2477 if (riscv_tls_symbol_p (src))
2479 riscv_emit_move (dest, riscv_legitimize_tls_address (src));
2480 return;
2483 /* If we have (const (plus symbol offset)), and that expression cannot
2484 be forced into memory, load the symbol first and add in the offset. Also
2485 prefer to do this even if the constant _can_ be forced into memory, as it
2486 usually produces better code. */
2487 split_const (src, &base, &offset);
2488 if (offset != const0_rtx
2489 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
2491 base = riscv_force_temporary (dest, base);
2492 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
2493 return;
2496 /* Handle below format.
2497 (const:DI
2498 (plus:DI
2499 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0
2500 (const_poly_int:DI [16, 16]) // <- op_1
2503 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS
2504 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1)))
2506 rtx dest_tmp = gen_reg_rtx (mode);
2507 rtx tmp = gen_reg_rtx (mode);
2509 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0));
2510 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1));
2512 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp)));
2513 return;
2516 src = force_const_mem (mode, src);
2518 /* When using explicit relocs, constant pool references are sometimes
2519 not legitimate addresses. */
2520 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
2521 riscv_emit_move (dest, src);
2524 /* Report when we try to do something that requires vector when vector is
2525 disabled. This is an error of last resort and isn't very high-quality. It
2526 usually involves attempts to measure the vector length in some way. */
2528 static void
2529 riscv_report_v_required (void)
2531 static bool reported_p = false;
2533 /* Avoid reporting a slew of messages for a single oversight. */
2534 if (reported_p)
2535 return;
2537 error ("this operation requires the RVV ISA extension");
2538 inform (input_location, "you can enable RVV using the command-line"
2539 " option %<-march%>, or by using the %<target%>"
2540 " attribute or pragma");
2541 reported_p = true;
2544 /* Helper function to operation for rtx_code CODE. */
2545 static void
2546 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1,
2547 rtx op2)
2549 if (can_create_pseudo_p ())
2551 rtx result;
2552 if (GET_RTX_CLASS (code) == RTX_UNARY)
2553 result = expand_simple_unop (mode, code, op1, NULL_RTX, false);
2554 else
2555 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false,
2556 OPTAB_DIRECT);
2557 riscv_emit_move (op0, result);
2559 else
2561 rtx pat;
2562 /* The following implementation is for prologue and epilogue.
2563 Because prologue and epilogue can not use pseudo register.
2564 We can't using expand_simple_binop or expand_simple_unop. */
2565 if (GET_RTX_CLASS (code) == RTX_UNARY)
2566 pat = gen_rtx_fmt_e (code, mode, op1);
2567 else
2568 pat = gen_rtx_fmt_ee (code, mode, op1, op2);
2569 emit_insn (gen_rtx_SET (op0, pat));
2573 /* Expand mult operation with constant integer, multiplicand also used as a
2574 * temporary register. */
2576 static void
2577 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand,
2578 HOST_WIDE_INT multiplier)
2580 if (multiplier == 0)
2582 riscv_emit_move (dest, GEN_INT (0));
2583 return;
2586 bool neg_p = multiplier < 0;
2587 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier);
2589 if (multiplier_abs == 1)
2591 if (neg_p)
2592 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX);
2593 else
2594 riscv_emit_move (dest, multiplicand);
2596 else
2598 if (pow2p_hwi (multiplier_abs))
2601 multiplicand = [BYTES_PER_RISCV_VECTOR].
2602 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8].
2603 Sequence:
2604 csrr a5, vlenb
2605 slli a5, a5, 3
2606 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8].
2607 Sequence:
2608 csrr a5, vlenb
2609 slli a5, a5, 3
2610 neg a5, a5
2612 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2613 gen_int_mode (exact_log2 (multiplier_abs), QImode));
2614 if (neg_p)
2615 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2617 else if (pow2p_hwi (multiplier_abs + 1))
2620 multiplicand = [BYTES_PER_RISCV_VECTOR].
2621 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7].
2622 Sequence:
2623 csrr a5, vlenb
2624 slli a4, a5, 3
2625 sub a5, a4, a5
2626 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7].
2627 Sequence:
2628 csrr a5, vlenb
2629 slli a4, a5, 3
2630 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4
2632 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2633 gen_int_mode (exact_log2 (multiplier_abs + 1),
2634 QImode));
2635 if (neg_p)
2636 riscv_expand_op (MINUS, mode, dest, multiplicand, dest);
2637 else
2638 riscv_expand_op (MINUS, mode, dest, dest, multiplicand);
2640 else if (pow2p_hwi (multiplier - 1))
2643 multiplicand = [BYTES_PER_RISCV_VECTOR].
2644 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9].
2645 Sequence:
2646 csrr a5, vlenb
2647 slli a4, a5, 3
2648 add a5, a4, a5
2649 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9].
2650 Sequence:
2651 csrr a5, vlenb
2652 slli a4, a5, 3
2653 add a5, a4, a5
2654 neg a5, a5
2656 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2657 gen_int_mode (exact_log2 (multiplier_abs - 1),
2658 QImode));
2659 riscv_expand_op (PLUS, mode, dest, dest, multiplicand);
2660 if (neg_p)
2661 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2663 else
2665 /* We use multiplication for remaining cases. */
2666 gcc_assert (
2667 TARGET_MUL
2668 && "M-extension must be enabled to calculate the poly_int "
2669 "size/offset.");
2670 riscv_emit_move (dest, gen_int_mode (multiplier, mode));
2671 riscv_expand_op (MULT, mode, dest, dest, multiplicand);
2676 /* Analyze src and emit const_poly_int mov sequence. */
2678 void
2679 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
2681 poly_int64 value = rtx_to_poly_int64 (src);
2682 /* It use HOST_WIDE_INT intead of int since 32bit type is not enough
2683 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */
2684 HOST_WIDE_INT offset = value.coeffs[0];
2685 HOST_WIDE_INT factor = value.coeffs[1];
2686 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
2687 int div_factor = 0;
2688 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions.
2689 For any (const_poly_int:MODE [m, n]), the calculation formula is as
2690 follows.
2691 constant = m - n.
2692 When minimum VLEN = 32, poly of VLENB = (4, 4).
2693 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1).
2694 When minimum VLEN > 32, poly of VLENB = (8, 8).
2695 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1).
2696 magn = (n, n) / base.
2697 (m, n) = base * magn + constant.
2698 This calculation doesn't need div operation. */
2700 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
2701 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode));
2702 else
2704 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode));
2705 emit_move_insn (gen_lowpart (Pmode, tmp),
2706 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
2709 if (BYTES_PER_RISCV_VECTOR.is_constant ())
2711 gcc_assert (value.is_constant ());
2712 riscv_emit_move (dest, GEN_INT (value.to_constant ()));
2713 return;
2715 else
2717 int max_power = exact_log2 (MAX_POLY_VARIANT);
2718 for (int i = 0; i <= max_power; i++)
2720 int possible_div_factor = 1 << i;
2721 if (factor % (vlenb / possible_div_factor) == 0)
2723 div_factor = possible_div_factor;
2724 break;
2727 gcc_assert (div_factor != 0);
2730 if (div_factor != 1)
2731 riscv_expand_op (LSHIFTRT, mode, tmp, tmp,
2732 gen_int_mode (exact_log2 (div_factor), QImode));
2734 riscv_expand_mult_with_const_int (mode, dest, tmp,
2735 factor / (vlenb / div_factor));
2736 HOST_WIDE_INT constant = offset - factor;
2738 if (constant == 0)
2739 return;
2740 else if (SMALL_OPERAND (constant))
2741 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2742 else
2744 /* Handle the constant value is not a 12-bit value. */
2745 rtx high;
2747 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2748 The addition inside the macro CONST_HIGH_PART may cause an
2749 overflow, so we need to force a sign-extension check. */
2750 high = gen_int_mode (CONST_HIGH_PART (constant), mode);
2751 constant = CONST_LOW_PART (constant);
2752 riscv_emit_move (tmp, high);
2753 riscv_expand_op (PLUS, mode, dest, tmp, dest);
2754 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2758 /* Adjust scalable frame of vector for prologue && epilogue. */
2760 static void
2761 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
2763 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
2764 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
2765 rtx insn, dwarf, adjust_frame_rtx;
2767 riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
2768 gen_int_mode (offset, Pmode));
2770 if (epilogue)
2771 insn = gen_add3_insn (target, target, adjust_size);
2772 else
2773 insn = gen_sub3_insn (target, target, adjust_size);
2775 insn = emit_insn (insn);
2777 RTX_FRAME_RELATED_P (insn) = 1;
2779 adjust_frame_rtx
2780 = gen_rtx_SET (target,
2781 plus_constant (Pmode, target, epilogue ? offset : -offset));
2783 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
2784 NULL_RTX);
2786 REG_NOTES (insn) = dwarf;
2789 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
2790 sequence that is valid. */
2792 bool
2793 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
2795 if (CONST_POLY_INT_P (src))
2798 Handle:
2799 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156)
2800 (const_int 96 [0x60])) [0 S1 A8])
2801 (const_poly_int:QI [8, 8]))
2802 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil))
2804 if (MEM_P (dest))
2806 emit_move_insn (dest, force_reg (mode, src));
2807 return true;
2809 poly_int64 value = rtx_to_poly_int64 (src);
2810 if (!value.is_constant () && !TARGET_VECTOR)
2812 riscv_report_v_required ();
2813 return false;
2816 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode)
2817 return false;
2819 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode))
2821 /* In RV32 system, handle (const_poly_int:QI [m, n])
2822 (const_poly_int:HI [m, n]).
2823 In RV64 system, handle (const_poly_int:QI [m, n])
2824 (const_poly_int:HI [m, n])
2825 (const_poly_int:SI [m, n]). */
2826 rtx tmp = gen_reg_rtx (Pmode);
2827 riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
2828 src);
2830 else
2832 /* In RV32 system, handle (const_poly_int:SI [m, n])
2833 (const_poly_int:DI [m, n]).
2834 In RV64 system, handle (const_poly_int:DI [m, n]).
2835 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode,
2836 the offset should not exceed 4GiB in general. */
2837 rtx tmp = gen_reg_rtx (mode);
2838 riscv_legitimize_poly_move (mode, dest, tmp, src);
2840 return true;
2842 /* Expand
2843 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2844 Expand this data movement instead of simply forbid it since
2845 we can improve the code generation for this following scenario
2846 by RVV auto-vectorization:
2847 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
2848 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2849 Since RVV mode and scalar mode are in different REG_CLASS,
2850 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
2851 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
2853 machine_mode vmode = GET_MODE (SUBREG_REG (src));
2854 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
2855 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant ();
2856 /* We should be able to handle both partial and paradoxical subreg. */
2857 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1;
2858 scalar_mode smode = as_a<scalar_mode> (mode);
2859 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size;
2860 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8)
2861 && !TARGET_VECTOR_ELEN_64 ? 2 : 1;
2862 bool need_int_reg_p = false;
2864 if (num == 2)
2866 /* If we want to extract 64bit value but ELEN < 64,
2867 we use RVV vector mode with EEW = 32 to extract
2868 the highpart and lowpart. */
2869 need_int_reg_p = smode == DFmode;
2870 smode = SImode;
2871 nunits = nunits * 2;
2874 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
2876 rtx v = gen_lowpart (vmode, SUBREG_REG (src));
2877 rtx int_reg = dest;
2879 if (need_int_reg_p)
2881 int_reg = gen_reg_rtx (DImode);
2882 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest));
2885 for (unsigned int i = 0; i < num; i++)
2887 rtx result;
2888 if (num == 1)
2889 result = int_reg;
2890 else if (i == 0)
2891 result = gen_lowpart (smode, int_reg);
2892 else
2893 result = gen_reg_rtx (smode);
2895 riscv_vector::emit_vec_extract (result, v,
2896 gen_int_mode (index + i, Pmode));
2898 if (i == 1)
2900 if (UNITS_PER_WORD < mode_size)
2901 /* If Pmode = SImode and mode = DImode, we just need to
2902 extract element of index = 1 from the vector and move it
2903 into the highpart of the DEST since DEST consists of 2
2904 scalar registers. */
2905 emit_move_insn (gen_highpart (smode, int_reg), result);
2906 else
2908 rtx tmp = expand_binop (Pmode, ashl_optab,
2909 gen_lowpart (Pmode, result),
2910 gen_int_mode (32, Pmode),
2911 NULL_RTX, 0, OPTAB_DIRECT);
2912 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg,
2913 NULL_RTX, 0, OPTAB_DIRECT);
2914 emit_move_insn (int_reg, tmp2);
2919 if (need_int_reg_p)
2920 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
2921 else
2922 emit_move_insn (dest, int_reg);
2924 else
2925 gcc_unreachable ();
2927 return true;
2929 /* Expand
2930 (set (reg:QI target) (mem:QI (address)))
2932 (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
2933 (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
2934 with auto-sign/zero extend. */
2935 if (GET_MODE_CLASS (mode) == MODE_INT
2936 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
2937 && can_create_pseudo_p ()
2938 && MEM_P (src))
2940 rtx temp_reg;
2941 int zero_extend_p;
2943 temp_reg = gen_reg_rtx (word_mode);
2944 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
2945 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
2946 zero_extend_p));
2947 riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
2948 return true;
2951 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
2953 rtx reg;
2955 if (GET_CODE (src) == CONST_INT)
2957 /* Apply the equivalent of PROMOTE_MODE here for constants to
2958 improve cse. */
2959 machine_mode promoted_mode = mode;
2960 if (GET_MODE_CLASS (mode) == MODE_INT
2961 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
2962 promoted_mode = word_mode;
2964 if (splittable_const_int_operand (src, mode))
2966 reg = gen_reg_rtx (promoted_mode);
2967 riscv_move_integer (reg, reg, INTVAL (src), mode);
2969 else
2970 reg = force_reg (promoted_mode, src);
2972 if (promoted_mode != mode)
2973 reg = gen_lowpart (mode, reg);
2975 else
2976 reg = force_reg (mode, src);
2977 riscv_emit_move (dest, reg);
2978 return true;
2981 /* In order to fit NaN boxing, expand
2982 (set FP_REG (reg:HF src))
2984 (set (reg:SI/DI mask) (const_int -65536)
2985 (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF src) 0)))
2986 (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
2987 (set (reg:HF dest) (unspec:HF [ (reg:SI/DI temp) ] UNSPEC_FMV_SFP16_X))
2990 if (TARGET_HARD_FLOAT
2991 && !TARGET_ZFHMIN && mode == HFmode
2992 && REG_P (dest) && FP_REG_P (REGNO (dest))
2993 && REG_P (src) && !FP_REG_P (REGNO (src))
2994 && can_create_pseudo_p ())
2996 rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
2997 rtx temp = gen_reg_rtx (word_mode);
2998 emit_insn (gen_extend_insn (temp,
2999 simplify_gen_subreg (HImode, src, mode, 0),
3000 word_mode, HImode, 1));
3001 if (word_mode == SImode)
3002 emit_insn (gen_iorsi3 (temp, mask, temp));
3003 else
3004 emit_insn (gen_iordi3 (temp, mask, temp));
3006 riscv_emit_move (dest, gen_rtx_UNSPEC (HFmode, gen_rtvec (1, temp),
3007 UNSPEC_FMV_SFP16_X));
3009 return true;
3012 /* We need to deal with constants that would be legitimate
3013 immediate_operands but aren't legitimate move_operands. */
3014 if (CONSTANT_P (src) && !move_operand (src, mode))
3016 riscv_legitimize_const_move (mode, dest, src);
3017 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
3018 return true;
3021 /* RISC-V GCC may generate non-legitimate address due to we provide some
3022 pattern for optimize access PIC local symbol and it's make GCC generate
3023 unrecognizable instruction during optmizing. */
3025 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
3026 reload_completed))
3028 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
3031 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
3032 reload_completed))
3034 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
3037 return false;
3040 /* Return true if there is an instruction that implements CODE and accepts
3041 X as an immediate operand. */
3043 static int
3044 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
3046 switch (code)
3048 case ASHIFT:
3049 case ASHIFTRT:
3050 case LSHIFTRT:
3051 /* All shift counts are truncated to a valid constant. */
3052 return true;
3054 case AND:
3055 case IOR:
3056 case XOR:
3057 case PLUS:
3058 case LT:
3059 case LTU:
3060 /* These instructions take 12-bit signed immediates. */
3061 return SMALL_OPERAND (x);
3063 case LE:
3064 /* We add 1 to the immediate and use SLT. */
3065 return SMALL_OPERAND (x + 1);
3067 case LEU:
3068 /* Likewise SLTU, but reject the always-true case. */
3069 return SMALL_OPERAND (x + 1) && x + 1 != 0;
3071 case GE:
3072 case GEU:
3073 /* We can emulate an immediate of 1 by using GT/GTU against x0. */
3074 return x == 1;
3076 default:
3077 /* By default assume that x0 can be used for 0. */
3078 return x == 0;
3082 /* Return the cost of binary operation X, given that the instruction
3083 sequence for a word-sized or smaller operation takes SIGNLE_INSNS
3084 instructions and that the sequence of a double-word operation takes
3085 DOUBLE_INSNS instructions. */
3087 static int
3088 riscv_binary_cost (rtx x, int single_insns, int double_insns)
3090 if (!riscv_v_ext_mode_p (GET_MODE (x))
3091 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
3092 return COSTS_N_INSNS (double_insns);
3093 return COSTS_N_INSNS (single_insns);
3096 /* Return the cost of sign- or zero-extending OP. */
3098 static int
3099 riscv_extend_cost (rtx op, bool unsigned_p)
3101 if (MEM_P (op))
3102 return 0;
3104 if (unsigned_p && GET_MODE (op) == QImode)
3105 /* We can use ANDI. */
3106 return COSTS_N_INSNS (1);
3108 /* ZBA provide zext.w. */
3109 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode)
3110 return COSTS_N_INSNS (1);
3112 /* ZBB provide zext.h, sext.b and sext.h. */
3113 if (TARGET_ZBB)
3115 if (!unsigned_p && GET_MODE (op) == QImode)
3116 return COSTS_N_INSNS (1);
3118 if (GET_MODE (op) == HImode)
3119 return COSTS_N_INSNS (1);
3122 if (!unsigned_p && GET_MODE (op) == SImode)
3123 /* We can use SEXT.W. */
3124 return COSTS_N_INSNS (1);
3126 /* We need to use a shift left and a shift right. */
3127 return COSTS_N_INSNS (2);
3130 /* Implement TARGET_RTX_COSTS. */
3132 #define SINGLE_SHIFT_COST 1
3134 static bool
3135 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
3136 int *total, bool speed)
3138 /* TODO: We set RVV instruction cost as 1 by default.
3139 Cost Model need to be well analyzed and supported in the future. */
3140 if (riscv_v_ext_mode_p (mode))
3142 *total = COSTS_N_INSNS (1);
3143 return true;
3146 bool float_mode_p = FLOAT_MODE_P (mode);
3147 int cost;
3149 switch (GET_CODE (x))
3151 case SET:
3152 /* If we are called for an INSN that's a simple set of a register,
3153 then cost based on the SET_SRC alone. */
3154 if (outer_code == INSN
3155 && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x))))
3157 riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
3158 return true;
3161 /* Otherwise return FALSE indicating we should recurse into both the
3162 SET_DEST and SET_SRC combining the cost of both. */
3163 return false;
3165 case CONST_INT:
3166 /* trivial constants checked using OUTER_CODE in case they are
3167 encodable in insn itself w/o need for additional insn(s). */
3168 if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
3170 *total = 0;
3171 return true;
3173 /* Fall through. */
3175 case SYMBOL_REF:
3176 case LABEL_REF:
3177 case CONST_DOUBLE:
3178 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE
3179 rtl object. Weird recheck due to switch-case fall through above. */
3180 if (GET_CODE (x) == CONST_DOUBLE)
3181 gcc_assert (GET_MODE (x) != VOIDmode);
3182 /* Fall through. */
3184 case CONST:
3185 /* Non trivial CONST_INT Fall through: check if need multiple insns. */
3186 if ((cost = riscv_const_insns (x)) > 0)
3188 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
3189 2. For constants loaded more than once, the approach so far has
3190 been to duplicate the operation than to CSE the constant.
3191 3. TODO: make cost more accurate specially if riscv_const_insns
3192 returns > 1. */
3193 if (outer_code == SET || GET_MODE (x) == VOIDmode)
3194 *total = COSTS_N_INSNS (1);
3196 else /* The instruction will be fetched from the constant pool. */
3197 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
3198 return true;
3200 case MEM:
3201 /* If the address is legitimate, return the number of
3202 instructions it needs. */
3203 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
3205 /* When optimizing for size, make uncompressible 32-bit addresses
3206 more expensive so that compressible 32-bit addresses are
3207 preferred. */
3208 if ((TARGET_RVC || TARGET_ZCA)
3209 && !speed && riscv_mshorten_memrefs && mode == SImode
3210 && !riscv_compressed_lw_address_p (XEXP (x, 0)))
3211 cost++;
3213 *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
3214 return true;
3216 /* Otherwise use the default handling. */
3217 return false;
3219 case IF_THEN_ELSE:
3220 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
3221 && reg_or_0_operand (XEXP (x, 1), mode)
3222 && sfb_alu_operand (XEXP (x, 2), mode)
3223 && comparison_operator (XEXP (x, 0), VOIDmode))
3225 /* For predicated conditional-move operations we assume the cost
3226 of a single instruction even though there are actually two. */
3227 *total = COSTS_N_INSNS (1);
3228 return true;
3230 else if (TARGET_ZICOND_LIKE
3231 && outer_code == SET
3232 && ((GET_CODE (XEXP (x, 1)) == REG
3233 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1))))
3234 || (GET_CODE (XEXP (x, 2)) == REG
3235 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2))))
3236 || (GET_CODE (XEXP (x, 1)) == REG
3237 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0)))
3238 || (GET_CODE (XEXP (x, 1)) == REG
3239 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0)))))
3241 *total = COSTS_N_INSNS (1);
3242 return true;
3244 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx)
3246 if (equality_operator (XEXP (x, 0), mode)
3247 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
3249 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1);
3250 return true;
3252 if (ordered_comparison_operator (XEXP (x, 0), mode))
3254 *total = COSTS_N_INSNS (1);
3255 return true;
3258 return false;
3260 case NOT:
3261 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
3262 return false;
3264 case AND:
3265 /* slli.uw pattern for zba. */
3266 if (TARGET_ZBA && TARGET_64BIT && mode == DImode
3267 && GET_CODE (XEXP (x, 0)) == ASHIFT)
3269 rtx and_rhs = XEXP (x, 1);
3270 rtx ashift_lhs = XEXP (XEXP (x, 0), 0);
3271 rtx ashift_rhs = XEXP (XEXP (x, 0), 1);
3272 if (register_operand (ashift_lhs, GET_MODE (ashift_lhs))
3273 && CONST_INT_P (ashift_rhs)
3274 && CONST_INT_P (and_rhs)
3275 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3276 *total = COSTS_N_INSNS (1);
3277 return true;
3279 /* bclri pattern for zbs. */
3280 if (TARGET_ZBS
3281 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode))
3283 *total = COSTS_N_INSNS (1);
3284 return true;
3286 /* bclr pattern for zbs. */
3287 if (TARGET_ZBS
3288 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1)))
3289 && GET_CODE (XEXP (x, 0)) == ROTATE
3290 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0))
3291 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2)
3293 *total = COSTS_N_INSNS (1);
3294 return true;
3297 gcc_fallthrough ();
3298 case IOR:
3299 case XOR:
3300 /* orn, andn and xorn pattern for zbb. */
3301 if (TARGET_ZBB
3302 && GET_CODE (XEXP (x, 0)) == NOT)
3304 *total = riscv_binary_cost (x, 1, 2);
3305 return true;
3308 /* bset[i] and binv[i] pattern for zbs. */
3309 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR)
3310 && TARGET_ZBS
3311 && ((GET_CODE (XEXP (x, 0)) == ASHIFT
3312 && CONST_INT_P (XEXP (XEXP (x, 0), 0)))
3313 || single_bit_mask_operand (XEXP (x, 1), VOIDmode)))
3315 *total = COSTS_N_INSNS (1);
3316 return true;
3319 /* Double-word operations use two single-word operations. */
3320 *total = riscv_binary_cost (x, 1, 2);
3321 return false;
3323 case ZERO_EXTRACT:
3324 /* This is an SImode shift. */
3325 if (outer_code == SET
3326 && CONST_INT_P (XEXP (x, 1))
3327 && CONST_INT_P (XEXP (x, 2))
3328 && (INTVAL (XEXP (x, 2)) > 0)
3329 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32))
3331 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3332 return true;
3334 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */
3335 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET
3336 && GET_CODE (XEXP (x, 1)) == CONST_INT
3337 && INTVAL (XEXP (x, 1)) == 1)
3339 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3340 return true;
3342 gcc_fallthrough ();
3343 case SIGN_EXTRACT:
3344 if (TARGET_XTHEADBB && outer_code == SET
3345 && CONST_INT_P (XEXP (x, 1))
3346 && CONST_INT_P (XEXP (x, 2)))
3348 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3349 return true;
3351 return false;
3353 case ASHIFT:
3354 /* bset pattern for zbs. */
3355 if (TARGET_ZBS
3356 && CONST_INT_P (XEXP (x, 0))
3357 && INTVAL (XEXP (x, 0)) == 1)
3359 *total = COSTS_N_INSNS (1);
3360 return true;
3362 gcc_fallthrough ();
3363 case ASHIFTRT:
3364 case LSHIFTRT:
3365 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST,
3366 CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
3367 return false;
3369 case ABS:
3370 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
3371 return false;
3373 case LO_SUM:
3374 *total = set_src_cost (XEXP (x, 0), mode, speed);
3375 return true;
3377 case LT:
3378 /* This is an SImode shift. */
3379 if (outer_code == SET && GET_MODE (x) == DImode
3380 && GET_MODE (XEXP (x, 0)) == SImode)
3382 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3383 return true;
3385 /* Fall through. */
3386 case LTU:
3387 case LE:
3388 case LEU:
3389 case GT:
3390 case GTU:
3391 case GE:
3392 case GEU:
3393 case EQ:
3394 case NE:
3395 /* Branch comparisons have VOIDmode, so use the first operand's
3396 mode instead. */
3397 mode = GET_MODE (XEXP (x, 0));
3398 if (float_mode_p)
3399 *total = tune_param->fp_add[mode == DFmode];
3400 else
3401 *total = riscv_binary_cost (x, 1, 3);
3402 return false;
3404 case UNORDERED:
3405 case ORDERED:
3406 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
3407 mode = GET_MODE (XEXP (x, 0));
3408 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3409 return false;
3411 case UNEQ:
3412 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
3413 mode = GET_MODE (XEXP (x, 0));
3414 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
3415 return false;
3417 case LTGT:
3418 /* (FLT(A, A) || FGT(B, B)). */
3419 mode = GET_MODE (XEXP (x, 0));
3420 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3421 return false;
3423 case UNGE:
3424 case UNGT:
3425 case UNLE:
3426 case UNLT:
3427 /* FLT or FLE, but guarded by an FFLAGS read and write. */
3428 mode = GET_MODE (XEXP (x, 0));
3429 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
3430 return false;
3432 case MINUS:
3433 if (float_mode_p)
3434 *total = tune_param->fp_add[mode == DFmode];
3435 else
3436 *total = riscv_binary_cost (x, 1, 4);
3437 return false;
3439 case PLUS:
3440 /* add.uw pattern for zba. */
3441 if (TARGET_ZBA
3442 && (TARGET_64BIT && (mode == DImode))
3443 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
3444 && register_operand (XEXP (XEXP (x, 0), 0),
3445 GET_MODE (XEXP (XEXP (x, 0), 0)))
3446 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
3448 *total = COSTS_N_INSNS (1);
3449 return true;
3451 /* shNadd pattern for zba. */
3452 if (TARGET_ZBA
3453 && ((!TARGET_64BIT && (mode == SImode)) ||
3454 (TARGET_64BIT && (mode == DImode)))
3455 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
3456 && register_operand (XEXP (XEXP (x, 0), 0),
3457 GET_MODE (XEXP (XEXP (x, 0), 0)))
3458 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3459 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
3461 *total = COSTS_N_INSNS (1);
3462 return true;
3464 /* Before strength-reduction, the shNadd can be expressed as the addition
3465 of a multiplication with a power-of-two. If this case is not handled,
3466 the strength-reduction in expmed.c will calculate an inflated cost. */
3467 if (TARGET_ZBA
3468 && mode == word_mode
3469 && GET_CODE (XEXP (x, 0)) == MULT
3470 && register_operand (XEXP (XEXP (x, 0), 0),
3471 GET_MODE (XEXP (XEXP (x, 0), 0)))
3472 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3473 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1)))
3474 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
3476 *total = COSTS_N_INSNS (1);
3477 return true;
3479 /* shNadd.uw pattern for zba.
3480 [(set (match_operand:DI 0 "register_operand" "=r")
3481 (plus:DI
3482 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
3483 (match_operand:QI 2 "immediate_operand" "I"))
3484 (match_operand 3 "immediate_operand" ""))
3485 (match_operand:DI 4 "register_operand" "r")))]
3486 "TARGET_64BIT && TARGET_ZBA
3487 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
3488 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff"
3490 if (TARGET_ZBA
3491 && (TARGET_64BIT && (mode == DImode))
3492 && (GET_CODE (XEXP (x, 0)) == AND)
3493 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1))))
3495 do {
3496 rtx and_lhs = XEXP (XEXP (x, 0), 0);
3497 rtx and_rhs = XEXP (XEXP (x, 0), 1);
3498 if (GET_CODE (and_lhs) != ASHIFT)
3499 break;
3500 if (!CONST_INT_P (and_rhs))
3501 break;
3503 rtx ashift_rhs = XEXP (and_lhs, 1);
3505 if (!CONST_INT_P (ashift_rhs)
3506 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3))
3507 break;
3509 if (CONST_INT_P (and_rhs)
3510 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3512 *total = COSTS_N_INSNS (1);
3513 return true;
3515 } while (false);
3518 if (float_mode_p)
3519 *total = tune_param->fp_add[mode == DFmode];
3520 else
3521 *total = riscv_binary_cost (x, 1, 4);
3522 return false;
3524 case NEG:
3526 rtx op = XEXP (x, 0);
3527 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
3529 *total = (tune_param->fp_mul[mode == DFmode]
3530 + set_src_cost (XEXP (op, 0), mode, speed)
3531 + set_src_cost (XEXP (op, 1), mode, speed)
3532 + set_src_cost (XEXP (op, 2), mode, speed));
3533 return true;
3537 if (float_mode_p)
3538 *total = tune_param->fp_add[mode == DFmode];
3539 else
3540 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
3541 return false;
3543 case MULT:
3544 if (float_mode_p)
3545 *total = tune_param->fp_mul[mode == DFmode];
3546 else if (!TARGET_MUL)
3547 /* Estimate the cost of a library call. */
3548 *total = COSTS_N_INSNS (speed ? 32 : 6);
3549 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
3550 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
3551 else if (!speed)
3552 *total = COSTS_N_INSNS (1);
3553 else
3554 *total = tune_param->int_mul[mode == DImode];
3555 return false;
3557 case DIV:
3558 case SQRT:
3559 case MOD:
3560 if (float_mode_p)
3562 *total = tune_param->fp_div[mode == DFmode];
3563 return false;
3565 /* Fall through. */
3567 case UDIV:
3568 case UMOD:
3569 if (!TARGET_DIV)
3570 /* Estimate the cost of a library call. */
3571 *total = COSTS_N_INSNS (speed ? 32 : 6);
3572 else if (speed)
3573 *total = tune_param->int_div[mode == DImode];
3574 else
3575 *total = COSTS_N_INSNS (1);
3576 return false;
3578 case ZERO_EXTEND:
3579 /* This is an SImode shift. */
3580 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3582 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3583 return true;
3585 /* Fall through. */
3586 case SIGN_EXTEND:
3587 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
3588 return false;
3590 case BSWAP:
3591 if (TARGET_ZBB)
3593 /* RISC-V only defines rev8 for XLEN, so we will need an extra
3594 shift-right instruction for smaller modes. */
3595 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2);
3596 return true;
3598 return false;
3600 case FLOAT:
3601 case UNSIGNED_FLOAT:
3602 case FIX:
3603 case FLOAT_EXTEND:
3604 case FLOAT_TRUNCATE:
3605 *total = tune_param->fp_add[mode == DFmode];
3606 return false;
3608 case FMA:
3609 *total = (tune_param->fp_mul[mode == DFmode]
3610 + set_src_cost (XEXP (x, 0), mode, speed)
3611 + set_src_cost (XEXP (x, 1), mode, speed)
3612 + set_src_cost (XEXP (x, 2), mode, speed));
3613 return true;
3615 case UNSPEC:
3616 if (XINT (x, 1) == UNSPEC_AUIPC)
3618 /* Make AUIPC cheap to avoid spilling its result to the stack. */
3619 *total = 1;
3620 return true;
3622 return false;
3624 default:
3625 return false;
3629 /* Implement TARGET_ADDRESS_COST. */
3631 static int
3632 riscv_address_cost (rtx addr, machine_mode mode,
3633 addr_space_t as ATTRIBUTE_UNUSED,
3634 bool speed ATTRIBUTE_UNUSED)
3636 /* When optimizing for size, make uncompressible 32-bit addresses more
3637 * expensive so that compressible 32-bit addresses are preferred. */
3638 if ((TARGET_RVC || TARGET_ZCA)
3639 && !speed && riscv_mshorten_memrefs && mode == SImode
3640 && !riscv_compressed_lw_address_p (addr))
3641 return riscv_address_insns (addr, mode, false) + 1;
3642 return riscv_address_insns (addr, mode, false);
3645 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost
3646 calculation for conditional branches: one unit is considered the cost
3647 of microarchitecture-dependent actual branch execution and therefore
3648 multiplied by BRANCH_COST and any remaining units are considered fixed
3649 branch overhead. Branches on a floating-point condition incur an extra
3650 instruction cost as they will be split into an FCMP operation followed
3651 by a branch on an integer condition. */
3653 static int
3654 riscv_insn_cost (rtx_insn *insn, bool speed)
3656 rtx x = PATTERN (insn);
3657 int cost = pattern_cost (x, speed);
3659 if (JUMP_P (insn))
3661 if (GET_CODE (x) == PARALLEL)
3662 x = XVECEXP (x, 0, 0);
3663 if (GET_CODE (x) == SET
3664 && GET_CODE (SET_DEST (x)) == PC
3665 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE)
3667 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1);
3668 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0))))
3669 cost += COSTS_N_INSNS (1);
3672 return cost;
3675 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
3676 but we consider cost units of branch instructions equal to cost units of
3677 other instructions. */
3679 static unsigned int
3680 riscv_max_noce_ifcvt_seq_cost (edge e)
3682 bool predictable_p = predictable_edge_p (e);
3684 if (predictable_p)
3686 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
3687 return param_max_rtl_if_conversion_predictable_cost;
3689 else
3691 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
3692 return param_max_rtl_if_conversion_unpredictable_cost;
3695 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p));
3698 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a
3699 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)'
3700 by our actual conditional branch cost, observing that our branches test
3701 conditions directly, so there is no preparatory extra condition-set
3702 instruction. */
3704 static bool
3705 riscv_noce_conversion_profitable_p (rtx_insn *seq,
3706 struct noce_if_info *if_info)
3708 struct noce_if_info riscv_if_info = *if_info;
3710 riscv_if_info.original_cost -= COSTS_N_INSNS (2);
3711 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p);
3713 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4'
3714 to emit a conditional set operation on DImode output it comes up
3715 with a sequence such as:
3717 (insn 26 0 27 (set (reg:SI 140)
3718 (eq:SI (reg/v:DI 137 [ c ])
3719 (const_int 0 [0]))) 302 {*seq_zero_disi}
3720 (nil))
3721 (insn 27 26 28 (set (reg:DI 139)
3722 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal}
3723 (nil))
3725 because our `cstore<mode>4' pattern expands to an insn that gives
3726 a SImode output. The output of conditional set is 0 or 1 boolean,
3727 so it is valid for input in any scalar integer mode and therefore
3728 combine later folds the zero extend operation into an equivalent
3729 conditional set operation that produces a DImode output, however
3730 this redundant zero extend operation counts towards the cost of
3731 the replacement sequence. Compensate for that by incrementing the
3732 cost of the original sequence as well as the maximum sequence cost
3733 accordingly. Likewise for sign extension. */
3734 rtx last_dest = NULL_RTX;
3735 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
3737 if (!NONDEBUG_INSN_P (insn))
3738 continue;
3740 rtx x = PATTERN (insn);
3741 if (NONJUMP_INSN_P (insn)
3742 && GET_CODE (x) == SET)
3744 rtx src = SET_SRC (x);
3745 enum rtx_code code = GET_CODE (src);
3746 if (last_dest != NULL_RTX
3747 && (code == SIGN_EXTEND || code == ZERO_EXTEND)
3748 && REG_P (XEXP (src, 0))
3749 && REGNO (XEXP (src, 0)) == REGNO (last_dest))
3751 riscv_if_info.original_cost += COSTS_N_INSNS (1);
3752 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
3754 last_dest = NULL_RTX;
3755 rtx dest = SET_DEST (x);
3756 if (COMPARISON_P (src)
3757 && REG_P (dest)
3758 && GET_MODE (dest) == SImode)
3759 last_dest = dest;
3761 else
3762 last_dest = NULL_RTX;
3765 return default_noce_conversion_profitable_p (seq, &riscv_if_info);
3768 /* Return one word of double-word value OP. HIGH_P is true to select the
3769 high part or false to select the low part. */
3772 riscv_subword (rtx op, bool high_p)
3774 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0;
3775 machine_mode mode = GET_MODE (op);
3777 if (mode == VOIDmode)
3778 mode = TARGET_64BIT ? TImode : DImode;
3780 if (MEM_P (op))
3781 return adjust_address (op, word_mode, byte);
3783 if (REG_P (op))
3784 gcc_assert (!FP_REG_RTX_P (op));
3786 return simplify_gen_subreg (word_mode, op, mode, byte);
3789 /* Return true if a 64-bit move from SRC to DEST should be split into two. */
3791 bool
3792 riscv_split_64bit_move_p (rtx dest, rtx src)
3794 if (TARGET_64BIT)
3795 return false;
3797 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
3798 if (satisfies_constraint_zfli (src))
3799 return false;
3801 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
3802 of zeroing an FPR with FCVT.D.W. */
3803 if (TARGET_DOUBLE_FLOAT
3804 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
3805 || (FP_REG_RTX_P (dest) && MEM_P (src))
3806 || (FP_REG_RTX_P (src) && MEM_P (dest))
3807 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
3808 return false;
3810 return true;
3813 /* Split a doubleword move from SRC to DEST. On 32-bit targets,
3814 this function handles 64-bit moves for which riscv_split_64bit_move_p
3815 holds. For 64-bit targets, this function handles 128-bit moves. */
3817 void
3818 riscv_split_doubleword_move (rtx dest, rtx src)
3820 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */
3821 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV))
3823 if (FP_REG_RTX_P (dest))
3825 rtx low_src = riscv_subword (src, false);
3826 rtx high_src = riscv_subword (src, true);
3828 if (TARGET_ZFA)
3829 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src));
3830 else
3831 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src));
3832 return;
3834 if (FP_REG_RTX_P (src))
3836 rtx low_dest = riscv_subword (dest, false);
3837 rtx high_dest = riscv_subword (dest, true);
3839 if (TARGET_ZFA)
3841 emit_insn (gen_movsidf2_low_rv32 (low_dest, src));
3842 emit_insn (gen_movsidf2_high_rv32 (high_dest, src));
3843 return;
3845 else
3847 emit_insn (gen_th_fmv_x_w (low_dest, src));
3848 emit_insn (gen_th_fmv_x_hw (high_dest, src));
3850 return;
3854 /* The operation can be split into two normal moves. Decide in
3855 which order to do them. */
3856 rtx low_dest = riscv_subword (dest, false);
3857 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
3859 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3860 riscv_emit_move (low_dest, riscv_subword (src, false));
3862 else
3864 riscv_emit_move (low_dest, riscv_subword (src, false));
3865 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3869 /* Return the appropriate instructions to move SRC into DEST. Assume
3870 that SRC is operand 1 and DEST is operand 0. */
3872 const char *
3873 riscv_output_move (rtx dest, rtx src)
3875 enum rtx_code dest_code, src_code;
3876 machine_mode mode;
3877 bool dbl_p;
3878 unsigned width;
3879 const char *insn;
3881 if ((insn = th_output_move (dest, src)))
3882 return insn;
3884 dest_code = GET_CODE (dest);
3885 src_code = GET_CODE (src);
3886 mode = GET_MODE (dest);
3887 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
3888 width = GET_MODE_SIZE (mode).to_constant ();
3890 if (dbl_p && riscv_split_64bit_move_p (dest, src))
3891 return "#";
3893 if (dest_code == REG && GP_REG_P (REGNO (dest)))
3895 if (src_code == REG && FP_REG_P (REGNO (src)))
3896 switch (width)
3898 case 2:
3899 if (TARGET_ZFHMIN)
3900 return "fmv.x.h\t%0,%1";
3901 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
3902 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
3903 case 4:
3904 return "fmv.x.s\t%0,%1";
3905 case 8:
3906 return "fmv.x.d\t%0,%1";
3909 if (src_code == MEM)
3910 switch (width)
3912 case 1: return "lbu\t%0,%1";
3913 case 2: return "lhu\t%0,%1";
3914 case 4: return "lw\t%0,%1";
3915 case 8: return "ld\t%0,%1";
3918 if (src_code == CONST_INT)
3920 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src)))
3921 return "li\t%0,%1";
3923 if (TARGET_ZBS
3924 && SINGLE_BIT_MASK_OPERAND (INTVAL (src)))
3925 return "bseti\t%0,zero,%S1";
3927 /* Should never reach here. */
3928 abort ();
3931 if (src_code == HIGH)
3932 return "lui\t%0,%h1";
3934 if (symbolic_operand (src, VOIDmode))
3935 switch (riscv_classify_symbolic_expression (src))
3937 case SYMBOL_GOT_DISP: return "la\t%0,%1";
3938 case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
3939 case SYMBOL_PCREL: return "lla\t%0,%1";
3940 default: gcc_unreachable ();
3943 if ((src_code == REG && GP_REG_P (REGNO (src)))
3944 || (src == CONST0_RTX (mode)))
3946 if (dest_code == REG)
3948 if (GP_REG_P (REGNO (dest)))
3949 return "mv\t%0,%z1";
3951 if (FP_REG_P (REGNO (dest)))
3952 switch (width)
3954 case 2:
3955 if (TARGET_ZFHMIN)
3956 return "fmv.h.x\t%0,%z1";
3957 /* High 16 bits should be all-1, otherwise HW will treated
3958 as a n-bit canonical NaN, but isn't matter for softfloat. */
3959 return "fmv.s.x\t%0,%1";
3960 case 4:
3961 return "fmv.s.x\t%0,%z1";
3962 case 8:
3963 if (TARGET_64BIT)
3964 return "fmv.d.x\t%0,%z1";
3965 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
3966 gcc_assert (src == CONST0_RTX (mode));
3967 return "fcvt.d.w\t%0,x0";
3970 if (dest_code == MEM)
3971 switch (width)
3973 case 1: return "sb\t%z1,%0";
3974 case 2: return "sh\t%z1,%0";
3975 case 4: return "sw\t%z1,%0";
3976 case 8: return "sd\t%z1,%0";
3979 if (src_code == REG && FP_REG_P (REGNO (src)))
3981 if (dest_code == REG && FP_REG_P (REGNO (dest)))
3982 switch (width)
3984 case 2:
3985 if (TARGET_ZFH)
3986 return "fmv.h\t%0,%1";
3987 return "fmv.s\t%0,%1";
3988 case 4:
3989 return "fmv.s\t%0,%1";
3990 case 8:
3991 return "fmv.d\t%0,%1";
3994 if (dest_code == MEM)
3995 switch (width)
3997 case 2:
3998 return "fsh\t%1,%0";
3999 case 4:
4000 return "fsw\t%1,%0";
4001 case 8:
4002 return "fsd\t%1,%0";
4005 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4007 if (src_code == MEM)
4008 switch (width)
4010 case 2:
4011 return "flh\t%0,%1";
4012 case 4:
4013 return "flw\t%0,%1";
4014 case 8:
4015 return "fld\t%0,%1";
4018 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src))
4019 switch (width)
4021 case 2:
4022 return "fli.h\t%0,%1";
4023 case 4:
4024 return "fli.s\t%0,%1";
4025 case 8:
4026 return "fli.d\t%0,%1";
4029 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT)
4031 /* We only want a single full vector register VLEN read after reload. */
4032 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR));
4033 return "csrr\t%0,vlenb";
4035 gcc_unreachable ();
4038 const char *
4039 riscv_output_return ()
4041 if (cfun->machine->naked_p)
4042 return "";
4044 return "ret";
4048 /* Return true if CMP1 is a suitable second operand for integer ordering
4049 test CODE. See also the *sCC patterns in riscv.md. */
4051 static bool
4052 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4054 switch (code)
4056 case GT:
4057 case GTU:
4058 return reg_or_0_operand (cmp1, VOIDmode);
4060 case GE:
4061 case GEU:
4062 return cmp1 == const1_rtx;
4064 case LT:
4065 case LTU:
4066 return arith_operand (cmp1, VOIDmode);
4068 case LE:
4069 return sle_operand (cmp1, VOIDmode);
4071 case LEU:
4072 return sleu_operand (cmp1, VOIDmode);
4074 default:
4075 gcc_unreachable ();
4079 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
4080 integer ordering test *CODE, or if an equivalent combination can
4081 be formed by adjusting *CODE and *CMP1. When returning true, update
4082 *CODE and *CMP1 with the chosen code and operand, otherwise leave
4083 them alone. */
4085 static bool
4086 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
4087 machine_mode mode)
4089 HOST_WIDE_INT plus_one;
4091 if (riscv_int_order_operand_ok_p (*code, *cmp1))
4092 return true;
4094 if (CONST_INT_P (*cmp1))
4095 switch (*code)
4097 case LE:
4098 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4099 if (INTVAL (*cmp1) < plus_one)
4101 *code = LT;
4102 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4103 return true;
4105 break;
4107 case LEU:
4108 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4109 if (plus_one != 0)
4111 *code = LTU;
4112 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4113 return true;
4115 break;
4117 default:
4118 break;
4120 return false;
4123 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
4124 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
4125 is nonnull, it's OK to set TARGET to the inverse of the result and
4126 flip *INVERT_PTR instead. */
4128 static void
4129 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
4130 rtx target, rtx cmp0, rtx cmp1)
4132 machine_mode mode;
4134 /* First see if there is a RISCV instruction that can do this operation.
4135 If not, try doing the same for the inverse operation. If that also
4136 fails, force CMP1 into a register and try again. */
4137 mode = GET_MODE (cmp0);
4138 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
4139 riscv_emit_binary (code, target, cmp0, cmp1);
4140 else
4142 enum rtx_code inv_code = reverse_condition (code);
4143 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
4145 cmp1 = force_reg (mode, cmp1);
4146 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
4148 else if (invert_ptr == 0)
4150 rtx inv_target = riscv_force_binary (word_mode,
4151 inv_code, cmp0, cmp1);
4152 riscv_emit_binary (EQ, target, inv_target, const0_rtx);
4154 else
4156 *invert_ptr = !*invert_ptr;
4157 riscv_emit_binary (inv_code, target, cmp0, cmp1);
4162 /* Return a register that is zero iff CMP0 and CMP1 are equal.
4163 The register will have the same mode as CMP0. */
4165 static rtx
4166 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
4168 if (cmp1 == const0_rtx)
4169 return cmp0;
4171 return expand_binop (GET_MODE (cmp0), sub_optab,
4172 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
4175 /* Helper function for riscv_extend_comparands to Sign-extend the OP.
4176 However if the OP is SI subreg promoted with an inner DI, such as
4177 (subreg/s/v:SI (reg/v:DI) 0)
4178 just peel off the SUBREG to get DI, avoiding extraneous extension. */
4180 static void
4181 riscv_sign_extend_if_not_subreg_prom (rtx *op)
4183 if (GET_CODE (*op) == SUBREG
4184 && SUBREG_PROMOTED_VAR_P (*op)
4185 && SUBREG_PROMOTED_SIGNED_P (*op)
4186 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant ()
4187 == GET_MODE_SIZE (word_mode)))
4188 *op = XEXP (*op, 0);
4189 else
4190 *op = gen_rtx_SIGN_EXTEND (word_mode, *op);
4193 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
4195 static void
4196 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
4198 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
4199 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
4201 /* It is more profitable to zero-extend QImode values. But not if the
4202 first operand has already been sign-extended, and the second one is
4203 is a constant or has already been sign-extended also. */
4204 if (unsigned_condition (code) == code
4205 && (GET_MODE (*op0) == QImode
4206 && ! (GET_CODE (*op0) == SUBREG
4207 && SUBREG_PROMOTED_VAR_P (*op0)
4208 && SUBREG_PROMOTED_SIGNED_P (*op0)
4209 && (CONST_INT_P (*op1)
4210 || (GET_CODE (*op1) == SUBREG
4211 && SUBREG_PROMOTED_VAR_P (*op1)
4212 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4214 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4215 if (CONST_INT_P (*op1))
4216 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4217 else
4218 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4220 else
4222 riscv_sign_extend_if_not_subreg_prom (op0);
4224 if (*op1 != const0_rtx)
4225 riscv_sign_extend_if_not_subreg_prom (op1);
4230 /* Convert a comparison into something that can be used in a branch or
4231 conditional move. On entry, *OP0 and *OP1 are the values being
4232 compared and *CODE is the code used to compare them.
4234 Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
4235 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are
4236 emitted. */
4238 static void
4239 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4240 bool need_eq_ne_p = false)
4242 if (need_eq_ne_p)
4244 rtx cmp_op0 = *op0;
4245 rtx cmp_op1 = *op1;
4246 if (*code == EQ || *code == NE)
4248 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1);
4249 *op1 = const0_rtx;
4250 return;
4252 gcc_unreachable ();
4255 if (splittable_const_int_operand (*op1, VOIDmode))
4257 HOST_WIDE_INT rhs = INTVAL (*op1);
4259 if (*code == EQ || *code == NE)
4261 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4262 if (SMALL_OPERAND (-rhs))
4264 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
4265 GEN_INT (-rhs));
4266 *op1 = const0_rtx;
4269 else
4271 static const enum rtx_code mag_comparisons[][2] = {
4272 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
4275 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4276 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4278 HOST_WIDE_INT new_rhs;
4279 bool increment = *code == mag_comparisons[i][0];
4280 bool decrement = *code == mag_comparisons[i][1];
4281 if (!increment && !decrement)
4282 continue;
4284 new_rhs = rhs + (increment ? 1 : -1);
4285 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
4286 if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
4287 && (rhs < 0) == (new_rhs < 0))
4289 *op1 = GEN_INT (new_rhs);
4290 *code = mag_comparisons[i][increment];
4292 break;
4297 riscv_extend_comparands (*code, op0, op1);
4299 *op0 = force_reg (word_mode, *op0);
4300 if (*op1 != const0_rtx)
4301 *op1 = force_reg (word_mode, *op1);
4304 /* Like riscv_emit_int_compare, but for floating-point comparisons. */
4306 static void
4307 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4308 bool *invert_ptr = nullptr)
4310 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
4311 enum rtx_code fp_code = *code;
4312 *code = NE;
4314 switch (fp_code)
4316 case UNORDERED:
4317 *code = EQ;
4318 /* Fall through. */
4320 case ORDERED:
4321 /* a == a && b == b */
4322 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4323 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4324 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4325 *op1 = const0_rtx;
4326 break;
4328 case UNEQ:
4329 /* ordered(a, b) > (a == b) */
4330 *code = EQ;
4331 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4332 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4333 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4334 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
4335 break;
4337 #define UNORDERED_COMPARISON(CODE, CMP) \
4338 case CODE: \
4339 *code = EQ; \
4340 *op0 = gen_reg_rtx (word_mode); \
4341 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \
4342 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \
4343 else if (GET_MODE (cmp_op0) == SFmode) \
4344 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \
4345 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \
4346 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \
4347 else if (GET_MODE (cmp_op0) == DFmode) \
4348 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \
4349 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \
4350 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \
4351 else if (GET_MODE (cmp_op0) == HFmode) \
4352 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \
4353 else \
4354 gcc_unreachable (); \
4355 *op1 = const0_rtx; \
4356 break;
4358 case UNLT:
4359 std::swap (cmp_op0, cmp_op1);
4360 gcc_fallthrough ();
4362 UNORDERED_COMPARISON(UNGT, le)
4364 case UNLE:
4365 std::swap (cmp_op0, cmp_op1);
4366 gcc_fallthrough ();
4368 UNORDERED_COMPARISON(UNGE, lt)
4369 #undef UNORDERED_COMPARISON
4371 case NE:
4372 fp_code = EQ;
4373 if (invert_ptr != nullptr)
4374 *invert_ptr = !*invert_ptr;
4375 else
4377 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
4378 cmp_op1 = const0_rtx;
4380 gcc_fallthrough ();
4382 case EQ:
4383 case LE:
4384 case LT:
4385 case GE:
4386 case GT:
4387 /* We have instructions for these cases. */
4388 *code = fp_code;
4389 *op0 = cmp_op0;
4390 *op1 = cmp_op1;
4391 break;
4393 case LTGT:
4394 /* (a < b) | (a > b) */
4395 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1);
4396 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1);
4397 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1);
4398 *op1 = const0_rtx;
4399 break;
4401 default:
4402 gcc_unreachable ();
4406 /* CODE-compare OP0 and OP1. Store the result in TARGET. */
4408 void
4409 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr)
4411 riscv_extend_comparands (code, &op0, &op1);
4412 op0 = force_reg (word_mode, op0);
4414 if (code == EQ || code == NE)
4416 rtx zie = riscv_zero_if_equal (op0, op1);
4417 riscv_emit_binary (code, target, zie, const0_rtx);
4419 else
4420 riscv_emit_int_order_test (code, invert_ptr, target, op0, op1);
4423 /* Like riscv_expand_int_scc, but for floating-point comparisons. */
4425 void
4426 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
4427 bool *invert_ptr)
4429 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr);
4431 machine_mode mode = GET_MODE (target);
4432 if (mode != word_mode)
4434 rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
4435 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
4437 else
4438 riscv_emit_binary (code, target, op0, op1);
4441 /* Jump to LABEL if (CODE OP0 OP1) holds. */
4443 void
4444 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
4446 if (FLOAT_MODE_P (GET_MODE (op1)))
4447 riscv_emit_float_compare (&code, &op0, &op1);
4448 else
4449 riscv_emit_int_compare (&code, &op0, &op1);
4451 if (FLOAT_MODE_P (GET_MODE (op0)))
4453 op0 = riscv_force_binary (word_mode, code, op0, op1);
4454 op1 = const0_rtx;
4455 code = NE;
4458 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
4459 emit_jump_insn (gen_condjump (condition, label));
4462 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
4463 Return 0 if expansion failed. */
4465 bool
4466 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
4468 machine_mode mode = GET_MODE (dest);
4469 rtx_code code = GET_CODE (op);
4470 rtx op0 = XEXP (op, 0);
4471 rtx op1 = XEXP (op, 1);
4473 if (((TARGET_ZICOND_LIKE
4474 || (arith_operand (cons, mode) && arith_operand (alt, mode)))
4475 && (GET_MODE_CLASS (mode) == MODE_INT))
4476 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4478 machine_mode mode0 = GET_MODE (op0);
4479 machine_mode mode1 = GET_MODE (op1);
4481 /* An integer comparison must be comparing WORD_MODE objects. We
4482 must enforce that so that we don't strip away a sign_extension
4483 thinking it is unnecessary. We might consider using
4484 riscv_extend_operands if they are not already properly extended. */
4485 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
4486 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
4487 return false;
4489 /* In the fallback generic case use MODE rather than WORD_MODE for
4490 the output of the SCC instruction, to match the mode of the NEG
4491 operation below. The output of SCC is 0 or 1 boolean, so it is
4492 valid for input in any scalar integer mode. */
4493 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
4494 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4495 ? word_mode : mode);
4496 bool invert = false;
4498 /* Canonicalize the comparison. It must be an equality comparison
4499 of integer operands, or with SFB it can be any comparison of
4500 integer operands. If it isn't, then emit an SCC instruction
4501 so that we can then use an equality comparison against zero. */
4502 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode))
4503 || !INTEGRAL_MODE_P (mode0))
4505 bool *invert_ptr = nullptr;
4507 /* If riscv_expand_int_scc inverts the condition, then it will
4508 flip the value of INVERT. We need to know where so that
4509 we can adjust it for our needs. */
4510 if (code == LE || code == LEU || code == GE || code == GEU)
4511 invert_ptr = &invert;
4513 /* Emit an SCC-like instruction into a temporary so that we can
4514 use an EQ/NE comparison. We can support both FP and integer
4515 conditional moves. */
4516 if (INTEGRAL_MODE_P (mode0))
4517 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr);
4518 else if (FLOAT_MODE_P (mode0)
4519 && fp_scc_comparison (op, GET_MODE (op)))
4520 riscv_expand_float_scc (tmp, code, op0, op1, &invert);
4521 else
4522 return false;
4524 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
4526 /* We've generated a new comparison. Update the local variables. */
4527 code = GET_CODE (op);
4528 op0 = XEXP (op, 0);
4529 op1 = XEXP (op, 1);
4531 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
4532 riscv_expand_int_scc (tmp, code, op0, op1, &invert);
4534 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4536 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
4537 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4539 /* The expander is a bit loose in its specification of the true
4540 arm of the conditional move. That allows us to support more
4541 cases for extensions which are more general than SFB. But
4542 does mean we need to force CONS into a register at this point. */
4543 cons = force_reg (mode, cons);
4544 /* With XTheadCondMov we need to force ALT into a register too. */
4545 alt = force_reg (mode, alt);
4546 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4547 cons, alt)));
4548 return true;
4550 else if (!TARGET_ZICOND_LIKE)
4552 if (invert)
4553 std::swap (cons, alt);
4555 rtx reg1 = gen_reg_rtx (mode);
4556 rtx reg2 = gen_reg_rtx (mode);
4557 rtx reg3 = gen_reg_rtx (mode);
4558 rtx reg4 = gen_reg_rtx (mode);
4560 riscv_emit_unary (NEG, reg1, tmp);
4561 riscv_emit_binary (AND, reg2, reg1, cons);
4562 riscv_emit_unary (NOT, reg3, reg1);
4563 riscv_emit_binary (AND, reg4, reg3, alt);
4564 riscv_emit_binary (IOR, dest, reg2, reg4);
4565 return true;
4567 /* 0, reg or 0, imm */
4568 else if (cons == CONST0_RTX (mode)
4569 && (REG_P (alt)
4570 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
4572 riscv_emit_int_compare (&code, &op0, &op1, true);
4573 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4574 alt = force_reg (mode, alt);
4575 emit_insn (gen_rtx_SET (dest,
4576 gen_rtx_IF_THEN_ELSE (mode, cond,
4577 cons, alt)));
4578 return true;
4580 /* imm, imm */
4581 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
4582 && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4584 riscv_emit_int_compare (&code, &op0, &op1, true);
4585 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4586 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
4587 alt = force_reg (mode, gen_int_mode (t, mode));
4588 emit_insn (gen_rtx_SET (dest,
4589 gen_rtx_IF_THEN_ELSE (mode, cond,
4590 CONST0_RTX (mode),
4591 alt)));
4592 /* CONS might not fit into a signed 12 bit immediate suitable
4593 for an addi instruction. If that's the case, force it
4594 into a register. */
4595 if (!SMALL_OPERAND (INTVAL (cons)))
4596 cons = force_reg (mode, cons);
4597 riscv_emit_binary (PLUS, dest, dest, cons);
4598 return true;
4600 /* imm, reg */
4601 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
4603 /* Optimize for register value of 0. */
4604 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
4606 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4607 cons = force_reg (mode, cons);
4608 emit_insn (gen_rtx_SET (dest,
4609 gen_rtx_IF_THEN_ELSE (mode, cond,
4610 cons, alt)));
4611 return true;
4614 riscv_emit_int_compare (&code, &op0, &op1, true);
4615 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4617 rtx temp1 = gen_reg_rtx (mode);
4618 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
4620 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
4621 suitable for an addi instruction. If that's the case, force it
4622 into a register. */
4623 if (!SMALL_OPERAND (INTVAL (temp2)))
4624 temp2 = force_reg (mode, temp2);
4625 if (!SMALL_OPERAND (INTVAL (cons)))
4626 cons = force_reg (mode, cons);
4628 riscv_emit_binary (PLUS, temp1, alt, temp2);
4629 emit_insn (gen_rtx_SET (dest,
4630 gen_rtx_IF_THEN_ELSE (mode, cond,
4631 CONST0_RTX (mode),
4632 temp1)));
4633 riscv_emit_binary (PLUS, dest, dest, cons);
4634 return true;
4636 /* reg, 0 or imm, 0 */
4637 else if ((REG_P (cons)
4638 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
4639 && alt == CONST0_RTX (mode))
4641 riscv_emit_int_compare (&code, &op0, &op1, true);
4642 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4643 cons = force_reg (mode, cons);
4644 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4645 cons, alt)));
4646 return true;
4648 /* reg, imm */
4649 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4651 /* Optimize for register value of 0. */
4652 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
4654 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4655 alt = force_reg (mode, alt);
4656 emit_insn (gen_rtx_SET (dest,
4657 gen_rtx_IF_THEN_ELSE (mode, cond,
4658 cons, alt)));
4659 return true;
4662 riscv_emit_int_compare (&code, &op0, &op1, true);
4663 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4665 rtx temp1 = gen_reg_rtx (mode);
4666 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
4668 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
4669 suitable for an addi instruction. If that's the case, force it
4670 into a register. */
4671 if (!SMALL_OPERAND (INTVAL (temp2)))
4672 temp2 = force_reg (mode, temp2);
4673 if (!SMALL_OPERAND (INTVAL (alt)))
4674 alt = force_reg (mode, alt);
4676 riscv_emit_binary (PLUS, temp1, cons, temp2);
4677 emit_insn (gen_rtx_SET (dest,
4678 gen_rtx_IF_THEN_ELSE (mode, cond,
4679 temp1,
4680 CONST0_RTX (mode))));
4681 riscv_emit_binary (PLUS, dest, dest, alt);
4682 return true;
4684 /* reg, reg */
4685 else if (REG_P (cons) && REG_P (alt))
4687 if ((code == EQ && rtx_equal_p (cons, op0))
4688 || (code == NE && rtx_equal_p (alt, op0)))
4690 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4691 alt = force_reg (mode, alt);
4692 emit_insn (gen_rtx_SET (dest,
4693 gen_rtx_IF_THEN_ELSE (mode, cond,
4694 cons, alt)));
4695 return true;
4698 rtx reg1 = gen_reg_rtx (mode);
4699 rtx reg2 = gen_reg_rtx (mode);
4700 riscv_emit_int_compare (&code, &op0, &op1, true);
4701 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4702 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
4703 GET_MODE (op0), op0, op1);
4704 emit_insn (gen_rtx_SET (reg2,
4705 gen_rtx_IF_THEN_ELSE (mode, cond2,
4706 CONST0_RTX (mode),
4707 cons)));
4708 emit_insn (gen_rtx_SET (reg1,
4709 gen_rtx_IF_THEN_ELSE (mode, cond1,
4710 CONST0_RTX (mode),
4711 alt)));
4712 riscv_emit_binary (IOR, dest, reg1, reg2);
4713 return true;
4717 return false;
4720 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
4721 least PARM_BOUNDARY bits of alignment, but will be given anything up
4722 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
4724 static unsigned int
4725 riscv_function_arg_boundary (machine_mode mode, const_tree type)
4727 unsigned int alignment;
4729 /* Use natural alignment if the type is not aggregate data. */
4730 if (type && !AGGREGATE_TYPE_P (type))
4731 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
4732 else
4733 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
4735 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
4738 /* If MODE represents an argument that can be passed or returned in
4739 floating-point registers, return the number of registers, else 0. */
4741 static unsigned
4742 riscv_pass_mode_in_fpr_p (machine_mode mode)
4744 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
4746 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4747 return 1;
4749 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4750 return 2;
4753 return 0;
4756 typedef struct {
4757 const_tree type;
4758 HOST_WIDE_INT offset;
4759 } riscv_aggregate_field;
4761 /* Identify subfields of aggregates that are candidates for passing in
4762 floating-point registers. */
4764 static int
4765 riscv_flatten_aggregate_field (const_tree type,
4766 riscv_aggregate_field fields[2],
4767 int n, HOST_WIDE_INT offset,
4768 bool ignore_zero_width_bit_field_p)
4770 switch (TREE_CODE (type))
4772 case RECORD_TYPE:
4773 /* Can't handle incomplete types nor sizes that are not fixed. */
4774 if (!COMPLETE_TYPE_P (type)
4775 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4776 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
4777 return -1;
4779 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
4780 if (TREE_CODE (f) == FIELD_DECL)
4782 if (!TYPE_P (TREE_TYPE (f)))
4783 return -1;
4785 /* The C++ front end strips zero-length bit-fields from structs.
4786 So we need to ignore them in the C front end to make C code
4787 compatible with C++ code. */
4788 if (ignore_zero_width_bit_field_p
4789 && DECL_BIT_FIELD (f)
4790 && (DECL_SIZE (f) == NULL_TREE
4791 || integer_zerop (DECL_SIZE (f))))
4793 else
4795 HOST_WIDE_INT pos = offset + int_byte_position (f);
4796 n = riscv_flatten_aggregate_field (TREE_TYPE (f),
4797 fields, n, pos,
4798 ignore_zero_width_bit_field_p);
4800 if (n < 0)
4801 return -1;
4803 return n;
4805 case ARRAY_TYPE:
4807 HOST_WIDE_INT n_elts;
4808 riscv_aggregate_field subfields[2];
4809 tree index = TYPE_DOMAIN (type);
4810 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
4811 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
4812 subfields, 0, offset,
4813 ignore_zero_width_bit_field_p);
4815 /* Can't handle incomplete types nor sizes that are not fixed. */
4816 if (n_subfields <= 0
4817 || !COMPLETE_TYPE_P (type)
4818 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4819 || !index
4820 || !TYPE_MAX_VALUE (index)
4821 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4822 || !TYPE_MIN_VALUE (index)
4823 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4824 || !tree_fits_uhwi_p (elt_size))
4825 return -1;
4827 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4828 - tree_to_uhwi (TYPE_MIN_VALUE (index));
4829 gcc_assert (n_elts >= 0);
4831 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
4832 for (int j = 0; j < n_subfields; j++)
4834 if (n >= 2)
4835 return -1;
4837 fields[n] = subfields[j];
4838 fields[n++].offset += i * tree_to_uhwi (elt_size);
4841 return n;
4844 case COMPLEX_TYPE:
4846 /* Complex type need consume 2 field, so n must be 0. */
4847 if (n != 0)
4848 return -1;
4850 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
4852 if (elt_size <= UNITS_PER_FP_ARG)
4854 fields[0].type = TREE_TYPE (type);
4855 fields[0].offset = offset;
4856 fields[1].type = TREE_TYPE (type);
4857 fields[1].offset = offset + elt_size;
4859 return 2;
4862 return -1;
4865 default:
4866 if (n < 2
4867 && ((SCALAR_FLOAT_TYPE_P (type)
4868 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
4869 || (INTEGRAL_TYPE_P (type)
4870 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
4872 fields[n].type = type;
4873 fields[n].offset = offset;
4874 return n + 1;
4876 else
4877 return -1;
4881 /* Identify candidate aggregates for passing in floating-point registers.
4882 Candidates have at most two fields after flattening. */
4884 static int
4885 riscv_flatten_aggregate_argument (const_tree type,
4886 riscv_aggregate_field fields[2],
4887 bool ignore_zero_width_bit_field_p)
4889 if (!type || TREE_CODE (type) != RECORD_TYPE)
4890 return -1;
4892 return riscv_flatten_aggregate_field (type, fields, 0, 0,
4893 ignore_zero_width_bit_field_p);
4896 /* See whether TYPE is a record whose fields should be returned in one or
4897 two floating-point registers. If so, populate FIELDS accordingly. */
4899 static unsigned
4900 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
4901 riscv_aggregate_field fields[2])
4903 static int warned = 0;
4905 /* This is the old ABI, which differs for C++ and C. */
4906 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4907 for (int i = 0; i < n_old; i++)
4908 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4910 n_old = -1;
4911 break;
4914 /* This is the new ABI, which is the same for C++ and C. */
4915 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4916 for (int i = 0; i < n_new; i++)
4917 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4919 n_new = -1;
4920 break;
4923 if ((n_old != n_new) && (warned == 0))
4925 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4926 "bit-fields changed in GCC 10");
4927 warned = 1;
4930 return n_new > 0 ? n_new : 0;
4933 /* See whether TYPE is a record whose fields should be returned in one or
4934 floating-point register and one integer register. If so, populate
4935 FIELDS accordingly. */
4937 static bool
4938 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
4939 riscv_aggregate_field fields[2])
4941 static int warned = 0;
4943 /* This is the old ABI, which differs for C++ and C. */
4944 unsigned num_int_old = 0, num_float_old = 0;
4945 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4946 for (int i = 0; i < n_old; i++)
4948 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
4949 num_int_old += INTEGRAL_TYPE_P (fields[i].type);
4952 /* This is the new ABI, which is the same for C++ and C. */
4953 unsigned num_int_new = 0, num_float_new = 0;
4954 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4955 for (int i = 0; i < n_new; i++)
4957 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
4958 num_int_new += INTEGRAL_TYPE_P (fields[i].type);
4961 if (((num_int_old == 1 && num_float_old == 1
4962 && (num_int_old != num_int_new || num_float_old != num_float_new))
4963 || (num_int_new == 1 && num_float_new == 1
4964 && (num_int_old != num_int_new || num_float_old != num_float_new)))
4965 && (warned == 0))
4967 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4968 "bit-fields changed in GCC 10");
4969 warned = 1;
4972 return num_int_new == 1 && num_float_new == 1;
4975 /* Return the representation of an argument passed or returned in an FPR
4976 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
4977 two modes may be different for structures like:
4979 struct __attribute__((packed)) foo { float f; }
4981 where the SFmode value "f" is passed in REGNO but the struct itself
4982 has mode BLKmode. */
4984 static rtx
4985 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno,
4986 machine_mode value_mode,
4987 HOST_WIDE_INT offset)
4989 rtx x = gen_rtx_REG (value_mode, regno);
4991 if (type_mode != value_mode)
4993 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
4994 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
4996 return x;
4999 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
5000 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
5001 byte offset for the first value, likewise MODE2 and OFFSET2 for the
5002 second value. */
5004 static rtx
5005 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
5006 machine_mode mode1, HOST_WIDE_INT offset1,
5007 unsigned regno2, machine_mode mode2,
5008 HOST_WIDE_INT offset2)
5010 return gen_rtx_PARALLEL
5011 (mode,
5012 gen_rtvec (2,
5013 gen_rtx_EXPR_LIST (VOIDmode,
5014 gen_rtx_REG (mode1, regno1),
5015 GEN_INT (offset1)),
5016 gen_rtx_EXPR_LIST (VOIDmode,
5017 gen_rtx_REG (mode2, regno2),
5018 GEN_INT (offset2))));
5021 static rtx
5022 riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
5023 unsigned gpr_base)
5025 gcc_assert (riscv_v_ext_vls_mode_p (mode));
5027 unsigned count = 0;
5028 unsigned regnum = 0;
5029 machine_mode gpr_mode = VOIDmode;
5030 unsigned vls_size = GET_MODE_SIZE (mode).to_constant ();
5031 unsigned gpr_size = GET_MODE_SIZE (Xmode);
5033 if (IN_RANGE (vls_size, 0, gpr_size * 2))
5035 count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size);
5037 if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS)
5039 regnum = gpr_base + info->gpr_offset;
5040 info->num_gprs = count;
5041 gpr_mode = riscv_v_vls_to_gpr_mode (vls_size);
5045 if (!regnum)
5046 return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
5048 gcc_assert (gpr_mode != VOIDmode);
5050 rtx reg = gen_rtx_REG (gpr_mode, regnum);
5051 rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode));
5053 return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
5056 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5057 for a call to a function whose data type is FNTYPE.
5058 For a library call, FNTYPE is 0. */
5060 void
5061 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
5063 memset (cum, 0, sizeof (*cum));
5065 if (fntype)
5066 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
5067 else
5068 cum->variant_cc = RISCV_CC_BASE;
5071 /* Return true if TYPE is a vector type that can be passed in vector registers.
5074 static bool
5075 riscv_vector_type_p (const_tree type)
5077 /* Currently, only builtin scalabler vector type is allowed, in the future,
5078 more vector types may be allowed, such as GNU vector type, etc. */
5079 return riscv_vector::builtin_type_p (type);
5082 static unsigned int
5083 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
5085 /* Subroutine of riscv_get_arg_info. */
5087 static rtx
5088 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5089 machine_mode mode, bool return_p)
5091 gcc_assert (riscv_v_ext_mode_p (mode));
5093 info->mr_offset = cum->num_mrs;
5094 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5096 /* For scalable mask return value. */
5097 if (return_p)
5098 return gen_rtx_REG (mode, V_REG_FIRST);
5100 /* For the first scalable mask argument. */
5101 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS)
5103 info->num_mrs = 1;
5104 return gen_rtx_REG (mode, V_REG_FIRST);
5106 else
5108 /* Rest scalable mask arguments are treated as scalable data
5109 arguments. */
5113 /* The number and alignment of vector registers need for this scalable vector
5114 argument. When the mode size is less than a full vector, we use 1 vector
5115 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number
5116 information. */
5117 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode);
5118 int LMUL = riscv_v_ext_tuple_mode_p (mode)
5119 ? nregs / riscv_vector::get_nf (mode)
5120 : nregs;
5121 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST;
5122 int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
5123 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
5125 /* For scalable data and scalable tuple return value. */
5126 if (return_p)
5127 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
5129 /* Iterate through the USED_VRS array to find vector register groups that have
5130 not been allocated and the first register is aligned with LMUL. */
5131 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL)
5133 /* The index in USED_VRS array. */
5134 int idx = i - arg_reg_start;
5135 /* Find the first register unused. */
5136 if (!cum->used_vrs[idx])
5138 bool find_set = true;
5139 /* Ensure there are NREGS continuous unused registers. */
5140 for (int j = 1; j < nregs; j++)
5141 if (cum->used_vrs[idx + j])
5143 find_set = false;
5144 /* Update I to the last aligned register which
5145 cannot be used and the next iteration will add
5146 LMUL step to I. */
5147 i += (j / LMUL) * LMUL;
5148 break;
5151 if (find_set)
5153 info->num_vrs = nregs;
5154 info->vr_offset = idx;
5155 return gen_rtx_REG (mode, i + V_REG_FIRST);
5160 return NULL_RTX;
5163 /* Fill INFO with information about a single argument, and return an RTL
5164 pattern to pass or return the argument. Return NULL_RTX if argument cannot
5165 pass or return in registers, then the argument may be passed by reference or
5166 through the stack or . CUM is the cumulative state for earlier arguments.
5167 MODE is the mode of this argument and TYPE is its type (if known). NAMED is
5168 true if this is a named (fixed) argument rather than a variable one. RETURN_P
5169 is true if returning the argument, or false if passing the argument. */
5171 static rtx
5172 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5173 machine_mode mode, const_tree type, bool named,
5174 bool return_p)
5176 unsigned num_bytes, num_words;
5177 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
5178 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
5179 unsigned alignment = riscv_function_arg_boundary (mode, type);
5181 memset (info, 0, sizeof (*info));
5182 info->gpr_offset = cum->num_gprs;
5183 info->fpr_offset = cum->num_fprs;
5185 /* Passed by reference when the scalable vector argument is anonymous. */
5186 if (riscv_v_ext_mode_p (mode) && !named)
5187 return NULL_RTX;
5189 if (named)
5191 riscv_aggregate_field fields[2];
5192 unsigned fregno = fpr_base + info->fpr_offset;
5193 unsigned gregno = gpr_base + info->gpr_offset;
5195 /* Pass one- or two-element floating-point aggregates in FPRs. */
5196 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
5197 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5198 switch (info->num_fprs)
5200 case 1:
5201 return riscv_pass_fpr_single (mode, fregno,
5202 TYPE_MODE (fields[0].type),
5203 fields[0].offset);
5205 case 2:
5206 return riscv_pass_fpr_pair (mode, fregno,
5207 TYPE_MODE (fields[0].type),
5208 fields[0].offset,
5209 fregno + 1,
5210 TYPE_MODE (fields[1].type),
5211 fields[1].offset);
5213 default:
5214 gcc_unreachable ();
5217 /* Pass real and complex floating-point numbers in FPRs. */
5218 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
5219 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5220 switch (GET_MODE_CLASS (mode))
5222 case MODE_FLOAT:
5223 return gen_rtx_REG (mode, fregno);
5225 case MODE_COMPLEX_FLOAT:
5226 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
5227 fregno + 1, GET_MODE_INNER (mode),
5228 GET_MODE_UNIT_SIZE (mode));
5230 default:
5231 gcc_unreachable ();
5234 /* Pass structs with one float and one integer in an FPR and a GPR. */
5235 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
5236 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
5237 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
5239 info->num_gprs = 1;
5240 info->num_fprs = 1;
5242 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
5243 std::swap (fregno, gregno);
5245 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
5246 fields[0].offset,
5247 gregno, TYPE_MODE (fields[1].type),
5248 fields[1].offset);
5251 /* For scalable vector argument. */
5252 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
5253 return riscv_get_vector_arg (info, cum, mode, return_p);
5255 /* For vls mode aggregated in gpr. */
5256 if (riscv_v_ext_vls_mode_p (mode))
5257 return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
5260 /* Work out the size of the argument. */
5261 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
5262 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5264 /* Doubleword-aligned varargs start on an even register boundary. */
5265 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
5266 info->gpr_offset += info->gpr_offset & 1;
5268 /* Partition the argument between registers and stack. */
5269 info->num_fprs = 0;
5270 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
5271 info->stack_p = (num_words - info->num_gprs) != 0;
5273 if (info->num_gprs || return_p)
5274 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
5276 return NULL_RTX;
5279 /* Implement TARGET_FUNCTION_ARG. */
5281 static rtx
5282 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5284 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5285 struct riscv_arg_info info;
5287 if (arg.end_marker_p ())
5288 /* Return the calling convention that used by the current function. */
5289 return gen_int_mode (cum->variant_cc, SImode);
5291 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5294 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
5296 static void
5297 riscv_function_arg_advance (cumulative_args_t cum_v,
5298 const function_arg_info &arg)
5300 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5301 struct riscv_arg_info info;
5303 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5305 /* Set the corresponding register in USED_VRS to used status. */
5306 for (unsigned int i = 0; i < info.num_vrs; i++)
5308 gcc_assert (!cum->used_vrs[info.vr_offset + i]);
5309 cum->used_vrs[info.vr_offset + i] = true;
5312 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
5314 error ("RVV type %qT cannot be passed to an unprototyped function",
5315 arg.type);
5316 /* Avoid repeating the message */
5317 cum->variant_cc = RISCV_CC_V;
5320 /* Advance the register count. This has the effect of setting
5321 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
5322 argument required us to skip the final GPR and pass the whole
5323 argument on the stack. */
5324 cum->num_fprs = info.fpr_offset + info.num_fprs;
5325 cum->num_gprs = info.gpr_offset + info.num_gprs;
5326 cum->num_mrs = info.mr_offset + info.num_mrs;
5329 /* Implement TARGET_ARG_PARTIAL_BYTES. */
5331 static int
5332 riscv_arg_partial_bytes (cumulative_args_t cum,
5333 const function_arg_info &generic_arg)
5335 struct riscv_arg_info arg;
5337 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
5338 generic_arg.type, generic_arg.named, false);
5339 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
5342 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
5343 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
5344 VALTYPE is null and MODE is the mode of the return value. */
5347 riscv_function_value (const_tree type, const_tree func, machine_mode mode)
5349 struct riscv_arg_info info;
5350 CUMULATIVE_ARGS args;
5352 if (type)
5354 int unsigned_p = TYPE_UNSIGNED (type);
5356 mode = TYPE_MODE (type);
5358 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
5359 return values, promote the mode here too. */
5360 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
5363 memset (&args, 0, sizeof args);
5365 return riscv_get_arg_info (&info, &args, mode, type, true, true);
5368 /* Implement TARGET_PASS_BY_REFERENCE. */
5370 static bool
5371 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
5373 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();;
5374 struct riscv_arg_info info;
5375 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5377 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
5378 never pass variadic arguments in floating-point and vector registers,
5379 so we can avoid the call to riscv_get_arg_info in this case. */
5380 if (cum != NULL)
5382 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5384 /* Don't pass by reference if we can use a floating-point register. */
5385 if (info.num_fprs)
5386 return false;
5388 /* Don't pass by reference if we can use general register(s) for vls. */
5389 if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode))
5390 return false;
5392 /* Don't pass by reference if we can use vector register groups. */
5393 if (info.num_vrs > 0 || info.num_mrs > 0)
5394 return false;
5397 /* Passed by reference when:
5398 1. The scalable vector argument is anonymous.
5399 2. Args cannot be passed through vector registers. */
5400 if (riscv_v_ext_mode_p (arg.mode))
5401 return true;
5403 /* Pass by reference if the data do not fit in two integer registers. */
5404 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
5407 /* Implement TARGET_RETURN_IN_MEMORY. */
5409 static bool
5410 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5412 CUMULATIVE_ARGS args;
5413 cumulative_args_t cum = pack_cumulative_args (&args);
5415 /* The rules for returning in memory are the same as for passing the
5416 first named argument by reference. */
5417 memset (&args, 0, sizeof args);
5418 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
5419 return riscv_pass_by_reference (cum, arg);
5422 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5424 static void
5425 riscv_setup_incoming_varargs (cumulative_args_t cum,
5426 const function_arg_info &arg,
5427 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
5429 CUMULATIVE_ARGS local_cum;
5430 int gp_saved;
5432 /* The caller has advanced CUM up to, but not beyond, the last named
5433 argument. Advance a local copy of CUM past the last "real" named
5434 argument, to find out how many registers are left over. */
5435 local_cum = *get_cumulative_args (cum);
5436 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
5437 || arg.type != NULL_TREE)
5438 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg);
5440 /* Found out how many registers we need to save. */
5441 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
5443 if (!no_rtl && gp_saved > 0)
5445 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5446 REG_PARM_STACK_SPACE (cfun->decl)
5447 - gp_saved * UNITS_PER_WORD);
5448 rtx mem = gen_frame_mem (BLKmode, ptr);
5449 set_mem_alias_set (mem, get_varargs_alias_set ());
5451 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
5452 mem, gp_saved);
5454 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
5455 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
5458 /* Return the descriptor of the Standard Vector Calling Convention Variant. */
5460 static const predefined_function_abi &
5461 riscv_v_abi ()
5463 predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
5464 if (!v_abi.initialized_p ())
5466 HARD_REG_SET full_reg_clobbers
5467 = default_function_abi.full_reg_clobbers ();
5468 /* Callee-saved vector registers: v1-v7, v24-v31. */
5469 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1)
5470 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5471 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
5472 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5473 v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
5475 return v_abi;
5478 static bool
5479 riscv_vector_int_type_p (const_tree type)
5481 machine_mode mode = TYPE_MODE (type);
5483 if (VECTOR_MODE_P (mode))
5484 return INTEGRAL_MODE_P (GET_MODE_INNER (mode));
5486 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5487 return strstr (name, "int") != NULL || strstr (name, "uint") != NULL;
5490 static bool
5491 riscv_vector_float_type_p (const_tree type)
5493 machine_mode mode = TYPE_MODE (type);
5495 if (VECTOR_MODE_P (mode))
5496 return FLOAT_MODE_P (GET_MODE_INNER (mode));
5498 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5499 return strstr (name, "vfloat") != NULL;
5502 static unsigned
5503 riscv_vector_element_bitsize (const_tree type)
5505 machine_mode mode = TYPE_MODE (type);
5507 if (VECTOR_MODE_P (mode))
5508 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
5510 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5512 if (strstr (name, "bool") != NULL)
5513 return 1;
5514 else if (strstr (name, "int8") != NULL)
5515 return 8;
5516 else if (strstr (name, "int16") != NULL || strstr (name, "float16") != NULL)
5517 return 16;
5518 else if (strstr (name, "int32") != NULL || strstr (name, "float32") != NULL)
5519 return 32;
5520 else if (strstr (name, "int64") != NULL || strstr (name, "float64") != NULL)
5521 return 64;
5523 gcc_unreachable ();
5526 static unsigned
5527 riscv_vector_required_min_vlen (const_tree type)
5529 machine_mode mode = TYPE_MODE (type);
5531 if (riscv_v_ext_mode_p (mode))
5532 return TARGET_MIN_VLEN;
5534 unsigned element_bitsize = riscv_vector_element_bitsize (type);
5535 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5537 if (strstr (name, "bool64") != NULL)
5538 return element_bitsize * 64;
5539 else if (strstr (name, "bool32") != NULL)
5540 return element_bitsize * 32;
5541 else if (strstr (name, "bool16") != NULL)
5542 return element_bitsize * 16;
5543 else if (strstr (name, "bool8") != NULL)
5544 return element_bitsize * 8;
5545 else if (strstr (name, "bool4") != NULL)
5546 return element_bitsize * 4;
5547 else if (strstr (name, "bool2") != NULL)
5548 return element_bitsize * 2;
5550 if (strstr (name, "mf8") != NULL)
5551 return element_bitsize * 8;
5552 else if (strstr (name, "mf4") != NULL)
5553 return element_bitsize * 4;
5554 else if (strstr (name, "mf2") != NULL)
5555 return element_bitsize * 2;
5557 return element_bitsize;
5560 static void
5561 riscv_validate_vector_type (const_tree type, const char *hint)
5563 gcc_assert (riscv_vector_type_p (type));
5565 if (!TARGET_VECTOR)
5567 error_at (input_location, "%s %qT requires the V ISA extension",
5568 hint, type);
5569 return;
5572 unsigned element_bitsize = riscv_vector_element_bitsize (type);
5573 bool int_type_p = riscv_vector_int_type_p (type);
5575 if (int_type_p && element_bitsize == 64
5576 && !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
5578 error_at (input_location,
5579 "%s %qT requires the zve64x, zve64f, zve64d or v ISA extension",
5580 hint, type);
5581 return;
5584 bool float_type_p = riscv_vector_float_type_p (type);
5586 if (float_type_p && element_bitsize == 16
5587 && !TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
5589 error_at (input_location,
5590 "%s %qT requires the zvfhmin or zvfh ISA extension",
5591 hint, type);
5592 return;
5595 if (float_type_p && element_bitsize == 32
5596 && !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
5598 error_at (input_location,
5599 "%s %qT requires the zve32f, zve64f, zve64d or v ISA extension",
5600 hint, type);
5601 return;
5604 if (float_type_p && element_bitsize == 64
5605 && !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
5607 error_at (input_location,
5608 "%s %qT requires the zve64d or v ISA extension", hint, type);
5609 return;
5612 unsigned required_min_vlen = riscv_vector_required_min_vlen (type);
5614 if (TARGET_MIN_VLEN < required_min_vlen)
5616 error_at (
5617 input_location,
5618 "%s %qT requires the minimal vector length %qd but %qd is given",
5619 hint, type, required_min_vlen, TARGET_MIN_VLEN);
5620 return;
5624 /* Return true if a function with type FNTYPE returns its value in
5625 RISC-V V registers. */
5627 static bool
5628 riscv_return_value_is_vector_type_p (const_tree fntype)
5630 tree return_type = TREE_TYPE (fntype);
5632 if (riscv_vector_type_p (return_type))
5634 riscv_validate_vector_type (return_type, "return type");
5635 return true;
5637 else
5638 return false;
5641 /* Return true if a function with type FNTYPE takes arguments in
5642 RISC-V V registers. */
5644 static bool
5645 riscv_arguments_is_vector_type_p (const_tree fntype)
5647 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
5648 chain = TREE_CHAIN (chain))
5650 tree arg_type = TREE_VALUE (chain);
5651 if (riscv_vector_type_p (arg_type))
5653 riscv_validate_vector_type (arg_type, "argument type");
5654 return true;
5658 return false;
5661 /* Return true if FUNC is a riscv_vector_cc function.
5662 For more details please reference the below link.
5663 https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
5664 static bool
5665 riscv_vector_cc_function_p (const_tree fntype)
5667 tree attr = TYPE_ATTRIBUTES (fntype);
5668 bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE
5669 || lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE;
5671 if (vector_cc_p && !TARGET_VECTOR)
5672 error_at (input_location,
5673 "function attribute %qs requires the V ISA extension",
5674 "riscv_vector_cc");
5676 return vector_cc_p;
5679 /* Implement TARGET_FNTYPE_ABI. */
5681 static const predefined_function_abi &
5682 riscv_fntype_abi (const_tree fntype)
5684 /* Implement the vector calling convention. For more details please
5685 reference the below link.
5686 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
5687 if (riscv_return_value_is_vector_type_p (fntype)
5688 || riscv_arguments_is_vector_type_p (fntype)
5689 || riscv_vector_cc_function_p (fntype))
5690 return riscv_v_abi ();
5692 return default_function_abi;
5695 /* Return riscv calling convention of call_insn. */
5696 riscv_cc
5697 get_riscv_cc (const rtx use)
5699 gcc_assert (GET_CODE (use) == USE);
5700 rtx unspec = XEXP (use, 0);
5701 gcc_assert (GET_CODE (unspec) == UNSPEC
5702 && XINT (unspec, 1) == UNSPEC_CALLEE_CC);
5703 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0));
5704 gcc_assert (cc < RISCV_CC_UNKNOWN);
5705 return cc;
5708 /* Implement TARGET_INSN_CALLEE_ABI. */
5710 const predefined_function_abi &
5711 riscv_insn_callee_abi (const rtx_insn *insn)
5713 rtx pat = PATTERN (insn);
5714 gcc_assert (GET_CODE (pat) == PARALLEL);
5715 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1));
5716 return function_abis[cc];
5719 /* Handle an attribute requiring a FUNCTION_DECL;
5720 arguments as in struct attribute_spec.handler. */
5721 static tree
5722 riscv_handle_fndecl_attribute (tree *node, tree name,
5723 tree args ATTRIBUTE_UNUSED,
5724 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5726 if (TREE_CODE (*node) != FUNCTION_DECL)
5728 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5729 name);
5730 *no_add_attrs = true;
5733 return NULL_TREE;
5736 /* Verify type based attributes. NODE is the what the attribute is being
5737 applied to. NAME is the attribute name. ARGS are the attribute args.
5738 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if
5739 the attribute should be ignored. */
5741 static tree
5742 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5743 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5745 /* Check for an argument. */
5746 if (is_attribute_p ("interrupt", name))
5748 if (args)
5750 tree cst = TREE_VALUE (args);
5751 const char *string;
5753 if (TREE_CODE (cst) != STRING_CST)
5755 warning (OPT_Wattributes,
5756 "%qE attribute requires a string argument",
5757 name);
5758 *no_add_attrs = true;
5759 return NULL_TREE;
5762 string = TREE_STRING_POINTER (cst);
5763 if (strcmp (string, "user") && strcmp (string, "supervisor")
5764 && strcmp (string, "machine"))
5766 warning (OPT_Wattributes,
5767 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
5768 "or %<\"machine\"%>", name);
5769 *no_add_attrs = true;
5774 return NULL_TREE;
5777 static tree
5778 riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args,
5779 ATTRIBUTE_UNUSED int flags,
5780 bool *no_add_attrs)
5782 if (!is_attribute_p ("riscv_rvv_vector_bits", name))
5783 return NULL_TREE;
5785 *no_add_attrs = true;
5787 if (rvv_vector_bits != RVV_VECTOR_BITS_ZVL)
5789 error (
5790 "%qs is only supported when %<-mrvv-vector-bits=zvl%> is specified",
5791 "riscv_rvv_vector_bits");
5792 return NULL_TREE;
5795 tree type = *node;
5797 if (!VECTOR_TYPE_P (type) || !riscv_vector::builtin_type_p (type))
5799 error ("%qs applied to non-RVV type %qT", "riscv_rvv_vector_bits", type);
5800 return NULL_TREE;
5803 tree size = TREE_VALUE (args);
5805 if (TREE_CODE (size) != INTEGER_CST)
5807 error ("%qs requires an integer constant", "riscv_rvv_vector_bits");
5808 return NULL_TREE;
5811 unsigned HOST_WIDE_INT args_in_bits = tree_to_uhwi (size);
5812 unsigned HOST_WIDE_INT type_mode_bits
5813 = GET_MODE_PRECISION (TYPE_MODE (type)).to_constant ();
5815 if (args_in_bits != type_mode_bits)
5817 error ("invalid RVV vector size %qd, "
5818 "expected size is %qd based on LMUL of type and %qs",
5819 (int)args_in_bits, (int)type_mode_bits, "-mrvv-vector-bits=zvl");
5820 return NULL_TREE;
5823 type = build_distinct_type_copy (type);
5824 TYPE_ATTRIBUTES (type)
5825 = remove_attribute ("RVV sizeless type",
5826 copy_list (TYPE_ATTRIBUTES (type)));
5828 /* The operations like alu/cmp on vbool*_t is not well defined,
5829 continue to treat vbool*_t as indivisible. */
5830 if (!VECTOR_BOOLEAN_TYPE_P (type))
5831 TYPE_INDIVISIBLE_P (type) = 0;
5833 *node = type;
5835 return NULL_TREE;
5838 /* Return true if function TYPE is an interrupt function. */
5839 static bool
5840 riscv_interrupt_type_p (tree type)
5842 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
5845 /* Return true if FUNC is a naked function. */
5846 static bool
5847 riscv_naked_function_p (tree func)
5849 tree func_decl = func;
5850 if (func == NULL_TREE)
5851 func_decl = current_function_decl;
5852 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
5855 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
5856 static bool
5857 riscv_allocate_stack_slots_for_args ()
5859 /* Naked functions should not allocate stack slots for arguments. */
5860 return !riscv_naked_function_p (current_function_decl);
5863 /* Implement TARGET_WARN_FUNC_RETURN. */
5864 static bool
5865 riscv_warn_func_return (tree decl)
5867 /* Naked functions are implemented entirely in assembly, including the
5868 return sequence, so suppress warnings about this. */
5869 return !riscv_naked_function_p (decl);
5872 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5874 static void
5875 riscv_va_start (tree valist, rtx nextarg)
5877 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
5878 std_expand_builtin_va_start (valist, nextarg);
5881 /* Make ADDR suitable for use as a call or sibcall target. */
5884 riscv_legitimize_call_address (rtx addr)
5886 if (!call_insn_operand (addr, VOIDmode))
5888 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
5889 riscv_emit_move (reg, addr);
5890 return reg;
5892 return addr;
5895 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
5896 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
5898 static void
5899 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
5901 const char *reloc;
5903 switch (riscv_classify_symbolic_expression (op))
5905 case SYMBOL_ABSOLUTE:
5906 reloc = hi_reloc ? "%hi" : "%lo";
5907 break;
5909 case SYMBOL_PCREL:
5910 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
5911 break;
5913 case SYMBOL_TLS_LE:
5914 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
5915 break;
5917 default:
5918 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R');
5919 return;
5922 fprintf (file, "%s(", reloc);
5923 output_addr_const (file, riscv_strip_unspec_address (op));
5924 fputc (')', file);
5927 /* Return the memory model that encapuslates both given models. */
5929 enum memmodel
5930 riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
5932 model1 = memmodel_base (model1);
5933 model2 = memmodel_base (model2);
5935 enum memmodel weaker = model1 <= model2 ? model1: model2;
5936 enum memmodel stronger = model1 > model2 ? model1: model2;
5938 switch (stronger)
5940 case MEMMODEL_SEQ_CST:
5941 case MEMMODEL_ACQ_REL:
5942 return stronger;
5943 case MEMMODEL_RELEASE:
5944 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
5945 return MEMMODEL_ACQ_REL;
5946 else
5947 return stronger;
5948 case MEMMODEL_ACQUIRE:
5949 case MEMMODEL_CONSUME:
5950 case MEMMODEL_RELAXED:
5951 return stronger;
5952 default:
5953 gcc_unreachable ();
5957 /* Return true if the .AQ suffix should be added to an AMO to implement the
5958 acquire portion of memory model MODEL. */
5960 static bool
5961 riscv_memmodel_needs_amo_acquire (enum memmodel model)
5963 /* ZTSO amo mappings require no annotations. */
5964 if (TARGET_ZTSO)
5965 return false;
5967 switch (model)
5969 case MEMMODEL_ACQ_REL:
5970 case MEMMODEL_SEQ_CST:
5971 case MEMMODEL_ACQUIRE:
5972 case MEMMODEL_CONSUME:
5973 return true;
5975 case MEMMODEL_RELEASE:
5976 case MEMMODEL_RELAXED:
5977 return false;
5979 default:
5980 gcc_unreachable ();
5984 /* Return true if the .RL suffix should be added to an AMO to implement the
5985 release portion of memory model MODEL. */
5987 static bool
5988 riscv_memmodel_needs_amo_release (enum memmodel model)
5990 /* ZTSO amo mappings require no annotations. */
5991 if (TARGET_ZTSO)
5992 return false;
5994 switch (model)
5996 case MEMMODEL_ACQ_REL:
5997 case MEMMODEL_SEQ_CST:
5998 case MEMMODEL_RELEASE:
5999 return true;
6001 case MEMMODEL_ACQUIRE:
6002 case MEMMODEL_CONSUME:
6003 case MEMMODEL_RELAXED:
6004 return false;
6006 default:
6007 gcc_unreachable ();
6011 /* Get REGNO alignment of vector mode.
6012 The alignment = LMUL when the LMUL >= 1.
6013 Otherwise, alignment = 1. */
6015 riscv_get_v_regno_alignment (machine_mode mode)
6017 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
6018 but for mask vector register, register numbers can be any number. */
6019 int lmul = 1;
6020 machine_mode rvv_mode = mode;
6021 if (riscv_v_ext_vls_mode_p (rvv_mode))
6023 int size = GET_MODE_BITSIZE (rvv_mode).to_constant ();
6024 if (size < TARGET_MIN_VLEN)
6025 return 1;
6026 else
6027 return size / TARGET_MIN_VLEN;
6029 if (riscv_v_ext_tuple_mode_p (rvv_mode))
6030 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
6031 poly_int64 size = GET_MODE_SIZE (rvv_mode);
6032 if (known_gt (size, UNITS_PER_V_REG))
6033 lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
6034 return lmul;
6037 /* Define ASM_OUTPUT_OPCODE to do anything special before
6038 emitting an opcode. */
6039 const char *
6040 riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
6042 if (TARGET_XTHEADVECTOR)
6043 return th_asm_output_opcode (asm_out_file, p);
6045 return p;
6048 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are:
6050 'h' Print the high-part relocation associated with OP, after stripping
6051 any outermost HIGH.
6052 'R' Print the low-part relocation associated with OP.
6053 'C' Print the integer branch condition for comparison OP.
6054 'N' Print the inverse of the integer branch condition for comparison OP.
6055 'A' Print the atomic operation suffix for memory model OP.
6056 'I' Print the LR suffix for memory model OP.
6057 'J' Print the SC suffix for memory model OP.
6058 'z' Print x0 if OP is zero, otherwise print OP normally.
6059 'i' Print i if the operand is not a register.
6060 'S' Print shift-index of single-bit mask OP.
6061 'T' Print shift-index of inverted single-bit mask OP.
6062 '~' Print w if TARGET_64BIT is true; otherwise not print anything.
6064 Note please keep this list and the list in riscv.md in sync. */
6066 static void
6067 riscv_print_operand (FILE *file, rtx op, int letter)
6069 /* `~` does not take an operand so op will be null
6070 Check for before accessing op.
6072 if (letter == '~')
6074 if (TARGET_64BIT)
6075 fputc('w', file);
6076 return;
6078 machine_mode mode = GET_MODE (op);
6079 enum rtx_code code = GET_CODE (op);
6081 switch (letter)
6083 case 'o': {
6084 /* Print 'OP' variant for RVV instructions.
6085 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv).
6086 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi).
6087 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */
6088 if (riscv_v_ext_mode_p (mode))
6090 if (REG_P (op))
6091 asm_fprintf (file, "v");
6092 else if (CONST_VECTOR_P (op))
6093 asm_fprintf (file, "i");
6094 else
6095 output_operand_lossage ("invalid vector operand");
6097 else
6099 if (CONST_INT_P (op))
6100 asm_fprintf (file, "i");
6101 else
6102 asm_fprintf (file, "x");
6104 break;
6106 case 'v': {
6107 rtx elt;
6109 if (REG_P (op))
6110 asm_fprintf (file, "%s", reg_names[REGNO (op)]);
6111 else
6113 if (!const_vec_duplicate_p (op, &elt))
6114 output_operand_lossage ("invalid vector constant");
6115 else if (satisfies_constraint_Wc0 (op))
6116 asm_fprintf (file, "0");
6117 else if (satisfies_constraint_vi (op)
6118 || satisfies_constraint_vj (op)
6119 || satisfies_constraint_vk (op))
6120 asm_fprintf (file, "%wd", INTVAL (elt));
6121 else
6122 output_operand_lossage ("invalid vector constant");
6124 break;
6126 case 'V': {
6127 rtx elt;
6128 if (!const_vec_duplicate_p (op, &elt))
6129 output_operand_lossage ("invalid vector constant");
6130 else if (satisfies_constraint_vj (op))
6131 asm_fprintf (file, "%wd", -INTVAL (elt));
6132 else
6133 output_operand_lossage ("invalid vector constant");
6134 break;
6136 case 'm': {
6137 if (riscv_v_ext_mode_p (mode))
6139 /* Calculate lmul according to mode and print the value. */
6140 int lmul = riscv_get_v_regno_alignment (mode);
6141 asm_fprintf (file, "%d", lmul);
6143 else if (code == CONST_INT)
6145 /* If it is a const_int value, it denotes the VLMUL field enum. */
6146 unsigned int vlmul = UINTVAL (op);
6147 switch (vlmul)
6149 case riscv_vector::LMUL_1:
6150 asm_fprintf (file, "%s", "m1");
6151 break;
6152 case riscv_vector::LMUL_2:
6153 asm_fprintf (file, "%s", "m2");
6154 break;
6155 case riscv_vector::LMUL_4:
6156 asm_fprintf (file, "%s", "m4");
6157 break;
6158 case riscv_vector::LMUL_8:
6159 asm_fprintf (file, "%s", "m8");
6160 break;
6161 case riscv_vector::LMUL_F8:
6162 asm_fprintf (file, "%s", "mf8");
6163 break;
6164 case riscv_vector::LMUL_F4:
6165 asm_fprintf (file, "%s", "mf4");
6166 break;
6167 case riscv_vector::LMUL_F2:
6168 asm_fprintf (file, "%s", "mf2");
6169 break;
6170 default:
6171 gcc_unreachable ();
6174 else
6175 output_operand_lossage ("invalid vector constant");
6176 break;
6178 case 'p': {
6179 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
6181 /* Print for RVV mask operand.
6182 If op is reg, print ",v0.t".
6183 Otherwise, don't print anything. */
6184 if (code == REG)
6185 fprintf (file, ",%s.t", reg_names[REGNO (op)]);
6187 else if (code == CONST_INT)
6189 /* Tail && Mask policy. */
6190 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u");
6192 else
6193 output_operand_lossage ("invalid vector constant");
6194 break;
6196 case 'h':
6197 if (code == HIGH)
6198 op = XEXP (op, 0);
6199 riscv_print_operand_reloc (file, op, true);
6200 break;
6202 case 'R':
6203 riscv_print_operand_reloc (file, op, false);
6204 break;
6206 case 'C':
6207 /* The RTL names match the instruction names. */
6208 fputs (GET_RTX_NAME (code), file);
6209 break;
6211 case 'N':
6212 /* The RTL names match the instruction names. */
6213 fputs (GET_RTX_NAME (reverse_condition (code)), file);
6214 break;
6216 case 'A': {
6217 const enum memmodel model = memmodel_base (INTVAL (op));
6218 if (riscv_memmodel_needs_amo_acquire (model)
6219 && riscv_memmodel_needs_amo_release (model))
6220 fputs (".aqrl", file);
6221 else if (riscv_memmodel_needs_amo_acquire (model))
6222 fputs (".aq", file);
6223 else if (riscv_memmodel_needs_amo_release (model))
6224 fputs (".rl", file);
6225 break;
6228 case 'I': {
6229 const enum memmodel model = memmodel_base (INTVAL (op));
6230 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
6231 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */
6232 break;
6233 else if (model == MEMMODEL_SEQ_CST)
6234 fputs (".aqrl", file);
6235 else if (riscv_memmodel_needs_amo_acquire (model))
6236 fputs (".aq", file);
6237 break;
6240 case 'J': {
6241 const enum memmodel model = memmodel_base (INTVAL (op));
6242 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
6243 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */
6244 fputs (".rl", file);
6245 else if (TARGET_ZTSO)
6246 break;
6247 else if (riscv_memmodel_needs_amo_release (model))
6248 fputs (".rl", file);
6249 break;
6252 case 'i':
6253 if (code != REG)
6254 fputs ("i", file);
6255 break;
6257 case 'B':
6258 fputs (GET_RTX_NAME (code), file);
6259 break;
6261 case 'S':
6263 rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
6264 output_addr_const (file, newop);
6265 break;
6267 case 'T':
6269 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
6270 output_addr_const (file, newop);
6271 break;
6273 case 'X':
6275 int ival = INTVAL (op) + 1;
6276 rtx newop = GEN_INT (ctz_hwi (ival) + 1);
6277 output_addr_const (file, newop);
6278 break;
6280 case 'Y':
6282 unsigned int imm = (UINTVAL (op) & 63);
6283 gcc_assert (imm <= 63);
6284 rtx newop = GEN_INT (imm);
6285 output_addr_const (file, newop);
6286 break;
6288 default:
6289 switch (code)
6291 case REG:
6292 if (letter && letter != 'z')
6293 output_operand_lossage ("invalid use of '%%%c'", letter);
6294 fprintf (file, "%s", reg_names[REGNO (op)]);
6295 break;
6297 case MEM:
6298 if (letter && letter != 'z')
6299 output_operand_lossage ("invalid use of '%%%c'", letter);
6300 else
6301 output_address (mode, XEXP (op, 0));
6302 break;
6304 case CONST_DOUBLE:
6306 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6308 fputs (reg_names[GP_REG_FIRST], file);
6309 break;
6312 int fli_index = riscv_float_const_rtx_index_for_fli (op);
6313 if (fli_index == -1 || fli_index > 31)
6315 output_operand_lossage ("invalid use of '%%%c'", letter);
6316 break;
6318 asm_fprintf (file, "%s", fli_value_print[fli_index]);
6319 break;
6322 default:
6323 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6324 fputs (reg_names[GP_REG_FIRST], file);
6325 else if (letter && letter != 'z')
6326 output_operand_lossage ("invalid use of '%%%c'", letter);
6327 else
6328 output_addr_const (file, riscv_strip_unspec_address (op));
6329 break;
6334 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
6335 static bool
6336 riscv_print_operand_punct_valid_p (unsigned char code)
6338 return (code == '~');
6341 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
6343 static void
6344 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6346 struct riscv_address_info addr;
6348 if (th_print_operand_address (file, mode, x))
6349 return;
6351 if (riscv_classify_address (&addr, x, word_mode, true))
6352 switch (addr.type)
6354 case ADDRESS_REG:
6355 output_addr_const (file, riscv_strip_unspec_address (addr.offset));
6356 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6357 return;
6359 case ADDRESS_LO_SUM:
6360 riscv_print_operand_reloc (file, addr.offset, false);
6361 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6362 return;
6364 case ADDRESS_CONST_INT:
6365 output_addr_const (file, x);
6366 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
6367 return;
6369 case ADDRESS_SYMBOLIC:
6370 output_addr_const (file, riscv_strip_unspec_address (x));
6371 return;
6373 default:
6374 gcc_unreachable ();
6377 gcc_unreachable ();
6380 static bool
6381 riscv_size_ok_for_small_data_p (int size)
6383 return g_switch_value && IN_RANGE (size, 1, g_switch_value);
6386 /* Return true if EXP should be placed in the small data section. */
6388 static bool
6389 riscv_in_small_data_p (const_tree x)
6391 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use
6392 the anchor symbol to address nearby objects. In large model, it can get
6393 the better result using the anchor optiomization. */
6394 if (riscv_cmodel == CM_LARGE)
6395 return false;
6397 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
6398 return false;
6400 if (VAR_P (x) && DECL_SECTION_NAME (x))
6402 const char *sec = DECL_SECTION_NAME (x);
6403 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
6406 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
6409 /* Switch to the appropriate section for output of DECL. */
6411 static section *
6412 riscv_select_section (tree decl, int reloc,
6413 unsigned HOST_WIDE_INT align)
6415 switch (categorize_decl_for_section (decl, reloc))
6417 case SECCAT_SRODATA:
6418 return get_named_section (decl, ".srodata", reloc);
6420 default:
6421 return default_elf_select_section (decl, reloc, align);
6425 /* Switch to the appropriate section for output of DECL. */
6427 static void
6428 riscv_unique_section (tree decl, int reloc)
6430 const char *prefix = NULL;
6431 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
6433 switch (categorize_decl_for_section (decl, reloc))
6435 case SECCAT_SRODATA:
6436 prefix = one_only ? ".sr" : ".srodata";
6437 break;
6439 default:
6440 break;
6442 if (prefix)
6444 const char *name, *linkonce;
6445 char *string;
6447 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6448 name = targetm.strip_name_encoding (name);
6450 /* If we're using one_only, then there needs to be a .gnu.linkonce
6451 prefix to the section name. */
6452 linkonce = one_only ? ".gnu.linkonce" : "";
6454 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6456 set_decl_section_name (decl, string);
6457 return;
6459 default_unique_section (decl, reloc);
6462 /* Constant pools are per-function when in large code model. */
6464 static inline bool
6465 riscv_can_use_per_function_literal_pools_p (void)
6467 return riscv_cmodel == CM_LARGE;
6470 static bool
6471 riscv_use_blocks_for_constant_p (machine_mode, const_rtx)
6473 /* We can't use blocks for constants when we're using a per-function
6474 constant pool. */
6475 return !riscv_can_use_per_function_literal_pools_p ();
6478 /* Return a section for X, handling small data. */
6480 static section *
6481 riscv_elf_select_rtx_section (machine_mode mode, rtx x,
6482 unsigned HOST_WIDE_INT align)
6484 /* The literal pool stays with the function. */
6485 if (riscv_can_use_per_function_literal_pools_p ())
6486 return function_section (current_function_decl);
6488 section *s = default_elf_select_rtx_section (mode, x, align);
6490 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
6492 if (startswith (s->named.name, ".rodata.cst"))
6494 /* Rename .rodata.cst* to .srodata.cst*. */
6495 char *name = (char *) alloca (strlen (s->named.name) + 2);
6496 sprintf (name, ".s%s", s->named.name + 1);
6497 return get_section (name, s->named.common.flags, NULL);
6500 if (s == data_section)
6501 return sdata_section;
6504 return s;
6507 /* Make the last instruction frame-related and note that it performs
6508 the operation described by FRAME_PATTERN. */
6510 static void
6511 riscv_set_frame_expr (rtx frame_pattern)
6513 rtx insn;
6515 insn = get_last_insn ();
6516 RTX_FRAME_RELATED_P (insn) = 1;
6517 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6518 frame_pattern,
6519 REG_NOTES (insn));
6522 /* Return a frame-related rtx that stores REG at MEM.
6523 REG must be a single register. */
6525 static rtx
6526 riscv_frame_set (rtx mem, rtx reg)
6528 rtx set = gen_rtx_SET (mem, reg);
6529 RTX_FRAME_RELATED_P (set) = 1;
6530 return set;
6533 /* Returns true if the current function might contain a far jump. */
6535 static bool
6536 riscv_far_jump_used_p ()
6538 size_t func_size = 0;
6540 if (cfun->machine->far_jump_used)
6541 return true;
6543 /* We can't change far_jump_used during or after reload, as there is
6544 no chance to change stack frame layout. So we must rely on the
6545 conservative heuristic below having done the right thing. */
6546 if (reload_in_progress || reload_completed)
6547 return false;
6549 /* Estimate the function length. */
6550 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
6551 func_size += get_attr_length (insn);
6553 /* Conservatively determine whether some jump might exceed 1 MiB
6554 displacement. */
6555 if (func_size * 2 >= 0x100000)
6556 cfun->machine->far_jump_used = true;
6558 return cfun->machine->far_jump_used;
6561 /* Return true, if the current function must save the incoming return
6562 address. */
6564 static bool
6565 riscv_save_return_addr_reg_p (void)
6567 /* The $ra register is call-clobbered: if this is not a leaf function,
6568 save it. */
6569 if (!crtl->is_leaf)
6570 return true;
6572 /* We need to save the incoming return address if __builtin_eh_return
6573 is being used to set a different return address. */
6574 if (crtl->calls_eh_return)
6575 return true;
6577 /* Far jumps/branches use $ra as a temporary to set up the target jump
6578 location (clobbering the incoming return address). */
6579 if (riscv_far_jump_used_p ())
6580 return true;
6582 /* We need to save it if anyone has used that. */
6583 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
6584 return true;
6586 /* Need not to use ra for leaf when frame pointer is turned off by
6587 option whatever the omit-leaf-frame's value. */
6588 if (frame_pointer_needed && crtl->is_leaf
6589 && !TARGET_OMIT_LEAF_FRAME_POINTER)
6590 return true;
6592 return false;
6595 /* Return true if the current function must save register REGNO. */
6597 static bool
6598 riscv_save_reg_p (unsigned int regno)
6600 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
6601 bool might_clobber = crtl->saves_all_registers
6602 || df_regs_ever_live_p (regno);
6604 if (call_saved && might_clobber)
6605 return true;
6607 /* Save callee-saved V registers. */
6608 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno)
6609 && might_clobber)
6610 return true;
6612 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
6613 return true;
6615 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ())
6616 return true;
6618 /* If this is an interrupt handler, then must save extra registers. */
6619 if (cfun->machine->interrupt_handler_p)
6621 /* zero register is always zero. */
6622 if (regno == GP_REG_FIRST)
6623 return false;
6625 /* The function will return the stack pointer to its original value. */
6626 if (regno == STACK_POINTER_REGNUM)
6627 return false;
6629 /* By convention, we assume that gp and tp are safe. */
6630 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
6631 return false;
6633 /* We must save every register used in this function. If this is not a
6634 leaf function, then we must save all temporary registers. */
6635 if (df_regs_ever_live_p (regno)
6636 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
6637 return true;
6640 return false;
6643 /* Return TRUE if Zcmp push and pop insns should be
6644 avoided. FALSE otherwise.
6645 Only use multi push & pop if all GPRs masked can be covered,
6646 and stack access is SP based,
6647 and GPRs are at top of the stack frame,
6648 and no conflicts in stack allocation with other features */
6649 static bool
6650 riscv_avoid_multi_push (const struct riscv_frame_info *frame)
6652 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed
6653 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0
6654 || crtl->args.pretend_args_size != 0
6655 || (use_shrink_wrapping_separate ()
6656 && !riscv_avoid_shrink_wrapping_separate ())
6657 || (frame->mask & ~MULTI_PUSH_GPR_MASK))
6658 return true;
6660 return false;
6663 /* Determine whether to use multi push insn. */
6664 static bool
6665 riscv_use_multi_push (const struct riscv_frame_info *frame)
6667 if (riscv_avoid_multi_push (frame))
6668 return false;
6670 return (frame->multi_push_adj_base != 0);
6673 /* Return TRUE if a libcall to save/restore GPRs should be
6674 avoided. FALSE otherwise. */
6675 static bool
6676 riscv_avoid_save_libcall (void)
6678 if (!TARGET_SAVE_RESTORE
6679 || crtl->calls_eh_return
6680 || frame_pointer_needed
6681 || cfun->machine->interrupt_handler_p
6682 || cfun->machine->varargs_size != 0
6683 || crtl->args.pretend_args_size != 0)
6684 return true;
6686 return false;
6689 /* Determine whether to call GPR save/restore routines. */
6690 static bool
6691 riscv_use_save_libcall (const struct riscv_frame_info *frame)
6693 if (riscv_avoid_save_libcall ())
6694 return false;
6696 return frame->save_libcall_adjustment != 0;
6699 /* Determine which GPR save/restore routine to call. */
6701 static unsigned
6702 riscv_save_libcall_count (unsigned mask)
6704 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
6705 if (BITSET_P (mask, n))
6706 return CALLEE_SAVED_REG_NUMBER (n) + 1;
6707 abort ();
6710 /* calculate number of s regs in multi push and pop.
6711 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
6712 static unsigned
6713 riscv_multi_push_sregs_count (unsigned mask)
6715 unsigned num = riscv_save_libcall_count (mask);
6716 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS
6717 : num;
6720 /* calculate number of regs(ra, s0-sx) in multi push and pop. */
6721 static unsigned
6722 riscv_multi_push_regs_count (unsigned mask)
6724 /* 1 is for ra */
6725 return riscv_multi_push_sregs_count (mask) + 1;
6728 /* Handle 16 bytes align for poly_int. */
6729 static poly_int64
6730 riscv_16bytes_align (poly_int64 value)
6732 return aligned_upper_bound (value, 16);
6735 static HOST_WIDE_INT
6736 riscv_16bytes_align (HOST_WIDE_INT value)
6738 return ROUND_UP (value, 16);
6741 /* Handle stack align for poly_int. */
6742 static poly_int64
6743 riscv_stack_align (poly_int64 value)
6745 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
6748 static HOST_WIDE_INT
6749 riscv_stack_align (HOST_WIDE_INT value)
6751 return RISCV_STACK_ALIGN (value);
6754 /* Populate the current function's riscv_frame_info structure.
6756 RISC-V stack frames grown downward. High addresses are at the top.
6758 +-------------------------------+
6760 | incoming stack arguments |
6762 +-------------------------------+ <-- incoming stack pointer
6764 | callee-allocated save area |
6765 | for arguments that are |
6766 | split between registers and |
6767 | the stack |
6769 +-------------------------------+ <-- arg_pointer_rtx
6771 | callee-allocated save area |
6772 | for register varargs |
6774 +-------------------------------+ <-- hard_frame_pointer_rtx;
6775 | | stack_pointer_rtx + gp_sp_offset
6776 | GPR save area | + UNITS_PER_WORD
6778 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
6779 | | + UNITS_PER_FP_REG
6780 | FPR save area |
6782 +-------------------------------+ <-- stack_pointer_rtx
6783 | | + v_sp_offset_top
6784 | Vector Registers save area |
6786 | ----------------------------- | <-- stack_pointer_rtx
6787 | padding | + v_sp_offset_bottom
6788 +-------------------------------+ <-- frame_pointer_rtx (virtual)
6790 | local variables |
6792 P +-------------------------------+
6794 | outgoing stack arguments |
6796 +-------------------------------+ <-- stack_pointer_rtx
6798 Dynamic stack allocations such as alloca insert data at point P.
6799 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
6800 hard_frame_pointer_rtx unchanged. */
6802 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
6804 static void
6805 riscv_compute_frame_info (void)
6807 struct riscv_frame_info *frame;
6808 poly_int64 offset;
6809 bool interrupt_save_prologue_temp = false;
6810 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0;
6811 unsigned int num_v_saved = 0;
6813 frame = &cfun->machine->frame;
6815 /* In an interrupt function, there are two cases in which t0 needs to be used:
6816 1, If we have a large frame, then we need to save/restore t0. We check for
6817 this before clearing the frame struct.
6818 2, Need to save and restore some CSRs in the frame. */
6819 if (cfun->machine->interrupt_handler_p)
6821 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
6822 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
6823 || (TARGET_HARD_FLOAT || TARGET_ZFINX))
6824 interrupt_save_prologue_temp = true;
6827 frame->reset();
6829 if (!cfun->machine->naked_p)
6831 /* Find out which GPRs we need to save. */
6832 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
6833 if (riscv_save_reg_p (regno)
6834 || (interrupt_save_prologue_temp
6835 && (regno == RISCV_PROLOGUE_TEMP_REGNUM)))
6836 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6838 /* If this function calls eh_return, we must also save and restore the
6839 EH data registers. */
6840 if (crtl->calls_eh_return)
6841 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
6842 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6844 /* Find out which FPRs we need to save. This loop must iterate over
6845 the same space as its companion in riscv_for_each_saved_reg. */
6846 if (TARGET_HARD_FLOAT)
6847 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
6848 if (riscv_save_reg_p (regno))
6849 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
6851 /* Find out which V registers we need to save. */
6852 if (TARGET_VECTOR)
6853 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
6854 if (riscv_save_reg_p (regno))
6856 frame->vmask |= 1 << (regno - V_REG_FIRST);
6857 num_v_saved++;
6861 if (frame->mask)
6863 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
6865 /* 1 is for ra */
6866 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
6867 /* Only use save/restore routines if they don't alter the stack size. */
6868 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
6869 && !riscv_avoid_save_libcall ())
6871 /* Libcall saves/restores 3 registers at once, so we need to
6872 allocate 12 bytes for callee-saved register. */
6873 if (TARGET_RVE)
6874 x_save_size = 3 * UNITS_PER_WORD;
6876 frame->save_libcall_adjustment = x_save_size;
6879 if (!riscv_avoid_multi_push (frame))
6881 /* num(ra, s0-sx) */
6882 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask);
6883 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
6884 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
6888 /* In an interrupt function, we need extra space for the initial saves of CSRs. */
6889 if (cfun->machine->interrupt_handler_p
6890 && ((TARGET_HARD_FLOAT && frame->fmask)
6891 || (TARGET_ZFINX
6892 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */
6893 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
6894 /* Save and restore FCSR. */
6895 /* TODO: When P or V extensions support interrupts, some of their CSRs
6896 may also need to be saved and restored. */
6897 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD);
6899 /* At the bottom of the frame are any outgoing stack arguments. */
6900 offset = riscv_stack_align (crtl->outgoing_args_size);
6901 /* Next are local stack variables. */
6902 offset += riscv_stack_align (get_frame_size ());
6903 /* The virtual frame pointer points above the local variables. */
6904 frame->frame_pointer_offset = offset;
6905 /* Next are the callee-saved VRs. */
6906 if (frame->vmask)
6907 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG);
6908 frame->v_sp_offset_top = offset;
6909 frame->v_sp_offset_bottom
6910 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG;
6911 /* Next are the callee-saved FPRs. */
6912 if (frame->fmask)
6913 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
6914 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
6915 /* Next are the callee-saved GPRs. */
6916 if (frame->mask)
6918 offset += x_save_size;
6919 /* align to 16 bytes and add paddings to GPR part to honor
6920 both stack alignment and zcmp pus/pop size alignment. */
6921 if (riscv_use_multi_push (frame)
6922 && known_lt (offset, frame->multi_push_adj_base
6923 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
6924 offset = riscv_16bytes_align (offset);
6926 frame->gp_sp_offset = offset - UNITS_PER_WORD;
6927 /* The hard frame pointer points above the callee-saved GPRs. */
6928 frame->hard_frame_pointer_offset = offset;
6929 /* Above the hard frame pointer is the callee-allocated varags save area. */
6930 offset += riscv_stack_align (cfun->machine->varargs_size);
6931 /* Next is the callee-allocated area for pretend stack arguments. */
6932 offset += riscv_stack_align (crtl->args.pretend_args_size);
6933 /* Arg pointer must be below pretend args, but must be above alignment
6934 padding. */
6935 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
6936 frame->total_size = offset;
6938 /* Next points the incoming stack pointer and any incoming arguments. */
6941 /* Make sure that we're not trying to eliminate to the wrong hard frame
6942 pointer. */
6944 static bool
6945 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
6947 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
6950 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
6951 or argument pointer. TO is either the stack pointer or hard frame
6952 pointer. */
6954 poly_int64
6955 riscv_initial_elimination_offset (int from, int to)
6957 poly_int64 src, dest;
6959 riscv_compute_frame_info ();
6961 if (to == HARD_FRAME_POINTER_REGNUM)
6962 dest = cfun->machine->frame.hard_frame_pointer_offset;
6963 else if (to == STACK_POINTER_REGNUM)
6964 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
6965 else
6966 gcc_unreachable ();
6968 if (from == FRAME_POINTER_REGNUM)
6969 src = cfun->machine->frame.frame_pointer_offset;
6970 else if (from == ARG_POINTER_REGNUM)
6971 src = cfun->machine->frame.arg_pointer_offset;
6972 else
6973 gcc_unreachable ();
6975 return src - dest;
6978 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
6979 previous frame. */
6982 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
6984 if (count != 0)
6985 return const0_rtx;
6987 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
6990 /* Emit code to change the current function's return address to
6991 ADDRESS. SCRATCH is available as a scratch register, if needed.
6992 ADDRESS and SCRATCH are both word-mode GPRs. */
6994 void
6995 riscv_set_return_address (rtx address, rtx scratch)
6997 rtx slot_address;
6999 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
7000 slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
7001 cfun->machine->frame.gp_sp_offset.to_constant());
7002 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
7005 /* Save register REG to MEM. Make the instruction frame-related. */
7007 static void
7008 riscv_save_reg (rtx reg, rtx mem)
7010 riscv_emit_move (mem, reg);
7011 riscv_set_frame_expr (riscv_frame_set (mem, reg));
7014 /* Restore register REG from MEM. */
7016 static void
7017 riscv_restore_reg (rtx reg, rtx mem)
7019 rtx insn = riscv_emit_move (reg, mem);
7020 rtx dwarf = NULL_RTX;
7021 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7023 if (known_gt (epilogue_cfa_sp_offset, 0)
7024 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
7026 rtx cfa_adjust_rtx
7027 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7028 gen_int_mode (epilogue_cfa_sp_offset, Pmode));
7029 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7032 REG_NOTES (insn) = dwarf;
7033 RTX_FRAME_RELATED_P (insn) = 1;
7036 /* A function to save or store a register. The first argument is the
7037 register and the second is the stack slot. */
7038 typedef void (*riscv_save_restore_fn) (rtx, rtx);
7040 /* Use FN to save or restore register REGNO. MODE is the register's
7041 mode and OFFSET is the offset of its save slot from the current
7042 stack pointer. */
7044 static void
7045 riscv_save_restore_reg (machine_mode mode, int regno,
7046 HOST_WIDE_INT offset, riscv_save_restore_fn fn)
7048 rtx mem;
7050 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
7051 fn (gen_rtx_REG (mode, regno), mem);
7054 /* Return the next register up from REGNO up to LIMIT for the callee
7055 to save or restore. OFFSET will be adjusted accordingly.
7056 If INC is set, then REGNO will be incremented first.
7057 Returns INVALID_REGNUM if there is no such next register. */
7059 static unsigned int
7060 riscv_next_saved_reg (unsigned int regno, unsigned int limit,
7061 HOST_WIDE_INT *offset, bool inc = true)
7063 if (inc)
7064 regno++;
7066 while (regno <= limit)
7068 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7070 *offset = *offset - UNITS_PER_WORD;
7071 return regno;
7074 regno++;
7076 return INVALID_REGNUM;
7079 /* Return TRUE if provided REGNO is eh return data register. */
7081 static bool
7082 riscv_is_eh_return_data_register (unsigned int regno)
7084 unsigned int i, regnum;
7086 if (!crtl->calls_eh_return)
7087 return false;
7089 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
7090 if (regno == regnum)
7092 return true;
7095 return false;
7098 /* Call FN for each register that is saved by the current function.
7099 SP_OFFSET is the offset of the current stack pointer from the start
7100 of the frame. */
7102 static void
7103 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
7104 bool epilogue, bool maybe_eh_return)
7106 HOST_WIDE_INT offset, first_fp_offset;
7107 unsigned int regno, num_masked_fp = 0;
7108 unsigned int start = GP_REG_FIRST;
7109 unsigned int limit = GP_REG_LAST;
7111 /* Save the link register and s-registers. */
7112 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ()
7113 + UNITS_PER_WORD;
7114 for (regno = riscv_next_saved_reg (start, limit, &offset, false);
7115 regno != INVALID_REGNUM;
7116 regno = riscv_next_saved_reg (regno, limit, &offset))
7118 if (cfun->machine->reg_is_wrapped_separately[regno])
7119 continue;
7121 /* If this is a normal return in a function that calls the eh_return
7122 builtin, then do not restore the eh return data registers as that
7123 would clobber the return value. But we do still need to save them
7124 in the prologue, and restore them for an exception return, so we
7125 need special handling here. */
7126 if (epilogue && !maybe_eh_return
7127 && riscv_is_eh_return_data_register (regno))
7128 continue;
7130 /* In an interrupt function, save and restore some necessary CSRs in the stack
7131 to avoid changes in CSRs. */
7132 if (regno == RISCV_PROLOGUE_TEMP_REGNUM
7133 && cfun->machine->interrupt_handler_p
7134 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask)
7135 || (TARGET_ZFINX
7136 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
7138 /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack
7139 offset misaligned later. */
7140 unsigned int fcsr_size = UNITS_PER_WORD;
7141 if (!epilogue)
7143 riscv_save_restore_reg (word_mode, regno, offset, fn);
7144 offset -= fcsr_size;
7145 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode)));
7146 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7147 offset, riscv_save_reg);
7149 else
7151 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7152 offset - fcsr_size, riscv_restore_reg);
7153 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode)));
7154 riscv_save_restore_reg (word_mode, regno, offset, fn);
7155 offset -= fcsr_size;
7157 continue;
7160 if (TARGET_XTHEADMEMPAIR)
7162 /* Get the next reg/offset pair. */
7163 HOST_WIDE_INT offset2 = offset;
7164 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
7166 /* Validate everything before emitting a mempair instruction. */
7167 if (regno2 != INVALID_REGNUM
7168 && !cfun->machine->reg_is_wrapped_separately[regno2]
7169 && !(epilogue && !maybe_eh_return
7170 && riscv_is_eh_return_data_register (regno2)))
7172 bool load_p = (fn == riscv_restore_reg);
7173 rtx operands[4];
7174 th_mempair_prepare_save_restore_operands (operands,
7175 load_p, word_mode,
7176 regno, offset,
7177 regno2, offset2);
7179 /* If the operands fit into a mempair insn, then emit one. */
7180 if (th_mempair_operands_p (operands, load_p, word_mode))
7182 th_mempair_save_restore_regs (operands, load_p, word_mode);
7183 offset = offset2;
7184 regno = regno2;
7185 continue;
7190 riscv_save_restore_reg (word_mode, regno, offset, fn);
7193 /* This loop must iterate over the same space as its companion in
7194 riscv_compute_frame_info. */
7195 first_fp_offset
7196 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
7197 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7198 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7200 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7201 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7202 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame))
7203 ? CALLEE_SAVED_FREG_NUMBER (regno)
7204 : num_masked_fp;
7205 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant ();
7206 if (handle_reg)
7207 riscv_save_restore_reg (mode, regno, offset, fn);
7208 num_masked_fp++;
7212 /* Call FN for each V register that is saved by the current function. */
7214 static void
7215 riscv_for_each_saved_v_reg (poly_int64 &remaining_size,
7216 riscv_save_restore_fn fn, bool prologue)
7218 rtx vlen = NULL_RTX;
7219 if (cfun->machine->frame.vmask != 0)
7221 if (UNITS_PER_V_REG.is_constant ()
7222 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ()))
7223 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ());
7224 else
7226 vlen = RISCV_PROLOGUE_TEMP (Pmode);
7227 rtx insn
7228 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode));
7229 RTX_FRAME_RELATED_P (insn) = 1;
7233 /* Select the mode where LMUL is 1 and SEW is largest. */
7234 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode;
7236 if (prologue)
7238 /* This loop must iterate over the same space as its companion in
7239 riscv_compute_frame_info. */
7240 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
7241 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7243 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7244 if (handle_reg)
7246 rtx insn = NULL_RTX;
7247 if (CONST_INT_P (vlen))
7249 gcc_assert (SMALL_OPERAND (-INTVAL (vlen)));
7250 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7251 stack_pointer_rtx,
7252 GEN_INT (-INTVAL (vlen))));
7254 else
7255 insn = emit_insn (
7256 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7257 gcc_assert (insn != NULL_RTX);
7258 RTX_FRAME_RELATED_P (insn) = 1;
7259 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7260 remaining_size -= UNITS_PER_V_REG;
7264 else
7266 /* This loop must iterate over the same space as its companion in
7267 riscv_compute_frame_info. */
7268 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--)
7269 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7271 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7272 if (handle_reg)
7274 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7275 rtx insn = emit_insn (
7276 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7277 gcc_assert (insn != NULL_RTX);
7278 RTX_FRAME_RELATED_P (insn) = 1;
7279 remaining_size -= UNITS_PER_V_REG;
7285 /* For stack frames that can't be allocated with a single ADDI instruction,
7286 compute the best value to initially allocate. It must at a minimum
7287 allocate enough space to spill the callee-saved registers. If TARGET_RVC,
7288 try to pick a value that will allow compression of the register saves
7289 without adding extra instructions. */
7291 static HOST_WIDE_INT
7292 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
7294 HOST_WIDE_INT remaining_const_size;
7295 if (!remaining_size.is_constant ())
7296 remaining_const_size
7297 = riscv_stack_align (remaining_size.coeffs[0])
7298 - riscv_stack_align (remaining_size.coeffs[1]);
7299 else
7300 remaining_const_size = remaining_size.to_constant ();
7302 /* First step must be set to the top of vector registers save area if any
7303 vector registers need be preversed. */
7304 if (frame->vmask != 0)
7305 return (remaining_size - frame->v_sp_offset_top).to_constant ();
7307 if (SMALL_OPERAND (remaining_const_size))
7308 return remaining_const_size;
7310 poly_int64 callee_saved_first_step =
7311 remaining_size - frame->frame_pointer_offset;
7312 gcc_assert(callee_saved_first_step.is_constant ());
7313 HOST_WIDE_INT min_first_step =
7314 riscv_stack_align (callee_saved_first_step.to_constant ());
7315 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
7316 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
7317 gcc_assert (min_first_step <= max_first_step);
7319 /* As an optimization, use the least-significant bits of the total frame
7320 size, so that the second adjustment step is just LUI + ADD. */
7321 if (!SMALL_OPERAND (min_second_step)
7322 && remaining_const_size % IMM_REACH <= max_first_step
7323 && remaining_const_size % IMM_REACH >= min_first_step)
7324 return remaining_const_size % IMM_REACH;
7326 if (TARGET_RVC || TARGET_ZCA)
7328 /* If we need two subtracts, and one is small enough to allow compressed
7329 loads and stores, then put that one first. */
7330 if (IN_RANGE (min_second_step, 0,
7331 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
7332 return MAX (min_second_step, min_first_step);
7334 /* If we need LUI + ADDI + ADD for the second adjustment step, then start
7335 with the minimum first step, so that we can get compressed loads and
7336 stores. */
7337 else if (!SMALL_OPERAND (min_second_step))
7338 return min_first_step;
7341 return max_first_step;
7344 static rtx
7345 riscv_adjust_libcall_cfi_prologue ()
7347 rtx dwarf = NULL_RTX;
7348 rtx adjust_sp_rtx, reg, mem, insn;
7349 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7350 int offset;
7352 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7353 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7355 /* The save order is ra, s0, s1, s2 to s11. */
7356 if (regno == RETURN_ADDR_REGNUM)
7357 offset = saved_size - UNITS_PER_WORD;
7358 else if (regno == S0_REGNUM)
7359 offset = saved_size - UNITS_PER_WORD * 2;
7360 else if (regno == S1_REGNUM)
7361 offset = saved_size - UNITS_PER_WORD * 3;
7362 else
7363 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
7365 reg = gen_rtx_REG (Pmode, regno);
7366 mem = gen_frame_mem (Pmode, plus_constant (Pmode,
7367 stack_pointer_rtx,
7368 offset));
7370 insn = gen_rtx_SET (mem, reg);
7371 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7374 /* Debug info for adjust sp. */
7375 adjust_sp_rtx =
7376 gen_rtx_SET (stack_pointer_rtx,
7377 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size)));
7378 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7379 dwarf);
7380 return dwarf;
7383 static rtx
7384 riscv_adjust_multi_push_cfi_prologue (int saved_size)
7386 rtx dwarf = NULL_RTX;
7387 rtx adjust_sp_rtx, reg, mem, insn;
7388 unsigned int mask = cfun->machine->frame.mask;
7389 int offset;
7390 int saved_cnt = 0;
7392 if (mask & S10_MASK)
7393 mask |= S11_MASK;
7395 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
7396 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
7398 /* The save order is s11-s0, ra
7399 from high to low addr. */
7400 offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
7402 reg = gen_rtx_REG (Pmode, regno);
7403 mem = gen_frame_mem (Pmode,
7404 plus_constant (Pmode, stack_pointer_rtx, offset));
7406 insn = gen_rtx_SET (mem, reg);
7407 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7410 /* Debug info for adjust sp. */
7411 adjust_sp_rtx
7412 = gen_rtx_SET (stack_pointer_rtx,
7413 plus_constant (Pmode, stack_pointer_rtx, -saved_size));
7414 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7415 return dwarf;
7418 static void
7419 riscv_emit_stack_tie (void)
7421 if (Pmode == SImode)
7422 emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
7423 else
7424 emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
7427 /*zcmp multi push and pop code_for_push_pop function ptr array */
7428 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM]
7429 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra,
7430 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra},
7431 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0,
7432 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0},
7433 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1,
7434 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1},
7435 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2,
7436 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2},
7437 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3,
7438 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3},
7439 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4,
7440 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4},
7441 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5,
7442 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5},
7443 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6,
7444 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6},
7445 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7,
7446 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7},
7447 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8,
7448 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8},
7449 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9,
7450 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9},
7451 {nullptr, nullptr, nullptr, nullptr},
7452 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11,
7453 code_for_gpr_multi_popret_up_to_s11,
7454 code_for_gpr_multi_popretz_up_to_s11}};
7456 static rtx
7457 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
7458 unsigned int regs_num)
7460 gcc_assert (op < ZCMP_OP_NUM);
7461 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS
7462 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/
7463 rtx stack_adj = GEN_INT (adj_size);
7464 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj);
7467 static unsigned
7468 get_multi_push_fpr_mask (unsigned max_fprs_push)
7470 unsigned mask_fprs_push = 0, num_f_pushed = 0;
7471 for (unsigned regno = FP_REG_FIRST;
7472 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++)
7473 if (riscv_save_reg_p (regno))
7474 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++;
7475 return mask_fprs_push;
7478 /* Expand the "prologue" pattern. */
7480 void
7481 riscv_expand_prologue (void)
7483 struct riscv_frame_info *frame = &cfun->machine->frame;
7484 poly_int64 remaining_size = frame->total_size;
7485 unsigned mask = frame->mask;
7486 unsigned fmask = frame->fmask;
7487 int spimm, multi_push_additional, stack_adj;
7488 rtx insn, dwarf = NULL_RTX;
7489 unsigned th_int_mask = 0;
7491 if (flag_stack_usage_info)
7492 current_function_static_stack_size = constant_lower_bound (remaining_size);
7494 if (cfun->machine->naked_p)
7495 return;
7497 /* prefer muti-push to save-restore libcall. */
7498 if (riscv_use_multi_push (frame))
7500 remaining_size -= frame->multi_push_adj_base;
7501 /* If there are vector registers that need to be saved, then it can only
7502 be reduced to the frame->v_sp_offset_top position at most, since the
7503 vector registers will need to be saved one by one by decreasing the SP
7504 later. */
7505 poly_int64 remaining_size_above_varea
7506 = frame->vmask != 0
7507 ? remaining_size - frame->v_sp_offset_top
7508 : remaining_size;
7510 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP))
7511 spimm = 3;
7512 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP))
7513 spimm = 2;
7514 else if (known_gt (remaining_size_above_varea, 0))
7515 spimm = 1;
7516 else
7517 spimm = 0;
7518 multi_push_additional = spimm * ZCMP_SP_INC_STEP;
7519 frame->multi_push_adj_addi = multi_push_additional;
7520 remaining_size -= multi_push_additional;
7522 /* emit multi push insn & dwarf along with it. */
7523 stack_adj = frame->multi_push_adj_base + multi_push_additional;
7524 insn = emit_insn (riscv_gen_multi_push_pop_insn (
7525 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask)));
7526 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj);
7527 RTX_FRAME_RELATED_P (insn) = 1;
7528 REG_NOTES (insn) = dwarf;
7530 /* Temporarily fib that we need not save GPRs. */
7531 frame->mask = 0;
7533 /* push FPRs into the addtional reserved space by cm.push. */
7534 if (fmask)
7536 unsigned mask_fprs_push
7537 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
7538 frame->fmask &= mask_fprs_push;
7539 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
7540 false);
7541 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */
7544 /* When optimizing for size, call a subroutine to save the registers. */
7545 else if (riscv_use_save_libcall (frame))
7547 rtx dwarf = NULL_RTX;
7548 dwarf = riscv_adjust_libcall_cfi_prologue ();
7550 remaining_size -= frame->save_libcall_adjustment;
7551 insn = emit_insn (riscv_gen_gpr_save_insn (frame));
7552 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
7554 RTX_FRAME_RELATED_P (insn) = 1;
7555 REG_NOTES (insn) = dwarf;
7558 th_int_mask = th_int_get_mask (frame->mask);
7559 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7561 frame->mask &= ~th_int_mask;
7563 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
7564 interrupts, such as fcsr. */
7565 if ((TARGET_HARD_FLOAT && frame->fmask)
7566 || (TARGET_ZFINX && frame->mask))
7567 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
7569 unsigned save_adjustment = th_int_get_save_adjustment ();
7570 frame->gp_sp_offset -= save_adjustment;
7571 remaining_size -= save_adjustment;
7573 insn = emit_insn (gen_th_int_push ());
7575 rtx dwarf = th_int_adjust_cfi_prologue (th_int_mask);
7576 RTX_FRAME_RELATED_P (insn) = 1;
7577 REG_NOTES (insn) = dwarf;
7580 /* Save the GP, FP registers. */
7581 if ((frame->mask | frame->fmask) != 0)
7583 if (known_gt (remaining_size, frame->frame_pointer_offset))
7585 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
7586 remaining_size -= step1;
7587 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7588 GEN_INT (-step1));
7589 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7591 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
7594 /* Undo the above fib. */
7595 frame->mask = mask;
7596 frame->fmask = fmask;
7598 /* Set up the frame pointer, if we're using one. */
7599 if (frame_pointer_needed)
7601 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
7602 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
7603 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7605 riscv_emit_stack_tie ();
7608 /* Save the V registers. */
7609 if (frame->vmask != 0)
7610 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true);
7612 /* Allocate the rest of the frame. */
7613 if (known_gt (remaining_size, 0))
7615 /* Two step adjustment:
7616 1.scalable frame. 2.constant frame. */
7617 poly_int64 scalable_frame (0, 0);
7618 if (!remaining_size.is_constant ())
7620 /* First for scalable frame. */
7621 poly_int64 scalable_frame = remaining_size;
7622 scalable_frame.coeffs[0] = remaining_size.coeffs[1];
7623 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
7624 remaining_size -= scalable_frame;
7627 /* Second step for constant frame. */
7628 HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
7629 if (constant_frame == 0)
7631 /* We must have allocated stack space for the scalable frame.
7632 Emit a stack tie if we have a frame pointer so that the
7633 allocation is ordered WRT fp setup and subsequent writes
7634 into the frame. */
7635 if (frame_pointer_needed)
7636 riscv_emit_stack_tie ();
7637 return;
7640 if (SMALL_OPERAND (-constant_frame))
7642 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7643 GEN_INT (-constant_frame));
7644 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7646 else
7648 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
7649 emit_insn (gen_add3_insn (stack_pointer_rtx,
7650 stack_pointer_rtx,
7651 RISCV_PROLOGUE_TEMP (Pmode)));
7653 /* Describe the effect of the previous instructions. */
7654 insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
7655 insn = gen_rtx_SET (stack_pointer_rtx, insn);
7656 riscv_set_frame_expr (insn);
7659 /* We must have allocated the remainder of the stack frame.
7660 Emit a stack tie if we have a frame pointer so that the
7661 allocation is ordered WRT fp setup and subsequent writes
7662 into the frame. */
7663 if (frame_pointer_needed)
7664 riscv_emit_stack_tie ();
7668 static rtx
7669 riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
7671 rtx dwarf = NULL_RTX;
7672 rtx adjust_sp_rtx, reg;
7673 unsigned int mask = cfun->machine->frame.mask;
7675 if (mask & S10_MASK)
7676 mask |= S11_MASK;
7678 /* Debug info for adjust sp. */
7679 adjust_sp_rtx
7680 = gen_rtx_SET (stack_pointer_rtx,
7681 plus_constant (Pmode, stack_pointer_rtx, saved_size));
7682 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7684 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7685 if (BITSET_P (mask, regno - GP_REG_FIRST))
7687 reg = gen_rtx_REG (Pmode, regno);
7688 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7691 return dwarf;
7694 static rtx
7695 riscv_adjust_libcall_cfi_epilogue ()
7697 rtx dwarf = NULL_RTX;
7698 rtx adjust_sp_rtx, reg;
7699 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7701 /* Debug info for adjust sp. */
7702 adjust_sp_rtx =
7703 gen_rtx_SET (stack_pointer_rtx,
7704 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size)));
7705 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7706 dwarf);
7708 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7709 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7711 reg = gen_rtx_REG (Pmode, regno);
7712 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7715 return dwarf;
7718 /* return true if popretz pattern can be matched.
7719 set (reg 10 a0) (const_int 0)
7720 use (reg 10 a0)
7721 NOTE_INSN_EPILOGUE_BEG */
7722 static rtx_insn *
7723 riscv_zcmp_can_use_popretz (void)
7725 rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
7727 /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
7728 struct sequence_stack *outer_seq = get_current_sequence ()->next;
7729 if (!outer_seq)
7730 return NULL;
7731 insn = outer_seq->first;
7732 if (!insn || !NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)
7733 return NULL;
7735 /* sequence stack for the insn before NOTE_INSN_EPILOGUE_BEG*/
7736 outer_seq = outer_seq->next;
7737 if (outer_seq)
7738 insn = outer_seq->last;
7740 /* skip notes */
7741 while (insn && NOTE_P (insn))
7743 insn = PREV_INSN (insn);
7745 use = insn;
7747 /* match use (reg 10 a0) */
7748 if (use == NULL || !INSN_P (use) || GET_CODE (PATTERN (use)) != USE
7749 || !REG_P (XEXP (PATTERN (use), 0))
7750 || REGNO (XEXP (PATTERN (use), 0)) != A0_REGNUM)
7751 return NULL;
7753 /* match set (reg 10 a0) (const_int 0 [0]) */
7754 clear = PREV_INSN (use);
7755 if (clear != NULL && INSN_P (clear) && GET_CODE (PATTERN (clear)) == SET
7756 && REG_P (SET_DEST (PATTERN (clear)))
7757 && REGNO (SET_DEST (PATTERN (clear))) == A0_REGNUM
7758 && SET_SRC (PATTERN (clear)) == const0_rtx)
7759 return clear;
7761 return NULL;
7764 static void
7765 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
7766 unsigned multipop_size)
7768 rtx insn;
7769 unsigned regs_count = riscv_multi_push_regs_count (mask);
7771 if (!use_multi_pop_normal)
7772 insn = emit_insn (
7773 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
7774 else if (rtx_insn *clear_a0_insn = riscv_zcmp_can_use_popretz ())
7776 delete_insn (NEXT_INSN (clear_a0_insn));
7777 delete_insn (clear_a0_insn);
7778 insn = emit_jump_insn (
7779 riscv_gen_multi_push_pop_insn (POPRETZ_IDX, multipop_size, regs_count));
7781 else
7782 insn = emit_jump_insn (
7783 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
7785 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size);
7786 RTX_FRAME_RELATED_P (insn) = 1;
7787 REG_NOTES (insn) = dwarf;
7790 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern;
7791 style says which. */
7793 void
7794 riscv_expand_epilogue (int style)
7796 /* Split the frame into 3 steps. STEP1 is the amount of stack we should
7797 deallocate before restoring the registers. STEP2 is the amount we
7798 should deallocate afterwards including the callee saved regs. STEP3
7799 is the amount deallocated by save-restore libcall.
7801 Start off by assuming that no registers need to be restored. */
7802 struct riscv_frame_info *frame = &cfun->machine->frame;
7803 unsigned mask = frame->mask;
7804 unsigned fmask = frame->fmask;
7805 unsigned mask_fprs_push = 0;
7806 poly_int64 step2 = 0;
7807 bool use_multi_pop_normal
7808 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame));
7809 bool use_multi_pop_sibcall
7810 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame));
7811 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
7813 bool use_restore_libcall
7814 = !use_multi_pop
7815 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame));
7816 unsigned libcall_size = use_restore_libcall && !use_multi_pop
7817 ? frame->save_libcall_adjustment
7818 : 0;
7819 unsigned multipop_size
7820 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
7821 : 0;
7822 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7823 unsigned th_int_mask = 0;
7824 rtx insn;
7826 /* We need to add memory barrier to prevent read from deallocated stack. */
7827 bool need_barrier_p = known_ne (get_frame_size ()
7828 + cfun->machine->frame.arg_pointer_offset, 0);
7830 if (cfun->machine->naked_p)
7832 gcc_assert (style == NORMAL_RETURN);
7834 emit_jump_insn (gen_return ());
7836 return;
7839 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ())
7841 emit_jump_insn (gen_return ());
7842 return;
7845 /* Reset the epilogue cfa info before starting to emit the epilogue. */
7846 epilogue_cfa_sp_offset = 0;
7848 /* Move past any dynamic stack allocations. */
7849 if (cfun->calls_alloca)
7851 /* Emit a barrier to prevent loads from a deallocated stack. */
7852 riscv_emit_stack_tie ();
7853 need_barrier_p = false;
7855 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
7856 rtx adjust = NULL_RTX;
7858 if (!adjust_offset.is_constant ())
7860 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
7861 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
7862 riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
7863 gen_int_mode (adjust_offset, Pmode));
7864 adjust = tmp1;
7866 else
7868 if (!SMALL_OPERAND (adjust_offset.to_constant ()))
7870 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
7871 GEN_INT (adjust_offset.to_constant ()));
7872 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7874 else
7875 adjust = GEN_INT (adjust_offset.to_constant ());
7878 insn = emit_insn (
7879 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
7880 adjust));
7882 rtx dwarf = NULL_RTX;
7883 rtx cfa_adjust_value = gen_rtx_PLUS (
7884 Pmode, hard_frame_pointer_rtx,
7885 gen_int_mode (-frame->hard_frame_pointer_offset, Pmode));
7886 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
7887 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
7888 RTX_FRAME_RELATED_P (insn) = 1;
7890 REG_NOTES (insn) = dwarf;
7893 if (use_restore_libcall || use_multi_pop)
7894 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7896 /* If we need to restore registers, deallocate as much stack as
7897 possible in the second step without going out of range. */
7898 if (use_multi_pop)
7900 if (frame->fmask
7901 && known_gt (frame->total_size - multipop_size,
7902 frame->frame_pointer_offset))
7903 step2
7904 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
7906 else if ((frame->mask | frame->fmask) != 0)
7907 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
7909 if (use_restore_libcall || use_multi_pop)
7910 frame->mask = mask; /* Undo the above fib. */
7912 poly_int64 step1;
7913 /* STEP1 must be set to the bottom of vector registers save area if any
7914 vector registers need be preversed. */
7915 if (frame->vmask != 0)
7917 step1 = frame->v_sp_offset_bottom;
7918 step2 = frame->total_size - step1 - libcall_size - multipop_size;
7920 else
7921 step1 = frame->total_size - step2 - libcall_size - multipop_size;
7923 /* Set TARGET to BASE + STEP1. */
7924 if (known_gt (step1, 0))
7926 /* Emit a barrier to prevent loads from a deallocated stack. */
7927 riscv_emit_stack_tie ();
7928 need_barrier_p = false;
7930 /* Restore the scalable frame which is assigned in prologue. */
7931 if (!step1.is_constant ())
7933 poly_int64 scalable_frame = step1;
7934 scalable_frame.coeffs[0] = step1.coeffs[1];
7935 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
7936 true);
7937 step1 -= scalable_frame;
7940 /* Get an rtx for STEP1 that we can add to BASE.
7941 Skip if adjust equal to zero. */
7942 if (step1.to_constant () != 0)
7944 rtx adjust = GEN_INT (step1.to_constant ());
7945 if (!SMALL_OPERAND (step1.to_constant ()))
7947 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
7948 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7951 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7952 stack_pointer_rtx,
7953 adjust));
7954 rtx dwarf = NULL_RTX;
7955 rtx cfa_adjust_rtx
7956 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7957 gen_int_mode (step2 + libcall_size + multipop_size,
7958 Pmode));
7960 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7961 RTX_FRAME_RELATED_P (insn) = 1;
7963 REG_NOTES (insn) = dwarf;
7966 else if (frame_pointer_needed)
7968 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring
7969 old value of FP. */
7970 epilogue_cfa_sp_offset = step2;
7973 if (use_multi_pop)
7975 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7976 if (fmask)
7978 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi
7979 / UNITS_PER_WORD);
7980 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */
7983 else if (use_restore_libcall)
7984 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7986 th_int_mask = th_int_get_mask (frame->mask);
7987 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7989 frame->mask &= ~th_int_mask;
7991 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
7992 interrupts, such as fcsr. */
7993 if ((TARGET_HARD_FLOAT && frame->fmask)
7994 || (TARGET_ZFINX && frame->mask))
7995 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
7998 /* Restore the registers. */
7999 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
8000 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
8001 - multipop_size,
8002 riscv_restore_reg, true, style == EXCEPTION_RETURN);
8004 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8006 frame->mask = mask; /* Undo the above fib. */
8007 unsigned save_adjustment = th_int_get_save_adjustment ();
8008 gcc_assert (step2.to_constant () >= save_adjustment);
8009 step2 -= save_adjustment;
8012 if (use_restore_libcall)
8013 frame->mask = mask; /* Undo the above fib. */
8015 if (need_barrier_p)
8016 riscv_emit_stack_tie ();
8018 /* Deallocate the final bit of the frame. */
8019 if (step2.to_constant () > 0)
8021 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8022 GEN_INT (step2.to_constant ())));
8024 rtx dwarf = NULL_RTX;
8025 rtx cfa_adjust_rtx
8026 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8027 GEN_INT (libcall_size + multipop_size));
8028 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
8029 RTX_FRAME_RELATED_P (insn) = 1;
8031 REG_NOTES (insn) = dwarf;
8034 if (use_multi_pop)
8036 /* restore FPRs pushed by cm.push. */
8037 frame->fmask = fmask & mask_fprs_push;
8038 if (frame->fmask)
8039 riscv_for_each_saved_reg (frame->total_size - libcall_size
8040 - multipop_size,
8041 riscv_restore_reg, true,
8042 style == EXCEPTION_RETURN);
8043 /* Undo the above fib. */
8044 frame->mask = mask;
8045 frame->fmask = fmask;
8046 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask,
8047 multipop_size);
8048 if (use_multi_pop_normal)
8049 return;
8051 else if (use_restore_libcall)
8053 rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
8054 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
8055 RTX_FRAME_RELATED_P (insn) = 1;
8056 REG_NOTES (insn) = dwarf;
8058 emit_jump_insn (gen_gpr_restore_return (ra));
8059 return;
8062 /* Add in the __builtin_eh_return stack adjustment. */
8063 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
8064 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8065 EH_RETURN_STACKADJ_RTX));
8067 /* Return from interrupt. */
8068 if (cfun->machine->interrupt_handler_p)
8070 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode;
8072 gcc_assert (mode != UNKNOWN_MODE);
8074 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8075 emit_jump_insn (gen_th_int_pop ());
8076 else if (mode == MACHINE_MODE)
8077 emit_jump_insn (gen_riscv_mret ());
8078 else if (mode == SUPERVISOR_MODE)
8079 emit_jump_insn (gen_riscv_sret ());
8080 else
8081 emit_jump_insn (gen_riscv_uret ());
8083 else if (style != SIBCALL_RETURN)
8084 emit_jump_insn (gen_simple_return_internal (ra));
8087 /* Implement EPILOGUE_USES. */
8089 bool
8090 riscv_epilogue_uses (unsigned int regno)
8092 if (regno == RETURN_ADDR_REGNUM)
8093 return true;
8095 if (epilogue_completed && cfun->machine->interrupt_handler_p)
8097 /* An interrupt function restores temp regs, so we must indicate that
8098 they are live at function end. */
8099 if (df_regs_ever_live_p (regno)
8100 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
8101 return true;
8104 return false;
8107 static bool
8108 riscv_avoid_shrink_wrapping_separate ()
8110 if (riscv_use_save_libcall (&cfun->machine->frame)
8111 || cfun->machine->interrupt_handler_p
8112 || !cfun->machine->frame.gp_sp_offset.is_constant ())
8113 return true;
8115 return false;
8118 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
8120 static sbitmap
8121 riscv_get_separate_components (void)
8123 HOST_WIDE_INT offset;
8124 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8125 bitmap_clear (components);
8127 if (riscv_avoid_shrink_wrapping_separate ())
8128 return components;
8130 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
8131 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8132 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8134 /* We can only wrap registers that have small operand offsets.
8135 For large offsets a pseudo register might be needed which
8136 cannot be created during the shrink wrapping pass. */
8137 if (SMALL_OPERAND (offset))
8138 bitmap_set_bit (components, regno);
8140 offset -= UNITS_PER_WORD;
8143 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
8144 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8145 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8147 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8149 /* We can only wrap registers that have small operand offsets.
8150 For large offsets a pseudo register might be needed which
8151 cannot be created during the shrink wrapping pass. */
8152 if (SMALL_OPERAND (offset))
8153 bitmap_set_bit (components, regno);
8155 offset -= GET_MODE_SIZE (mode).to_constant ();
8158 /* Don't mess with the hard frame pointer. */
8159 if (frame_pointer_needed)
8160 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
8162 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
8164 return components;
8167 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
8169 static sbitmap
8170 riscv_components_for_bb (basic_block bb)
8172 bitmap in = DF_LIVE_IN (bb);
8173 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
8174 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
8176 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8177 bitmap_clear (components);
8179 function_abi_aggregator callee_abis;
8180 rtx_insn *insn;
8181 FOR_BB_INSNS (bb, insn)
8182 if (CALL_P (insn))
8183 callee_abis.note_callee_abi (insn_callee_abi (insn));
8184 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
8186 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
8187 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8188 if (!fixed_regs[regno]
8189 && !crtl->abi->clobbers_full_reg_p (regno)
8190 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8191 || bitmap_bit_p (in, regno)
8192 || bitmap_bit_p (gen, regno)
8193 || bitmap_bit_p (kill, regno)))
8194 bitmap_set_bit (components, regno);
8196 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8197 if (!fixed_regs[regno]
8198 && !crtl->abi->clobbers_full_reg_p (regno)
8199 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8200 || bitmap_bit_p (in, regno)
8201 || bitmap_bit_p (gen, regno)
8202 || bitmap_bit_p (kill, regno)))
8203 bitmap_set_bit (components, regno);
8205 return components;
8208 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
8210 static void
8211 riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
8213 /* Nothing to do for riscv. */
8216 static void
8217 riscv_process_components (sbitmap components, bool prologue_p)
8219 HOST_WIDE_INT offset;
8220 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
8222 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
8223 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8224 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8226 if (bitmap_bit_p (components, regno))
8227 riscv_save_restore_reg (word_mode, regno, offset, fn);
8229 offset -= UNITS_PER_WORD;
8232 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
8233 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8234 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8236 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8238 if (bitmap_bit_p (components, regno))
8239 riscv_save_restore_reg (mode, regno, offset, fn);
8241 offset -= GET_MODE_SIZE (mode).to_constant ();
8245 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
8247 static void
8248 riscv_emit_prologue_components (sbitmap components)
8250 riscv_process_components (components, true);
8253 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
8255 static void
8256 riscv_emit_epilogue_components (sbitmap components)
8258 riscv_process_components (components, false);
8261 static void
8262 riscv_set_handled_components (sbitmap components)
8264 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8265 if (bitmap_bit_p (components, regno))
8266 cfun->machine->reg_is_wrapped_separately[regno] = true;
8268 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8269 if (bitmap_bit_p (components, regno))
8270 cfun->machine->reg_is_wrapped_separately[regno] = true;
8273 /* Return nonzero if this function is known to have a null epilogue.
8274 This allows the optimizer to omit jumps to jumps if no stack
8275 was created. */
8277 bool
8278 riscv_can_use_return_insn (void)
8280 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
8281 && ! cfun->machine->interrupt_handler_p);
8284 /* Given that there exists at least one variable that is set (produced)
8285 by OUT_INSN and read (consumed) by IN_INSN, return true iff
8286 IN_INSN represents one or more memory store operations and none of
8287 the variables set by OUT_INSN is used by IN_INSN as the address of a
8288 store operation. If either IN_INSN or OUT_INSN does not represent
8289 a "single" RTL SET expression (as loosely defined by the
8290 implementation of the single_set function) or a PARALLEL with only
8291 SETs, CLOBBERs, and USEs inside, this function returns false.
8293 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain
8294 conditions that result in assertion failures in the generic
8295 store_data_bypass_p function and returns FALSE in such cases.
8297 This is required to make -msave-restore work with the sifive-7
8298 pipeline description. */
8300 bool
8301 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
8303 rtx out_set, in_set;
8304 rtx out_pat, in_pat;
8305 rtx out_exp, in_exp;
8306 int i, j;
8308 in_set = single_set (in_insn);
8309 if (in_set)
8311 if (MEM_P (SET_DEST (in_set)))
8313 out_set = single_set (out_insn);
8314 if (!out_set)
8316 out_pat = PATTERN (out_insn);
8317 if (GET_CODE (out_pat) == PARALLEL)
8319 for (i = 0; i < XVECLEN (out_pat, 0); i++)
8321 out_exp = XVECEXP (out_pat, 0, i);
8322 if ((GET_CODE (out_exp) == CLOBBER)
8323 || (GET_CODE (out_exp) == USE))
8324 continue;
8325 else if (GET_CODE (out_exp) != SET)
8326 return false;
8332 else
8334 in_pat = PATTERN (in_insn);
8335 if (GET_CODE (in_pat) != PARALLEL)
8336 return false;
8338 for (i = 0; i < XVECLEN (in_pat, 0); i++)
8340 in_exp = XVECEXP (in_pat, 0, i);
8341 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
8342 continue;
8343 else if (GET_CODE (in_exp) != SET)
8344 return false;
8346 if (MEM_P (SET_DEST (in_exp)))
8348 out_set = single_set (out_insn);
8349 if (!out_set)
8351 out_pat = PATTERN (out_insn);
8352 if (GET_CODE (out_pat) != PARALLEL)
8353 return false;
8354 for (j = 0; j < XVECLEN (out_pat, 0); j++)
8356 out_exp = XVECEXP (out_pat, 0, j);
8357 if ((GET_CODE (out_exp) == CLOBBER)
8358 || (GET_CODE (out_exp) == USE))
8359 continue;
8360 else if (GET_CODE (out_exp) != SET)
8361 return false;
8368 return store_data_bypass_p (out_insn, in_insn);
8371 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
8373 When floating-point registers are wider than integer ones, moves between
8374 them must go through memory. */
8376 static bool
8377 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
8378 reg_class_t class2)
8380 return (!riscv_v_ext_mode_p (mode)
8381 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
8382 && (class1 == FP_REGS) != (class2 == FP_REGS)
8383 && !TARGET_XTHEADFMV
8384 && !TARGET_ZFA);
8387 /* Implement TARGET_REGISTER_MOVE_COST. */
8389 static int
8390 riscv_register_move_cost (machine_mode mode,
8391 reg_class_t from, reg_class_t to)
8393 if ((from == FP_REGS && to == GR_REGS) ||
8394 (from == GR_REGS && to == FP_REGS))
8395 return tune_param->fmv_cost;
8397 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
8400 /* Implement TARGET_HARD_REGNO_NREGS. */
8402 static unsigned int
8403 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
8405 if (riscv_v_ext_vector_mode_p (mode))
8407 /* Handle fractional LMUL, it only occupy part of vector register but
8408 still need one vector register to hold. */
8409 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
8410 return 1;
8412 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
8415 /* For tuple modes, the number of register = NF * LMUL. */
8416 if (riscv_v_ext_tuple_mode_p (mode))
8418 unsigned int nf = riscv_vector::get_nf (mode);
8419 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode);
8420 poly_int64 size = GET_MODE_SIZE (subpart_mode);
8421 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode)));
8422 if (maybe_lt (size, UNITS_PER_V_REG))
8423 return nf;
8424 else
8426 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
8427 return nf * lmul;
8431 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */
8432 if (riscv_v_ext_vls_mode_p (mode))
8434 int size = GET_MODE_BITSIZE (mode).to_constant ();
8435 if (size < TARGET_MIN_VLEN)
8436 return 1;
8437 else
8438 return size / TARGET_MIN_VLEN;
8441 /* mode for VL or VTYPE are just a marker, not holding value,
8442 so it always consume one register. */
8443 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8444 || FRM_REG_P (regno))
8445 return 1;
8447 /* Assume every valid non-vector mode fits in one vector register. */
8448 if (V_REG_P (regno))
8449 return 1;
8451 if (FP_REG_P (regno))
8452 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
8454 /* All other registers are word-sized. */
8455 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8458 /* Implement TARGET_HARD_REGNO_MODE_OK. */
8460 static bool
8461 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
8463 unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
8465 if (GP_REG_P (regno))
8467 if (riscv_v_ext_mode_p (mode))
8468 return false;
8470 if (!GP_REG_P (regno + nregs - 1))
8471 return false;
8473 else if (FP_REG_P (regno))
8475 if (riscv_v_ext_mode_p (mode))
8476 return false;
8478 if (!FP_REG_P (regno + nregs - 1))
8479 return false;
8481 if (GET_MODE_CLASS (mode) != MODE_FLOAT
8482 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
8483 return false;
8485 /* Only use callee-saved registers if a potential callee is guaranteed
8486 to spill the requisite width. */
8487 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
8488 || (!call_used_or_fixed_reg_p (regno)
8489 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
8490 return false;
8492 else if (V_REG_P (regno))
8494 if (!riscv_v_ext_mode_p (mode))
8495 return false;
8497 if (!V_REG_P (regno + nregs - 1))
8498 return false;
8500 int regno_alignment = riscv_get_v_regno_alignment (mode);
8501 if (regno_alignment != 1)
8502 return ((regno % regno_alignment) == 0);
8504 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8505 || FRM_REG_P (regno))
8506 return true;
8507 else
8508 return false;
8510 /* Require same callee-savedness for all registers. */
8511 for (unsigned i = 1; i < nregs; i++)
8512 if (call_used_or_fixed_reg_p (regno)
8513 != call_used_or_fixed_reg_p (regno + i))
8514 return false;
8516 /* Only use even registers in RV32 ZDINX */
8517 if (!TARGET_64BIT && TARGET_ZDINX){
8518 if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
8519 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
8520 return !(regno & 1);
8523 return true;
8526 /* Implement TARGET_MODES_TIEABLE_P.
8528 Don't allow floating-point modes to be tied, since type punning of
8529 single-precision and double-precision is implementation defined. */
8531 static bool
8532 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
8534 /* We don't allow different REG_CLASS modes tieable since it
8535 will cause ICE in register allocation (RA).
8536 E.g. V2SI and DI are not tieable. */
8537 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
8538 return false;
8539 return (mode1 == mode2
8540 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
8541 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
8544 /* Implement CLASS_MAX_NREGS. */
8546 static unsigned char
8547 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
8549 if (reg_class_subset_p (rclass, FP_REGS))
8550 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
8552 if (reg_class_subset_p (rclass, GR_REGS))
8553 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
8555 if (reg_class_subset_p (rclass, V_REGS))
8556 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
8558 return 0;
8561 /* Implement TARGET_MEMORY_MOVE_COST. */
8563 static int
8564 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
8566 return (tune_param->memory_cost
8567 + memory_move_secondary_cost (mode, rclass, in));
8570 /* Return the number of instructions that can be issued per cycle. */
8572 static int
8573 riscv_issue_rate (void)
8575 return tune_param->issue_rate;
8578 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
8579 static int
8580 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
8582 if (DEBUG_INSN_P (insn))
8583 return more;
8585 rtx_code code = GET_CODE (PATTERN (insn));
8586 if (code == USE || code == CLOBBER)
8587 return more;
8589 /* GHOST insns are used for blockage and similar cases which
8590 effectively end a cycle. */
8591 if (get_attr_type (insn) == TYPE_GHOST)
8592 return 0;
8594 /* If we ever encounter an insn with an unknown type, trip
8595 an assert so we can find and fix this problem. */
8596 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN);
8598 /* If we ever encounter an insn without an insn reservation, trip
8599 an assert so we can find and fix this problem. */
8600 gcc_assert (insn_has_dfa_reservation_p (insn));
8602 return more - 1;
8605 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
8606 instruction fusion of some sort. */
8608 static bool
8609 riscv_macro_fusion_p (void)
8611 return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
8614 /* Return true iff the instruction fusion described by OP is enabled. */
8616 static bool
8617 riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
8619 return tune_param->fusible_ops & op;
8622 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
8623 should be kept together during scheduling. */
8625 static bool
8626 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
8628 rtx prev_set = single_set (prev);
8629 rtx curr_set = single_set (curr);
8630 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
8631 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
8633 if (!riscv_macro_fusion_p ())
8634 return false;
8636 if (simple_sets_p
8637 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
8638 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
8640 /* We are trying to match the following:
8641 prev (slli) == (set (reg:DI rD)
8642 (ashift:DI (reg:DI rS) (const_int 32)))
8643 curr (slri) == (set (reg:DI rD)
8644 (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
8645 with <shift> being either 32 for FUSE_ZEXTW, or
8646 `less than 32 for FUSE_ZEXTWS. */
8648 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8649 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8650 && REG_P (SET_DEST (prev_set))
8651 && REG_P (SET_DEST (curr_set))
8652 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8653 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8654 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8655 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8656 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
8657 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
8658 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
8659 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
8660 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
8661 return true;
8664 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
8666 /* We are trying to match the following:
8667 prev (slli) == (set (reg:DI rD)
8668 (ashift:DI (reg:DI rS) (const_int 48)))
8669 curr (slri) == (set (reg:DI rD)
8670 (lshiftrt:DI (reg:DI rD) (const_int 48))) */
8672 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8673 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8674 && REG_P (SET_DEST (prev_set))
8675 && REG_P (SET_DEST (curr_set))
8676 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8677 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8678 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8679 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8680 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
8681 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
8682 return true;
8685 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
8687 /* We are trying to match the following:
8688 prev (add) == (set (reg:DI rD)
8689 (plus:DI (reg:DI rS1) (reg:DI rS2))
8690 curr (ld) == (set (reg:DI rD)
8691 (mem:DI (reg:DI rD))) */
8693 if (MEM_P (SET_SRC (curr_set))
8694 && REG_P (XEXP (SET_SRC (curr_set), 0))
8695 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8696 && GET_CODE (SET_SRC (prev_set)) == PLUS
8697 && REG_P (XEXP (SET_SRC (prev_set), 0))
8698 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8699 return true;
8701 /* We are trying to match the following:
8702 prev (add) == (set (reg:DI rD)
8703 (plus:DI (reg:DI rS1) (reg:DI rS2)))
8704 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
8706 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8707 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
8708 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8709 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
8710 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
8711 && GET_CODE (SET_SRC (prev_set)) == PLUS
8712 && REG_P (XEXP (SET_SRC (prev_set), 0))
8713 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8714 return true;
8717 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
8719 /* We are trying to match the following:
8720 prev (add) == (set (reg:DI rS)
8721 (plus:DI (reg:DI rS) (const_int))
8722 curr (ld) == (set (reg:DI rD)
8723 (mem:DI (reg:DI rS))) */
8725 if (MEM_P (SET_SRC (curr_set))
8726 && REG_P (XEXP (SET_SRC (curr_set), 0))
8727 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8728 && GET_CODE (SET_SRC (prev_set)) == PLUS
8729 && REG_P (XEXP (SET_SRC (prev_set), 0))
8730 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
8731 return true;
8734 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
8736 /* We are trying to match the following:
8737 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8738 curr (addi) == (set (reg:DI rD)
8739 (plus:DI (reg:DI rD) (const_int IMM12))) */
8741 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
8742 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8743 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8744 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
8745 && (GET_CODE (SET_SRC (prev_set)) == HIGH
8746 || (CONST_INT_P (SET_SRC (prev_set))
8747 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
8748 return true;
8751 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
8753 /* We are trying to match the following:
8754 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8755 curr (addi) == (set (reg:DI rD)
8756 (plus:DI (reg:DI rD) (const_int IMM12)))
8758 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8759 curr (addi) == (set (reg:DI rD)
8760 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
8762 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8763 && XINT (SET_SRC (prev_set), 1) == UNSPEC_AUIPC
8764 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
8765 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8766 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
8768 return true;
8771 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
8773 /* We are trying to match the following:
8774 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8775 curr (ld) == (set (reg:DI rD)
8776 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8778 if (CONST_INT_P (SET_SRC (prev_set))
8779 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
8780 && MEM_P (SET_SRC (curr_set))
8781 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8782 return true;
8784 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8785 && MEM_P (SET_SRC (curr_set))
8786 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
8787 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
8788 return true;
8790 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8791 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8792 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
8793 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8794 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
8795 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
8796 return true;
8799 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
8801 /* We are trying to match the following:
8802 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8803 curr (ld) == (set (reg:DI rD)
8804 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8806 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8807 && XINT (prev_set, 1) == UNSPEC_AUIPC
8808 && MEM_P (SET_SRC (curr_set))
8809 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8810 return true;
8813 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
8815 /* We are trying to match the following:
8816 prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8817 (reg rS1))
8818 curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8819 (reg rS2)) */
8821 if (MEM_P (SET_DEST (prev_set))
8822 && MEM_P (SET_DEST (curr_set))
8823 /* We can probably relax this condition. The documentation is a bit
8824 unclear about sub-word cases. So we just model DImode for now. */
8825 && GET_MODE (SET_DEST (curr_set)) == DImode
8826 && GET_MODE (SET_DEST (prev_set)) == DImode)
8828 rtx base_prev, base_curr, offset_prev, offset_curr;
8830 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
8831 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
8833 /* The two stores must be contained within opposite halves of the same
8834 16 byte aligned block of memory. We know that the stack pointer and
8835 the frame pointer have suitable alignment. So we just need to check
8836 the offsets of the two stores for suitable alignment.
8838 Originally the thought was to check MEM_ALIGN, but that was reporting
8839 incorrect alignments, even for SP/FP accesses, so we gave up on that
8840 approach. */
8841 if (base_prev != NULL_RTX
8842 && base_curr != NULL_RTX
8843 && REG_P (base_prev)
8844 && REG_P (base_curr)
8845 && REGNO (base_prev) == REGNO (base_curr)
8846 && (REGNO (base_prev) == STACK_POINTER_REGNUM
8847 || REGNO (base_prev) == HARD_FRAME_POINTER_REGNUM)
8848 && ((INTVAL (offset_prev) == INTVAL (offset_curr) + 8
8849 && (INTVAL (offset_prev) % 16) == 0)
8850 || ((INTVAL (offset_curr) == INTVAL (offset_prev) + 8)
8851 && (INTVAL (offset_curr) % 16) == 0)))
8852 return true;
8856 return false;
8859 /* Adjust the cost/latency of instructions for scheduling.
8860 For now this is just used to change the latency of vector instructions
8861 according to their LMUL. We assume that an insn with LMUL == 8 requires
8862 eight times more execution cycles than the same insn with LMUL == 1.
8863 As this may cause very high latencies which lead to scheduling artifacts
8864 we currently only perform the adjustment when -madjust-lmul-cost is given.
8866 static int
8867 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
8868 unsigned int)
8870 /* Only do adjustments for the generic out-of-order scheduling model. */
8871 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
8872 return cost;
8874 if (recog_memoized (insn) < 0)
8875 return cost;
8877 enum attr_type type = get_attr_type (insn);
8879 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
8881 /* TODO: For ordered reductions scale the base cost relative to the
8882 number of units. */
8886 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */
8887 if (!TARGET_ADJUST_LMUL_COST)
8888 return cost;
8890 /* vsetvl has a vlmul attribute but its latency does not depend on it. */
8891 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE)
8892 return cost;
8894 enum riscv_vector::vlmul_type lmul =
8895 (riscv_vector::vlmul_type)get_attr_vlmul (insn);
8897 double factor = 1;
8898 switch (lmul)
8900 case riscv_vector::LMUL_2:
8901 factor = 2;
8902 break;
8903 case riscv_vector::LMUL_4:
8904 factor = 4;
8905 break;
8906 case riscv_vector::LMUL_8:
8907 factor = 8;
8908 break;
8909 case riscv_vector::LMUL_F2:
8910 factor = 0.5;
8911 break;
8912 case riscv_vector::LMUL_F4:
8913 factor = 0.25;
8914 break;
8915 case riscv_vector::LMUL_F8:
8916 factor = 0.125;
8917 break;
8918 default:
8919 factor = 1;
8922 /* If the latency was nonzero, keep it that way. */
8923 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor);
8925 return new_cost;
8928 /* Auxiliary function to emit RISC-V ELF attribute. */
8929 static void
8930 riscv_emit_attribute ()
8932 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n",
8933 riscv_arch_str ().c_str ());
8935 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n",
8936 TARGET_STRICT_ALIGN ? 0 : 1);
8938 fprintf (asm_out_file, "\t.attribute stack_align, %d\n",
8939 riscv_stack_boundary / 8);
8942 /* Output .variant_cc for function symbol which follows vector calling
8943 convention. */
8945 static void
8946 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
8948 if (TREE_CODE (decl) == FUNCTION_DECL)
8950 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
8951 if (cc == RISCV_CC_V)
8953 fprintf (stream, "\t.variant_cc\t");
8954 assemble_name (stream, name);
8955 fprintf (stream, "\n");
8960 /* Implement ASM_DECLARE_FUNCTION_NAME. */
8962 void
8963 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
8965 riscv_asm_output_variant_cc (stream, fndecl, name);
8966 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
8967 ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl);
8968 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8970 fprintf (stream, "\t.option push\n");
8972 std::string *target_name = riscv_func_target_get (fndecl);
8973 std::string isa = target_name != NULL
8974 ? *target_name
8975 : riscv_cmdline_subset_list ()->to_string (true);
8976 fprintf (stream, "\t.option arch, %s\n", isa.c_str ());
8977 riscv_func_target_remove_and_destory (fndecl);
8979 struct cl_target_option *local_cl_target =
8980 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
8981 struct cl_target_option *global_cl_target =
8982 TREE_TARGET_OPTION (target_option_default_node);
8983 const char *local_tune_str = get_tune_str (local_cl_target);
8984 const char *global_tune_str = get_tune_str (global_cl_target);
8985 if (strcmp (local_tune_str, global_tune_str) != 0)
8986 fprintf (stream, "\t# tune = %s\n", local_tune_str);
8990 void
8991 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
8993 if (!flag_inhibit_size_directive)
8994 ASM_OUTPUT_MEASURED_SIZE (stream, name);
8996 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8998 fprintf (stream, "\t.option pop\n");
9002 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */
9004 void
9005 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target)
9007 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
9008 const char *value = IDENTIFIER_POINTER (target);
9009 riscv_asm_output_variant_cc (stream, decl, name);
9010 ASM_OUTPUT_DEF (stream, name, value);
9013 /* Implement ASM_OUTPUT_EXTERNAL. */
9015 void
9016 riscv_asm_output_external (FILE *stream, tree decl, const char *name)
9018 default_elf_asm_output_external (stream, decl, name);
9019 riscv_asm_output_variant_cc (stream, decl, name);
9022 /* Implement TARGET_ASM_FILE_START. */
9024 static void
9025 riscv_file_start (void)
9027 default_file_start ();
9029 /* Instruct GAS to generate position-[in]dependent code. */
9030 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
9032 /* If the user specifies "-mno-relax" on the command line then disable linker
9033 relaxation in the assembler. */
9034 if (! riscv_mrelax)
9035 fprintf (asm_out_file, "\t.option norelax\n");
9037 /* If the user specifies "-mcsr-check" on the command line then enable csr
9038 check in the assembler. */
9039 if (riscv_mcsr_check)
9040 fprintf (asm_out_file, "\t.option csr-check\n");
9042 if (riscv_emit_attribute_p)
9043 riscv_emit_attribute ();
9046 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
9047 in order to avoid duplicating too much logic from elsewhere. */
9049 static void
9050 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9051 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9052 tree function)
9054 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
9055 rtx this_rtx, temp1, temp2, fnaddr;
9056 rtx_insn *insn;
9058 riscv_in_thunk_func = true;
9060 /* Pretend to be a post-reload pass while generating rtl. */
9061 reload_completed = 1;
9063 /* Mark the end of the (empty) prologue. */
9064 emit_note (NOTE_INSN_PROLOGUE_END);
9066 /* Determine if we can use a sibcall to call FUNCTION directly. */
9067 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
9069 /* We need two temporary registers in some cases. */
9070 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
9071 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
9073 /* Find out which register contains the "this" pointer. */
9074 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9075 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
9076 else
9077 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
9079 /* Add DELTA to THIS_RTX. */
9080 if (delta != 0)
9082 rtx offset = GEN_INT (delta);
9083 if (!SMALL_OPERAND (delta))
9085 riscv_emit_move (temp1, offset);
9086 offset = temp1;
9088 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
9091 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
9092 if (vcall_offset != 0)
9094 rtx addr;
9096 /* Set TEMP1 to *THIS_RTX. */
9097 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
9099 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
9100 addr = riscv_add_offset (temp2, temp1, vcall_offset);
9102 /* Load the offset and add it to THIS_RTX. */
9103 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
9104 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
9107 /* Jump to the target function. */
9108 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode);
9109 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc));
9110 SIBLING_CALL_P (insn) = 1;
9112 /* Run just enough of rest_of_compilation. This sequence was
9113 "borrowed" from alpha.cc. */
9114 insn = get_insns ();
9115 split_all_insns_noflow ();
9116 shorten_branches (insn);
9117 assemble_start_function (thunk_fndecl, fnname);
9118 final_start_function (insn, file, 1);
9119 final (insn, file, 1);
9120 final_end_function ();
9121 assemble_end_function (thunk_fndecl, fnname);
9123 /* Clean up the vars set above. Note that final_end_function resets
9124 the global pointer for us. */
9125 reload_completed = 0;
9126 riscv_in_thunk_func = false;
9129 /* Allocate a chunk of memory for per-function machine-dependent data. */
9131 static struct machine_function *
9132 riscv_init_machine_status (void)
9134 return ggc_cleared_alloc<machine_function> ();
9137 /* Return the VLEN value associated with -march and -mwrvv-vector-bits.
9138 TODO: So far we only support length-agnostic value. */
9139 static poly_uint16
9140 riscv_convert_vector_chunks (struct gcc_options *opts)
9142 int chunk_num;
9143 int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
9144 if (min_vlen > 32)
9146 /* When targetting minimum VLEN > 32, we should use 64-bit chunk size.
9147 Otherwise we can not include SEW = 64bits.
9148 Runtime invariant: The single indeterminate represent the
9149 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
9150 Thus the number of bytes in a vector is 8 + 8 * x1 which is
9151 riscv_vector_chunks * 8 = poly_int (8, 8). */
9152 riscv_bytes_per_vector_chunk = 8;
9153 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN:
9154 - TARGET_MIN_VLEN = 64bit: [8,8]
9155 - TARGET_MIN_VLEN = 128bit: [16,16]
9156 - TARGET_MIN_VLEN = 256bit: [32,32]
9157 - TARGET_MIN_VLEN = 512bit: [64,64]
9158 - TARGET_MIN_VLEN = 1024bit: [128,128]
9159 - TARGET_MIN_VLEN = 2048bit: [256,256]
9160 - TARGET_MIN_VLEN = 4096bit: [512,512]
9161 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */
9162 chunk_num = min_vlen / 64;
9164 else
9166 /* When targetting minimum VLEN = 32, we should use 32-bit
9167 chunk size. Runtime invariant: The single indeterminate represent the
9168 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
9169 Thus the number of bytes in a vector is 4 + 4 * x1 which is
9170 riscv_vector_chunks * 4 = poly_int (4, 4). */
9171 riscv_bytes_per_vector_chunk = 4;
9172 chunk_num = 1;
9175 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR
9176 is enabled. Set riscv_vector_chunks as 1 compile-time constant if
9177 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def"
9178 to set RVV mode size. The RVV machine modes size are run-time constant if
9179 TARGET_VECTOR is enabled. The RVV machine modes size remains default
9180 compile-time constant if TARGET_VECTOR is disabled. */
9181 if (TARGET_VECTOR_OPTS_P (opts))
9183 switch (opts->x_rvv_vector_bits)
9185 case RVV_VECTOR_BITS_SCALABLE:
9186 return poly_uint16 (chunk_num, chunk_num);
9187 case RVV_VECTOR_BITS_ZVL:
9188 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
9189 default:
9190 gcc_unreachable ();
9193 else
9194 return 1;
9197 /* 'Unpack' up the internal tuning structs and update the options
9198 in OPTS. The caller must have set up selected_tune and selected_arch
9199 as all the other target-specific codegen decisions are
9200 derived from them. */
9201 void
9202 riscv_override_options_internal (struct gcc_options *opts)
9204 const struct riscv_tune_info *cpu;
9206 /* The presence of the M extension implies that division instructions
9207 are present, so include them unless explicitly disabled. */
9208 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
9209 opts->x_target_flags |= MASK_DIV;
9210 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
9211 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
9213 /* Likewise floating-point division and square root. */
9214 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
9215 && ((target_flags_explicit & MASK_FDIV) == 0))
9216 opts->x_target_flags |= MASK_FDIV;
9218 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
9219 if both -mtune and -mcpu are not given. */
9220 const char *tune_string = get_tune_str (opts);
9221 cpu = riscv_parse_tune (tune_string, false);
9222 riscv_microarchitecture = cpu->microarchitecture;
9223 tune_param = opts->x_optimize_size
9224 ? &optimize_size_tune_info
9225 : cpu->tune_param;
9227 /* Use -mtune's setting for slow_unaligned_access, even when optimizing
9228 for size. For architectures that trap and emulate unaligned accesses,
9229 the performance cost is too great, even for -Os. Similarly, if
9230 -m[no-]strict-align is left unspecified, heed -mtune's advice. */
9231 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
9232 || TARGET_STRICT_ALIGN);
9234 /* Make a note if user explicity passed -mstrict-align for later
9235 builtin macro generation. Can't use target_flags_explicitly since
9236 it is set even for -mno-strict-align. */
9237 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts);
9239 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
9240 && cpu->tune_param->slow_unaligned_access)
9241 opts->x_target_flags |= MASK_STRICT_ALIGN;
9243 /* If the user hasn't specified a branch cost, use the processor's
9244 default. */
9245 if (opts->x_riscv_branch_cost == 0)
9246 opts->x_riscv_branch_cost = tune_param->branch_cost;
9248 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of
9249 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16.
9251 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */
9252 if (TARGET_MIN_VLEN_OPTS (opts) > 4096)
9253 sorry ("Current RISC-V GCC does not support VLEN greater than 4096bit for "
9254 "'V' Extension");
9256 /* FIXME: We don't support RVV in big-endian for now, we may enable RVV with
9257 big-endian after finishing full coverage testing. */
9258 if (TARGET_VECTOR && TARGET_BIG_ENDIAN)
9259 sorry ("Current RISC-V GCC does not support RVV in big-endian mode");
9261 /* Convert -march and -mrvv-vector-bits to a chunks count. */
9262 riscv_vector_chunks = riscv_convert_vector_chunks (opts);
9265 /* Implement TARGET_OPTION_OVERRIDE. */
9267 void
9268 riscv_option_override (void)
9270 #ifdef SUBTARGET_OVERRIDE_OPTIONS
9271 SUBTARGET_OVERRIDE_OPTIONS;
9272 #endif
9274 flag_pcc_struct_return = 0;
9276 if (flag_pic)
9277 g_switch_value = 0;
9279 /* Always prefer medlow than medany for RV32 since medlow can access
9280 full address space. */
9281 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT)
9282 riscv_cmodel = CM_MEDLOW;
9284 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS)
9285 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs");
9287 if (riscv_cmodel == CM_LARGE && flag_pic)
9288 sorry ("code model %qs with %qs", "large",
9289 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic");
9291 if (flag_pic)
9292 riscv_cmodel = CM_PIC;
9294 /* We need to save the fp with ra for non-leaf functions with no fp and ra
9295 for leaf functions while no-omit-frame-pointer with
9296 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first
9297 priority to determine whether the frame pointer is needed. If we do not
9298 override it, the fp and ra will be stored for leaf functions, which is not
9299 our wanted. */
9300 riscv_save_frame_pointer = false;
9301 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
9303 if (!global_options.x_flag_omit_frame_pointer)
9304 riscv_save_frame_pointer = true;
9306 global_options.x_flag_omit_frame_pointer = 1;
9309 /* We get better code with explicit relocs for CM_MEDLOW, but
9310 worse code for the others (for now). Pick the best default. */
9311 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
9312 if (riscv_cmodel == CM_MEDLOW)
9313 target_flags |= MASK_EXPLICIT_RELOCS;
9315 /* Require that the ISA supports the requested floating-point ABI. */
9316 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
9317 error ("requested ABI requires %<-march%> to subsume the %qc extension",
9318 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
9320 /* RVE requires specific ABI. */
9321 if (TARGET_RVE)
9323 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E)
9324 error ("rv32e requires ilp32e ABI");
9325 else if (TARGET_64BIT && riscv_abi != ABI_LP64E)
9326 error ("rv64e requires lp64e ABI");
9329 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */
9330 if (TARGET_ZFINX
9331 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64
9332 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E)
9333 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
9335 /* We do not yet support ILP32 on RV64. */
9336 if (BITS_PER_WORD != POINTER_SIZE)
9337 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
9339 /* Validate -mpreferred-stack-boundary= value. */
9340 riscv_stack_boundary = ABI_STACK_BOUNDARY;
9341 if (riscv_preferred_stack_boundary_arg)
9343 int min = ctz_hwi (STACK_BOUNDARY / 8);
9344 int max = 8;
9346 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max))
9347 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d",
9348 riscv_preferred_stack_boundary_arg, min, max);
9350 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg;
9353 if (riscv_emit_attribute_p < 0)
9354 #ifdef HAVE_AS_RISCV_ATTRIBUTE
9355 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE;
9356 #else
9357 riscv_emit_attribute_p = 0;
9359 if (riscv_emit_attribute_p)
9360 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32"
9361 " [%<-mriscv-attribute%>]");
9362 #endif
9364 if (riscv_stack_protector_guard == SSP_GLOBAL
9365 && OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9367 error ("incompatible options %<-mstack-protector-guard=global%> and "
9368 "%<-mstack-protector-guard-offset=%s%>",
9369 riscv_stack_protector_guard_offset_str);
9372 if (riscv_stack_protector_guard == SSP_TLS
9373 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str)
9374 && OPTION_SET_P (riscv_stack_protector_guard_reg_str)))
9376 error ("both %<-mstack-protector-guard-offset%> and "
9377 "%<-mstack-protector-guard-reg%> must be used "
9378 "with %<-mstack-protector-guard=sysreg%>");
9381 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str))
9383 const char *str = riscv_stack_protector_guard_reg_str;
9384 int reg = decode_reg_name (str);
9386 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
9387 error ("%qs is not a valid base register in %qs", str,
9388 "-mstack-protector-guard-reg=");
9390 riscv_stack_protector_guard_reg = reg;
9393 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9395 char *end;
9396 const char *str = riscv_stack_protector_guard_offset_str;
9397 errno = 0;
9398 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
9400 if (!*str || *end || errno)
9401 error ("%qs is not a valid number in %qs", str,
9402 "-mstack-protector-guard-offset=");
9404 if (!SMALL_OPERAND (offs))
9405 error ("%qs is not a valid offset in %qs", str,
9406 "-mstack-protector-guard-offset=");
9408 riscv_stack_protector_guard_offset = offs;
9411 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
9412 param_sched_pressure_algorithm,
9413 SCHED_PRESSURE_MODEL);
9415 /* Function to allocate machine-dependent function status. */
9416 init_machine_status = &riscv_init_machine_status;
9418 riscv_override_options_internal (&global_options);
9420 /* Save these options as the default ones in case we push and pop them later
9421 while processing functions with potential target attributes. */
9422 target_option_default_node = target_option_current_node
9423 = build_target_option_node (&global_options, &global_options_set);
9426 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9427 Used by riscv_set_current_function to
9428 make sure optab availability predicates are recomputed when necessary. */
9430 void
9431 riscv_save_restore_target_globals (tree new_tree)
9433 if (TREE_TARGET_GLOBALS (new_tree))
9434 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9435 else if (new_tree == target_option_default_node)
9436 restore_target_globals (&default_target_globals);
9437 else
9438 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9441 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9442 using the information saved in PTR. */
9444 static void
9445 riscv_option_restore (struct gcc_options *opts,
9446 struct gcc_options * /* opts_set */,
9447 struct cl_target_option * /* ptr */)
9449 riscv_override_options_internal (opts);
9452 static GTY (()) tree riscv_previous_fndecl;
9454 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9456 static void
9457 riscv_conditional_register_usage (void)
9459 /* We have only x0~x15 on RV32E/RV64E. */
9460 if (TARGET_RVE)
9462 for (int r = 16; r <= 31; r++)
9463 fixed_regs[r] = 1;
9466 if (riscv_abi == ABI_ILP32E)
9468 for (int r = 16; r <= 31; r++)
9469 call_used_regs[r] = 1;
9472 if (!TARGET_HARD_FLOAT)
9474 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9475 fixed_regs[regno] = call_used_regs[regno] = 1;
9478 /* In the soft-float ABI, there are no callee-saved FP registers. */
9479 if (UNITS_PER_FP_ARG == 0)
9481 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9482 call_used_regs[regno] = 1;
9485 if (!TARGET_VECTOR)
9487 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
9488 fixed_regs[regno] = call_used_regs[regno] = 1;
9490 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1;
9491 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1;
9492 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1;
9493 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1;
9497 /* Return a register priority for hard reg REGNO. */
9499 static int
9500 riscv_register_priority (int regno)
9502 /* Favor compressed registers to improve the odds of RVC instruction
9503 selection. */
9504 if (riscv_compressed_reg_p (regno))
9505 return 1;
9507 return 0;
9510 /* Implement TARGET_TRAMPOLINE_INIT. */
9512 static void
9513 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9515 rtx addr, end_addr, mem;
9516 uint32_t trampoline[4];
9517 unsigned int i;
9518 HOST_WIDE_INT static_chain_offset, target_function_offset;
9520 /* Work out the offsets of the pointers from the start of the
9521 trampoline code. */
9522 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
9524 /* Get pointers to the beginning and end of the code block. */
9525 addr = force_reg (Pmode, XEXP (m_tramp, 0));
9526 end_addr = riscv_force_binary (Pmode, PLUS, addr,
9527 GEN_INT (TRAMPOLINE_CODE_SIZE));
9530 if (Pmode == SImode)
9532 chain_value = force_reg (Pmode, chain_value);
9534 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9535 /* lui t2, hi(chain)
9536 lui t0, hi(func)
9537 addi t2, t2, lo(chain)
9538 jr t0, lo(func)
9540 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
9541 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
9543 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
9545 /* 0xfff. */
9546 rtx imm12_mask = gen_reg_rtx (SImode);
9547 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
9549 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
9551 /* Gen lui t2, hi(chain). */
9552 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
9553 fixup_value);
9554 hi_chain = riscv_force_binary (SImode, AND, hi_chain,
9555 uimm_mask);
9556 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9557 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
9558 gen_int_mode (lui_hi_chain_code, SImode));
9560 mem = adjust_address (m_tramp, SImode, 0);
9561 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
9563 /* Gen lui t0, hi(func). */
9564 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
9565 fixup_value);
9566 hi_func = riscv_force_binary (SImode, AND, hi_func,
9567 uimm_mask);
9568 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
9569 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
9570 gen_int_mode (lui_hi_func_code, SImode));
9572 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
9573 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
9575 /* Gen addi t2, t2, lo(chain). */
9576 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
9577 imm12_mask);
9578 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
9580 lo_chain_code = OPCODE_ADDI
9581 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9582 | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
9584 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
9585 force_reg (SImode, GEN_INT (lo_chain_code)));
9587 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
9588 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
9590 /* Gen jr t0, lo(func). */
9591 rtx lo_func = riscv_force_binary (SImode, AND, target_function,
9592 imm12_mask);
9593 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
9595 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9597 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
9598 force_reg (SImode, GEN_INT (lo_func_code)));
9600 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
9601 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
9603 else
9605 static_chain_offset = TRAMPOLINE_CODE_SIZE;
9606 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
9608 /* auipc t2, 0
9609 l[wd] t0, target_function_offset(t2)
9610 l[wd] t2, static_chain_offset(t2)
9611 jr t0
9613 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9614 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9615 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
9616 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9617 | (target_function_offset << SHIFT_IMM);
9618 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9619 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9620 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9621 | (static_chain_offset << SHIFT_IMM);
9622 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9624 /* Copy the trampoline code. */
9625 for (i = 0; i < ARRAY_SIZE (trampoline); i++)
9627 if (BYTES_BIG_ENDIAN)
9628 trampoline[i] = __builtin_bswap32(trampoline[i]);
9629 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
9630 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
9633 /* Set up the static chain pointer field. */
9634 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
9635 riscv_emit_move (mem, chain_value);
9637 /* Set up the target function field. */
9638 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
9639 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
9642 /* Flush the code part of the trampoline. */
9643 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
9644 emit_insn (gen_clear_cache (addr, end_addr));
9647 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
9649 static bool
9650 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
9651 tree exp ATTRIBUTE_UNUSED)
9653 /* Don't use sibcalls when use save-restore routine. */
9654 if (TARGET_SAVE_RESTORE)
9655 return false;
9657 /* Don't use sibcall for naked functions. */
9658 if (cfun->machine->naked_p)
9659 return false;
9661 /* Don't use sibcall for interrupt functions. */
9662 if (cfun->machine->interrupt_handler_p)
9663 return false;
9665 /* Don't use sibcalls in the large model, because a sibcall instruction
9666 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP
9667 register. */
9668 if (riscv_cmodel == CM_LARGE)
9669 return false;
9671 return true;
9674 /* Get the interrupt type, return UNKNOWN_MODE if it's not
9675 interrupt function. */
9676 static enum riscv_privilege_levels
9677 riscv_get_interrupt_type (tree decl)
9679 gcc_assert (decl != NULL_TREE);
9681 if ((TREE_CODE(decl) != FUNCTION_DECL)
9682 || (!riscv_interrupt_type_p (TREE_TYPE (decl))))
9683 return UNKNOWN_MODE;
9685 tree attr_args
9686 = TREE_VALUE (lookup_attribute ("interrupt",
9687 TYPE_ATTRIBUTES (TREE_TYPE (decl))));
9689 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE)
9691 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
9693 if (!strcmp (string, "user"))
9694 return USER_MODE;
9695 else if (!strcmp (string, "supervisor"))
9696 return SUPERVISOR_MODE;
9697 else /* Must be "machine". */
9698 return MACHINE_MODE;
9700 else
9701 /* Interrupt attributes are machine mode by default. */
9702 return MACHINE_MODE;
9705 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
9706 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9707 of the function, if such exists. This function may be called multiple
9708 times on a single function so use aarch64_previous_fndecl to avoid
9709 setting up identical state. */
9711 /* Sanity cheching for above function attributes. */
9712 static void
9713 riscv_set_current_function (tree decl)
9715 if (decl == NULL_TREE
9716 || current_function_decl == NULL_TREE
9717 || current_function_decl == error_mark_node
9718 || ! cfun->machine)
9719 return;
9721 if (!cfun->machine->attributes_checked_p)
9723 cfun->machine->naked_p = riscv_naked_function_p (decl);
9724 cfun->machine->interrupt_handler_p
9725 = riscv_interrupt_type_p (TREE_TYPE (decl));
9727 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p)
9728 error ("function attributes %qs and %qs are mutually exclusive",
9729 "interrupt", "naked");
9731 if (cfun->machine->interrupt_handler_p)
9733 tree ret = TREE_TYPE (TREE_TYPE (decl));
9734 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
9736 if (TREE_CODE (ret) != VOID_TYPE)
9737 error ("%qs function cannot return a value", "interrupt");
9739 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
9740 error ("%qs function cannot have arguments", "interrupt");
9742 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl);
9744 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE);
9747 /* Don't print the above diagnostics more than once. */
9748 cfun->machine->attributes_checked_p = 1;
9751 if (!decl || decl == riscv_previous_fndecl)
9752 return;
9754 tree old_tree = (riscv_previous_fndecl
9755 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl)
9756 : NULL_TREE);
9758 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl);
9760 /* If current function has no attributes but the previous one did,
9761 use the default node. */
9762 if (!new_tree && old_tree)
9763 new_tree = target_option_default_node;
9765 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9766 the default have been handled by aarch64_save_restore_target_globals from
9767 aarch64_pragma_target_parse. */
9768 if (old_tree == new_tree)
9769 return;
9771 riscv_previous_fndecl = decl;
9773 /* First set the target options. */
9774 cl_target_option_restore (&global_options, &global_options_set,
9775 TREE_TARGET_OPTION (new_tree));
9777 /* The ISA extension can vary based on the function extension like target.
9778 Thus, make sure that the machine modes are reflected correctly here. */
9779 init_adjust_machine_modes ();
9781 riscv_save_restore_target_globals (new_tree);
9784 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
9785 static tree
9786 riscv_merge_decl_attributes (tree olddecl, tree newdecl)
9788 tree combined_attrs;
9790 enum riscv_privilege_levels old_interrupt_type
9791 = riscv_get_interrupt_type (olddecl);
9792 enum riscv_privilege_levels new_interrupt_type
9793 = riscv_get_interrupt_type (newdecl);
9795 /* Check old and new has same interrupt type. */
9796 if ((old_interrupt_type != UNKNOWN_MODE)
9797 && (new_interrupt_type != UNKNOWN_MODE)
9798 && (old_interrupt_type != new_interrupt_type))
9799 error ("%qs function cannot have different interrupt type", "interrupt");
9801 /* Create combined attributes. */
9802 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
9803 DECL_ATTRIBUTES (newdecl));
9805 return combined_attrs;
9808 /* Implement TARGET_CANNOT_COPY_INSN_P. */
9810 static bool
9811 riscv_cannot_copy_insn_p (rtx_insn *insn)
9813 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
9816 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
9818 static bool
9819 riscv_slow_unaligned_access (machine_mode, unsigned int)
9821 return riscv_slow_unaligned_access_p;
9824 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9826 static bool
9827 riscv_can_change_mode_class (machine_mode from, machine_mode to,
9828 reg_class_t rclass)
9830 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS.
9831 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation
9832 between wider mode (FROM) and narrow mode (TO).
9834 E.g. We should not allow copy propagation
9835 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0])
9836 since we can't order their size which will cause ICE in regcprop.
9838 TODO: Even though they are have different size, they always change
9839 the whole register. We may enhance such case in regcprop to optimize
9840 it in the future. */
9841 if (reg_classes_intersect_p (V_REGS, rclass)
9842 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
9843 return false;
9844 return !reg_classes_intersect_p (FP_REGS, rclass);
9847 /* Implement TARGET_CONSTANT_ALIGNMENT. */
9849 static HOST_WIDE_INT
9850 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align)
9852 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
9853 && (riscv_align_data_type == riscv_align_data_type_xlen))
9854 return MAX (align, BITS_PER_WORD);
9855 return align;
9858 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
9860 /* This function is equivalent to default_promote_function_mode_always_promote
9861 except that it returns a promoted mode even if type is NULL_TREE. This is
9862 needed by libcalls which have no type (only a mode) such as fixed conversion
9863 routines that take a signed or unsigned char/short/int argument and convert
9864 it to a fixed type. */
9866 static machine_mode
9867 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9868 machine_mode mode,
9869 int *punsignedp ATTRIBUTE_UNUSED,
9870 const_tree fntype ATTRIBUTE_UNUSED,
9871 int for_return ATTRIBUTE_UNUSED)
9873 int unsignedp;
9875 if (type != NULL_TREE)
9876 return promote_mode (type, mode, punsignedp);
9878 unsignedp = *punsignedp;
9879 scalar_mode smode = as_a <scalar_mode> (mode);
9880 PROMOTE_MODE (smode, unsignedp, type);
9881 *punsignedp = unsignedp;
9882 return smode;
9885 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
9887 static void
9888 riscv_reorg (void)
9890 /* Do nothing unless we have -msave-restore */
9891 if (TARGET_SAVE_RESTORE)
9892 riscv_remove_unneeded_save_restore_calls ();
9895 /* Return nonzero if register FROM_REGNO can be renamed to register
9896 TO_REGNO. */
9898 bool
9899 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
9900 unsigned to_regno)
9902 /* Interrupt functions can only use registers that have already been
9903 saved by the prologue, even if they would normally be
9904 call-clobbered. */
9905 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno);
9908 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */
9910 bool
9911 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
9913 /* Prefer old address if it is less expensive. */
9914 addr_space_t as = MEM_ADDR_SPACE (memref);
9915 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
9916 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed);
9917 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed);
9918 return new_cost <= old_cost;
9921 /* Helper function for generating gpr_save pattern. */
9924 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
9926 unsigned count = riscv_save_libcall_count (frame->mask);
9927 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
9928 unsigned veclen = 1 + 2 + 1 + count;
9929 rtvec vec = rtvec_alloc (veclen);
9931 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
9933 RTVEC_ELT (vec, 0) =
9934 gen_rtx_UNSPEC_VOLATILE (VOIDmode,
9935 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE);
9937 for (unsigned i = 1; i < veclen; ++i)
9939 unsigned regno = gpr_save_reg_order[i];
9940 rtx reg = gen_rtx_REG (Pmode, regno);
9941 rtx elt;
9943 /* t0 and t1 are CLOBBERs, others are USEs. */
9944 if (i < 3)
9945 elt = gen_rtx_CLOBBER (Pmode, reg);
9946 else
9947 elt = gen_rtx_USE (Pmode, reg);
9949 RTVEC_ELT (vec, i) = elt;
9952 /* Largest number of caller-save register must set in mask if we are
9953 not using __riscv_save_0. */
9954 gcc_assert ((count == 0) ||
9955 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
9957 return gen_rtx_PARALLEL (VOIDmode, vec);
9960 static HOST_WIDE_INT
9961 zcmp_base_adj (int regs_num)
9963 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode));
9966 static HOST_WIDE_INT
9967 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num)
9969 return total - zcmp_base_adj (regs_num);
9972 bool
9973 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num)
9975 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num);
9976 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP
9977 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP
9978 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP;
9981 /* Return true if it's valid gpr_save pattern. */
9983 bool
9984 riscv_gpr_save_operation_p (rtx op)
9986 unsigned len = XVECLEN (op, 0);
9988 if (len > ARRAY_SIZE (gpr_save_reg_order))
9989 return false;
9991 for (unsigned i = 0; i < len; i++)
9993 rtx elt = XVECEXP (op, 0, i);
9994 if (i == 0)
9996 /* First element in parallel is unspec. */
9997 if (GET_CODE (elt) != UNSPEC_VOLATILE
9998 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
9999 || XINT (elt, 1) != UNSPECV_GPR_SAVE)
10000 return false;
10002 else
10004 /* Two CLOBBER and USEs, must check the order. */
10005 unsigned expect_code = i < 3 ? CLOBBER : USE;
10006 if (GET_CODE (elt) != expect_code
10007 || !REG_P (XEXP (elt, 1))
10008 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
10009 return false;
10011 break;
10013 return true;
10016 /* Implement TARGET_ASAN_SHADOW_OFFSET. */
10018 static unsigned HOST_WIDE_INT
10019 riscv_asan_shadow_offset (void)
10021 /* We only have libsanitizer support for RV64 at present.
10023 This number must match ASAN_SHADOW_OFFSET_CONST in the file
10024 libsanitizer/asan/asan_mapping.h. */
10025 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
10028 /* Implement TARGET_MANGLE_TYPE. */
10030 static const char *
10031 riscv_mangle_type (const_tree type)
10033 /* Half-precision float, _Float16 is "DF16_". */
10034 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
10035 return "DF16_";
10037 /* Mangle all vector type for vector extension. */
10038 /* The mangle name follows the rule of RVV LLVM
10039 that is "u" + length of (abi_name) + abi_name. */
10040 if (TYPE_NAME (type) != NULL)
10042 const char *res = riscv_vector::mangle_builtin_type (type);
10043 if (res)
10044 return res;
10047 /* Use the default mangling. */
10048 return NULL;
10051 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
10053 static bool
10054 riscv_scalar_mode_supported_p (scalar_mode mode)
10056 if (mode == HFmode)
10057 return true;
10058 else
10059 return default_scalar_mode_supported_p (mode);
10062 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
10063 if MODE is HFmode, and punt to the generic implementation otherwise. */
10065 static bool
10066 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
10068 if (mode == HFmode)
10069 return true;
10070 else
10071 return default_libgcc_floating_mode_supported_p (mode);
10074 /* Set the value of FLT_EVAL_METHOD.
10075 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
10077 0: evaluate all operations and constants, whose semantic type has at
10078 most the range and precision of type float, to the range and
10079 precision of float; evaluate all other operations and constants to
10080 the range and precision of the semantic type;
10082 N, where _FloatN is a supported interchange floating type
10083 evaluate all operations and constants, whose semantic type has at
10084 most the range and precision of _FloatN type, to the range and
10085 precision of the _FloatN type; evaluate all other operations and
10086 constants to the range and precision of the semantic type;
10088 If we have the zfh/zhinx/zvfh extensions then we support _Float16
10089 in native precision, so we should set this to 16. */
10090 static enum flt_eval_method
10091 riscv_excess_precision (enum excess_precision_type type)
10093 switch (type)
10095 case EXCESS_PRECISION_TYPE_FAST:
10096 case EXCESS_PRECISION_TYPE_STANDARD:
10097 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
10098 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
10099 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
10100 case EXCESS_PRECISION_TYPE_IMPLICIT:
10101 case EXCESS_PRECISION_TYPE_FLOAT16:
10102 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
10103 default:
10104 gcc_unreachable ();
10106 return FLT_EVAL_METHOD_UNPREDICTABLE;
10109 /* Implement TARGET_FLOATN_MODE. */
10110 static opt_scalar_float_mode
10111 riscv_floatn_mode (int n, bool extended)
10113 if (!extended && n == 16)
10114 return HFmode;
10116 return default_floatn_mode (n, extended);
10119 static void
10120 riscv_init_libfuncs (void)
10122 /* Half-precision float operations. The compiler handles all operations
10123 with NULL libfuncs by converting to SFmode. */
10125 /* Arithmetic. */
10126 set_optab_libfunc (add_optab, HFmode, NULL);
10127 set_optab_libfunc (sdiv_optab, HFmode, NULL);
10128 set_optab_libfunc (smul_optab, HFmode, NULL);
10129 set_optab_libfunc (neg_optab, HFmode, NULL);
10130 set_optab_libfunc (sub_optab, HFmode, NULL);
10132 /* Comparisons. */
10133 set_optab_libfunc (eq_optab, HFmode, NULL);
10134 set_optab_libfunc (ne_optab, HFmode, NULL);
10135 set_optab_libfunc (lt_optab, HFmode, NULL);
10136 set_optab_libfunc (le_optab, HFmode, NULL);
10137 set_optab_libfunc (ge_optab, HFmode, NULL);
10138 set_optab_libfunc (gt_optab, HFmode, NULL);
10139 set_optab_libfunc (unord_optab, HFmode, NULL);
10142 #if CHECKING_P
10143 void
10144 riscv_reinit (void)
10146 riscv_option_override ();
10147 init_adjust_machine_modes ();
10148 init_derived_machine_modes ();
10149 reinit_regs ();
10150 init_optabs ();
10152 #endif
10154 #if CHECKING_P
10155 #undef TARGET_RUN_TARGET_SELFTESTS
10156 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests
10157 #endif /* #if CHECKING_P */
10159 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
10161 static bool
10162 riscv_vector_mode_supported_p (machine_mode mode)
10164 if (TARGET_VECTOR)
10165 return riscv_v_ext_mode_p (mode);
10167 return false;
10170 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */
10172 static bool
10173 riscv_verify_type_context (location_t loc, type_context_kind context,
10174 const_tree type, bool silent_p)
10176 return riscv_vector::verify_type_context (loc, context, type, silent_p);
10179 /* Implement TARGET_VECTOR_ALIGNMENT. */
10181 static HOST_WIDE_INT
10182 riscv_vector_alignment (const_tree type)
10184 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
10185 be set for non-predicate vectors of booleans. Modes are the most
10186 direct way we have of identifying real RVV predicate types. */
10187 /* FIXME: RVV didn't mention the alignment of bool, we uses
10188 one byte align. */
10189 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
10190 return 8;
10192 widest_int min_size
10193 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
10194 return wi::umin (min_size, 128).to_uhwi ();
10197 /* Implement REGMODE_NATURAL_SIZE. */
10199 poly_uint64
10200 riscv_regmode_natural_size (machine_mode mode)
10202 /* The natural size for RVV data modes is one RVV data vector,
10203 and similarly for predicates. We can't independently modify
10204 anything smaller than that. */
10205 /* ??? For now, only do this for variable-width RVV registers.
10206 Doing it for constant-sized registers breaks lower-subreg.c. */
10208 if (riscv_v_ext_mode_p (mode))
10210 poly_uint64 size = GET_MODE_SIZE (mode);
10211 if (riscv_v_ext_tuple_mode_p (mode))
10213 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode));
10214 if (known_lt (size, BYTES_PER_RISCV_VECTOR))
10215 return size;
10217 else if (riscv_v_ext_vector_mode_p (mode))
10219 /* RVV mask modes always consume a single register. */
10220 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
10221 return BYTES_PER_RISCV_VECTOR;
10223 if (!size.is_constant ())
10224 return BYTES_PER_RISCV_VECTOR;
10225 else if (!riscv_v_ext_vls_mode_p (mode))
10226 /* For -march=rv64gc_zve32f, the natural vector register size
10227 is 32bits which is smaller than scalar register size, so we
10228 return minimum size between vector register size and scalar
10229 register size. */
10230 return MIN (size.to_constant (), UNITS_PER_WORD);
10232 return UNITS_PER_WORD;
10235 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
10237 static unsigned int
10238 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
10239 int *offset)
10241 /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
10242 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
10243 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
10245 gcc_assert (i == 1);
10246 *factor = riscv_bytes_per_vector_chunk;
10247 *offset = 1;
10248 return RISCV_DWARF_VLENB;
10251 /* Implement TARGET_ESTIMATED_POLY_VALUE. */
10253 static HOST_WIDE_INT
10254 riscv_estimated_poly_value (poly_int64 val,
10255 poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
10257 if (TARGET_VECTOR)
10258 return riscv_vector::estimated_poly_value (val, kind);
10259 return default_estimated_poly_value (val, kind);
10262 /* Return true if the vector misalignment factor is supported by the
10263 target. */
10264 bool
10265 riscv_support_vector_misalignment (machine_mode mode,
10266 const_tree type ATTRIBUTE_UNUSED,
10267 int misalignment,
10268 bool is_packed ATTRIBUTE_UNUSED)
10270 /* Depend on movmisalign pattern. */
10271 return default_builtin_support_vector_misalignment (mode, type, misalignment,
10272 is_packed);
10275 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
10277 static opt_machine_mode
10278 riscv_get_mask_mode (machine_mode mode)
10280 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode))
10281 return riscv_vector::get_mask_mode (mode);
10283 return default_get_mask_mode (mode);
10286 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
10287 it isn't worth branching around empty masked ops (including masked
10288 stores). */
10290 static bool
10291 riscv_empty_mask_is_expensive (unsigned)
10293 return false;
10296 /* Return true if a shift-amount matches the trailing cleared bits on
10297 a bitmask. */
10299 bool
10300 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
10302 return shamt == ctz_hwi (mask);
10305 static HARD_REG_SET
10306 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10308 HARD_REG_SET zeroed_hardregs;
10309 CLEAR_HARD_REG_SET (zeroed_hardregs);
10311 /* Find a register to hold vl. */
10312 unsigned vl_regno = INVALID_REGNUM;
10313 /* Skip the first GPR, otherwise the existing vl is kept due to the same
10314 between vl and avl. */
10315 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
10317 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10319 vl_regno = regno;
10320 break;
10324 if (vl_regno > GP_REG_LAST)
10325 sorry ("cannot allocate vl register for %qs on this target",
10326 "-fzero-call-used-regs");
10328 /* Vector configurations need not be saved and restored here. The
10329 -fzero-call-used-regs=* option will zero all vector registers and
10330 return. So there's no vector operations between them. */
10332 bool emitted_vlmax_vsetvl = false;
10333 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */
10334 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
10336 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10338 rtx target = regno_reg_rtx[regno];
10339 machine_mode mode = GET_MODE (target);
10341 if (!emitted_vlmax_vsetvl)
10343 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl);
10344 emitted_vlmax_vsetvl = true;
10347 rtx ops[] = {target, CONST0_RTX (mode)};
10348 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode),
10349 riscv_vector::UNARY_OP, ops, vl);
10351 SET_HARD_REG_BIT (zeroed_hardregs, regno);
10355 return zeroed_hardregs;
10358 /* Generate a sequence of instructions that zero registers specified by
10359 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
10360 zeroed. */
10361 HARD_REG_SET
10362 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10364 HARD_REG_SET zeroed_hardregs;
10365 CLEAR_HARD_REG_SET (zeroed_hardregs);
10367 if (TARGET_VECTOR)
10368 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs);
10370 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs
10371 & ~zeroed_hardregs);
10374 /* Implement target hook TARGET_ARRAY_MODE. */
10376 static opt_machine_mode
10377 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
10379 machine_mode vmode;
10380 if (TARGET_VECTOR
10381 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode))
10382 return vmode;
10384 return opt_machine_mode ();
10387 /* Given memory reference MEM, expand code to compute the aligned
10388 memory address, shift and mask values and store them into
10389 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */
10391 void
10392 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
10393 rtx *not_mask)
10395 /* Align the memory address to a word. */
10396 rtx addr = force_reg (Pmode, XEXP (mem, 0));
10398 rtx addr_mask = gen_int_mode (-4, Pmode);
10400 rtx aligned_addr = gen_reg_rtx (Pmode);
10401 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask));
10403 *aligned_mem = change_address (mem, SImode, aligned_addr);
10405 /* Calculate the shift amount. */
10406 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
10407 gen_int_mode (3, SImode)));
10408 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
10409 gen_int_mode (3, SImode)));
10411 /* Calculate the mask. */
10412 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
10414 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
10416 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
10417 gen_lowpart (QImode, *shift)));
10419 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
10422 /* Leftshift a subword within an SImode register. */
10424 void
10425 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
10426 rtx *shifted_value)
10428 rtx value_reg = gen_reg_rtx (SImode);
10429 emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
10430 mode, 0));
10432 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
10433 gen_lowpart (QImode, shift)));
10436 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This
10437 allows the behavior to be tuned for specific implementations as well as
10438 when optimizing for size. */
10440 bool
10441 riscv_use_divmod_expander (void)
10443 return tune_param->use_divmod_expansion;
10446 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
10448 static machine_mode
10449 riscv_preferred_simd_mode (scalar_mode mode)
10451 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10452 return riscv_vector::preferred_simd_mode (mode);
10454 return word_mode;
10457 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
10459 static poly_uint64
10460 riscv_vectorize_preferred_vector_alignment (const_tree type)
10462 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
10463 return TYPE_ALIGN (TREE_TYPE (type));
10464 return TYPE_ALIGN (type);
10467 /* Return true if it is static FRM rounding mode. */
10469 static bool
10470 riscv_static_frm_mode_p (int mode)
10472 switch (mode)
10474 case riscv_vector::FRM_RDN:
10475 case riscv_vector::FRM_RUP:
10476 case riscv_vector::FRM_RTZ:
10477 case riscv_vector::FRM_RMM:
10478 case riscv_vector::FRM_RNE:
10479 return true;
10480 default:
10481 return false;
10484 gcc_unreachable ();
10487 /* Implement the floating-point Mode Switching. */
10489 static void
10490 riscv_emit_frm_mode_set (int mode, int prev_mode)
10492 rtx backup_reg = DYNAMIC_FRM_RTL (cfun);
10494 if (prev_mode == riscv_vector::FRM_DYN_CALL)
10495 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
10497 if (mode != prev_mode)
10499 rtx frm = gen_int_mode (mode, SImode);
10501 if (mode == riscv_vector::FRM_DYN_CALL
10502 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
10503 /* No need to emit when prev mode is DYN already. */
10504 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10505 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
10506 && prev_mode != riscv_vector::FRM_DYN
10507 && prev_mode != riscv_vector::FRM_DYN_CALL)
10508 /* No need to emit when prev mode is DYN or DYN_CALL already. */
10509 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10510 else if (mode == riscv_vector::FRM_DYN
10511 && prev_mode != riscv_vector::FRM_DYN_CALL)
10512 /* Restore frm value from backup when switch to DYN mode. */
10513 emit_insn (gen_fsrmsi_restore (backup_reg));
10514 else if (riscv_static_frm_mode_p (mode))
10515 /* Set frm value when switch to static mode. */
10516 emit_insn (gen_fsrmsi_restore (frm));
10520 /* Implement Mode switching. */
10522 static void
10523 riscv_emit_mode_set (int entity, int mode, int prev_mode,
10524 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
10526 switch (entity)
10528 case RISCV_VXRM:
10529 if (mode != VXRM_MODE_NONE && mode != prev_mode)
10530 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
10531 break;
10532 case RISCV_FRM:
10533 riscv_emit_frm_mode_set (mode, prev_mode);
10534 break;
10535 default:
10536 gcc_unreachable ();
10540 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the
10541 underlying emit. */
10543 static int
10544 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
10546 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
10548 if (insn && CALL_P (insn))
10549 return riscv_vector::FRM_DYN;
10551 return mode;
10554 /* Insert the backup frm insn to the end of the bb if and only if the call
10555 is the last insn of this bb. */
10557 static void
10558 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
10560 edge eg;
10561 bool abnormal_edge_p = false;
10562 edge_iterator eg_iterator;
10563 basic_block bb = BLOCK_FOR_INSN (cur_insn);
10565 FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
10567 if (eg->flags & EDGE_ABNORMAL)
10568 abnormal_edge_p = true;
10569 else
10571 start_sequence ();
10572 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10573 rtx_insn *backup_insn = get_insns ();
10574 end_sequence ();
10576 insert_insn_on_edge (backup_insn, eg);
10580 if (abnormal_edge_p)
10582 start_sequence ();
10583 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10584 rtx_insn *backup_insn = get_insns ();
10585 end_sequence ();
10587 insert_insn_end_basic_block (backup_insn, bb);
10590 commit_edge_insertions ();
10593 /* Return mode that frm must be switched into
10594 prior to the execution of insn. */
10596 static int
10597 riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
10599 if (!DYNAMIC_FRM_RTL(cfun))
10601 /* The dynamic frm will be initialized only onece during cfun. */
10602 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
10603 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10606 if (CALL_P (cur_insn))
10608 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
10610 if (!insn)
10611 riscv_frm_emit_after_bb_end (cur_insn);
10613 return riscv_vector::FRM_DYN_CALL;
10616 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
10618 if (mode == riscv_vector::FRM_NONE)
10619 /* After meet a call, we need to backup the frm because it may be
10620 updated during the call. Here, for each insn, we will check if
10621 the previous insn is a call or not. When previous insn is call,
10622 there will be 2 cases for the emit mode set.
10624 1. Current insn is not MODE_NONE, then the mode switch framework
10625 will do the mode switch from MODE_CALL to MODE_NONE natively.
10626 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
10627 the MODE_DYN, and leave the mode switch itself to perform
10628 the emit mode set.
10630 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
10632 return mode;
10635 /* Return mode that entity must be switched into
10636 prior to the execution of insn. */
10638 static int
10639 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
10641 int code = recog_memoized (insn);
10643 switch (entity)
10645 case RISCV_VXRM:
10646 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
10647 case RISCV_FRM:
10648 return riscv_frm_mode_needed (insn, code);
10649 default:
10650 gcc_unreachable ();
10654 /* Return TRUE that an insn is asm. */
10656 static bool
10657 asm_insn_p (rtx_insn *insn)
10659 extract_insn (insn);
10661 return recog_data.is_asm;
10664 /* Return TRUE that an insn is unknown for VXRM. */
10666 static bool
10667 vxrm_unknown_p (rtx_insn *insn)
10669 /* Return true if there is a definition of VXRM. */
10670 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn))
10671 return true;
10673 /* A CALL function may contain an instruction that modifies the VXRM,
10674 return true in this situation. */
10675 if (CALL_P (insn))
10676 return true;
10678 /* Return true for all assembly since users may hardcode a assembly
10679 like this: asm volatile ("csrwi vxrm, 0"). */
10680 if (asm_insn_p (insn))
10681 return true;
10683 return false;
10686 /* Return TRUE that an insn is unknown dynamic for FRM. */
10688 static bool
10689 frm_unknown_dynamic_p (rtx_insn *insn)
10691 /* Return true if there is a definition of FRM. */
10692 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn))
10693 return true;
10695 return false;
10698 /* Return the mode that an insn results in for VXRM. */
10700 static int
10701 riscv_vxrm_mode_after (rtx_insn *insn, int mode)
10703 if (vxrm_unknown_p (insn))
10704 return VXRM_MODE_NONE;
10706 if (recog_memoized (insn) < 0)
10707 return mode;
10709 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn)))
10710 return get_attr_vxrm_mode (insn);
10711 else
10712 return mode;
10715 /* Return the mode that an insn results in for FRM. */
10717 static int
10718 riscv_frm_mode_after (rtx_insn *insn, int mode)
10720 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode);
10722 if (CALL_P (insn))
10723 return mode;
10725 if (frm_unknown_dynamic_p (insn))
10726 return riscv_vector::FRM_DYN;
10728 if (recog_memoized (insn) < 0)
10729 return mode;
10731 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn)))
10732 return get_attr_frm_mode (insn);
10733 else
10734 return mode;
10737 /* Return the mode that an insn results in. */
10739 static int
10740 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
10742 switch (entity)
10744 case RISCV_VXRM:
10745 return riscv_vxrm_mode_after (insn, mode);
10746 case RISCV_FRM:
10747 return riscv_frm_mode_after (insn, mode);
10748 default:
10749 gcc_unreachable ();
10753 /* Return a mode that ENTITY is assumed to be
10754 switched to at function entry. */
10756 static int
10757 riscv_mode_entry (int entity)
10759 switch (entity)
10761 case RISCV_VXRM:
10762 return VXRM_MODE_NONE;
10763 case RISCV_FRM:
10765 /* According to RVV 1.0 spec, all vector floating-point operations use
10766 the dynamic rounding mode in the frm register. Likewise in other
10767 similar places. */
10768 return riscv_vector::FRM_DYN;
10770 default:
10771 gcc_unreachable ();
10775 /* Return a mode that ENTITY is assumed to be
10776 switched to at function exit. */
10778 static int
10779 riscv_mode_exit (int entity)
10781 switch (entity)
10783 case RISCV_VXRM:
10784 return VXRM_MODE_NONE;
10785 case RISCV_FRM:
10786 return riscv_vector::FRM_DYN_EXIT;
10787 default:
10788 gcc_unreachable ();
10792 static int
10793 riscv_mode_priority (int, int n)
10795 return n;
10798 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
10799 unsigned int
10800 riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
10802 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10803 return riscv_vector::autovectorize_vector_modes (modes, all);
10805 return default_autovectorize_vector_modes (modes, all);
10808 /* Implement TARGET_VECTORIZE_RELATED_MODE. */
10809 opt_machine_mode
10810 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
10811 poly_uint64 nunits)
10813 if (TARGET_VECTOR)
10814 return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
10815 nunits);
10816 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
10819 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
10821 static bool
10822 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
10823 rtx target, rtx op0, rtx op1,
10824 const vec_perm_indices &sel)
10826 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode))
10827 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
10828 op1, sel);
10830 return false;
10833 static bool
10834 riscv_frame_pointer_required (void)
10836 return riscv_save_frame_pointer && !crtl->is_leaf;
10839 /* Return the appropriate common costs according to VECTYPE from COSTS. */
10840 static const common_vector_cost *
10841 get_common_costs (const cpu_vector_cost *costs, tree vectype)
10843 gcc_assert (costs);
10845 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
10846 return costs->vls;
10847 return costs->vla;
10850 /* Return the CPU vector costs according to -mtune if tune info has non-NULL
10851 vector cost. Otherwide, return the default generic vector costs. */
10852 const cpu_vector_cost *
10853 get_vector_costs ()
10855 const cpu_vector_cost *costs = tune_param->vec_costs;
10856 if (!costs)
10857 return &generic_vector_cost;
10858 return costs;
10861 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10863 static int
10864 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10865 tree vectype, int misalign ATTRIBUTE_UNUSED)
10867 const cpu_vector_cost *costs = get_vector_costs ();
10868 bool fp = false;
10870 if (vectype != NULL)
10871 fp = FLOAT_TYPE_P (vectype);
10873 const common_vector_cost *common_costs = get_common_costs (costs, vectype);
10874 gcc_assert (common_costs != NULL);
10875 switch (type_of_cost)
10877 case scalar_stmt:
10878 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
10880 case scalar_load:
10881 return costs->scalar_load_cost;
10883 case scalar_store:
10884 return costs->scalar_store_cost;
10886 case vector_stmt:
10887 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10889 case vector_load:
10890 return common_costs->align_load_cost;
10892 case vector_store:
10893 return common_costs->align_store_cost;
10895 case vec_to_scalar:
10896 return common_costs->vec_to_scalar_cost;
10898 case scalar_to_vec:
10899 return common_costs->scalar_to_vec_cost;
10901 case unaligned_load:
10902 return common_costs->unalign_load_cost;
10903 case vector_gather_load:
10904 return common_costs->gather_load_cost;
10906 case unaligned_store:
10907 return common_costs->unalign_store_cost;
10908 case vector_scatter_store:
10909 return common_costs->scatter_store_cost;
10911 case cond_branch_taken:
10912 return costs->cond_taken_branch_cost;
10914 case cond_branch_not_taken:
10915 return costs->cond_not_taken_branch_cost;
10917 case vec_perm:
10918 return common_costs->permute_cost;
10920 case vec_promote_demote:
10921 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10923 case vec_construct:
10924 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
10926 default:
10927 gcc_unreachable ();
10930 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
10933 /* Implement targetm.vectorize.create_costs. */
10935 static vector_costs *
10936 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
10938 if (TARGET_VECTOR)
10939 return new riscv_vector::costs (vinfo, costing_for_scalar);
10940 /* Default vector costs. */
10941 return new vector_costs (vinfo, costing_for_scalar);
10944 /* Implement TARGET_PREFERRED_ELSE_VALUE. */
10946 static tree
10947 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
10948 tree *ops)
10950 if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
10951 return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype));
10953 return default_preferred_else_value (ifn, vectype, nops, ops);
10956 /* If MEM is in the form of "base+offset", extract the two parts
10957 of address and set to BASE and OFFSET, otherwise return false
10958 after clearing BASE and OFFSET. */
10960 bool
10961 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10963 rtx addr;
10965 gcc_assert (MEM_P (mem));
10967 addr = XEXP (mem, 0);
10969 if (REG_P (addr))
10971 *base = addr;
10972 *offset = const0_rtx;
10973 return true;
10976 if (GET_CODE (addr) == PLUS
10977 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10979 *base = XEXP (addr, 0);
10980 *offset = XEXP (addr, 1);
10981 return true;
10984 *base = NULL_RTX;
10985 *offset = NULL_RTX;
10987 return false;
10990 /* Implements target hook vector_mode_supported_any_target_p. */
10992 static bool
10993 riscv_vector_mode_supported_any_target_p (machine_mode)
10995 if (TARGET_XTHEADVECTOR)
10996 return false;
10997 return true;
11000 /* Initialize the GCC target structure. */
11001 #undef TARGET_ASM_ALIGNED_HI_OP
11002 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
11003 #undef TARGET_ASM_ALIGNED_SI_OP
11004 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11005 #undef TARGET_ASM_ALIGNED_DI_OP
11006 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
11008 #undef TARGET_OPTION_OVERRIDE
11009 #define TARGET_OPTION_OVERRIDE riscv_option_override
11011 #undef TARGET_OPTION_RESTORE
11012 #define TARGET_OPTION_RESTORE riscv_option_restore
11014 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
11015 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
11017 #undef TARGET_LEGITIMIZE_ADDRESS
11018 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
11020 #undef TARGET_SCHED_ISSUE_RATE
11021 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
11022 #undef TARGET_SCHED_MACRO_FUSION_P
11023 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
11024 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11025 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
11027 #undef TARGET_SCHED_VARIABLE_ISSUE
11028 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
11030 #undef TARGET_SCHED_ADJUST_COST
11031 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
11033 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11034 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
11036 #undef TARGET_SET_CURRENT_FUNCTION
11037 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function
11039 #undef TARGET_REGISTER_MOVE_COST
11040 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
11041 #undef TARGET_MEMORY_MOVE_COST
11042 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
11043 #undef TARGET_RTX_COSTS
11044 #define TARGET_RTX_COSTS riscv_rtx_costs
11045 #undef TARGET_ADDRESS_COST
11046 #define TARGET_ADDRESS_COST riscv_address_cost
11047 #undef TARGET_INSN_COST
11048 #define TARGET_INSN_COST riscv_insn_cost
11050 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
11051 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost
11052 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
11053 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p
11055 #undef TARGET_ASM_FILE_START
11056 #define TARGET_ASM_FILE_START riscv_file_start
11057 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
11058 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
11059 #undef TARGET_ASM_FILE_END
11060 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
11062 #undef TARGET_EXPAND_BUILTIN_VA_START
11063 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
11065 #undef TARGET_PROMOTE_FUNCTION_MODE
11066 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode
11068 #undef TARGET_RETURN_IN_MEMORY
11069 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
11071 #undef TARGET_ASM_OUTPUT_MI_THUNK
11072 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
11073 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11074 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11076 #undef TARGET_PRINT_OPERAND
11077 #define TARGET_PRINT_OPERAND riscv_print_operand
11078 #undef TARGET_PRINT_OPERAND_ADDRESS
11079 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
11080 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
11081 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
11083 #undef TARGET_SETUP_INCOMING_VARARGS
11084 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
11085 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
11086 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args
11087 #undef TARGET_STRICT_ARGUMENT_NAMING
11088 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
11089 #undef TARGET_MUST_PASS_IN_STACK
11090 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11091 #undef TARGET_PASS_BY_REFERENCE
11092 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
11093 #undef TARGET_ARG_PARTIAL_BYTES
11094 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
11095 #undef TARGET_FUNCTION_ARG
11096 #define TARGET_FUNCTION_ARG riscv_function_arg
11097 #undef TARGET_FUNCTION_ARG_ADVANCE
11098 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
11099 #undef TARGET_FUNCTION_ARG_BOUNDARY
11100 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
11101 #undef TARGET_FNTYPE_ABI
11102 #define TARGET_FNTYPE_ABI riscv_fntype_abi
11103 #undef TARGET_INSN_CALLEE_ABI
11104 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi
11106 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
11107 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
11108 riscv_get_separate_components
11110 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
11111 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
11112 riscv_components_for_bb
11114 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
11115 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
11116 riscv_disqualify_components
11118 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
11119 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
11120 riscv_emit_prologue_components
11122 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
11123 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
11124 riscv_emit_epilogue_components
11126 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
11127 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
11128 riscv_set_handled_components
11130 /* The generic ELF target does not always have TLS support. */
11131 #ifdef HAVE_AS_TLS
11132 #undef TARGET_HAVE_TLS
11133 #define TARGET_HAVE_TLS true
11134 #endif
11136 #undef TARGET_CANNOT_FORCE_CONST_MEM
11137 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
11139 #undef TARGET_LEGITIMATE_CONSTANT_P
11140 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
11142 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11143 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p
11145 #undef TARGET_LEGITIMATE_ADDRESS_P
11146 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p
11148 #undef TARGET_CAN_ELIMINATE
11149 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
11151 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11152 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
11154 #undef TARGET_CLASS_MAX_NREGS
11155 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
11157 #undef TARGET_TRAMPOLINE_INIT
11158 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
11160 #undef TARGET_IN_SMALL_DATA_P
11161 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
11163 #undef TARGET_HAVE_SRODATA_SECTION
11164 #define TARGET_HAVE_SRODATA_SECTION true
11166 #undef TARGET_ASM_SELECT_SECTION
11167 #define TARGET_ASM_SELECT_SECTION riscv_select_section
11169 #undef TARGET_ASM_UNIQUE_SECTION
11170 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section
11172 #undef TARGET_ASM_SELECT_RTX_SECTION
11173 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section
11175 #undef TARGET_MIN_ANCHOR_OFFSET
11176 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
11178 #undef TARGET_MAX_ANCHOR_OFFSET
11179 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
11181 #undef TARGET_REGISTER_PRIORITY
11182 #define TARGET_REGISTER_PRIORITY riscv_register_priority
11184 #undef TARGET_CANNOT_COPY_INSN_P
11185 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
11187 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11188 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
11190 #undef TARGET_INIT_BUILTINS
11191 #define TARGET_INIT_BUILTINS riscv_init_builtins
11193 #undef TARGET_BUILTIN_DECL
11194 #define TARGET_BUILTIN_DECL riscv_builtin_decl
11196 #undef TARGET_GIMPLE_FOLD_BUILTIN
11197 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
11199 #undef TARGET_EXPAND_BUILTIN
11200 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
11202 #undef TARGET_HARD_REGNO_NREGS
11203 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs
11204 #undef TARGET_HARD_REGNO_MODE_OK
11205 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok
11207 #undef TARGET_MODES_TIEABLE_P
11208 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
11210 #undef TARGET_SLOW_UNALIGNED_ACCESS
11211 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
11213 #undef TARGET_SECONDARY_MEMORY_NEEDED
11214 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
11216 #undef TARGET_CAN_CHANGE_MODE_CLASS
11217 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class
11219 #undef TARGET_CONSTANT_ALIGNMENT
11220 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment
11222 #undef TARGET_MERGE_DECL_ATTRIBUTES
11223 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes
11225 #undef TARGET_ATTRIBUTE_TABLE
11226 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table
11228 #undef TARGET_WARN_FUNC_RETURN
11229 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return
11231 /* The low bit is ignored by jump instructions so is safe to use. */
11232 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
11233 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
11235 #undef TARGET_MACHINE_DEPENDENT_REORG
11236 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
11238 #undef TARGET_NEW_ADDRESS_PROFITABLE_P
11239 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
11241 #undef TARGET_MANGLE_TYPE
11242 #define TARGET_MANGLE_TYPE riscv_mangle_type
11244 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11245 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
11247 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
11248 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
11249 riscv_libgcc_floating_mode_supported_p
11251 #undef TARGET_INIT_LIBFUNCS
11252 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
11254 #undef TARGET_C_EXCESS_PRECISION
11255 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision
11257 #undef TARGET_FLOATN_MODE
11258 #define TARGET_FLOATN_MODE riscv_floatn_mode
11260 #undef TARGET_ASAN_SHADOW_OFFSET
11261 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
11263 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11264 #undef TARGET_DEFAULT_TARGET_FLAGS
11265 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
11266 #endif
11268 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11269 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p
11271 #undef TARGET_VERIFY_TYPE_CONTEXT
11272 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
11274 #undef TARGET_ESTIMATED_POLY_VALUE
11275 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
11277 #undef TARGET_VECTORIZE_GET_MASK_MODE
11278 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
11280 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
11281 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive
11283 #undef TARGET_VECTOR_ALIGNMENT
11284 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
11286 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
11287 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment
11289 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
11290 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
11292 #undef TARGET_ZERO_CALL_USED_REGS
11293 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
11295 #undef TARGET_ARRAY_MODE
11296 #define TARGET_ARRAY_MODE riscv_array_mode
11298 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11299 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
11301 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
11302 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
11303 riscv_vectorize_preferred_vector_alignment
11305 /* Mode switching hooks. */
11307 #undef TARGET_MODE_EMIT
11308 #define TARGET_MODE_EMIT riscv_emit_mode_set
11309 #undef TARGET_MODE_NEEDED
11310 #define TARGET_MODE_NEEDED riscv_mode_needed
11311 #undef TARGET_MODE_AFTER
11312 #define TARGET_MODE_AFTER riscv_mode_after
11313 #undef TARGET_MODE_ENTRY
11314 #define TARGET_MODE_ENTRY riscv_mode_entry
11315 #undef TARGET_MODE_EXIT
11316 #define TARGET_MODE_EXIT riscv_mode_exit
11317 #undef TARGET_MODE_PRIORITY
11318 #define TARGET_MODE_PRIORITY riscv_mode_priority
11320 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
11321 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
11322 riscv_autovectorize_vector_modes
11324 #undef TARGET_VECTORIZE_RELATED_MODE
11325 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
11327 #undef TARGET_VECTORIZE_VEC_PERM_CONST
11328 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
11330 #undef TARGET_FRAME_POINTER_REQUIRED
11331 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
11333 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11334 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11335 riscv_builtin_vectorization_cost
11337 #undef TARGET_VECTORIZE_CREATE_COSTS
11338 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
11340 #undef TARGET_PREFERRED_ELSE_VALUE
11341 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
11343 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
11344 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
11346 struct gcc_target targetm = TARGET_INITIALIZER;
11348 #include "gt-riscv.h"