RISC-V: Allow RVV intrinsic when function target("arch=+v")
[official-gcc.git] / gcc / config / riscv / riscv.cc
blobfe9976bfffe980233b667b31ca2a42ab18bcfb17
1 /* Subroutines used for code generation for RISC-V.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Andrew Waterman (andrew@sifive.com).
4 Based on MIPS target for GNU compiler.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #define INCLUDE_STRING
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "target.h"
29 #include "backend.h"
30 #include "tm.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "insn-config.h"
34 #include "insn-attr.h"
35 #include "recog.h"
36 #include "output.h"
37 #include "alias.h"
38 #include "tree.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "varasm.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "function.h"
45 #include "explow.h"
46 #include "ifcvt.h"
47 #include "memmodel.h"
48 #include "emit-rtl.h"
49 #include "reload.h"
50 #include "tm_p.h"
51 #include "basic-block.h"
52 #include "expr.h"
53 #include "optabs.h"
54 #include "bitmap.h"
55 #include "df.h"
56 #include "function-abi.h"
57 #include "diagnostic.h"
58 #include "builtins.h"
59 #include "predict.h"
60 #include "tree-pass.h"
61 #include "opts.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
64 #include "gimple.h"
65 #include "cfghooks.h"
66 #include "cfgloop.h"
67 #include "cfgrtl.h"
68 #include "shrink-wrap.h"
69 #include "sel-sched.h"
70 #include "sched-int.h"
71 #include "fold-const.h"
72 #include "gimple-iterator.h"
73 #include "gimple-expr.h"
74 #include "tree-vectorizer.h"
75 #include "gcse.h"
76 #include "tree-dfa.h"
77 #include "target-globals.h"
79 /* This file should be included last. */
80 #include "target-def.h"
81 #include "riscv-vector-costs.h"
82 #include "riscv-subset.h"
84 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
85 #define UNSPEC_ADDRESS_P(X) \
86 (GET_CODE (X) == UNSPEC \
87 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
88 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
90 /* Extract the symbol or label from UNSPEC wrapper X. */
91 #define UNSPEC_ADDRESS(X) \
92 XVECEXP (X, 0, 0)
94 /* Extract the symbol type from UNSPEC wrapper X. */
95 #define UNSPEC_ADDRESS_TYPE(X) \
96 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
98 /* Extract the backup dynamic frm rtl. */
99 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm)
101 /* True the mode switching has static frm, or false. */
102 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
104 /* True if we can use the instructions in the XTheadInt extension
105 to handle interrupts, or false. */
106 #define TH_INT_INTERRUPT(c) \
107 (TARGET_XTHEADINT \
108 /* The XTheadInt extension only supports rv32. */ \
109 && !TARGET_64BIT \
110 && (c)->machine->interrupt_handler_p \
111 /* The XTheadInt instructions can only be executed in M-mode. */ \
112 && (c)->machine->interrupt_mode == MACHINE_MODE)
114 /* Information about a function's frame layout. */
115 struct GTY(()) riscv_frame_info {
116 /* The size of the frame in bytes. */
117 poly_int64 total_size;
119 /* Bit X is set if the function saves or restores GPR X. */
120 unsigned int mask;
122 /* Likewise FPR X. */
123 unsigned int fmask;
125 /* Likewise for vector registers. */
126 unsigned int vmask;
128 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */
129 unsigned save_libcall_adjustment;
131 /* the minimum number of bytes, in multiples of 16-byte address increments,
132 required to cover the registers in a multi push & pop. */
133 unsigned multi_push_adj_base;
135 /* the number of additional 16-byte address increments allocated for the stack
136 frame in a multi push & pop. */
137 unsigned multi_push_adj_addi;
139 /* Offsets of fixed-point and floating-point save areas from frame bottom */
140 poly_int64 gp_sp_offset;
141 poly_int64 fp_sp_offset;
143 /* Top and bottom offsets of vector save areas from frame bottom. */
144 poly_int64 v_sp_offset_top;
145 poly_int64 v_sp_offset_bottom;
147 /* Offset of virtual frame pointer from stack pointer/frame bottom */
148 poly_int64 frame_pointer_offset;
150 /* Offset of hard frame pointer from stack pointer/frame bottom */
151 poly_int64 hard_frame_pointer_offset;
153 /* The offset of arg_pointer_rtx from the bottom of the frame. */
154 poly_int64 arg_pointer_offset;
156 /* Reset this struct, clean all field to zero. */
157 void reset(void);
160 enum riscv_privilege_levels {
161 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
164 struct GTY(()) mode_switching_info {
165 /* The RTL variable which stores the dynamic FRM value. We always use this
166 RTX to restore dynamic FRM rounding mode in mode switching. */
167 rtx dynamic_frm;
169 /* The boolean variables indicates there is at least one static rounding
170 mode instruction in the function or not. */
171 bool static_frm_p;
173 mode_switching_info ()
175 dynamic_frm = NULL_RTX;
176 static_frm_p = false;
180 struct GTY(()) machine_function {
181 /* The number of extra stack bytes taken up by register varargs.
182 This area is allocated by the callee at the very top of the frame. */
183 int varargs_size;
185 /* True if current function is a naked function. */
186 bool naked_p;
188 /* True if current function is an interrupt function. */
189 bool interrupt_handler_p;
190 /* For an interrupt handler, indicates the privilege level. */
191 enum riscv_privilege_levels interrupt_mode;
193 /* True if attributes on current function have been checked. */
194 bool attributes_checked_p;
196 /* True if RA must be saved because of a far jump. */
197 bool far_jump_used;
199 /* The current frame information, calculated by riscv_compute_frame_info. */
200 struct riscv_frame_info frame;
202 /* The components already handled by separate shrink-wrapping, which should
203 not be considered by the prologue and epilogue. */
204 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
206 /* The mode swithching information for the FRM rounding modes. */
207 struct mode_switching_info mode_sw_info;
210 /* Information about a single argument. */
211 struct riscv_arg_info {
212 /* True if the argument is at least partially passed on the stack. */
213 bool stack_p;
215 /* The number of integer registers allocated to this argument. */
216 unsigned int num_gprs;
218 /* The offset of the first register used, provided num_gprs is nonzero.
219 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
220 unsigned int gpr_offset;
222 /* The number of floating-point registers allocated to this argument. */
223 unsigned int num_fprs;
225 /* The offset of the first register used, provided num_fprs is nonzero. */
226 unsigned int fpr_offset;
228 /* The number of vector registers allocated to this argument. */
229 unsigned int num_vrs;
231 /* The offset of the first register used, provided num_vrs is nonzero. */
232 unsigned int vr_offset;
234 /* The number of mask registers allocated to this argument. */
235 unsigned int num_mrs;
237 /* The offset of the first register used, provided num_mrs is nonzero. */
238 unsigned int mr_offset;
241 /* One stage in a constant building sequence. These sequences have
242 the form:
244 A = VALUE[0]
245 A = A CODE[1] VALUE[1]
246 A = A CODE[2] VALUE[2]
249 where A is an accumulator, each CODE[i] is a binary rtl operation
250 and each VALUE[i] is a constant integer. CODE[0] is undefined. */
251 struct riscv_integer_op {
252 enum rtx_code code;
253 unsigned HOST_WIDE_INT value;
256 /* The largest number of operations needed to load an integer constant.
257 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
258 #define RISCV_MAX_INTEGER_OPS 8
260 enum riscv_fusion_pairs
262 RISCV_FUSE_NOTHING = 0,
263 RISCV_FUSE_ZEXTW = (1 << 0),
264 RISCV_FUSE_ZEXTH = (1 << 1),
265 RISCV_FUSE_ZEXTWS = (1 << 2),
266 RISCV_FUSE_LDINDEXED = (1 << 3),
267 RISCV_FUSE_LUI_ADDI = (1 << 4),
268 RISCV_FUSE_AUIPC_ADDI = (1 << 5),
269 RISCV_FUSE_LUI_LD = (1 << 6),
270 RISCV_FUSE_AUIPC_LD = (1 << 7),
271 RISCV_FUSE_LDPREINCREMENT = (1 << 8),
272 RISCV_FUSE_ALIGNED_STD = (1 << 9),
275 /* Costs of various operations on the different architectures. */
277 struct riscv_tune_param
279 unsigned short fp_add[2];
280 unsigned short fp_mul[2];
281 unsigned short fp_div[2];
282 unsigned short int_mul[2];
283 unsigned short int_div[2];
284 unsigned short issue_rate;
285 unsigned short branch_cost;
286 unsigned short memory_cost;
287 unsigned short fmv_cost;
288 bool slow_unaligned_access;
289 bool use_divmod_expansion;
290 unsigned int fusible_ops;
291 const struct cpu_vector_cost *vec_costs;
295 /* Global variables for machine-dependent things. */
297 /* Whether unaligned accesses execute very slowly. */
298 bool riscv_slow_unaligned_access_p;
300 /* Whether user explicitly passed -mstrict-align. */
301 bool riscv_user_wants_strict_align;
303 /* Stack alignment to assume/maintain. */
304 unsigned riscv_stack_boundary;
306 /* Whether in riscv_output_mi_thunk. */
307 static bool riscv_in_thunk_func = false;
309 /* If non-zero, this is an offset to be added to SP to redefine the CFA
310 when restoring the FP register from the stack. Only valid when generating
311 the epilogue. */
312 static poly_int64 epilogue_cfa_sp_offset;
314 /* Which tuning parameters to use. */
315 static const struct riscv_tune_param *tune_param;
317 /* Which automaton to use for tuning. */
318 enum riscv_microarchitecture_type riscv_microarchitecture;
320 /* The number of chunks in a single vector register. */
321 poly_uint16 riscv_vector_chunks;
323 /* The number of bytes in a vector chunk. */
324 unsigned riscv_bytes_per_vector_chunk;
326 /* Index R is the smallest register class that contains register R. */
327 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
328 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
329 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS,
330 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS,
331 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
332 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS,
333 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
334 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
335 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
336 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
337 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
338 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
339 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
340 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
341 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
342 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
343 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
344 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS,
345 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
346 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
347 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
348 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
349 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
350 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
351 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
352 VM_REGS, VD_REGS, VD_REGS, VD_REGS,
353 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
354 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
355 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
356 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
357 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
358 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
359 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
362 /* RVV costs for VLS vector operations. */
363 static const common_vector_cost rvv_vls_vector_cost = {
364 1, /* int_stmt_cost */
365 1, /* fp_stmt_cost */
366 1, /* gather_load_cost */
367 1, /* scatter_store_cost */
368 1, /* vec_to_scalar_cost */
369 1, /* scalar_to_vec_cost */
370 1, /* permute_cost */
371 1, /* align_load_cost */
372 1, /* align_store_cost */
373 2, /* unalign_load_cost */
374 2, /* unalign_store_cost */
377 /* RVV costs for VLA vector operations. */
378 static const scalable_vector_cost rvv_vla_vector_cost = {
380 1, /* int_stmt_cost */
381 1, /* fp_stmt_cost */
382 1, /* gather_load_cost */
383 1, /* scatter_store_cost */
384 1, /* vec_to_scalar_cost */
385 1, /* scalar_to_vec_cost */
386 1, /* permute_cost */
387 1, /* align_load_cost */
388 1, /* align_store_cost */
389 2, /* unalign_load_cost */
390 2, /* unalign_store_cost */
394 /* RVV register move cost. */
395 static const regmove_vector_cost rvv_regmove_vector_cost = {
396 2, /* GR2VR */
397 2, /* FR2VR */
398 2, /* VR2GR */
399 2, /* VR2FR */
402 /* Generic costs for vector insn classes. It is supposed to be the vector cost
403 models used by default if no other cost model was specified. */
404 static const struct cpu_vector_cost generic_vector_cost = {
405 1, /* scalar_int_stmt_cost */
406 1, /* scalar_fp_stmt_cost */
407 1, /* scalar_load_cost */
408 1, /* scalar_store_cost */
409 3, /* cond_taken_branch_cost */
410 1, /* cond_not_taken_branch_cost */
411 &rvv_vls_vector_cost, /* vls */
412 &rvv_vla_vector_cost, /* vla */
413 &rvv_regmove_vector_cost, /* regmove */
416 /* Costs to use when optimizing for rocket. */
417 static const struct riscv_tune_param rocket_tune_info = {
418 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
419 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
420 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
421 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
422 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
423 1, /* issue_rate */
424 3, /* branch_cost */
425 5, /* memory_cost */
426 8, /* fmv_cost */
427 true, /* slow_unaligned_access */
428 false, /* use_divmod_expansion */
429 RISCV_FUSE_NOTHING, /* fusible_ops */
430 NULL, /* vector cost */
433 /* Costs to use when optimizing for Sifive 7 Series. */
434 static const struct riscv_tune_param sifive_7_tune_info = {
435 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
436 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
437 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
438 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
439 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
440 2, /* issue_rate */
441 4, /* branch_cost */
442 3, /* memory_cost */
443 8, /* fmv_cost */
444 true, /* slow_unaligned_access */
445 false, /* use_divmod_expansion */
446 RISCV_FUSE_NOTHING, /* fusible_ops */
447 NULL, /* vector cost */
450 /* Costs to use when optimizing for Sifive p400 Series. */
451 static const struct riscv_tune_param sifive_p400_tune_info = {
452 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
453 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
454 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
455 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
456 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
457 3, /* issue_rate */
458 4, /* branch_cost */
459 3, /* memory_cost */
460 4, /* fmv_cost */
461 true, /* slow_unaligned_access */
462 false, /* use_divmod_expansion */
463 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
464 &generic_vector_cost, /* vector cost */
467 /* Costs to use when optimizing for Sifive p600 Series. */
468 static const struct riscv_tune_param sifive_p600_tune_info = {
469 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
470 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
471 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
472 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
473 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
474 4, /* issue_rate */
475 4, /* branch_cost */
476 3, /* memory_cost */
477 4, /* fmv_cost */
478 true, /* slow_unaligned_access */
479 false, /* use_divmod_expansion */
480 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
481 &generic_vector_cost, /* vector cost */
484 /* Costs to use when optimizing for T-HEAD c906. */
485 static const struct riscv_tune_param thead_c906_tune_info = {
486 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
487 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
488 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
489 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
490 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */
491 1, /* issue_rate */
492 3, /* branch_cost */
493 5, /* memory_cost */
494 8, /* fmv_cost */
495 false, /* slow_unaligned_access */
496 false, /* use_divmod_expansion */
497 RISCV_FUSE_NOTHING, /* fusible_ops */
498 NULL, /* vector cost */
501 /* Costs to use when optimizing for xiangshan nanhu. */
502 static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
503 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_add */
504 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_mul */
505 {COSTS_N_INSNS (10), COSTS_N_INSNS (20)}, /* fp_div */
506 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* int_mul */
507 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
508 6, /* issue_rate */
509 3, /* branch_cost */
510 3, /* memory_cost */
511 3, /* fmv_cost */
512 true, /* slow_unaligned_access */
513 false, /* use_divmod_expansion */
514 RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
515 NULL, /* vector cost */
518 /* Costs to use when optimizing for a generic ooo profile. */
519 static const struct riscv_tune_param generic_ooo_tune_info = {
520 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */
521 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */
522 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */
523 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
524 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
525 1, /* issue_rate */
526 3, /* branch_cost */
527 4, /* memory_cost */
528 4, /* fmv_cost */
529 false, /* slow_unaligned_access */
530 false, /* use_divmod_expansion */
531 RISCV_FUSE_NOTHING, /* fusible_ops */
532 &generic_vector_cost, /* vector cost */
535 /* Costs to use when optimizing for size. */
536 static const struct riscv_tune_param optimize_size_tune_info = {
537 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
538 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
539 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
540 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */
541 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */
542 1, /* issue_rate */
543 1, /* branch_cost */
544 2, /* memory_cost */
545 8, /* fmv_cost */
546 false, /* slow_unaligned_access */
547 false, /* use_divmod_expansion */
548 RISCV_FUSE_NOTHING, /* fusible_ops */
549 NULL, /* vector cost */
552 static bool riscv_avoid_shrink_wrapping_separate ();
553 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
554 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
555 static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int,
556 bool *);
558 /* Defining target-specific uses of __attribute__. */
559 static const attribute_spec riscv_gnu_attributes[] =
561 /* Syntax: { name, min_len, max_len, decl_required, type_required,
562 function_type_required, affects_type_identity, handler,
563 exclude } */
565 /* The attribute telling no prologue/epilogue. */
566 {"naked", 0, 0, true, false, false, false, riscv_handle_fndecl_attribute,
567 NULL},
568 /* This attribute generates prologue/epilogue for interrupt handlers. */
569 {"interrupt", 0, 1, false, true, true, false, riscv_handle_type_attribute,
570 NULL},
572 /* The following two are used for the built-in properties of the Vector type
573 and are not used externally */
574 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL},
575 {"RVV type", 0, 0, false, true, false, true, NULL, NULL},
576 /* This attribute is used to declare a function, forcing it to use the
577 standard vector calling convention variant. Syntax:
578 __attribute__((riscv_vector_cc)). */
579 {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL},
580 /* This attribute is used to declare a new type, to appoint the exactly
581 bits size of the type. For example:
583 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
585 The new created type f_vint8m1_t will be exactly 256 bits. It can be
586 be used in globals, structs, unions, and arrays instead of sizeless
587 types. */
588 {"riscv_rvv_vector_bits", 1, 1, false, true, false, true,
589 riscv_handle_rvv_vector_bits_attribute, NULL},
592 static const scoped_attribute_specs riscv_gnu_attribute_table =
594 "gnu", {riscv_gnu_attributes}
597 static const attribute_spec riscv_attributes[] =
599 /* This attribute is used to declare a function, forcing it to use the
600 standard vector calling convention variant. Syntax:
601 [[riscv::vector_cc]]. */
602 {"vector_cc", 0, 0, false, true, true, true, NULL, NULL},
603 /* This attribute is used to declare a new type, to appoint the exactly
604 bits size of the type. For example:
606 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
608 The new created type f_vint8m1_t will be exactly 256 bits. It can be
609 be used in globals, structs, unions, and arrays instead of sizeless
610 types. */
611 {"rvv_vector_bits", 1, 1, false, true, false, true,
612 riscv_handle_rvv_vector_bits_attribute, NULL},
615 static const scoped_attribute_specs riscv_nongnu_attribute_table =
617 "riscv", {riscv_attributes}
620 static const scoped_attribute_specs *const riscv_attribute_table[] =
622 &riscv_gnu_attribute_table,
623 &riscv_nongnu_attribute_table
626 /* Order for the CLOBBERs/USEs of gpr_save. */
627 static const unsigned gpr_save_reg_order[] = {
628 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
629 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
630 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
631 S10_REGNUM, S11_REGNUM
634 /* A table describing all the processors GCC knows about. */
635 static const struct riscv_tune_info riscv_tune_info_table[] = {
636 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
637 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
638 #include "riscv-cores.def"
641 /* Global variable to distinguish whether we should save and restore s0/fp for
642 function. */
643 static bool riscv_save_frame_pointer;
645 typedef enum
647 PUSH_IDX = 0,
648 POP_IDX,
649 POPRET_IDX,
650 POPRETZ_IDX,
651 ZCMP_OP_NUM
652 } riscv_zcmp_op_t;
654 typedef insn_code (*code_for_push_pop_t) (machine_mode);
656 void riscv_frame_info::reset(void)
658 total_size = 0;
659 mask = 0;
660 fmask = 0;
661 vmask = 0;
662 save_libcall_adjustment = 0;
664 gp_sp_offset = 0;
665 fp_sp_offset = 0;
666 v_sp_offset_top = 0;
667 v_sp_offset_bottom = 0;
669 frame_pointer_offset = 0;
671 hard_frame_pointer_offset = 0;
673 arg_pointer_offset = 0;
676 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
678 static unsigned int
679 riscv_min_arithmetic_precision (void)
681 return 32;
684 template <class T>
685 static const char *
686 get_tune_str (const T *opts)
688 const char *tune_string = RISCV_TUNE_STRING_DEFAULT;
689 if (opts->x_riscv_tune_string)
690 tune_string = opts->x_riscv_tune_string;
691 else if (opts->x_riscv_cpu_string)
692 tune_string = opts->x_riscv_cpu_string;
693 return tune_string;
696 /* Return the riscv_tune_info entry for the given name string, return nullptr
697 if NULL_P is true, otherwise return an placeholder and report error. */
699 const struct riscv_tune_info *
700 riscv_parse_tune (const char *tune_string, bool null_p)
702 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
704 if (cpu)
705 tune_string = cpu->tune;
707 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
708 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
709 return riscv_tune_info_table + i;
711 if (null_p)
712 return nullptr;
714 error ("unknown cpu %qs for %<-mtune%>", tune_string);
715 return riscv_tune_info_table;
718 /* Helper function for riscv_build_integer; arguments are as for
719 riscv_build_integer. */
721 static int
722 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
723 HOST_WIDE_INT value, machine_mode mode)
725 HOST_WIDE_INT low_part = CONST_LOW_PART (value);
726 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
727 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
729 if (SMALL_OPERAND (value) || LUI_OPERAND (value))
731 /* Simply ADDI or LUI. */
732 codes[0].code = UNKNOWN;
733 codes[0].value = value;
734 return 1;
736 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
738 /* Simply BSETI. */
739 codes[0].code = UNKNOWN;
740 codes[0].value = value;
742 /* RISC-V sign-extends all 32bit values that live in a 32bit
743 register. To avoid paradoxes, we thus need to use the
744 sign-extended (negative) representation (-1 << 31) for the
745 value, if we want to build (1 << 31) in SImode. This will
746 then expand to an LUI instruction. */
747 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
748 codes[0].value = (HOST_WIDE_INT_M1U << 31);
750 return 1;
753 /* End with ADDI. When constructing HImode constants, do not generate any
754 intermediate value that is not itself a valid HImode constant. The
755 XORI case below will handle those remaining HImode constants. */
756 if (low_part != 0
757 && (mode != HImode
758 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
760 HOST_WIDE_INT upper_part = value - low_part;
761 if (mode != VOIDmode)
762 upper_part = trunc_int_for_mode (value - low_part, mode);
764 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
765 if (alt_cost < cost)
767 alt_codes[alt_cost-1].code = PLUS;
768 alt_codes[alt_cost-1].value = low_part;
769 memcpy (codes, alt_codes, sizeof (alt_codes));
770 cost = alt_cost;
774 /* End with XORI. */
775 if (cost > 2 && (low_part < 0 || mode == HImode))
777 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
778 if (alt_cost < cost)
780 alt_codes[alt_cost-1].code = XOR;
781 alt_codes[alt_cost-1].value = low_part;
782 memcpy (codes, alt_codes, sizeof (alt_codes));
783 cost = alt_cost;
787 /* Eliminate trailing zeros and end with SLLI. */
788 if (cost > 2 && (value & 1) == 0)
790 int shift = ctz_hwi (value);
791 unsigned HOST_WIDE_INT x = value;
792 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
794 /* Don't eliminate the lower 12 bits if LUI might apply. */
795 if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS))
796 shift -= IMM_BITS, x <<= IMM_BITS;
798 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
799 if (alt_cost < cost)
801 alt_codes[alt_cost-1].code = ASHIFT;
802 alt_codes[alt_cost-1].value = shift;
803 memcpy (codes, alt_codes, sizeof (alt_codes));
804 cost = alt_cost;
808 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB))
810 int leading_ones = clz_hwi (~value);
811 int trailing_ones = ctz_hwi (~value);
813 /* If all bits are one except a few that are zero, and the zero bits
814 are within a range of 11 bits, then we can synthesize a constant
815 by loading a small negative constant and rotating. */
816 if (leading_ones < 64
817 && ((64 - leading_ones - trailing_ones) < 12))
819 codes[0].code = UNKNOWN;
820 /* The sign-bit might be zero, so just rotate to be safe. */
821 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
822 | (value << (64 - trailing_ones)));
823 codes[1].code = ROTATERT;
824 codes[1].value = 64 - trailing_ones;
825 cost = 2;
827 /* Handle the case where the 11 bit range of zero bits wraps around. */
828 else
830 int upper_trailing_ones = ctz_hwi (~value >> 32);
831 int lower_leading_ones = clz_hwi (~value << 32);
833 if (upper_trailing_ones < 32 && lower_leading_ones < 32
834 && ((64 - upper_trailing_ones - lower_leading_ones) < 12))
836 codes[0].code = UNKNOWN;
837 /* The sign-bit might be zero, so just rotate to be safe. */
838 codes[0].value = ((value << (32 - upper_trailing_ones))
839 | ((unsigned HOST_WIDE_INT) value
840 >> (32 + upper_trailing_ones)));
841 codes[1].code = ROTATERT;
842 codes[1].value = 32 - upper_trailing_ones;
843 cost = 2;
848 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
849 return cost;
852 /* Fill CODES with a sequence of rtl operations to load VALUE.
853 Return the number of operations needed. */
855 static int
856 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
857 machine_mode mode)
859 int cost = riscv_build_integer_1 (codes, value, mode);
861 /* Eliminate leading zeros and end with SRLI. */
862 if (value > 0 && cost > 2)
864 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
865 int alt_cost, shift = clz_hwi (value);
866 HOST_WIDE_INT shifted_val;
868 /* Try filling trailing bits with 1s. */
869 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
870 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
871 if (alt_cost < cost)
873 alt_codes[alt_cost-1].code = LSHIFTRT;
874 alt_codes[alt_cost-1].value = shift;
875 memcpy (codes, alt_codes, sizeof (alt_codes));
876 cost = alt_cost;
879 /* Try filling trailing bits with 0s. */
880 shifted_val = value << shift;
881 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
882 if (alt_cost < cost)
884 alt_codes[alt_cost-1].code = LSHIFTRT;
885 alt_codes[alt_cost-1].value = shift;
886 memcpy (codes, alt_codes, sizeof (alt_codes));
887 cost = alt_cost;
891 if (!TARGET_64BIT
892 && (value > INT32_MAX || value < INT32_MIN))
894 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32);
895 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32);
896 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
897 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS];
898 int hi_cost, lo_cost;
900 hi_cost = riscv_build_integer_1 (hicode, hival, mode);
901 if (hi_cost < cost)
903 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode);
904 if (lo_cost + hi_cost < cost)
906 memcpy (codes, alt_codes,
907 lo_cost * sizeof (struct riscv_integer_op));
908 memcpy (codes + lo_cost, hicode,
909 hi_cost * sizeof (struct riscv_integer_op));
910 cost = lo_cost + hi_cost;
915 return cost;
918 /* Return the cost of constructing VAL in the event that a scratch
919 register is available. */
921 static int
922 riscv_split_integer_cost (HOST_WIDE_INT val)
924 int cost;
925 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
926 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
927 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
929 cost = 2 + riscv_build_integer (codes, loval, VOIDmode);
930 if (loval != hival)
931 cost += riscv_build_integer (codes, hival, VOIDmode);
933 return cost;
936 /* Return the cost of constructing the integer constant VAL. */
938 static int
939 riscv_integer_cost (HOST_WIDE_INT val)
941 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
942 return MIN (riscv_build_integer (codes, val, VOIDmode),
943 riscv_split_integer_cost (val));
946 /* Try to split a 64b integer into 32b parts, then reassemble. */
948 static rtx
949 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
951 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
952 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
953 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
955 riscv_move_integer (lo, lo, loval, mode);
957 if (loval == hival)
958 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
959 else
961 riscv_move_integer (hi, hi, hival, mode);
962 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32));
965 hi = force_reg (mode, hi);
966 return gen_rtx_PLUS (mode, hi, lo);
969 /* Return true if X is a thread-local symbol. */
971 static bool
972 riscv_tls_symbol_p (const_rtx x)
974 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
977 /* Return true if symbol X binds locally. */
979 static bool
980 riscv_symbol_binds_local_p (const_rtx x)
982 if (SYMBOL_REF_P (x))
983 return (SYMBOL_REF_DECL (x)
984 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
985 : SYMBOL_REF_LOCAL_P (x));
986 else
987 return false;
990 /* Return the method that should be used to access SYMBOL_REF or
991 LABEL_REF X. */
993 static enum riscv_symbol_type
994 riscv_classify_symbol (const_rtx x)
996 if (riscv_tls_symbol_p (x))
997 return SYMBOL_TLS;
999 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
1000 return SYMBOL_GOT_DISP;
1002 switch (riscv_cmodel)
1004 case CM_MEDLOW:
1005 return SYMBOL_ABSOLUTE;
1006 case CM_LARGE:
1007 if (SYMBOL_REF_P (x))
1008 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM;
1009 return SYMBOL_PCREL;
1010 default:
1011 return SYMBOL_PCREL;
1015 /* Classify the base of symbolic expression X. */
1017 enum riscv_symbol_type
1018 riscv_classify_symbolic_expression (rtx x)
1020 rtx offset;
1022 split_const (x, &x, &offset);
1023 if (UNSPEC_ADDRESS_P (x))
1024 return UNSPEC_ADDRESS_TYPE (x);
1026 return riscv_classify_symbol (x);
1029 /* Return true if X is a symbolic constant. If it is, store the type of
1030 the symbol in *SYMBOL_TYPE. */
1032 bool
1033 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
1035 rtx offset;
1037 split_const (x, &x, &offset);
1038 if (UNSPEC_ADDRESS_P (x))
1040 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1041 x = UNSPEC_ADDRESS (x);
1043 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
1044 *symbol_type = riscv_classify_symbol (x);
1045 else
1046 return false;
1048 if (offset == const0_rtx)
1049 return true;
1051 /* Nonzero offsets are only valid for references that don't use the GOT. */
1052 switch (*symbol_type)
1054 case SYMBOL_ABSOLUTE:
1055 case SYMBOL_PCREL:
1056 case SYMBOL_TLS_LE:
1057 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1058 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1060 default:
1061 return false;
1065 /* Returns the number of instructions necessary to reference a symbol. */
1067 static int riscv_symbol_insns (enum riscv_symbol_type type)
1069 switch (type)
1071 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */
1072 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */
1073 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */
1074 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */
1075 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */
1076 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */
1077 default: gcc_unreachable ();
1081 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA
1082 Manual draft. For details, please see:
1083 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */
1085 static unsigned HOST_WIDE_INT fli_value_hf[32] =
1087 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
1088 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
1089 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
1090 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
1091 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */
1092 0x78p8,
1093 0x7cp8, 0x7ep8
1096 static unsigned HOST_WIDE_INT fli_value_sf[32] =
1098 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
1099 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
1100 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
1101 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
1104 static unsigned HOST_WIDE_INT fli_value_df[32] =
1106 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
1107 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
1108 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
1109 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
1110 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
1111 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
1112 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
1113 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
1116 /* Display floating-point values at the assembly level, which is consistent
1117 with the zfa extension of llvm:
1118 https://reviews.llvm.org/D145645. */
1120 const char *fli_value_print[32] =
1122 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125",
1123 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875",
1124 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0",
1125 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan"
1128 /* Return index of the FLI instruction table if rtx X is an immediate constant that can
1129 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */
1132 riscv_float_const_rtx_index_for_fli (rtx x)
1134 unsigned HOST_WIDE_INT *fli_value_array;
1136 machine_mode mode = GET_MODE (x);
1138 if (!TARGET_ZFA
1139 || !CONST_DOUBLE_P(x)
1140 || mode == VOIDmode
1141 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH))
1142 || (mode == SFmode && !TARGET_HARD_FLOAT)
1143 || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
1144 return -1;
1146 if (!SCALAR_FLOAT_MODE_P (mode)
1147 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT
1148 /* Only support up to DF mode. */
1149 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode))
1150 return -1;
1152 unsigned HOST_WIDE_INT ival = 0;
1154 long res[2];
1155 real_to_target (res,
1156 CONST_DOUBLE_REAL_VALUE (x),
1157 REAL_MODE_FORMAT (mode));
1159 if (mode == DFmode)
1161 int order = BYTES_BIG_ENDIAN ? 1 : 0;
1162 ival = zext_hwi (res[order], 32);
1163 ival |= (zext_hwi (res[1 - order], 32) << 32);
1165 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */
1166 if (ival & (unsigned HOST_WIDE_INT)0xffffffff)
1167 return -1;
1169 else
1170 ival = zext_hwi (res[0], 32);
1172 switch (mode)
1174 case E_HFmode:
1175 fli_value_array = fli_value_hf;
1176 break;
1177 case E_SFmode:
1178 fli_value_array = fli_value_sf;
1179 break;
1180 case E_DFmode:
1181 fli_value_array = fli_value_df;
1182 break;
1183 default:
1184 return -1;
1187 if (fli_value_array[0] == ival)
1188 return 0;
1190 if (fli_value_array[1] == ival)
1191 return 1;
1193 /* Perform a binary search to find target index. */
1194 unsigned l, r, m;
1196 l = 2;
1197 r = 31;
1199 while (l <= r)
1201 m = (l + r) / 2;
1202 if (fli_value_array[m] == ival)
1203 return m;
1204 else if (fli_value_array[m] < ival)
1205 l = m+1;
1206 else
1207 r = m-1;
1210 return -1;
1213 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1215 static bool
1216 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1218 return riscv_const_insns (x) > 0;
1221 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1223 static bool
1224 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1226 enum riscv_symbol_type type;
1227 rtx base, offset;
1229 /* There's no way to calculate VL-based values using relocations. */
1230 subrtx_iterator::array_type array;
1231 FOR_EACH_SUBRTX (iter, array, x, ALL)
1232 if (GET_CODE (*iter) == CONST_POLY_INT)
1233 return true;
1235 /* There is no assembler syntax for expressing an address-sized
1236 high part. */
1237 if (GET_CODE (x) == HIGH)
1238 return true;
1240 if (satisfies_constraint_zfli (x))
1241 return true;
1243 split_const (x, &base, &offset);
1244 if (riscv_symbolic_constant_p (base, &type))
1246 if (type == SYMBOL_FORCE_TO_MEM)
1247 return false;
1249 /* As an optimization, don't spill symbolic constants that are as
1250 cheap to rematerialize as to access in the constant pool. */
1251 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
1252 return true;
1254 /* As an optimization, avoid needlessly generate dynamic relocations. */
1255 if (flag_pic)
1256 return true;
1259 /* TLS symbols must be computed by riscv_legitimize_move. */
1260 if (tls_referenced_p (x))
1261 return true;
1263 return false;
1266 /* Return true if register REGNO is a valid base register for mode MODE.
1267 STRICT_P is true if REG_OK_STRICT is in effect. */
1270 riscv_regno_mode_ok_for_base_p (int regno,
1271 machine_mode mode ATTRIBUTE_UNUSED,
1272 bool strict_p)
1274 if (!HARD_REGISTER_NUM_P (regno))
1276 if (!strict_p)
1277 return true;
1278 regno = reg_renumber[regno];
1281 /* These fake registers will be eliminated to either the stack or
1282 hard frame pointer, both of which are usually valid base registers.
1283 Reload deals with the cases where the eliminated form isn't valid. */
1284 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
1285 return true;
1287 return GP_REG_P (regno);
1290 /* Get valid index register class.
1291 The RISC-V base instructions don't support index registers,
1292 but extensions might support that. */
1294 enum reg_class
1295 riscv_index_reg_class ()
1297 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1298 return GR_REGS;
1300 return NO_REGS;
1303 /* Return true if register REGNO is a valid index register.
1304 The RISC-V base instructions don't support index registers,
1305 but extensions might support that. */
1308 riscv_regno_ok_for_index_p (int regno)
1310 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1311 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1);
1313 return 0;
1316 /* Return true if X is a valid base register for mode MODE.
1317 STRICT_P is true if REG_OK_STRICT is in effect. */
1319 bool
1320 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
1322 if (!strict_p && GET_CODE (x) == SUBREG)
1323 x = SUBREG_REG (x);
1325 return (REG_P (x)
1326 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
1329 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
1330 can address a value of mode MODE. */
1332 static bool
1333 riscv_valid_offset_p (rtx x, machine_mode mode)
1335 /* Check that X is a signed 12-bit number. */
1336 if (!const_arith_operand (x, Pmode))
1337 return false;
1339 /* We may need to split multiword moves, so make sure that every word
1340 is accessible. */
1341 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
1342 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
1343 return false;
1345 return true;
1348 /* Should a symbol of type SYMBOL_TYPE should be split in two? */
1350 bool
1351 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
1353 if (symbol_type == SYMBOL_TLS_LE)
1354 return true;
1356 if (!TARGET_EXPLICIT_RELOCS)
1357 return false;
1359 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
1362 /* Return true if a LO_SUM can address a value of mode MODE when the
1363 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which
1364 is used when the mode is BLKmode. */
1366 static bool
1367 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
1368 rtx x)
1370 int align, size;
1372 /* Check that symbols of type SYMBOL_TYPE can be used to access values
1373 of mode MODE. */
1374 if (riscv_symbol_insns (sym_type) == 0)
1375 return false;
1377 /* Check that there is a known low-part relocation. */
1378 if (!riscv_split_symbol_type (sym_type))
1379 return false;
1381 /* We can't tell size or alignment when we have BLKmode, so try extracing a
1382 decl from the symbol if possible. */
1383 if (mode == BLKmode)
1385 rtx offset;
1387 /* Extract the symbol from the LO_SUM operand, if any. */
1388 split_const (x, &x, &offset);
1390 /* Might be a CODE_LABEL. We can compute align but not size for that,
1391 so don't bother trying to handle it. */
1392 if (!SYMBOL_REF_P (x))
1393 return false;
1395 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
1396 align = (SYMBOL_REF_DECL (x)
1397 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
1398 : 1);
1399 size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
1400 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
1401 : 2*BITS_PER_WORD);
1403 else
1405 align = GET_MODE_ALIGNMENT (mode);
1406 size = GET_MODE_BITSIZE (mode).to_constant ();
1409 /* We may need to split multiword moves, so make sure that each word
1410 can be accessed without inducing a carry. */
1411 if (size > BITS_PER_WORD
1412 && (!TARGET_STRICT_ALIGN || size > align))
1413 return false;
1415 return true;
1418 /* Return true if mode is the RVV enabled mode.
1419 For example: 'RVVMF2SI' mode is disabled,
1420 wheras 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */
1422 bool
1423 riscv_v_ext_vector_mode_p (machine_mode mode)
1425 #define ENTRY(MODE, REQUIREMENT, ...) \
1426 case MODE##mode: \
1427 return REQUIREMENT;
1428 switch (mode)
1430 #include "riscv-vector-switch.def"
1431 default:
1432 return false;
1435 return false;
1438 /* Return true if mode is the RVV enabled tuple mode. */
1440 bool
1441 riscv_v_ext_tuple_mode_p (machine_mode mode)
1443 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \
1444 case MODE##mode: \
1445 return REQUIREMENT;
1446 switch (mode)
1448 #include "riscv-vector-switch.def"
1449 default:
1450 return false;
1453 return false;
1456 /* Return true if mode is the RVV enabled vls mode. */
1458 bool
1459 riscv_v_ext_vls_mode_p (machine_mode mode)
1461 #define VLS_ENTRY(MODE, REQUIREMENT) \
1462 case MODE##mode: \
1463 return REQUIREMENT;
1464 switch (mode)
1466 #include "riscv-vector-switch.def"
1467 default:
1468 return false;
1471 return false;
1474 /* Return true if it is either of below modes.
1475 1. RVV vector mode.
1476 2. RVV tuple mode.
1477 3. RVV vls mode. */
1479 static bool
1480 riscv_v_ext_mode_p (machine_mode mode)
1482 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
1483 || riscv_v_ext_vls_mode_p (mode);
1486 static unsigned
1487 riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size,
1488 unsigned scalar_unit_size)
1490 gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0);
1492 if (vls_unit_size < scalar_unit_size)
1493 return 1;
1495 /* Ensure the vls mode is exact_div by scalar_unit_size. */
1496 gcc_assert ((vls_unit_size % scalar_unit_size) == 0);
1498 return vls_unit_size / scalar_unit_size;
1501 static machine_mode
1502 riscv_v_vls_to_gpr_mode (unsigned vls_mode_size)
1504 switch (vls_mode_size)
1506 case 16:
1507 return TImode;
1508 case 8:
1509 return DImode;
1510 case 4:
1511 return SImode;
1512 case 2:
1513 return HImode;
1514 case 1:
1515 return QImode;
1516 default:
1517 gcc_unreachable ();
1521 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1522 NUNITS size for corresponding machine_mode. */
1524 poly_int64
1525 riscv_v_adjust_nunits (machine_mode mode, int scale)
1527 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
1528 if (riscv_v_ext_mode_p (mode))
1530 if (TARGET_MIN_VLEN == 32)
1531 scale = scale / 2;
1532 return riscv_vector_chunks * scale;
1534 return scale;
1537 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1538 NUNITS size for corresponding machine_mode. */
1540 poly_int64
1541 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf)
1543 if (riscv_v_ext_mode_p (mode))
1545 scalar_mode smode = GET_MODE_INNER (mode);
1546 int size = GET_MODE_SIZE (smode);
1547 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size;
1548 if (fractional_p)
1549 return nunits_per_chunk / lmul * riscv_vector_chunks * nf;
1550 else
1551 return nunits_per_chunk * lmul * riscv_vector_chunks * nf;
1553 /* Set the disabled RVV modes size as 1 by default. */
1554 return 1;
1557 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
1558 BYTE size for corresponding machine_mode. */
1560 poly_int64
1561 riscv_v_adjust_bytesize (machine_mode mode, int scale)
1563 if (riscv_v_ext_vector_mode_p (mode))
1565 if (TARGET_XTHEADVECTOR)
1566 return BYTES_PER_RISCV_VECTOR;
1568 poly_int64 nunits = GET_MODE_NUNITS (mode);
1570 if (nunits.coeffs[0] > 8)
1571 return exact_div (nunits, 8);
1572 else if (nunits.is_constant ())
1573 return 1;
1574 else
1575 return poly_int64 (1, 1);
1578 return scale;
1581 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
1582 PRECISION size for corresponding machine_mode. */
1584 poly_int64
1585 riscv_v_adjust_precision (machine_mode mode, int scale)
1587 return riscv_v_adjust_nunits (mode, scale);
1590 /* Return true if X is a valid address for machine mode MODE. If it is,
1591 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
1592 effect. */
1594 static bool
1595 riscv_classify_address (struct riscv_address_info *info, rtx x,
1596 machine_mode mode, bool strict_p)
1598 if (th_classify_address (info, x, mode, strict_p))
1599 return true;
1601 switch (GET_CODE (x))
1603 case REG:
1604 case SUBREG:
1605 info->type = ADDRESS_REG;
1606 info->reg = x;
1607 info->offset = const0_rtx;
1608 return riscv_valid_base_register_p (info->reg, mode, strict_p);
1610 case PLUS:
1611 /* RVV load/store disallow any offset. */
1612 if (riscv_v_ext_mode_p (mode))
1613 return false;
1615 info->type = ADDRESS_REG;
1616 info->reg = XEXP (x, 0);
1617 info->offset = XEXP (x, 1);
1618 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1619 && riscv_valid_offset_p (info->offset, mode));
1621 case LO_SUM:
1622 /* RVV load/store disallow LO_SUM. */
1623 if (riscv_v_ext_mode_p (mode))
1624 return false;
1626 info->type = ADDRESS_LO_SUM;
1627 info->reg = XEXP (x, 0);
1628 info->offset = XEXP (x, 1);
1629 /* We have to trust the creator of the LO_SUM to do something vaguely
1630 sane. Target-independent code that creates a LO_SUM should also
1631 create and verify the matching HIGH. Target-independent code that
1632 adds an offset to a LO_SUM must prove that the offset will not
1633 induce a carry. Failure to do either of these things would be
1634 a bug, and we are not required to check for it here. The RISC-V
1635 backend itself should only create LO_SUMs for valid symbolic
1636 constants, with the high part being either a HIGH or a copy
1637 of _gp. */
1638 info->symbol_type
1639 = riscv_classify_symbolic_expression (info->offset);
1640 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1641 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
1643 case CONST_INT:
1644 /* We only allow the const0_rtx for the RVV load/store. For example:
1645 +----------------------------------------------------------+
1646 | li a5,0 |
1647 | vsetvli zero,a1,e32,m1,ta,ma |
1648 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. |
1649 | vs1r.v v24,0(a0) |
1650 +----------------------------------------------------------+
1651 It can be folded to:
1652 +----------------------------------------------------------+
1653 | vsetvli zero,a1,e32,m1,ta,ma |
1654 | vle32.v v24,0(zero) |
1655 | vs1r.v v24,0(a0) |
1656 +----------------------------------------------------------+
1657 This behavior will benefit the underlying RVV auto vectorization. */
1658 if (riscv_v_ext_mode_p (mode))
1659 return x == const0_rtx;
1661 /* Small-integer addresses don't occur very often, but they
1662 are legitimate if x0 is a valid base register. */
1663 info->type = ADDRESS_CONST_INT;
1664 return SMALL_OPERAND (INTVAL (x));
1666 default:
1667 return false;
1671 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1673 static bool
1674 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
1675 code_helper = ERROR_MARK)
1677 /* Disallow RVV modes base address.
1678 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */
1679 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x))))
1680 return false;
1681 struct riscv_address_info addr;
1683 return riscv_classify_address (&addr, x, mode, strict_p);
1686 /* Return true if hard reg REGNO can be used in compressed instructions. */
1688 static bool
1689 riscv_compressed_reg_p (int regno)
1691 /* x8-x15/f8-f15 are compressible registers. */
1692 return ((TARGET_RVC || TARGET_ZCA)
1693 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
1694 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)));
1697 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */
1699 static bool
1700 riscv_compressed_lw_offset_p (rtx x)
1702 return (CONST_INT_P (x)
1703 && (INTVAL (x) & 3) == 0
1704 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET));
1707 /* Return true if load/store from/to address x can be compressed. */
1709 static bool
1710 riscv_compressed_lw_address_p (rtx x)
1712 struct riscv_address_info addr;
1713 bool result = riscv_classify_address (&addr, x, GET_MODE (x),
1714 reload_completed);
1716 /* Return false if address is not compressed_reg + small_offset. */
1717 if (!result
1718 || addr.type != ADDRESS_REG
1719 /* Before reload, assume all registers are OK. */
1720 || (reload_completed
1721 && !riscv_compressed_reg_p (REGNO (addr.reg))
1722 && addr.reg != stack_pointer_rtx)
1723 || !riscv_compressed_lw_offset_p (addr.offset))
1724 return false;
1726 return result;
1729 /* Return the number of instructions needed to load or store a value
1730 of mode MODE at address X. Return 0 if X isn't valid for MODE.
1731 Assume that multiword moves may need to be split into word moves
1732 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
1733 enough. */
1736 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
1738 struct riscv_address_info addr = {};
1739 int n = 1;
1741 if (!riscv_classify_address (&addr, x, mode, false))
1743 /* This could be a pattern from the pic.md file. In which case we want
1744 this address to always have a cost of 3 to make it as expensive as the
1745 most expensive symbol. This prevents constant propagation from
1746 preferring symbols over register plus offset. */
1747 return 3;
1750 /* BLKmode is used for single unaligned loads and stores and should
1751 not count as a multiword mode. */
1752 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p)
1753 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1755 if (addr.type == ADDRESS_LO_SUM)
1756 n += riscv_symbol_insns (addr.symbol_type) - 1;
1758 return n;
1761 /* Return the number of instructions needed to load constant X.
1762 Return 0 if X isn't a valid constant. */
1765 riscv_const_insns (rtx x)
1767 enum riscv_symbol_type symbol_type;
1768 rtx offset;
1770 switch (GET_CODE (x))
1772 case HIGH:
1773 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
1774 || !riscv_split_symbol_type (symbol_type))
1775 return 0;
1777 /* This is simply an LUI. */
1778 return 1;
1780 case CONST_INT:
1782 int cost = riscv_integer_cost (INTVAL (x));
1783 /* Force complicated constants to memory. */
1784 return cost < 4 ? cost : 0;
1787 case CONST_DOUBLE:
1788 /* See if we can use FMV directly. */
1789 if (satisfies_constraint_zfli (x))
1790 return 1;
1792 /* We can use x0 to load floating-point zero. */
1793 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1794 case CONST_VECTOR:
1796 /* TODO: This is not accurate, we will need to
1797 adapt the COST of CONST_VECTOR in the future
1798 for the following cases:
1800 - 1. const duplicate vector with element value
1801 in range of [-16, 15].
1802 - 2. const duplicate vector with element value
1803 out range of [-16, 15].
1804 - 3. const series vector.
1805 ...etc. */
1806 if (riscv_v_ext_mode_p (GET_MODE (x)))
1808 /* const series vector. */
1809 rtx base, step;
1810 if (const_vec_series_p (x, &base, &step))
1812 /* This is not accurate, we will need to adapt the COST
1813 * accurately according to BASE && STEP. */
1814 return 1;
1817 rtx elt;
1818 if (const_vec_duplicate_p (x, &elt))
1820 /* We don't allow CONST_VECTOR for DI vector on RV32
1821 system since the ELT constant value can not held
1822 within a single register to disable reload a DI
1823 register vec_duplicate into vmv.v.x. */
1824 scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
1825 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
1826 && !immediate_operand (elt, Pmode))
1827 return 0;
1828 /* Constants from -16 to 15 can be loaded with vmv.v.i.
1829 The Wc0, Wc1 constraints are already covered by the
1830 vi constraint so we do not need to check them here
1831 separately. */
1832 if (satisfies_constraint_vi (x))
1833 return 1;
1835 /* Any int/FP constants can always be broadcast from a
1836 scalar register. Loading of a floating-point
1837 constant incurs a literal-pool access. Allow this in
1838 order to increase vectorization possibilities. */
1839 int n = riscv_const_insns (elt);
1840 if (CONST_DOUBLE_P (elt))
1841 return 1 + 4; /* vfmv.v.f + memory access. */
1842 else
1844 /* We need as many insns as it takes to load the constant
1845 into a GPR and one vmv.v.x. */
1846 if (n != 0)
1847 return 1 + n;
1848 else
1849 return 1 + 4; /*vmv.v.x + memory access. */
1854 /* TODO: We may support more const vector in the future. */
1855 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1858 case CONST:
1859 /* See if we can refer to X directly. */
1860 if (riscv_symbolic_constant_p (x, &symbol_type))
1861 return riscv_symbol_insns (symbol_type);
1863 /* Otherwise try splitting the constant into a base and offset. */
1864 split_const (x, &x, &offset);
1865 if (offset != 0)
1867 int n = riscv_const_insns (x);
1868 if (n != 0)
1869 return n + riscv_integer_cost (INTVAL (offset));
1871 return 0;
1873 case SYMBOL_REF:
1874 case LABEL_REF:
1875 return riscv_symbol_insns (riscv_classify_symbol (x));
1877 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB
1878 instruction and several scalar shift or mult instructions,
1879 it is so far unknown. We set it to 4 temporarily. */
1880 case CONST_POLY_INT:
1881 return 4;
1883 default:
1884 return 0;
1888 /* X is a doubleword constant that can be handled by splitting it into
1889 two words and loading each word separately. Return the number of
1890 instructions required to do this. */
1893 riscv_split_const_insns (rtx x)
1895 unsigned int low, high;
1897 low = riscv_const_insns (riscv_subword (x, false));
1898 high = riscv_const_insns (riscv_subword (x, true));
1899 gcc_assert (low > 0 && high > 0);
1900 return low + high;
1903 /* Return the number of instructions needed to implement INSN,
1904 given that it loads from or stores to MEM. */
1907 riscv_load_store_insns (rtx mem, rtx_insn *insn)
1909 machine_mode mode;
1910 bool might_split_p;
1911 rtx set;
1913 gcc_assert (MEM_P (mem));
1914 mode = GET_MODE (mem);
1916 /* Try to prove that INSN does not need to be split. */
1917 might_split_p = true;
1918 if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
1919 might_split_p = false;
1920 else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
1922 set = single_set (insn);
1923 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
1924 might_split_p = false;
1927 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
1930 /* Emit a move from SRC to DEST. Assume that the move expanders can
1931 handle all moves if !can_create_pseudo_p (). The distinction is
1932 important because, unlike emit_move_insn, the move expanders know
1933 how to force Pmode objects into the constant pool even when the
1934 constant pool address is not itself legitimate. */
1937 riscv_emit_move (rtx dest, rtx src)
1939 return (can_create_pseudo_p ()
1940 ? emit_move_insn (dest, src)
1941 : emit_move_insn_1 (dest, src));
1944 /* Emit an instruction of the form (set TARGET SRC). */
1946 static rtx
1947 riscv_emit_set (rtx target, rtx src)
1949 emit_insn (gen_rtx_SET (target, src));
1950 return target;
1953 /* Emit an instruction of the form (set DEST (CODE X)). */
1956 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x)
1958 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x));
1961 /* Emit an instruction of the form (set DEST (CODE X Y)). */
1964 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
1966 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
1969 /* Compute (CODE X Y) and store the result in a new register
1970 of mode MODE. Return that new register. */
1972 static rtx
1973 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y)
1975 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
1978 static rtx
1979 riscv_swap_instruction (rtx inst)
1981 gcc_assert (GET_MODE (inst) == SImode);
1982 if (BYTES_BIG_ENDIAN)
1983 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
1984 return inst;
1987 /* Copy VALUE to a register and return that register. If new pseudos
1988 are allowed, copy it into a new register, otherwise use DEST. */
1990 static rtx
1991 riscv_force_temporary (rtx dest, rtx value)
1993 if (can_create_pseudo_p ())
1994 return force_reg (Pmode, value);
1995 else
1997 riscv_emit_move (dest, value);
1998 return dest;
2002 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
2003 then add CONST_INT OFFSET to the result. */
2005 static rtx
2006 riscv_unspec_address_offset (rtx base, rtx offset,
2007 enum riscv_symbol_type symbol_type)
2009 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
2010 UNSPEC_ADDRESS_FIRST + symbol_type);
2011 if (offset != const0_rtx)
2012 base = gen_rtx_PLUS (Pmode, base, offset);
2013 return gen_rtx_CONST (Pmode, base);
2016 /* Return an UNSPEC address with underlying address ADDRESS and symbol
2017 type SYMBOL_TYPE. */
2020 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
2022 rtx base, offset;
2024 split_const (address, &base, &offset);
2025 return riscv_unspec_address_offset (base, offset, symbol_type);
2028 /* If OP is an UNSPEC address, return the address to which it refers,
2029 otherwise return OP itself. */
2031 static rtx
2032 riscv_strip_unspec_address (rtx op)
2034 rtx base, offset;
2036 split_const (op, &base, &offset);
2037 if (UNSPEC_ADDRESS_P (base))
2038 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2039 return op;
2042 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
2043 high part to BASE and return the result. Just return BASE otherwise.
2044 TEMP is as for riscv_force_temporary.
2046 The returned expression can be used as the first operand to a LO_SUM. */
2048 static rtx
2049 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
2051 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
2052 return riscv_force_temporary (temp, addr);
2055 /* Load an entry from the GOT for a TLS GD access. */
2057 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
2059 if (Pmode == DImode)
2060 return gen_got_load_tls_gddi (dest, sym);
2061 else
2062 return gen_got_load_tls_gdsi (dest, sym);
2065 /* Load an entry from the GOT for a TLS IE access. */
2067 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
2069 if (Pmode == DImode)
2070 return gen_got_load_tls_iedi (dest, sym);
2071 else
2072 return gen_got_load_tls_iesi (dest, sym);
2075 /* Add in the thread pointer for a TLS LE access. */
2077 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
2079 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2080 if (Pmode == DImode)
2081 return gen_tls_add_tp_ledi (dest, base, tp, sym);
2082 else
2083 return gen_tls_add_tp_lesi (dest, base, tp, sym);
2086 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
2087 it appears in a MEM of that mode. Return true if ADDR is a legitimate
2088 constant in that context and can be split into high and low parts.
2089 If so, and if LOW_OUT is nonnull, emit the high part and store the
2090 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
2092 TEMP is as for riscv_force_temporary and is used to load the high
2093 part into a register.
2095 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
2096 a legitimize SET_SRC for an .md pattern, otherwise the low part
2097 is guaranteed to be a legitimate address for mode MODE. */
2099 bool
2100 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
2102 enum riscv_symbol_type symbol_type;
2104 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
2105 || !riscv_symbolic_constant_p (addr, &symbol_type)
2106 || riscv_symbol_insns (symbol_type) == 0
2107 || !riscv_split_symbol_type (symbol_type))
2108 return false;
2110 if (low_out)
2111 switch (symbol_type)
2113 case SYMBOL_FORCE_TO_MEM:
2114 return false;
2116 case SYMBOL_ABSOLUTE:
2118 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
2119 high = riscv_force_temporary (temp, high);
2120 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
2122 break;
2124 case SYMBOL_PCREL:
2126 static unsigned seqno;
2127 char buf[32];
2128 rtx label;
2130 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
2131 gcc_assert ((size_t) bytes < sizeof (buf));
2133 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
2134 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
2135 /* ??? Ugly hack to make weak symbols work. May need to change the
2136 RTL for the auipc and/or low patterns to get a better fix for
2137 this. */
2138 if (! nonzero_address_p (addr))
2139 SYMBOL_REF_WEAK (label) = 1;
2141 if (temp == NULL)
2142 temp = gen_reg_rtx (Pmode);
2144 if (Pmode == DImode)
2145 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
2146 else
2147 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
2149 *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
2151 seqno++;
2153 break;
2155 default:
2156 gcc_unreachable ();
2159 return true;
2162 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2163 riscv_force_temporary; it is only needed when OFFSET is not a
2164 SMALL_OPERAND. */
2166 static rtx
2167 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2169 if (!SMALL_OPERAND (offset))
2171 rtx high;
2173 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2174 The addition inside the macro CONST_HIGH_PART may cause an
2175 overflow, so we need to force a sign-extension check. */
2176 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2177 offset = CONST_LOW_PART (offset);
2178 high = riscv_force_temporary (temp, high);
2179 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2181 return plus_constant (Pmode, reg, offset);
2184 /* The __tls_get_attr symbol. */
2185 static GTY(()) rtx riscv_tls_symbol;
2187 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2188 the TLS symbol we are referencing and TYPE is the symbol type to use
2189 (either global dynamic or local dynamic). RESULT is an RTX for the
2190 return value location. */
2192 static rtx_insn *
2193 riscv_call_tls_get_addr (rtx sym, rtx result)
2195 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
2196 rtx_insn *insn;
2198 if (!riscv_tls_symbol)
2199 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
2200 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
2202 start_sequence ();
2204 emit_insn (riscv_got_load_tls_gd (a0, sym));
2205 insn = emit_call_insn (gen_call_value (result, func, const0_rtx,
2206 gen_int_mode (RISCV_CC_BASE, SImode)));
2207 RTL_CONST_CALL_P (insn) = 1;
2208 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2209 insn = get_insns ();
2211 end_sequence ();
2213 return insn;
2216 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2217 its address. The return value will be both a valid address and a valid
2218 SET_SRC (either a REG or a LO_SUM). */
2220 static rtx
2221 riscv_legitimize_tls_address (rtx loc)
2223 rtx dest, tp, tmp;
2224 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2226 #if 0
2227 /* TLS copy relocs are now deprecated and should not be used. */
2228 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */
2229 if (!flag_pic)
2230 model = TLS_MODEL_LOCAL_EXEC;
2231 #endif
2233 switch (model)
2235 case TLS_MODEL_LOCAL_DYNAMIC:
2236 /* Rely on section anchors for the optimization that LDM TLS
2237 provides. The anchor's address is loaded with GD TLS. */
2238 case TLS_MODEL_GLOBAL_DYNAMIC:
2239 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2240 dest = gen_reg_rtx (Pmode);
2241 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp, loc);
2242 break;
2244 case TLS_MODEL_INITIAL_EXEC:
2245 /* la.tls.ie; tp-relative add */
2246 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2247 tmp = gen_reg_rtx (Pmode);
2248 emit_insn (riscv_got_load_tls_ie (tmp, loc));
2249 dest = gen_reg_rtx (Pmode);
2250 emit_insn (gen_add3_insn (dest, tmp, tp));
2251 break;
2253 case TLS_MODEL_LOCAL_EXEC:
2254 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
2255 dest = gen_reg_rtx (Pmode);
2256 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
2257 dest = gen_rtx_LO_SUM (Pmode, dest,
2258 riscv_unspec_address (loc, SYMBOL_TLS_LE));
2259 break;
2261 default:
2262 gcc_unreachable ();
2264 return dest;
2267 /* If X is not a valid address for mode MODE, force it into a register. */
2269 static rtx
2270 riscv_force_address (rtx x, machine_mode mode)
2272 if (!riscv_legitimate_address_p (mode, x, false))
2274 if (can_create_pseudo_p ())
2275 return force_reg (Pmode, x);
2276 else
2278 /* It's only safe for the thunk function.
2279 Use ra as the temp regiater. */
2280 gcc_assert (riscv_in_thunk_func);
2281 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode);
2282 riscv_emit_move (reg, x);
2283 return reg;
2287 return x;
2290 /* Modify base + offset so that offset fits within a compressed load/store insn
2291 and the excess is added to base. */
2293 static rtx
2294 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset)
2296 rtx addr, high;
2297 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess
2298 into HIGH. */
2299 high = GEN_INT (offset & ~CSW_MAX_OFFSET);
2300 offset &= CSW_MAX_OFFSET;
2301 if (!SMALL_OPERAND (INTVAL (high)))
2302 high = force_reg (Pmode, high);
2303 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base));
2304 addr = plus_constant (Pmode, base, offset);
2305 return addr;
2308 /* Helper for riscv_legitimize_address. Given X, return true if it
2309 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
2311 This respectively represent canonical shift-add rtxs or scaled
2312 memory addresses. */
2313 static bool
2314 mem_shadd_or_shadd_rtx_p (rtx x)
2316 return ((GET_CODE (x) == ASHIFT
2317 || GET_CODE (x) == MULT)
2318 && CONST_INT_P (XEXP (x, 1))
2319 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
2320 || (GET_CODE (x) == MULT
2321 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
2324 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
2325 be legitimized in a way that the generic machinery might not expect,
2326 return a new address, otherwise return NULL. MODE is the mode of
2327 the memory being accessed. */
2329 static rtx
2330 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2331 machine_mode mode)
2333 rtx addr;
2335 if (riscv_tls_symbol_p (x))
2336 return riscv_legitimize_tls_address (x);
2338 /* See if the address can split into a high part and a LO_SUM. */
2339 if (riscv_split_symbol (NULL, x, mode, &addr))
2340 return riscv_force_address (addr, mode);
2342 /* Handle BASE + OFFSET. */
2343 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
2344 && INTVAL (XEXP (x, 1)) != 0)
2346 rtx base = XEXP (x, 0);
2347 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
2349 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
2350 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
2351 && SMALL_OPERAND (offset))
2353 rtx index = XEXP (base, 0);
2354 rtx fp = XEXP (base, 1);
2355 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
2358 /* If we were given a MULT, we must fix the constant
2359 as we're going to create the ASHIFT form. */
2360 int shift_val = INTVAL (XEXP (index, 1));
2361 if (GET_CODE (index) == MULT)
2362 shift_val = exact_log2 (shift_val);
2364 rtx reg1 = gen_reg_rtx (Pmode);
2365 rtx reg2 = gen_reg_rtx (Pmode);
2366 rtx reg3 = gen_reg_rtx (Pmode);
2367 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
2368 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val));
2369 riscv_emit_binary (PLUS, reg3, reg2, reg1);
2371 return reg3;
2375 if (!riscv_valid_base_register_p (base, mode, false))
2376 base = copy_to_mode_reg (Pmode, base);
2377 if (optimize_function_for_size_p (cfun)
2378 && (strcmp (current_pass->name, "shorten_memrefs") == 0)
2379 && mode == SImode)
2380 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow
2381 possible compressed load/store. */
2382 addr = riscv_shorten_lw_offset (base, offset);
2383 else
2384 addr = riscv_add_offset (NULL, base, offset);
2385 return riscv_force_address (addr, mode);
2388 return x;
2391 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE
2392 is the original src mode before promotion. */
2394 void
2395 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
2396 machine_mode orig_mode)
2398 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
2399 machine_mode mode;
2400 int i, num_ops;
2401 rtx x;
2403 mode = GET_MODE (dest);
2404 /* We use the original mode for the riscv_build_integer call, because HImode
2405 values are given special treatment. */
2406 num_ops = riscv_build_integer (codes, value, orig_mode);
2408 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
2409 && num_ops >= riscv_split_integer_cost (value))
2410 x = riscv_split_integer (value, mode);
2411 else
2413 codes[0].value = trunc_int_for_mode (codes[0].value, mode);
2414 /* Apply each binary operation to X. */
2415 x = GEN_INT (codes[0].value);
2417 for (i = 1; i < num_ops; i++)
2419 if (!can_create_pseudo_p ())
2420 x = riscv_emit_set (temp, x);
2421 else
2422 x = force_reg (mode, x);
2423 codes[i].value = trunc_int_for_mode (codes[i].value, mode);
2424 x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
2428 riscv_emit_set (dest, x);
2431 /* Subroutine of riscv_legitimize_move. Move constant SRC into register
2432 DEST given that SRC satisfies immediate_operand but doesn't satisfy
2433 move_operand. */
2435 static void
2436 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
2438 rtx base, offset;
2440 /* Split moves of big integers into smaller pieces. */
2441 if (splittable_const_int_operand (src, mode))
2443 riscv_move_integer (dest, dest, INTVAL (src), mode);
2444 return;
2447 if (satisfies_constraint_zfli (src))
2449 riscv_emit_set (dest, src);
2450 return;
2453 /* Split moves of symbolic constants into high/low pairs. */
2454 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
2456 riscv_emit_set (dest, src);
2457 return;
2460 /* Generate the appropriate access sequences for TLS symbols. */
2461 if (riscv_tls_symbol_p (src))
2463 riscv_emit_move (dest, riscv_legitimize_tls_address (src));
2464 return;
2467 /* If we have (const (plus symbol offset)), and that expression cannot
2468 be forced into memory, load the symbol first and add in the offset. Also
2469 prefer to do this even if the constant _can_ be forced into memory, as it
2470 usually produces better code. */
2471 split_const (src, &base, &offset);
2472 if (offset != const0_rtx
2473 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
2475 base = riscv_force_temporary (dest, base);
2476 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
2477 return;
2480 /* Handle below format.
2481 (const:DI
2482 (plus:DI
2483 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0
2484 (const_poly_int:DI [16, 16]) // <- op_1
2487 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS
2488 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1)))
2490 rtx dest_tmp = gen_reg_rtx (mode);
2491 rtx tmp = gen_reg_rtx (mode);
2493 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0));
2494 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1));
2496 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp)));
2497 return;
2500 src = force_const_mem (mode, src);
2502 /* When using explicit relocs, constant pool references are sometimes
2503 not legitimate addresses. */
2504 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
2505 riscv_emit_move (dest, src);
2508 /* Report when we try to do something that requires vector when vector is
2509 disabled. This is an error of last resort and isn't very high-quality. It
2510 usually involves attempts to measure the vector length in some way. */
2512 static void
2513 riscv_report_v_required (void)
2515 static bool reported_p = false;
2517 /* Avoid reporting a slew of messages for a single oversight. */
2518 if (reported_p)
2519 return;
2521 error ("this operation requires the RVV ISA extension");
2522 inform (input_location, "you can enable RVV using the command-line"
2523 " option %<-march%>, or by using the %<target%>"
2524 " attribute or pragma");
2525 reported_p = true;
2528 /* Helper function to operation for rtx_code CODE. */
2529 static void
2530 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1,
2531 rtx op2)
2533 if (can_create_pseudo_p ())
2535 rtx result;
2536 if (GET_RTX_CLASS (code) == RTX_UNARY)
2537 result = expand_simple_unop (mode, code, op1, NULL_RTX, false);
2538 else
2539 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false,
2540 OPTAB_DIRECT);
2541 riscv_emit_move (op0, result);
2543 else
2545 rtx pat;
2546 /* The following implementation is for prologue and epilogue.
2547 Because prologue and epilogue can not use pseudo register.
2548 We can't using expand_simple_binop or expand_simple_unop. */
2549 if (GET_RTX_CLASS (code) == RTX_UNARY)
2550 pat = gen_rtx_fmt_e (code, mode, op1);
2551 else
2552 pat = gen_rtx_fmt_ee (code, mode, op1, op2);
2553 emit_insn (gen_rtx_SET (op0, pat));
2557 /* Expand mult operation with constant integer, multiplicand also used as a
2558 * temporary register. */
2560 static void
2561 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand,
2562 HOST_WIDE_INT multiplier)
2564 if (multiplier == 0)
2566 riscv_emit_move (dest, GEN_INT (0));
2567 return;
2570 bool neg_p = multiplier < 0;
2571 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier);
2573 if (multiplier_abs == 1)
2575 if (neg_p)
2576 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX);
2577 else
2578 riscv_emit_move (dest, multiplicand);
2580 else
2582 if (pow2p_hwi (multiplier_abs))
2585 multiplicand = [BYTES_PER_RISCV_VECTOR].
2586 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8].
2587 Sequence:
2588 csrr a5, vlenb
2589 slli a5, a5, 3
2590 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8].
2591 Sequence:
2592 csrr a5, vlenb
2593 slli a5, a5, 3
2594 neg a5, a5
2596 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2597 gen_int_mode (exact_log2 (multiplier_abs), QImode));
2598 if (neg_p)
2599 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2601 else if (pow2p_hwi (multiplier_abs + 1))
2604 multiplicand = [BYTES_PER_RISCV_VECTOR].
2605 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7].
2606 Sequence:
2607 csrr a5, vlenb
2608 slli a4, a5, 3
2609 sub a5, a4, a5
2610 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7].
2611 Sequence:
2612 csrr a5, vlenb
2613 slli a4, a5, 3
2614 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4
2616 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2617 gen_int_mode (exact_log2 (multiplier_abs + 1),
2618 QImode));
2619 if (neg_p)
2620 riscv_expand_op (MINUS, mode, dest, multiplicand, dest);
2621 else
2622 riscv_expand_op (MINUS, mode, dest, dest, multiplicand);
2624 else if (pow2p_hwi (multiplier - 1))
2627 multiplicand = [BYTES_PER_RISCV_VECTOR].
2628 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9].
2629 Sequence:
2630 csrr a5, vlenb
2631 slli a4, a5, 3
2632 add a5, a4, a5
2633 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9].
2634 Sequence:
2635 csrr a5, vlenb
2636 slli a4, a5, 3
2637 add a5, a4, a5
2638 neg a5, a5
2640 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2641 gen_int_mode (exact_log2 (multiplier_abs - 1),
2642 QImode));
2643 riscv_expand_op (PLUS, mode, dest, dest, multiplicand);
2644 if (neg_p)
2645 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2647 else
2649 /* We use multiplication for remaining cases. */
2650 gcc_assert (
2651 TARGET_MUL
2652 && "M-extension must be enabled to calculate the poly_int "
2653 "size/offset.");
2654 riscv_emit_move (dest, gen_int_mode (multiplier, mode));
2655 riscv_expand_op (MULT, mode, dest, dest, multiplicand);
2660 /* Analyze src and emit const_poly_int mov sequence. */
2662 void
2663 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
2665 poly_int64 value = rtx_to_poly_int64 (src);
2666 /* It use HOST_WIDE_INT intead of int since 32bit type is not enough
2667 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */
2668 HOST_WIDE_INT offset = value.coeffs[0];
2669 HOST_WIDE_INT factor = value.coeffs[1];
2670 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
2671 int div_factor = 0;
2672 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions.
2673 For any (const_poly_int:MODE [m, n]), the calculation formula is as
2674 follows.
2675 constant = m - n.
2676 When minimum VLEN = 32, poly of VLENB = (4, 4).
2677 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1).
2678 When minimum VLEN > 32, poly of VLENB = (8, 8).
2679 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1).
2680 magn = (n, n) / base.
2681 (m, n) = base * magn + constant.
2682 This calculation doesn't need div operation. */
2684 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
2685 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode));
2686 else
2688 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode));
2689 emit_move_insn (gen_lowpart (Pmode, tmp),
2690 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
2693 if (BYTES_PER_RISCV_VECTOR.is_constant ())
2695 gcc_assert (value.is_constant ());
2696 riscv_emit_move (dest, GEN_INT (value.to_constant ()));
2697 return;
2699 else
2701 int max_power = exact_log2 (MAX_POLY_VARIANT);
2702 for (int i = 0; i <= max_power; i++)
2704 int possible_div_factor = 1 << i;
2705 if (factor % (vlenb / possible_div_factor) == 0)
2707 div_factor = possible_div_factor;
2708 break;
2711 gcc_assert (div_factor != 0);
2714 if (div_factor != 1)
2715 riscv_expand_op (LSHIFTRT, mode, tmp, tmp,
2716 gen_int_mode (exact_log2 (div_factor), QImode));
2718 riscv_expand_mult_with_const_int (mode, dest, tmp,
2719 factor / (vlenb / div_factor));
2720 HOST_WIDE_INT constant = offset - factor;
2722 if (constant == 0)
2723 return;
2724 else if (SMALL_OPERAND (constant))
2725 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2726 else
2728 /* Handle the constant value is not a 12-bit value. */
2729 rtx high;
2731 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2732 The addition inside the macro CONST_HIGH_PART may cause an
2733 overflow, so we need to force a sign-extension check. */
2734 high = gen_int_mode (CONST_HIGH_PART (constant), mode);
2735 constant = CONST_LOW_PART (constant);
2736 riscv_emit_move (tmp, high);
2737 riscv_expand_op (PLUS, mode, dest, tmp, dest);
2738 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2742 /* Adjust scalable frame of vector for prologue && epilogue. */
2744 static void
2745 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
2747 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
2748 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
2749 rtx insn, dwarf, adjust_frame_rtx;
2751 riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
2752 gen_int_mode (offset, Pmode));
2754 if (epilogue)
2755 insn = gen_add3_insn (target, target, adjust_size);
2756 else
2757 insn = gen_sub3_insn (target, target, adjust_size);
2759 insn = emit_insn (insn);
2761 RTX_FRAME_RELATED_P (insn) = 1;
2763 adjust_frame_rtx
2764 = gen_rtx_SET (target,
2765 plus_constant (Pmode, target, epilogue ? offset : -offset));
2767 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
2768 NULL_RTX);
2770 REG_NOTES (insn) = dwarf;
2773 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
2774 sequence that is valid. */
2776 bool
2777 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
2779 if (CONST_POLY_INT_P (src))
2782 Handle:
2783 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156)
2784 (const_int 96 [0x60])) [0 S1 A8])
2785 (const_poly_int:QI [8, 8]))
2786 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil))
2788 if (MEM_P (dest))
2790 emit_move_insn (dest, force_reg (mode, src));
2791 return true;
2793 poly_int64 value = rtx_to_poly_int64 (src);
2794 if (!value.is_constant () && !TARGET_VECTOR)
2796 riscv_report_v_required ();
2797 return false;
2800 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode)
2801 return false;
2803 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode))
2805 /* In RV32 system, handle (const_poly_int:QI [m, n])
2806 (const_poly_int:HI [m, n]).
2807 In RV64 system, handle (const_poly_int:QI [m, n])
2808 (const_poly_int:HI [m, n])
2809 (const_poly_int:SI [m, n]). */
2810 rtx tmp = gen_reg_rtx (Pmode);
2811 riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
2812 src);
2814 else
2816 /* In RV32 system, handle (const_poly_int:SI [m, n])
2817 (const_poly_int:DI [m, n]).
2818 In RV64 system, handle (const_poly_int:DI [m, n]).
2819 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode,
2820 the offset should not exceed 4GiB in general. */
2821 rtx tmp = gen_reg_rtx (mode);
2822 riscv_legitimize_poly_move (mode, dest, tmp, src);
2824 return true;
2826 /* Expand
2827 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2828 Expand this data movement instead of simply forbid it since
2829 we can improve the code generation for this following scenario
2830 by RVV auto-vectorization:
2831 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
2832 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2833 Since RVV mode and scalar mode are in different REG_CLASS,
2834 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
2835 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
2837 machine_mode vmode = GET_MODE (SUBREG_REG (src));
2838 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
2839 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant ();
2840 /* We should be able to handle both partial and paradoxical subreg. */
2841 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1;
2842 scalar_mode smode = as_a<scalar_mode> (mode);
2843 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size;
2844 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8)
2845 && !TARGET_VECTOR_ELEN_64 ? 2 : 1;
2846 bool need_int_reg_p = false;
2848 if (num == 2)
2850 /* If we want to extract 64bit value but ELEN < 64,
2851 we use RVV vector mode with EEW = 32 to extract
2852 the highpart and lowpart. */
2853 need_int_reg_p = smode == DFmode;
2854 smode = SImode;
2855 nunits = nunits * 2;
2858 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
2860 rtx v = gen_lowpart (vmode, SUBREG_REG (src));
2861 rtx int_reg = dest;
2863 if (need_int_reg_p)
2865 int_reg = gen_reg_rtx (DImode);
2866 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest));
2869 for (unsigned int i = 0; i < num; i++)
2871 rtx result;
2872 if (num == 1)
2873 result = int_reg;
2874 else if (i == 0)
2875 result = gen_lowpart (smode, int_reg);
2876 else
2877 result = gen_reg_rtx (smode);
2879 riscv_vector::emit_vec_extract (result, v,
2880 gen_int_mode (index + i, Pmode));
2882 if (i == 1)
2884 if (UNITS_PER_WORD < mode_size)
2885 /* If Pmode = SImode and mode = DImode, we just need to
2886 extract element of index = 1 from the vector and move it
2887 into the highpart of the DEST since DEST consists of 2
2888 scalar registers. */
2889 emit_move_insn (gen_highpart (smode, int_reg), result);
2890 else
2892 rtx tmp = expand_binop (Pmode, ashl_optab,
2893 gen_lowpart (Pmode, result),
2894 gen_int_mode (32, Pmode),
2895 NULL_RTX, 0, OPTAB_DIRECT);
2896 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg,
2897 NULL_RTX, 0, OPTAB_DIRECT);
2898 emit_move_insn (int_reg, tmp2);
2903 if (need_int_reg_p)
2904 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
2905 else
2906 emit_move_insn (dest, int_reg);
2908 else
2909 gcc_unreachable ();
2911 return true;
2913 /* Expand
2914 (set (reg:QI target) (mem:QI (address)))
2916 (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
2917 (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
2918 with auto-sign/zero extend. */
2919 if (GET_MODE_CLASS (mode) == MODE_INT
2920 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
2921 && can_create_pseudo_p ()
2922 && MEM_P (src))
2924 rtx temp_reg;
2925 int zero_extend_p;
2927 temp_reg = gen_reg_rtx (word_mode);
2928 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
2929 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
2930 zero_extend_p));
2931 riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
2932 return true;
2935 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
2937 rtx reg;
2939 if (GET_CODE (src) == CONST_INT)
2941 /* Apply the equivalent of PROMOTE_MODE here for constants to
2942 improve cse. */
2943 machine_mode promoted_mode = mode;
2944 if (GET_MODE_CLASS (mode) == MODE_INT
2945 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
2946 promoted_mode = word_mode;
2948 if (splittable_const_int_operand (src, mode))
2950 reg = gen_reg_rtx (promoted_mode);
2951 riscv_move_integer (reg, reg, INTVAL (src), mode);
2953 else
2954 reg = force_reg (promoted_mode, src);
2956 if (promoted_mode != mode)
2957 reg = gen_lowpart (mode, reg);
2959 else
2960 reg = force_reg (mode, src);
2961 riscv_emit_move (dest, reg);
2962 return true;
2965 /* In order to fit NaN boxing, expand
2966 (set FP_REG (reg:HF src))
2968 (set (reg:SI/DI mask) (const_int -65536)
2969 (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF src) 0)))
2970 (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
2971 (set (reg:HF dest) (unspec:HF [ (reg:SI/DI temp) ] UNSPEC_FMV_SFP16_X))
2974 if (TARGET_HARD_FLOAT
2975 && !TARGET_ZFHMIN && mode == HFmode
2976 && REG_P (dest) && FP_REG_P (REGNO (dest))
2977 && REG_P (src) && !FP_REG_P (REGNO (src))
2978 && can_create_pseudo_p ())
2980 rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
2981 rtx temp = gen_reg_rtx (word_mode);
2982 emit_insn (gen_extend_insn (temp,
2983 simplify_gen_subreg (HImode, src, mode, 0),
2984 word_mode, HImode, 1));
2985 if (word_mode == SImode)
2986 emit_insn (gen_iorsi3 (temp, mask, temp));
2987 else
2988 emit_insn (gen_iordi3 (temp, mask, temp));
2990 riscv_emit_move (dest, gen_rtx_UNSPEC (HFmode, gen_rtvec (1, temp),
2991 UNSPEC_FMV_SFP16_X));
2993 return true;
2996 /* We need to deal with constants that would be legitimate
2997 immediate_operands but aren't legitimate move_operands. */
2998 if (CONSTANT_P (src) && !move_operand (src, mode))
3000 riscv_legitimize_const_move (mode, dest, src);
3001 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
3002 return true;
3005 /* RISC-V GCC may generate non-legitimate address due to we provide some
3006 pattern for optimize access PIC local symbol and it's make GCC generate
3007 unrecognizable instruction during optmizing. */
3009 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
3010 reload_completed))
3012 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
3015 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
3016 reload_completed))
3018 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
3021 return false;
3024 /* Return true if there is an instruction that implements CODE and accepts
3025 X as an immediate operand. */
3027 static int
3028 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
3030 switch (code)
3032 case ASHIFT:
3033 case ASHIFTRT:
3034 case LSHIFTRT:
3035 /* All shift counts are truncated to a valid constant. */
3036 return true;
3038 case AND:
3039 case IOR:
3040 case XOR:
3041 case PLUS:
3042 case LT:
3043 case LTU:
3044 /* These instructions take 12-bit signed immediates. */
3045 return SMALL_OPERAND (x);
3047 case LE:
3048 /* We add 1 to the immediate and use SLT. */
3049 return SMALL_OPERAND (x + 1);
3051 case LEU:
3052 /* Likewise SLTU, but reject the always-true case. */
3053 return SMALL_OPERAND (x + 1) && x + 1 != 0;
3055 case GE:
3056 case GEU:
3057 /* We can emulate an immediate of 1 by using GT/GTU against x0. */
3058 return x == 1;
3060 default:
3061 /* By default assume that x0 can be used for 0. */
3062 return x == 0;
3066 /* Return the cost of binary operation X, given that the instruction
3067 sequence for a word-sized or smaller operation takes SIGNLE_INSNS
3068 instructions and that the sequence of a double-word operation takes
3069 DOUBLE_INSNS instructions. */
3071 static int
3072 riscv_binary_cost (rtx x, int single_insns, int double_insns)
3074 if (!riscv_v_ext_mode_p (GET_MODE (x))
3075 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
3076 return COSTS_N_INSNS (double_insns);
3077 return COSTS_N_INSNS (single_insns);
3080 /* Return the cost of sign- or zero-extending OP. */
3082 static int
3083 riscv_extend_cost (rtx op, bool unsigned_p)
3085 if (MEM_P (op))
3086 return 0;
3088 if (unsigned_p && GET_MODE (op) == QImode)
3089 /* We can use ANDI. */
3090 return COSTS_N_INSNS (1);
3092 /* ZBA provide zext.w. */
3093 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode)
3094 return COSTS_N_INSNS (1);
3096 /* ZBB provide zext.h, sext.b and sext.h. */
3097 if (TARGET_ZBB)
3099 if (!unsigned_p && GET_MODE (op) == QImode)
3100 return COSTS_N_INSNS (1);
3102 if (GET_MODE (op) == HImode)
3103 return COSTS_N_INSNS (1);
3106 if (!unsigned_p && GET_MODE (op) == SImode)
3107 /* We can use SEXT.W. */
3108 return COSTS_N_INSNS (1);
3110 /* We need to use a shift left and a shift right. */
3111 return COSTS_N_INSNS (2);
3114 /* Implement TARGET_RTX_COSTS. */
3116 #define SINGLE_SHIFT_COST 1
3118 static bool
3119 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
3120 int *total, bool speed)
3122 /* TODO: We set RVV instruction cost as 1 by default.
3123 Cost Model need to be well analyzed and supported in the future. */
3124 if (riscv_v_ext_mode_p (mode))
3126 *total = COSTS_N_INSNS (1);
3127 return true;
3130 bool float_mode_p = FLOAT_MODE_P (mode);
3131 int cost;
3133 switch (GET_CODE (x))
3135 case SET:
3136 /* If we are called for an INSN that's a simple set of a register,
3137 then cost based on the SET_SRC alone. */
3138 if (outer_code == INSN
3139 && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x))))
3141 riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
3142 return true;
3145 /* Otherwise return FALSE indicating we should recurse into both the
3146 SET_DEST and SET_SRC combining the cost of both. */
3147 return false;
3149 case CONST_INT:
3150 /* trivial constants checked using OUTER_CODE in case they are
3151 encodable in insn itself w/o need for additional insn(s). */
3152 if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
3154 *total = 0;
3155 return true;
3157 /* Fall through. */
3159 case SYMBOL_REF:
3160 case LABEL_REF:
3161 case CONST_DOUBLE:
3162 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE
3163 rtl object. Weird recheck due to switch-case fall through above. */
3164 if (GET_CODE (x) == CONST_DOUBLE)
3165 gcc_assert (GET_MODE (x) != VOIDmode);
3166 /* Fall through. */
3168 case CONST:
3169 /* Non trivial CONST_INT Fall through: check if need multiple insns. */
3170 if ((cost = riscv_const_insns (x)) > 0)
3172 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
3173 2. For constants loaded more than once, the approach so far has
3174 been to duplicate the operation than to CSE the constant.
3175 3. TODO: make cost more accurate specially if riscv_const_insns
3176 returns > 1. */
3177 if (outer_code == SET || GET_MODE (x) == VOIDmode)
3178 *total = COSTS_N_INSNS (1);
3180 else /* The instruction will be fetched from the constant pool. */
3181 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
3182 return true;
3184 case MEM:
3185 /* If the address is legitimate, return the number of
3186 instructions it needs. */
3187 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
3189 /* When optimizing for size, make uncompressible 32-bit addresses
3190 more expensive so that compressible 32-bit addresses are
3191 preferred. */
3192 if ((TARGET_RVC || TARGET_ZCA)
3193 && !speed && riscv_mshorten_memrefs && mode == SImode
3194 && !riscv_compressed_lw_address_p (XEXP (x, 0)))
3195 cost++;
3197 *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
3198 return true;
3200 /* Otherwise use the default handling. */
3201 return false;
3203 case IF_THEN_ELSE:
3204 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
3205 && reg_or_0_operand (XEXP (x, 1), mode)
3206 && sfb_alu_operand (XEXP (x, 2), mode)
3207 && comparison_operator (XEXP (x, 0), VOIDmode))
3209 /* For predicated conditional-move operations we assume the cost
3210 of a single instruction even though there are actually two. */
3211 *total = COSTS_N_INSNS (1);
3212 return true;
3214 else if (TARGET_ZICOND_LIKE
3215 && outer_code == SET
3216 && ((GET_CODE (XEXP (x, 1)) == REG
3217 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1))))
3218 || (GET_CODE (XEXP (x, 2)) == REG
3219 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2))))
3220 || (GET_CODE (XEXP (x, 1)) == REG
3221 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0)))
3222 || (GET_CODE (XEXP (x, 1)) == REG
3223 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0)))))
3225 *total = COSTS_N_INSNS (1);
3226 return true;
3228 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx)
3230 if (equality_operator (XEXP (x, 0), mode)
3231 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
3233 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1);
3234 return true;
3236 if (ordered_comparison_operator (XEXP (x, 0), mode))
3238 *total = COSTS_N_INSNS (1);
3239 return true;
3242 return false;
3244 case NOT:
3245 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
3246 return false;
3248 case AND:
3249 /* slli.uw pattern for zba. */
3250 if (TARGET_ZBA && TARGET_64BIT && mode == DImode
3251 && GET_CODE (XEXP (x, 0)) == ASHIFT)
3253 rtx and_rhs = XEXP (x, 1);
3254 rtx ashift_lhs = XEXP (XEXP (x, 0), 0);
3255 rtx ashift_rhs = XEXP (XEXP (x, 0), 1);
3256 if (register_operand (ashift_lhs, GET_MODE (ashift_lhs))
3257 && CONST_INT_P (ashift_rhs)
3258 && CONST_INT_P (and_rhs)
3259 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3260 *total = COSTS_N_INSNS (1);
3261 return true;
3263 /* bclri pattern for zbs. */
3264 if (TARGET_ZBS
3265 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode))
3267 *total = COSTS_N_INSNS (1);
3268 return true;
3270 /* bclr pattern for zbs. */
3271 if (TARGET_ZBS
3272 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1)))
3273 && GET_CODE (XEXP (x, 0)) == ROTATE
3274 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0))
3275 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2)
3277 *total = COSTS_N_INSNS (1);
3278 return true;
3281 gcc_fallthrough ();
3282 case IOR:
3283 case XOR:
3284 /* orn, andn and xorn pattern for zbb. */
3285 if (TARGET_ZBB
3286 && GET_CODE (XEXP (x, 0)) == NOT)
3288 *total = riscv_binary_cost (x, 1, 2);
3289 return true;
3292 /* bset[i] and binv[i] pattern for zbs. */
3293 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR)
3294 && TARGET_ZBS
3295 && ((GET_CODE (XEXP (x, 0)) == ASHIFT
3296 && CONST_INT_P (XEXP (XEXP (x, 0), 0)))
3297 || single_bit_mask_operand (XEXP (x, 1), VOIDmode)))
3299 *total = COSTS_N_INSNS (1);
3300 return true;
3303 /* Double-word operations use two single-word operations. */
3304 *total = riscv_binary_cost (x, 1, 2);
3305 return false;
3307 case ZERO_EXTRACT:
3308 /* This is an SImode shift. */
3309 if (outer_code == SET
3310 && CONST_INT_P (XEXP (x, 1))
3311 && CONST_INT_P (XEXP (x, 2))
3312 && (INTVAL (XEXP (x, 2)) > 0)
3313 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32))
3315 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3316 return true;
3318 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */
3319 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET
3320 && GET_CODE (XEXP (x, 1)) == CONST_INT
3321 && INTVAL (XEXP (x, 1)) == 1)
3323 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3324 return true;
3326 gcc_fallthrough ();
3327 case SIGN_EXTRACT:
3328 if (TARGET_XTHEADBB && outer_code == SET
3329 && CONST_INT_P (XEXP (x, 1))
3330 && CONST_INT_P (XEXP (x, 2)))
3332 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3333 return true;
3335 return false;
3337 case ASHIFT:
3338 /* bset pattern for zbs. */
3339 if (TARGET_ZBS
3340 && CONST_INT_P (XEXP (x, 0))
3341 && INTVAL (XEXP (x, 0)) == 1)
3343 *total = COSTS_N_INSNS (1);
3344 return true;
3346 gcc_fallthrough ();
3347 case ASHIFTRT:
3348 case LSHIFTRT:
3349 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST,
3350 CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
3351 return false;
3353 case ABS:
3354 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
3355 return false;
3357 case LO_SUM:
3358 *total = set_src_cost (XEXP (x, 0), mode, speed);
3359 return true;
3361 case LT:
3362 /* This is an SImode shift. */
3363 if (outer_code == SET && GET_MODE (x) == DImode
3364 && GET_MODE (XEXP (x, 0)) == SImode)
3366 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3367 return true;
3369 /* Fall through. */
3370 case LTU:
3371 case LE:
3372 case LEU:
3373 case GT:
3374 case GTU:
3375 case GE:
3376 case GEU:
3377 case EQ:
3378 case NE:
3379 /* Branch comparisons have VOIDmode, so use the first operand's
3380 mode instead. */
3381 mode = GET_MODE (XEXP (x, 0));
3382 if (float_mode_p)
3383 *total = tune_param->fp_add[mode == DFmode];
3384 else
3385 *total = riscv_binary_cost (x, 1, 3);
3386 return false;
3388 case UNORDERED:
3389 case ORDERED:
3390 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
3391 mode = GET_MODE (XEXP (x, 0));
3392 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3393 return false;
3395 case UNEQ:
3396 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
3397 mode = GET_MODE (XEXP (x, 0));
3398 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
3399 return false;
3401 case LTGT:
3402 /* (FLT(A, A) || FGT(B, B)). */
3403 mode = GET_MODE (XEXP (x, 0));
3404 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3405 return false;
3407 case UNGE:
3408 case UNGT:
3409 case UNLE:
3410 case UNLT:
3411 /* FLT or FLE, but guarded by an FFLAGS read and write. */
3412 mode = GET_MODE (XEXP (x, 0));
3413 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
3414 return false;
3416 case MINUS:
3417 if (float_mode_p)
3418 *total = tune_param->fp_add[mode == DFmode];
3419 else
3420 *total = riscv_binary_cost (x, 1, 4);
3421 return false;
3423 case PLUS:
3424 /* add.uw pattern for zba. */
3425 if (TARGET_ZBA
3426 && (TARGET_64BIT && (mode == DImode))
3427 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
3428 && register_operand (XEXP (XEXP (x, 0), 0),
3429 GET_MODE (XEXP (XEXP (x, 0), 0)))
3430 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
3432 *total = COSTS_N_INSNS (1);
3433 return true;
3435 /* shNadd pattern for zba. */
3436 if (TARGET_ZBA
3437 && ((!TARGET_64BIT && (mode == SImode)) ||
3438 (TARGET_64BIT && (mode == DImode)))
3439 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
3440 && register_operand (XEXP (XEXP (x, 0), 0),
3441 GET_MODE (XEXP (XEXP (x, 0), 0)))
3442 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3443 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
3445 *total = COSTS_N_INSNS (1);
3446 return true;
3448 /* Before strength-reduction, the shNadd can be expressed as the addition
3449 of a multiplication with a power-of-two. If this case is not handled,
3450 the strength-reduction in expmed.c will calculate an inflated cost. */
3451 if (TARGET_ZBA
3452 && mode == word_mode
3453 && GET_CODE (XEXP (x, 0)) == MULT
3454 && register_operand (XEXP (XEXP (x, 0), 0),
3455 GET_MODE (XEXP (XEXP (x, 0), 0)))
3456 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3457 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1)))
3458 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
3460 *total = COSTS_N_INSNS (1);
3461 return true;
3463 /* shNadd.uw pattern for zba.
3464 [(set (match_operand:DI 0 "register_operand" "=r")
3465 (plus:DI
3466 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
3467 (match_operand:QI 2 "immediate_operand" "I"))
3468 (match_operand 3 "immediate_operand" ""))
3469 (match_operand:DI 4 "register_operand" "r")))]
3470 "TARGET_64BIT && TARGET_ZBA
3471 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
3472 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff"
3474 if (TARGET_ZBA
3475 && (TARGET_64BIT && (mode == DImode))
3476 && (GET_CODE (XEXP (x, 0)) == AND)
3477 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1))))
3479 do {
3480 rtx and_lhs = XEXP (XEXP (x, 0), 0);
3481 rtx and_rhs = XEXP (XEXP (x, 0), 1);
3482 if (GET_CODE (and_lhs) != ASHIFT)
3483 break;
3484 if (!CONST_INT_P (and_rhs))
3485 break;
3487 rtx ashift_rhs = XEXP (and_lhs, 1);
3489 if (!CONST_INT_P (ashift_rhs)
3490 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3))
3491 break;
3493 if (CONST_INT_P (and_rhs)
3494 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3496 *total = COSTS_N_INSNS (1);
3497 return true;
3499 } while (false);
3502 if (float_mode_p)
3503 *total = tune_param->fp_add[mode == DFmode];
3504 else
3505 *total = riscv_binary_cost (x, 1, 4);
3506 return false;
3508 case NEG:
3510 rtx op = XEXP (x, 0);
3511 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
3513 *total = (tune_param->fp_mul[mode == DFmode]
3514 + set_src_cost (XEXP (op, 0), mode, speed)
3515 + set_src_cost (XEXP (op, 1), mode, speed)
3516 + set_src_cost (XEXP (op, 2), mode, speed));
3517 return true;
3521 if (float_mode_p)
3522 *total = tune_param->fp_add[mode == DFmode];
3523 else
3524 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
3525 return false;
3527 case MULT:
3528 if (float_mode_p)
3529 *total = tune_param->fp_mul[mode == DFmode];
3530 else if (!TARGET_MUL)
3531 /* Estimate the cost of a library call. */
3532 *total = COSTS_N_INSNS (speed ? 32 : 6);
3533 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
3534 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
3535 else if (!speed)
3536 *total = COSTS_N_INSNS (1);
3537 else
3538 *total = tune_param->int_mul[mode == DImode];
3539 return false;
3541 case DIV:
3542 case SQRT:
3543 case MOD:
3544 if (float_mode_p)
3546 *total = tune_param->fp_div[mode == DFmode];
3547 return false;
3549 /* Fall through. */
3551 case UDIV:
3552 case UMOD:
3553 if (!TARGET_DIV)
3554 /* Estimate the cost of a library call. */
3555 *total = COSTS_N_INSNS (speed ? 32 : 6);
3556 else if (speed)
3557 *total = tune_param->int_div[mode == DImode];
3558 else
3559 *total = COSTS_N_INSNS (1);
3560 return false;
3562 case ZERO_EXTEND:
3563 /* This is an SImode shift. */
3564 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3566 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3567 return true;
3569 /* Fall through. */
3570 case SIGN_EXTEND:
3571 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
3572 return false;
3574 case BSWAP:
3575 if (TARGET_ZBB)
3577 /* RISC-V only defines rev8 for XLEN, so we will need an extra
3578 shift-right instruction for smaller modes. */
3579 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2);
3580 return true;
3582 return false;
3584 case FLOAT:
3585 case UNSIGNED_FLOAT:
3586 case FIX:
3587 case FLOAT_EXTEND:
3588 case FLOAT_TRUNCATE:
3589 *total = tune_param->fp_add[mode == DFmode];
3590 return false;
3592 case FMA:
3593 *total = (tune_param->fp_mul[mode == DFmode]
3594 + set_src_cost (XEXP (x, 0), mode, speed)
3595 + set_src_cost (XEXP (x, 1), mode, speed)
3596 + set_src_cost (XEXP (x, 2), mode, speed));
3597 return true;
3599 case UNSPEC:
3600 if (XINT (x, 1) == UNSPEC_AUIPC)
3602 /* Make AUIPC cheap to avoid spilling its result to the stack. */
3603 *total = 1;
3604 return true;
3606 return false;
3608 default:
3609 return false;
3613 /* Implement TARGET_ADDRESS_COST. */
3615 static int
3616 riscv_address_cost (rtx addr, machine_mode mode,
3617 addr_space_t as ATTRIBUTE_UNUSED,
3618 bool speed ATTRIBUTE_UNUSED)
3620 /* When optimizing for size, make uncompressible 32-bit addresses more
3621 * expensive so that compressible 32-bit addresses are preferred. */
3622 if ((TARGET_RVC || TARGET_ZCA)
3623 && !speed && riscv_mshorten_memrefs && mode == SImode
3624 && !riscv_compressed_lw_address_p (addr))
3625 return riscv_address_insns (addr, mode, false) + 1;
3626 return riscv_address_insns (addr, mode, false);
3629 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost
3630 calculation for conditional branches: one unit is considered the cost
3631 of microarchitecture-dependent actual branch execution and therefore
3632 multiplied by BRANCH_COST and any remaining units are considered fixed
3633 branch overhead. Branches on a floating-point condition incur an extra
3634 instruction cost as they will be split into an FCMP operation followed
3635 by a branch on an integer condition. */
3637 static int
3638 riscv_insn_cost (rtx_insn *insn, bool speed)
3640 rtx x = PATTERN (insn);
3641 int cost = pattern_cost (x, speed);
3643 if (JUMP_P (insn))
3645 if (GET_CODE (x) == PARALLEL)
3646 x = XVECEXP (x, 0, 0);
3647 if (GET_CODE (x) == SET
3648 && GET_CODE (SET_DEST (x)) == PC
3649 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE)
3651 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1);
3652 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0))))
3653 cost += COSTS_N_INSNS (1);
3656 return cost;
3659 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
3660 but we consider cost units of branch instructions equal to cost units of
3661 other instructions. */
3663 static unsigned int
3664 riscv_max_noce_ifcvt_seq_cost (edge e)
3666 bool predictable_p = predictable_edge_p (e);
3668 if (predictable_p)
3670 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
3671 return param_max_rtl_if_conversion_predictable_cost;
3673 else
3675 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
3676 return param_max_rtl_if_conversion_unpredictable_cost;
3679 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p));
3682 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a
3683 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)'
3684 by our actual conditional branch cost, observing that our branches test
3685 conditions directly, so there is no preparatory extra condition-set
3686 instruction. */
3688 static bool
3689 riscv_noce_conversion_profitable_p (rtx_insn *seq,
3690 struct noce_if_info *if_info)
3692 struct noce_if_info riscv_if_info = *if_info;
3694 riscv_if_info.original_cost -= COSTS_N_INSNS (2);
3695 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p);
3697 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4'
3698 to emit a conditional set operation on DImode output it comes up
3699 with a sequence such as:
3701 (insn 26 0 27 (set (reg:SI 140)
3702 (eq:SI (reg/v:DI 137 [ c ])
3703 (const_int 0 [0]))) 302 {*seq_zero_disi}
3704 (nil))
3705 (insn 27 26 28 (set (reg:DI 139)
3706 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal}
3707 (nil))
3709 because our `cstore<mode>4' pattern expands to an insn that gives
3710 a SImode output. The output of conditional set is 0 or 1 boolean,
3711 so it is valid for input in any scalar integer mode and therefore
3712 combine later folds the zero extend operation into an equivalent
3713 conditional set operation that produces a DImode output, however
3714 this redundant zero extend operation counts towards the cost of
3715 the replacement sequence. Compensate for that by incrementing the
3716 cost of the original sequence as well as the maximum sequence cost
3717 accordingly. Likewise for sign extension. */
3718 rtx last_dest = NULL_RTX;
3719 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
3721 if (!NONDEBUG_INSN_P (insn))
3722 continue;
3724 rtx x = PATTERN (insn);
3725 if (NONJUMP_INSN_P (insn)
3726 && GET_CODE (x) == SET)
3728 rtx src = SET_SRC (x);
3729 enum rtx_code code = GET_CODE (src);
3730 if (last_dest != NULL_RTX
3731 && (code == SIGN_EXTEND || code == ZERO_EXTEND)
3732 && REG_P (XEXP (src, 0))
3733 && REGNO (XEXP (src, 0)) == REGNO (last_dest))
3735 riscv_if_info.original_cost += COSTS_N_INSNS (1);
3736 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
3738 last_dest = NULL_RTX;
3739 rtx dest = SET_DEST (x);
3740 if (COMPARISON_P (src)
3741 && REG_P (dest)
3742 && GET_MODE (dest) == SImode)
3743 last_dest = dest;
3745 else
3746 last_dest = NULL_RTX;
3749 return default_noce_conversion_profitable_p (seq, &riscv_if_info);
3752 /* Return one word of double-word value OP. HIGH_P is true to select the
3753 high part or false to select the low part. */
3756 riscv_subword (rtx op, bool high_p)
3758 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0;
3759 machine_mode mode = GET_MODE (op);
3761 if (mode == VOIDmode)
3762 mode = TARGET_64BIT ? TImode : DImode;
3764 if (MEM_P (op))
3765 return adjust_address (op, word_mode, byte);
3767 if (REG_P (op))
3768 gcc_assert (!FP_REG_RTX_P (op));
3770 return simplify_gen_subreg (word_mode, op, mode, byte);
3773 /* Return true if a 64-bit move from SRC to DEST should be split into two. */
3775 bool
3776 riscv_split_64bit_move_p (rtx dest, rtx src)
3778 if (TARGET_64BIT)
3779 return false;
3781 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
3782 if (satisfies_constraint_zfli (src))
3783 return false;
3785 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
3786 of zeroing an FPR with FCVT.D.W. */
3787 if (TARGET_DOUBLE_FLOAT
3788 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
3789 || (FP_REG_RTX_P (dest) && MEM_P (src))
3790 || (FP_REG_RTX_P (src) && MEM_P (dest))
3791 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
3792 return false;
3794 return true;
3797 /* Split a doubleword move from SRC to DEST. On 32-bit targets,
3798 this function handles 64-bit moves for which riscv_split_64bit_move_p
3799 holds. For 64-bit targets, this function handles 128-bit moves. */
3801 void
3802 riscv_split_doubleword_move (rtx dest, rtx src)
3804 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */
3805 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV))
3807 if (FP_REG_RTX_P (dest))
3809 rtx low_src = riscv_subword (src, false);
3810 rtx high_src = riscv_subword (src, true);
3812 if (TARGET_ZFA)
3813 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src));
3814 else
3815 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src));
3816 return;
3818 if (FP_REG_RTX_P (src))
3820 rtx low_dest = riscv_subword (dest, false);
3821 rtx high_dest = riscv_subword (dest, true);
3823 if (TARGET_ZFA)
3825 emit_insn (gen_movsidf2_low_rv32 (low_dest, src));
3826 emit_insn (gen_movsidf2_high_rv32 (high_dest, src));
3827 return;
3829 else
3831 emit_insn (gen_th_fmv_x_w (low_dest, src));
3832 emit_insn (gen_th_fmv_x_hw (high_dest, src));
3834 return;
3838 /* The operation can be split into two normal moves. Decide in
3839 which order to do them. */
3840 rtx low_dest = riscv_subword (dest, false);
3841 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
3843 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3844 riscv_emit_move (low_dest, riscv_subword (src, false));
3846 else
3848 riscv_emit_move (low_dest, riscv_subword (src, false));
3849 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3853 /* Return the appropriate instructions to move SRC into DEST. Assume
3854 that SRC is operand 1 and DEST is operand 0. */
3856 const char *
3857 riscv_output_move (rtx dest, rtx src)
3859 enum rtx_code dest_code, src_code;
3860 machine_mode mode;
3861 bool dbl_p;
3862 unsigned width;
3863 const char *insn;
3865 if ((insn = th_output_move (dest, src)))
3866 return insn;
3868 dest_code = GET_CODE (dest);
3869 src_code = GET_CODE (src);
3870 mode = GET_MODE (dest);
3871 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
3872 width = GET_MODE_SIZE (mode).to_constant ();
3874 if (dbl_p && riscv_split_64bit_move_p (dest, src))
3875 return "#";
3877 if (dest_code == REG && GP_REG_P (REGNO (dest)))
3879 if (src_code == REG && FP_REG_P (REGNO (src)))
3880 switch (width)
3882 case 2:
3883 if (TARGET_ZFHMIN)
3884 return "fmv.x.h\t%0,%1";
3885 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
3886 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
3887 case 4:
3888 return "fmv.x.s\t%0,%1";
3889 case 8:
3890 return "fmv.x.d\t%0,%1";
3893 if (src_code == MEM)
3894 switch (width)
3896 case 1: return "lbu\t%0,%1";
3897 case 2: return "lhu\t%0,%1";
3898 case 4: return "lw\t%0,%1";
3899 case 8: return "ld\t%0,%1";
3902 if (src_code == CONST_INT)
3904 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src)))
3905 return "li\t%0,%1";
3907 if (TARGET_ZBS
3908 && SINGLE_BIT_MASK_OPERAND (INTVAL (src)))
3909 return "bseti\t%0,zero,%S1";
3911 /* Should never reach here. */
3912 abort ();
3915 if (src_code == HIGH)
3916 return "lui\t%0,%h1";
3918 if (symbolic_operand (src, VOIDmode))
3919 switch (riscv_classify_symbolic_expression (src))
3921 case SYMBOL_GOT_DISP: return "la\t%0,%1";
3922 case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
3923 case SYMBOL_PCREL: return "lla\t%0,%1";
3924 default: gcc_unreachable ();
3927 if ((src_code == REG && GP_REG_P (REGNO (src)))
3928 || (src == CONST0_RTX (mode)))
3930 if (dest_code == REG)
3932 if (GP_REG_P (REGNO (dest)))
3933 return "mv\t%0,%z1";
3935 if (FP_REG_P (REGNO (dest)))
3936 switch (width)
3938 case 2:
3939 if (TARGET_ZFHMIN)
3940 return "fmv.h.x\t%0,%z1";
3941 /* High 16 bits should be all-1, otherwise HW will treated
3942 as a n-bit canonical NaN, but isn't matter for softfloat. */
3943 return "fmv.s.x\t%0,%1";
3944 case 4:
3945 return "fmv.s.x\t%0,%z1";
3946 case 8:
3947 if (TARGET_64BIT)
3948 return "fmv.d.x\t%0,%z1";
3949 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
3950 gcc_assert (src == CONST0_RTX (mode));
3951 return "fcvt.d.w\t%0,x0";
3954 if (dest_code == MEM)
3955 switch (width)
3957 case 1: return "sb\t%z1,%0";
3958 case 2: return "sh\t%z1,%0";
3959 case 4: return "sw\t%z1,%0";
3960 case 8: return "sd\t%z1,%0";
3963 if (src_code == REG && FP_REG_P (REGNO (src)))
3965 if (dest_code == REG && FP_REG_P (REGNO (dest)))
3966 switch (width)
3968 case 2:
3969 if (TARGET_ZFH)
3970 return "fmv.h\t%0,%1";
3971 return "fmv.s\t%0,%1";
3972 case 4:
3973 return "fmv.s\t%0,%1";
3974 case 8:
3975 return "fmv.d\t%0,%1";
3978 if (dest_code == MEM)
3979 switch (width)
3981 case 2:
3982 return "fsh\t%1,%0";
3983 case 4:
3984 return "fsw\t%1,%0";
3985 case 8:
3986 return "fsd\t%1,%0";
3989 if (dest_code == REG && FP_REG_P (REGNO (dest)))
3991 if (src_code == MEM)
3992 switch (width)
3994 case 2:
3995 return "flh\t%0,%1";
3996 case 4:
3997 return "flw\t%0,%1";
3998 case 8:
3999 return "fld\t%0,%1";
4002 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src))
4003 switch (width)
4005 case 2:
4006 return "fli.h\t%0,%1";
4007 case 4:
4008 return "fli.s\t%0,%1";
4009 case 8:
4010 return "fli.d\t%0,%1";
4013 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT)
4015 /* We only want a single full vector register VLEN read after reload. */
4016 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR));
4017 return "csrr\t%0,vlenb";
4019 gcc_unreachable ();
4022 const char *
4023 riscv_output_return ()
4025 if (cfun->machine->naked_p)
4026 return "";
4028 return "ret";
4032 /* Return true if CMP1 is a suitable second operand for integer ordering
4033 test CODE. See also the *sCC patterns in riscv.md. */
4035 static bool
4036 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4038 switch (code)
4040 case GT:
4041 case GTU:
4042 return reg_or_0_operand (cmp1, VOIDmode);
4044 case GE:
4045 case GEU:
4046 return cmp1 == const1_rtx;
4048 case LT:
4049 case LTU:
4050 return arith_operand (cmp1, VOIDmode);
4052 case LE:
4053 return sle_operand (cmp1, VOIDmode);
4055 case LEU:
4056 return sleu_operand (cmp1, VOIDmode);
4058 default:
4059 gcc_unreachable ();
4063 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
4064 integer ordering test *CODE, or if an equivalent combination can
4065 be formed by adjusting *CODE and *CMP1. When returning true, update
4066 *CODE and *CMP1 with the chosen code and operand, otherwise leave
4067 them alone. */
4069 static bool
4070 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
4071 machine_mode mode)
4073 HOST_WIDE_INT plus_one;
4075 if (riscv_int_order_operand_ok_p (*code, *cmp1))
4076 return true;
4078 if (CONST_INT_P (*cmp1))
4079 switch (*code)
4081 case LE:
4082 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4083 if (INTVAL (*cmp1) < plus_one)
4085 *code = LT;
4086 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4087 return true;
4089 break;
4091 case LEU:
4092 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4093 if (plus_one != 0)
4095 *code = LTU;
4096 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4097 return true;
4099 break;
4101 default:
4102 break;
4104 return false;
4107 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
4108 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
4109 is nonnull, it's OK to set TARGET to the inverse of the result and
4110 flip *INVERT_PTR instead. */
4112 static void
4113 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
4114 rtx target, rtx cmp0, rtx cmp1)
4116 machine_mode mode;
4118 /* First see if there is a RISCV instruction that can do this operation.
4119 If not, try doing the same for the inverse operation. If that also
4120 fails, force CMP1 into a register and try again. */
4121 mode = GET_MODE (cmp0);
4122 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
4123 riscv_emit_binary (code, target, cmp0, cmp1);
4124 else
4126 enum rtx_code inv_code = reverse_condition (code);
4127 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
4129 cmp1 = force_reg (mode, cmp1);
4130 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
4132 else if (invert_ptr == 0)
4134 rtx inv_target = riscv_force_binary (word_mode,
4135 inv_code, cmp0, cmp1);
4136 riscv_emit_binary (EQ, target, inv_target, const0_rtx);
4138 else
4140 *invert_ptr = !*invert_ptr;
4141 riscv_emit_binary (inv_code, target, cmp0, cmp1);
4146 /* Return a register that is zero iff CMP0 and CMP1 are equal.
4147 The register will have the same mode as CMP0. */
4149 static rtx
4150 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
4152 if (cmp1 == const0_rtx)
4153 return cmp0;
4155 return expand_binop (GET_MODE (cmp0), sub_optab,
4156 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
4159 /* Helper function for riscv_extend_comparands to Sign-extend the OP.
4160 However if the OP is SI subreg promoted with an inner DI, such as
4161 (subreg/s/v:SI (reg/v:DI) 0)
4162 just peel off the SUBREG to get DI, avoiding extraneous extension. */
4164 static void
4165 riscv_sign_extend_if_not_subreg_prom (rtx *op)
4167 if (GET_CODE (*op) == SUBREG
4168 && SUBREG_PROMOTED_VAR_P (*op)
4169 && SUBREG_PROMOTED_SIGNED_P (*op)
4170 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant ()
4171 == GET_MODE_SIZE (word_mode)))
4172 *op = XEXP (*op, 0);
4173 else
4174 *op = gen_rtx_SIGN_EXTEND (word_mode, *op);
4177 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
4179 static void
4180 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
4182 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
4183 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
4185 /* It is more profitable to zero-extend QImode values. But not if the
4186 first operand has already been sign-extended, and the second one is
4187 is a constant or has already been sign-extended also. */
4188 if (unsigned_condition (code) == code
4189 && (GET_MODE (*op0) == QImode
4190 && ! (GET_CODE (*op0) == SUBREG
4191 && SUBREG_PROMOTED_VAR_P (*op0)
4192 && SUBREG_PROMOTED_SIGNED_P (*op0)
4193 && (CONST_INT_P (*op1)
4194 || (GET_CODE (*op1) == SUBREG
4195 && SUBREG_PROMOTED_VAR_P (*op1)
4196 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4198 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4199 if (CONST_INT_P (*op1))
4200 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4201 else
4202 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4204 else
4206 riscv_sign_extend_if_not_subreg_prom (op0);
4208 if (*op1 != const0_rtx)
4209 riscv_sign_extend_if_not_subreg_prom (op1);
4214 /* Convert a comparison into something that can be used in a branch or
4215 conditional move. On entry, *OP0 and *OP1 are the values being
4216 compared and *CODE is the code used to compare them.
4218 Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
4219 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are
4220 emitted. */
4222 static void
4223 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4224 bool need_eq_ne_p = false)
4226 if (need_eq_ne_p)
4228 rtx cmp_op0 = *op0;
4229 rtx cmp_op1 = *op1;
4230 if (*code == EQ || *code == NE)
4232 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1);
4233 *op1 = const0_rtx;
4234 return;
4236 gcc_unreachable ();
4239 if (splittable_const_int_operand (*op1, VOIDmode))
4241 HOST_WIDE_INT rhs = INTVAL (*op1);
4243 if (*code == EQ || *code == NE)
4245 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4246 if (SMALL_OPERAND (-rhs))
4248 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
4249 GEN_INT (-rhs));
4250 *op1 = const0_rtx;
4253 else
4255 static const enum rtx_code mag_comparisons[][2] = {
4256 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
4259 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4260 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4262 HOST_WIDE_INT new_rhs;
4263 bool increment = *code == mag_comparisons[i][0];
4264 bool decrement = *code == mag_comparisons[i][1];
4265 if (!increment && !decrement)
4266 continue;
4268 new_rhs = rhs + (increment ? 1 : -1);
4269 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
4270 if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
4271 && (rhs < 0) == (new_rhs < 0))
4273 *op1 = GEN_INT (new_rhs);
4274 *code = mag_comparisons[i][increment];
4276 break;
4281 riscv_extend_comparands (*code, op0, op1);
4283 *op0 = force_reg (word_mode, *op0);
4284 if (*op1 != const0_rtx)
4285 *op1 = force_reg (word_mode, *op1);
4288 /* Like riscv_emit_int_compare, but for floating-point comparisons. */
4290 static void
4291 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4292 bool *invert_ptr = nullptr)
4294 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
4295 enum rtx_code fp_code = *code;
4296 *code = NE;
4298 switch (fp_code)
4300 case UNORDERED:
4301 *code = EQ;
4302 /* Fall through. */
4304 case ORDERED:
4305 /* a == a && b == b */
4306 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4307 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4308 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4309 *op1 = const0_rtx;
4310 break;
4312 case UNEQ:
4313 /* ordered(a, b) > (a == b) */
4314 *code = EQ;
4315 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4316 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4317 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4318 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
4319 break;
4321 #define UNORDERED_COMPARISON(CODE, CMP) \
4322 case CODE: \
4323 *code = EQ; \
4324 *op0 = gen_reg_rtx (word_mode); \
4325 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \
4326 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \
4327 else if (GET_MODE (cmp_op0) == SFmode) \
4328 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \
4329 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \
4330 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \
4331 else if (GET_MODE (cmp_op0) == DFmode) \
4332 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \
4333 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \
4334 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \
4335 else if (GET_MODE (cmp_op0) == HFmode) \
4336 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \
4337 else \
4338 gcc_unreachable (); \
4339 *op1 = const0_rtx; \
4340 break;
4342 case UNLT:
4343 std::swap (cmp_op0, cmp_op1);
4344 gcc_fallthrough ();
4346 UNORDERED_COMPARISON(UNGT, le)
4348 case UNLE:
4349 std::swap (cmp_op0, cmp_op1);
4350 gcc_fallthrough ();
4352 UNORDERED_COMPARISON(UNGE, lt)
4353 #undef UNORDERED_COMPARISON
4355 case NE:
4356 fp_code = EQ;
4357 if (invert_ptr != nullptr)
4358 *invert_ptr = !*invert_ptr;
4359 else
4361 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
4362 cmp_op1 = const0_rtx;
4364 gcc_fallthrough ();
4366 case EQ:
4367 case LE:
4368 case LT:
4369 case GE:
4370 case GT:
4371 /* We have instructions for these cases. */
4372 *code = fp_code;
4373 *op0 = cmp_op0;
4374 *op1 = cmp_op1;
4375 break;
4377 case LTGT:
4378 /* (a < b) | (a > b) */
4379 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1);
4380 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1);
4381 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1);
4382 *op1 = const0_rtx;
4383 break;
4385 default:
4386 gcc_unreachable ();
4390 /* CODE-compare OP0 and OP1. Store the result in TARGET. */
4392 void
4393 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr)
4395 riscv_extend_comparands (code, &op0, &op1);
4396 op0 = force_reg (word_mode, op0);
4398 if (code == EQ || code == NE)
4400 rtx zie = riscv_zero_if_equal (op0, op1);
4401 riscv_emit_binary (code, target, zie, const0_rtx);
4403 else
4404 riscv_emit_int_order_test (code, invert_ptr, target, op0, op1);
4407 /* Like riscv_expand_int_scc, but for floating-point comparisons. */
4409 void
4410 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
4411 bool *invert_ptr)
4413 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr);
4415 machine_mode mode = GET_MODE (target);
4416 if (mode != word_mode)
4418 rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
4419 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
4421 else
4422 riscv_emit_binary (code, target, op0, op1);
4425 /* Jump to LABEL if (CODE OP0 OP1) holds. */
4427 void
4428 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
4430 if (FLOAT_MODE_P (GET_MODE (op1)))
4431 riscv_emit_float_compare (&code, &op0, &op1);
4432 else
4433 riscv_emit_int_compare (&code, &op0, &op1);
4435 if (FLOAT_MODE_P (GET_MODE (op0)))
4437 op0 = riscv_force_binary (word_mode, code, op0, op1);
4438 op1 = const0_rtx;
4439 code = NE;
4442 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
4443 emit_jump_insn (gen_condjump (condition, label));
4446 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
4447 Return 0 if expansion failed. */
4449 bool
4450 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
4452 machine_mode mode = GET_MODE (dest);
4453 rtx_code code = GET_CODE (op);
4454 rtx op0 = XEXP (op, 0);
4455 rtx op1 = XEXP (op, 1);
4457 if (((TARGET_ZICOND_LIKE
4458 || (arith_operand (cons, mode) && arith_operand (alt, mode)))
4459 && (GET_MODE_CLASS (mode) == MODE_INT))
4460 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4462 machine_mode mode0 = GET_MODE (op0);
4463 machine_mode mode1 = GET_MODE (op1);
4465 /* An integer comparison must be comparing WORD_MODE objects. We
4466 must enforce that so that we don't strip away a sign_extension
4467 thinking it is unnecessary. We might consider using
4468 riscv_extend_operands if they are not already properly extended. */
4469 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
4470 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
4471 return false;
4473 /* In the fallback generic case use MODE rather than WORD_MODE for
4474 the output of the SCC instruction, to match the mode of the NEG
4475 operation below. The output of SCC is 0 or 1 boolean, so it is
4476 valid for input in any scalar integer mode. */
4477 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
4478 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4479 ? word_mode : mode);
4480 bool invert = false;
4482 /* Canonicalize the comparison. It must be an equality comparison
4483 of integer operands, or with SFB it can be any comparison of
4484 integer operands. If it isn't, then emit an SCC instruction
4485 so that we can then use an equality comparison against zero. */
4486 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode))
4487 || !INTEGRAL_MODE_P (mode0))
4489 bool *invert_ptr = nullptr;
4491 /* If riscv_expand_int_scc inverts the condition, then it will
4492 flip the value of INVERT. We need to know where so that
4493 we can adjust it for our needs. */
4494 if (code == LE || code == LEU || code == GE || code == GEU)
4495 invert_ptr = &invert;
4497 /* Emit an SCC-like instruction into a temporary so that we can
4498 use an EQ/NE comparison. We can support both FP and integer
4499 conditional moves. */
4500 if (INTEGRAL_MODE_P (mode0))
4501 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr);
4502 else if (FLOAT_MODE_P (mode0)
4503 && fp_scc_comparison (op, GET_MODE (op)))
4504 riscv_expand_float_scc (tmp, code, op0, op1, &invert);
4505 else
4506 return false;
4508 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
4510 /* We've generated a new comparison. Update the local variables. */
4511 code = GET_CODE (op);
4512 op0 = XEXP (op, 0);
4513 op1 = XEXP (op, 1);
4515 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
4516 riscv_expand_int_scc (tmp, code, op0, op1, &invert);
4518 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4520 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
4521 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4523 /* The expander is a bit loose in its specification of the true
4524 arm of the conditional move. That allows us to support more
4525 cases for extensions which are more general than SFB. But
4526 does mean we need to force CONS into a register at this point. */
4527 cons = force_reg (mode, cons);
4528 /* With XTheadCondMov we need to force ALT into a register too. */
4529 alt = force_reg (mode, alt);
4530 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4531 cons, alt)));
4532 return true;
4534 else if (!TARGET_ZICOND_LIKE)
4536 if (invert)
4537 std::swap (cons, alt);
4539 rtx reg1 = gen_reg_rtx (mode);
4540 rtx reg2 = gen_reg_rtx (mode);
4541 rtx reg3 = gen_reg_rtx (mode);
4542 rtx reg4 = gen_reg_rtx (mode);
4544 riscv_emit_unary (NEG, reg1, tmp);
4545 riscv_emit_binary (AND, reg2, reg1, cons);
4546 riscv_emit_unary (NOT, reg3, reg1);
4547 riscv_emit_binary (AND, reg4, reg3, alt);
4548 riscv_emit_binary (IOR, dest, reg2, reg4);
4549 return true;
4551 /* 0, reg or 0, imm */
4552 else if (cons == CONST0_RTX (mode)
4553 && (REG_P (alt)
4554 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
4556 riscv_emit_int_compare (&code, &op0, &op1, true);
4557 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4558 alt = force_reg (mode, alt);
4559 emit_insn (gen_rtx_SET (dest,
4560 gen_rtx_IF_THEN_ELSE (mode, cond,
4561 cons, alt)));
4562 return true;
4564 /* imm, imm */
4565 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
4566 && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4568 riscv_emit_int_compare (&code, &op0, &op1, true);
4569 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4570 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
4571 alt = force_reg (mode, gen_int_mode (t, mode));
4572 emit_insn (gen_rtx_SET (dest,
4573 gen_rtx_IF_THEN_ELSE (mode, cond,
4574 CONST0_RTX (mode),
4575 alt)));
4576 /* CONS might not fit into a signed 12 bit immediate suitable
4577 for an addi instruction. If that's the case, force it
4578 into a register. */
4579 if (!SMALL_OPERAND (INTVAL (cons)))
4580 cons = force_reg (mode, cons);
4581 riscv_emit_binary (PLUS, dest, dest, cons);
4582 return true;
4584 /* imm, reg */
4585 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
4587 /* Optimize for register value of 0. */
4588 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
4590 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4591 cons = force_reg (mode, cons);
4592 emit_insn (gen_rtx_SET (dest,
4593 gen_rtx_IF_THEN_ELSE (mode, cond,
4594 cons, alt)));
4595 return true;
4598 riscv_emit_int_compare (&code, &op0, &op1, true);
4599 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4601 rtx temp1 = gen_reg_rtx (mode);
4602 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
4604 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
4605 suitable for an addi instruction. If that's the case, force it
4606 into a register. */
4607 if (!SMALL_OPERAND (INTVAL (temp2)))
4608 temp2 = force_reg (mode, temp2);
4609 if (!SMALL_OPERAND (INTVAL (cons)))
4610 cons = force_reg (mode, cons);
4612 riscv_emit_binary (PLUS, temp1, alt, temp2);
4613 emit_insn (gen_rtx_SET (dest,
4614 gen_rtx_IF_THEN_ELSE (mode, cond,
4615 CONST0_RTX (mode),
4616 temp1)));
4617 riscv_emit_binary (PLUS, dest, dest, cons);
4618 return true;
4620 /* reg, 0 or imm, 0 */
4621 else if ((REG_P (cons)
4622 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
4623 && alt == CONST0_RTX (mode))
4625 riscv_emit_int_compare (&code, &op0, &op1, true);
4626 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4627 cons = force_reg (mode, cons);
4628 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4629 cons, alt)));
4630 return true;
4632 /* reg, imm */
4633 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4635 /* Optimize for register value of 0. */
4636 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
4638 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4639 alt = force_reg (mode, alt);
4640 emit_insn (gen_rtx_SET (dest,
4641 gen_rtx_IF_THEN_ELSE (mode, cond,
4642 cons, alt)));
4643 return true;
4646 riscv_emit_int_compare (&code, &op0, &op1, true);
4647 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4649 rtx temp1 = gen_reg_rtx (mode);
4650 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
4652 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
4653 suitable for an addi instruction. If that's the case, force it
4654 into a register. */
4655 if (!SMALL_OPERAND (INTVAL (temp2)))
4656 temp2 = force_reg (mode, temp2);
4657 if (!SMALL_OPERAND (INTVAL (alt)))
4658 alt = force_reg (mode, alt);
4660 riscv_emit_binary (PLUS, temp1, cons, temp2);
4661 emit_insn (gen_rtx_SET (dest,
4662 gen_rtx_IF_THEN_ELSE (mode, cond,
4663 temp1,
4664 CONST0_RTX (mode))));
4665 riscv_emit_binary (PLUS, dest, dest, alt);
4666 return true;
4668 /* reg, reg */
4669 else if (REG_P (cons) && REG_P (alt))
4671 if ((code == EQ && rtx_equal_p (cons, op0))
4672 || (code == NE && rtx_equal_p (alt, op0)))
4674 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4675 alt = force_reg (mode, alt);
4676 emit_insn (gen_rtx_SET (dest,
4677 gen_rtx_IF_THEN_ELSE (mode, cond,
4678 cons, alt)));
4679 return true;
4682 rtx reg1 = gen_reg_rtx (mode);
4683 rtx reg2 = gen_reg_rtx (mode);
4684 riscv_emit_int_compare (&code, &op0, &op1, true);
4685 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4686 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
4687 GET_MODE (op0), op0, op1);
4688 emit_insn (gen_rtx_SET (reg2,
4689 gen_rtx_IF_THEN_ELSE (mode, cond2,
4690 CONST0_RTX (mode),
4691 cons)));
4692 emit_insn (gen_rtx_SET (reg1,
4693 gen_rtx_IF_THEN_ELSE (mode, cond1,
4694 CONST0_RTX (mode),
4695 alt)));
4696 riscv_emit_binary (IOR, dest, reg1, reg2);
4697 return true;
4701 return false;
4704 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
4705 least PARM_BOUNDARY bits of alignment, but will be given anything up
4706 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
4708 static unsigned int
4709 riscv_function_arg_boundary (machine_mode mode, const_tree type)
4711 unsigned int alignment;
4713 /* Use natural alignment if the type is not aggregate data. */
4714 if (type && !AGGREGATE_TYPE_P (type))
4715 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
4716 else
4717 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
4719 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
4722 /* If MODE represents an argument that can be passed or returned in
4723 floating-point registers, return the number of registers, else 0. */
4725 static unsigned
4726 riscv_pass_mode_in_fpr_p (machine_mode mode)
4728 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
4730 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4731 return 1;
4733 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4734 return 2;
4737 return 0;
4740 typedef struct {
4741 const_tree type;
4742 HOST_WIDE_INT offset;
4743 } riscv_aggregate_field;
4745 /* Identify subfields of aggregates that are candidates for passing in
4746 floating-point registers. */
4748 static int
4749 riscv_flatten_aggregate_field (const_tree type,
4750 riscv_aggregate_field fields[2],
4751 int n, HOST_WIDE_INT offset,
4752 bool ignore_zero_width_bit_field_p)
4754 switch (TREE_CODE (type))
4756 case RECORD_TYPE:
4757 /* Can't handle incomplete types nor sizes that are not fixed. */
4758 if (!COMPLETE_TYPE_P (type)
4759 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4760 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
4761 return -1;
4763 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
4764 if (TREE_CODE (f) == FIELD_DECL)
4766 if (!TYPE_P (TREE_TYPE (f)))
4767 return -1;
4769 /* The C++ front end strips zero-length bit-fields from structs.
4770 So we need to ignore them in the C front end to make C code
4771 compatible with C++ code. */
4772 if (ignore_zero_width_bit_field_p
4773 && DECL_BIT_FIELD (f)
4774 && (DECL_SIZE (f) == NULL_TREE
4775 || integer_zerop (DECL_SIZE (f))))
4777 else
4779 HOST_WIDE_INT pos = offset + int_byte_position (f);
4780 n = riscv_flatten_aggregate_field (TREE_TYPE (f),
4781 fields, n, pos,
4782 ignore_zero_width_bit_field_p);
4784 if (n < 0)
4785 return -1;
4787 return n;
4789 case ARRAY_TYPE:
4791 HOST_WIDE_INT n_elts;
4792 riscv_aggregate_field subfields[2];
4793 tree index = TYPE_DOMAIN (type);
4794 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
4795 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
4796 subfields, 0, offset,
4797 ignore_zero_width_bit_field_p);
4799 /* Can't handle incomplete types nor sizes that are not fixed. */
4800 if (n_subfields <= 0
4801 || !COMPLETE_TYPE_P (type)
4802 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4803 || !index
4804 || !TYPE_MAX_VALUE (index)
4805 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4806 || !TYPE_MIN_VALUE (index)
4807 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4808 || !tree_fits_uhwi_p (elt_size))
4809 return -1;
4811 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4812 - tree_to_uhwi (TYPE_MIN_VALUE (index));
4813 gcc_assert (n_elts >= 0);
4815 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
4816 for (int j = 0; j < n_subfields; j++)
4818 if (n >= 2)
4819 return -1;
4821 fields[n] = subfields[j];
4822 fields[n++].offset += i * tree_to_uhwi (elt_size);
4825 return n;
4828 case COMPLEX_TYPE:
4830 /* Complex type need consume 2 field, so n must be 0. */
4831 if (n != 0)
4832 return -1;
4834 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
4836 if (elt_size <= UNITS_PER_FP_ARG)
4838 fields[0].type = TREE_TYPE (type);
4839 fields[0].offset = offset;
4840 fields[1].type = TREE_TYPE (type);
4841 fields[1].offset = offset + elt_size;
4843 return 2;
4846 return -1;
4849 default:
4850 if (n < 2
4851 && ((SCALAR_FLOAT_TYPE_P (type)
4852 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
4853 || (INTEGRAL_TYPE_P (type)
4854 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
4856 fields[n].type = type;
4857 fields[n].offset = offset;
4858 return n + 1;
4860 else
4861 return -1;
4865 /* Identify candidate aggregates for passing in floating-point registers.
4866 Candidates have at most two fields after flattening. */
4868 static int
4869 riscv_flatten_aggregate_argument (const_tree type,
4870 riscv_aggregate_field fields[2],
4871 bool ignore_zero_width_bit_field_p)
4873 if (!type || TREE_CODE (type) != RECORD_TYPE)
4874 return -1;
4876 return riscv_flatten_aggregate_field (type, fields, 0, 0,
4877 ignore_zero_width_bit_field_p);
4880 /* See whether TYPE is a record whose fields should be returned in one or
4881 two floating-point registers. If so, populate FIELDS accordingly. */
4883 static unsigned
4884 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
4885 riscv_aggregate_field fields[2])
4887 static int warned = 0;
4889 /* This is the old ABI, which differs for C++ and C. */
4890 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4891 for (int i = 0; i < n_old; i++)
4892 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4894 n_old = -1;
4895 break;
4898 /* This is the new ABI, which is the same for C++ and C. */
4899 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4900 for (int i = 0; i < n_new; i++)
4901 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4903 n_new = -1;
4904 break;
4907 if ((n_old != n_new) && (warned == 0))
4909 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4910 "bit-fields changed in GCC 10");
4911 warned = 1;
4914 return n_new > 0 ? n_new : 0;
4917 /* See whether TYPE is a record whose fields should be returned in one or
4918 floating-point register and one integer register. If so, populate
4919 FIELDS accordingly. */
4921 static bool
4922 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
4923 riscv_aggregate_field fields[2])
4925 static int warned = 0;
4927 /* This is the old ABI, which differs for C++ and C. */
4928 unsigned num_int_old = 0, num_float_old = 0;
4929 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4930 for (int i = 0; i < n_old; i++)
4932 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
4933 num_int_old += INTEGRAL_TYPE_P (fields[i].type);
4936 /* This is the new ABI, which is the same for C++ and C. */
4937 unsigned num_int_new = 0, num_float_new = 0;
4938 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4939 for (int i = 0; i < n_new; i++)
4941 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
4942 num_int_new += INTEGRAL_TYPE_P (fields[i].type);
4945 if (((num_int_old == 1 && num_float_old == 1
4946 && (num_int_old != num_int_new || num_float_old != num_float_new))
4947 || (num_int_new == 1 && num_float_new == 1
4948 && (num_int_old != num_int_new || num_float_old != num_float_new)))
4949 && (warned == 0))
4951 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4952 "bit-fields changed in GCC 10");
4953 warned = 1;
4956 return num_int_new == 1 && num_float_new == 1;
4959 /* Return the representation of an argument passed or returned in an FPR
4960 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
4961 two modes may be different for structures like:
4963 struct __attribute__((packed)) foo { float f; }
4965 where the SFmode value "f" is passed in REGNO but the struct itself
4966 has mode BLKmode. */
4968 static rtx
4969 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno,
4970 machine_mode value_mode,
4971 HOST_WIDE_INT offset)
4973 rtx x = gen_rtx_REG (value_mode, regno);
4975 if (type_mode != value_mode)
4977 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
4978 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
4980 return x;
4983 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
4984 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
4985 byte offset for the first value, likewise MODE2 and OFFSET2 for the
4986 second value. */
4988 static rtx
4989 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
4990 machine_mode mode1, HOST_WIDE_INT offset1,
4991 unsigned regno2, machine_mode mode2,
4992 HOST_WIDE_INT offset2)
4994 return gen_rtx_PARALLEL
4995 (mode,
4996 gen_rtvec (2,
4997 gen_rtx_EXPR_LIST (VOIDmode,
4998 gen_rtx_REG (mode1, regno1),
4999 GEN_INT (offset1)),
5000 gen_rtx_EXPR_LIST (VOIDmode,
5001 gen_rtx_REG (mode2, regno2),
5002 GEN_INT (offset2))));
5005 static rtx
5006 riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
5007 unsigned gpr_base)
5009 gcc_assert (riscv_v_ext_vls_mode_p (mode));
5011 unsigned count = 0;
5012 unsigned regnum = 0;
5013 machine_mode gpr_mode = VOIDmode;
5014 unsigned vls_size = GET_MODE_SIZE (mode).to_constant ();
5015 unsigned gpr_size = GET_MODE_SIZE (Xmode);
5017 if (IN_RANGE (vls_size, 0, gpr_size * 2))
5019 count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size);
5021 if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS)
5023 regnum = gpr_base + info->gpr_offset;
5024 info->num_gprs = count;
5025 gpr_mode = riscv_v_vls_to_gpr_mode (vls_size);
5029 if (!regnum)
5030 return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
5032 gcc_assert (gpr_mode != VOIDmode);
5034 rtx reg = gen_rtx_REG (gpr_mode, regnum);
5035 rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode));
5037 return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
5040 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5041 for a call to a function whose data type is FNTYPE.
5042 For a library call, FNTYPE is 0. */
5044 void
5045 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
5047 memset (cum, 0, sizeof (*cum));
5049 if (fntype)
5050 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
5051 else
5052 cum->variant_cc = RISCV_CC_BASE;
5055 /* Return true if TYPE is a vector type that can be passed in vector registers.
5058 static bool
5059 riscv_vector_type_p (const_tree type)
5061 /* Currently, only builtin scalabler vector type is allowed, in the future,
5062 more vector types may be allowed, such as GNU vector type, etc. */
5063 return riscv_vector::builtin_type_p (type);
5066 static unsigned int
5067 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
5069 /* Subroutine of riscv_get_arg_info. */
5071 static rtx
5072 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5073 machine_mode mode, bool return_p)
5075 gcc_assert (riscv_v_ext_mode_p (mode));
5077 info->mr_offset = cum->num_mrs;
5078 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5080 /* For scalable mask return value. */
5081 if (return_p)
5082 return gen_rtx_REG (mode, V_REG_FIRST);
5084 /* For the first scalable mask argument. */
5085 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS)
5087 info->num_mrs = 1;
5088 return gen_rtx_REG (mode, V_REG_FIRST);
5090 else
5092 /* Rest scalable mask arguments are treated as scalable data
5093 arguments. */
5097 /* The number and alignment of vector registers need for this scalable vector
5098 argument. When the mode size is less than a full vector, we use 1 vector
5099 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number
5100 information. */
5101 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode);
5102 int LMUL = riscv_v_ext_tuple_mode_p (mode)
5103 ? nregs / riscv_vector::get_nf (mode)
5104 : nregs;
5105 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST;
5106 int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
5107 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
5109 /* For scalable data and scalable tuple return value. */
5110 if (return_p)
5111 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
5113 /* Iterate through the USED_VRS array to find vector register groups that have
5114 not been allocated and the first register is aligned with LMUL. */
5115 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL)
5117 /* The index in USED_VRS array. */
5118 int idx = i - arg_reg_start;
5119 /* Find the first register unused. */
5120 if (!cum->used_vrs[idx])
5122 bool find_set = true;
5123 /* Ensure there are NREGS continuous unused registers. */
5124 for (int j = 1; j < nregs; j++)
5125 if (cum->used_vrs[idx + j])
5127 find_set = false;
5128 /* Update I to the last aligned register which
5129 cannot be used and the next iteration will add
5130 LMUL step to I. */
5131 i += (j / LMUL) * LMUL;
5132 break;
5135 if (find_set)
5137 info->num_vrs = nregs;
5138 info->vr_offset = idx;
5139 return gen_rtx_REG (mode, i + V_REG_FIRST);
5144 return NULL_RTX;
5147 /* Fill INFO with information about a single argument, and return an RTL
5148 pattern to pass or return the argument. Return NULL_RTX if argument cannot
5149 pass or return in registers, then the argument may be passed by reference or
5150 through the stack or . CUM is the cumulative state for earlier arguments.
5151 MODE is the mode of this argument and TYPE is its type (if known). NAMED is
5152 true if this is a named (fixed) argument rather than a variable one. RETURN_P
5153 is true if returning the argument, or false if passing the argument. */
5155 static rtx
5156 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5157 machine_mode mode, const_tree type, bool named,
5158 bool return_p)
5160 unsigned num_bytes, num_words;
5161 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
5162 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
5163 unsigned alignment = riscv_function_arg_boundary (mode, type);
5165 memset (info, 0, sizeof (*info));
5166 info->gpr_offset = cum->num_gprs;
5167 info->fpr_offset = cum->num_fprs;
5169 /* Passed by reference when the scalable vector argument is anonymous. */
5170 if (riscv_v_ext_mode_p (mode) && !named)
5171 return NULL_RTX;
5173 if (named)
5175 riscv_aggregate_field fields[2];
5176 unsigned fregno = fpr_base + info->fpr_offset;
5177 unsigned gregno = gpr_base + info->gpr_offset;
5179 /* Pass one- or two-element floating-point aggregates in FPRs. */
5180 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
5181 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5182 switch (info->num_fprs)
5184 case 1:
5185 return riscv_pass_fpr_single (mode, fregno,
5186 TYPE_MODE (fields[0].type),
5187 fields[0].offset);
5189 case 2:
5190 return riscv_pass_fpr_pair (mode, fregno,
5191 TYPE_MODE (fields[0].type),
5192 fields[0].offset,
5193 fregno + 1,
5194 TYPE_MODE (fields[1].type),
5195 fields[1].offset);
5197 default:
5198 gcc_unreachable ();
5201 /* Pass real and complex floating-point numbers in FPRs. */
5202 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
5203 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5204 switch (GET_MODE_CLASS (mode))
5206 case MODE_FLOAT:
5207 return gen_rtx_REG (mode, fregno);
5209 case MODE_COMPLEX_FLOAT:
5210 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
5211 fregno + 1, GET_MODE_INNER (mode),
5212 GET_MODE_UNIT_SIZE (mode));
5214 default:
5215 gcc_unreachable ();
5218 /* Pass structs with one float and one integer in an FPR and a GPR. */
5219 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
5220 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
5221 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
5223 info->num_gprs = 1;
5224 info->num_fprs = 1;
5226 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
5227 std::swap (fregno, gregno);
5229 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
5230 fields[0].offset,
5231 gregno, TYPE_MODE (fields[1].type),
5232 fields[1].offset);
5235 /* For scalable vector argument. */
5236 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
5237 return riscv_get_vector_arg (info, cum, mode, return_p);
5239 /* For vls mode aggregated in gpr. */
5240 if (riscv_v_ext_vls_mode_p (mode))
5241 return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
5244 /* Work out the size of the argument. */
5245 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
5246 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5248 /* Doubleword-aligned varargs start on an even register boundary. */
5249 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
5250 info->gpr_offset += info->gpr_offset & 1;
5252 /* Partition the argument between registers and stack. */
5253 info->num_fprs = 0;
5254 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
5255 info->stack_p = (num_words - info->num_gprs) != 0;
5257 if (info->num_gprs || return_p)
5258 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
5260 return NULL_RTX;
5263 /* Implement TARGET_FUNCTION_ARG. */
5265 static rtx
5266 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5268 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5269 struct riscv_arg_info info;
5271 if (arg.end_marker_p ())
5272 /* Return the calling convention that used by the current function. */
5273 return gen_int_mode (cum->variant_cc, SImode);
5275 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5278 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
5280 static void
5281 riscv_function_arg_advance (cumulative_args_t cum_v,
5282 const function_arg_info &arg)
5284 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5285 struct riscv_arg_info info;
5287 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5289 /* Set the corresponding register in USED_VRS to used status. */
5290 for (unsigned int i = 0; i < info.num_vrs; i++)
5292 gcc_assert (!cum->used_vrs[info.vr_offset + i]);
5293 cum->used_vrs[info.vr_offset + i] = true;
5296 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
5298 error ("RVV type %qT cannot be passed to an unprototyped function",
5299 arg.type);
5300 /* Avoid repeating the message */
5301 cum->variant_cc = RISCV_CC_V;
5304 /* Advance the register count. This has the effect of setting
5305 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
5306 argument required us to skip the final GPR and pass the whole
5307 argument on the stack. */
5308 cum->num_fprs = info.fpr_offset + info.num_fprs;
5309 cum->num_gprs = info.gpr_offset + info.num_gprs;
5310 cum->num_mrs = info.mr_offset + info.num_mrs;
5313 /* Implement TARGET_ARG_PARTIAL_BYTES. */
5315 static int
5316 riscv_arg_partial_bytes (cumulative_args_t cum,
5317 const function_arg_info &generic_arg)
5319 struct riscv_arg_info arg;
5321 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
5322 generic_arg.type, generic_arg.named, false);
5323 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
5326 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
5327 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
5328 VALTYPE is null and MODE is the mode of the return value. */
5331 riscv_function_value (const_tree type, const_tree func, machine_mode mode)
5333 struct riscv_arg_info info;
5334 CUMULATIVE_ARGS args;
5336 if (type)
5338 int unsigned_p = TYPE_UNSIGNED (type);
5340 mode = TYPE_MODE (type);
5342 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
5343 return values, promote the mode here too. */
5344 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
5347 memset (&args, 0, sizeof args);
5349 return riscv_get_arg_info (&info, &args, mode, type, true, true);
5352 /* Implement TARGET_PASS_BY_REFERENCE. */
5354 static bool
5355 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
5357 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();;
5358 struct riscv_arg_info info;
5359 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5361 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
5362 never pass variadic arguments in floating-point and vector registers,
5363 so we can avoid the call to riscv_get_arg_info in this case. */
5364 if (cum != NULL)
5366 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5368 /* Don't pass by reference if we can use a floating-point register. */
5369 if (info.num_fprs)
5370 return false;
5372 /* Don't pass by reference if we can use general register(s) for vls. */
5373 if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode))
5374 return false;
5376 /* Don't pass by reference if we can use vector register groups. */
5377 if (info.num_vrs > 0 || info.num_mrs > 0)
5378 return false;
5381 /* Passed by reference when:
5382 1. The scalable vector argument is anonymous.
5383 2. Args cannot be passed through vector registers. */
5384 if (riscv_v_ext_mode_p (arg.mode))
5385 return true;
5387 /* Pass by reference if the data do not fit in two integer registers. */
5388 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
5391 /* Implement TARGET_RETURN_IN_MEMORY. */
5393 static bool
5394 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5396 CUMULATIVE_ARGS args;
5397 cumulative_args_t cum = pack_cumulative_args (&args);
5399 /* The rules for returning in memory are the same as for passing the
5400 first named argument by reference. */
5401 memset (&args, 0, sizeof args);
5402 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
5403 return riscv_pass_by_reference (cum, arg);
5406 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5408 static void
5409 riscv_setup_incoming_varargs (cumulative_args_t cum,
5410 const function_arg_info &arg,
5411 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
5413 CUMULATIVE_ARGS local_cum;
5414 int gp_saved;
5416 /* The caller has advanced CUM up to, but not beyond, the last named
5417 argument. Advance a local copy of CUM past the last "real" named
5418 argument, to find out how many registers are left over. */
5419 local_cum = *get_cumulative_args (cum);
5420 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
5421 || arg.type != NULL_TREE)
5422 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg);
5424 /* Found out how many registers we need to save. */
5425 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
5427 if (!no_rtl && gp_saved > 0)
5429 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5430 REG_PARM_STACK_SPACE (cfun->decl)
5431 - gp_saved * UNITS_PER_WORD);
5432 rtx mem = gen_frame_mem (BLKmode, ptr);
5433 set_mem_alias_set (mem, get_varargs_alias_set ());
5435 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
5436 mem, gp_saved);
5438 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
5439 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
5442 /* Return the descriptor of the Standard Vector Calling Convention Variant. */
5444 static const predefined_function_abi &
5445 riscv_v_abi ()
5447 predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
5448 if (!v_abi.initialized_p ())
5450 HARD_REG_SET full_reg_clobbers
5451 = default_function_abi.full_reg_clobbers ();
5452 /* Callee-saved vector registers: v1-v7, v24-v31. */
5453 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1)
5454 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5455 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
5456 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5457 v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
5459 return v_abi;
5462 /* Return true if a function with type FNTYPE returns its value in
5463 RISC-V V registers. */
5465 static bool
5466 riscv_return_value_is_vector_type_p (const_tree fntype)
5468 tree return_type = TREE_TYPE (fntype);
5470 if (riscv_vector_type_p (return_type))
5472 if (!TARGET_VECTOR)
5473 error_at (input_location,
5474 "return type %qT requires the V ISA extension", return_type);
5475 return true;
5477 else
5478 return false;
5481 /* Return true if a function with type FNTYPE takes arguments in
5482 RISC-V V registers. */
5484 static bool
5485 riscv_arguments_is_vector_type_p (const_tree fntype)
5487 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
5488 chain = TREE_CHAIN (chain))
5490 tree arg_type = TREE_VALUE (chain);
5491 if (riscv_vector_type_p (arg_type))
5493 if (!TARGET_VECTOR)
5494 error_at (input_location,
5495 "argument type %qT requires the V ISA extension",
5496 arg_type);
5497 return true;
5501 return false;
5504 /* Return true if FUNC is a riscv_vector_cc function.
5505 For more details please reference the below link.
5506 https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
5507 static bool
5508 riscv_vector_cc_function_p (const_tree fntype)
5510 tree attr = TYPE_ATTRIBUTES (fntype);
5511 bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE
5512 || lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE;
5514 if (vector_cc_p && !TARGET_VECTOR)
5515 error_at (input_location,
5516 "function attribute %qs requires the V ISA extension",
5517 "riscv_vector_cc");
5519 return vector_cc_p;
5522 /* Implement TARGET_FNTYPE_ABI. */
5524 static const predefined_function_abi &
5525 riscv_fntype_abi (const_tree fntype)
5527 /* Implement the vector calling convention. For more details please
5528 reference the below link.
5529 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
5530 if (riscv_return_value_is_vector_type_p (fntype)
5531 || riscv_arguments_is_vector_type_p (fntype)
5532 || riscv_vector_cc_function_p (fntype))
5533 return riscv_v_abi ();
5535 return default_function_abi;
5538 /* Return riscv calling convention of call_insn. */
5539 riscv_cc
5540 get_riscv_cc (const rtx use)
5542 gcc_assert (GET_CODE (use) == USE);
5543 rtx unspec = XEXP (use, 0);
5544 gcc_assert (GET_CODE (unspec) == UNSPEC
5545 && XINT (unspec, 1) == UNSPEC_CALLEE_CC);
5546 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0));
5547 gcc_assert (cc < RISCV_CC_UNKNOWN);
5548 return cc;
5551 /* Implement TARGET_INSN_CALLEE_ABI. */
5553 const predefined_function_abi &
5554 riscv_insn_callee_abi (const rtx_insn *insn)
5556 rtx pat = PATTERN (insn);
5557 gcc_assert (GET_CODE (pat) == PARALLEL);
5558 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1));
5559 return function_abis[cc];
5562 /* Handle an attribute requiring a FUNCTION_DECL;
5563 arguments as in struct attribute_spec.handler. */
5564 static tree
5565 riscv_handle_fndecl_attribute (tree *node, tree name,
5566 tree args ATTRIBUTE_UNUSED,
5567 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5569 if (TREE_CODE (*node) != FUNCTION_DECL)
5571 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5572 name);
5573 *no_add_attrs = true;
5576 return NULL_TREE;
5579 /* Verify type based attributes. NODE is the what the attribute is being
5580 applied to. NAME is the attribute name. ARGS are the attribute args.
5581 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if
5582 the attribute should be ignored. */
5584 static tree
5585 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5586 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5588 /* Check for an argument. */
5589 if (is_attribute_p ("interrupt", name))
5591 if (args)
5593 tree cst = TREE_VALUE (args);
5594 const char *string;
5596 if (TREE_CODE (cst) != STRING_CST)
5598 warning (OPT_Wattributes,
5599 "%qE attribute requires a string argument",
5600 name);
5601 *no_add_attrs = true;
5602 return NULL_TREE;
5605 string = TREE_STRING_POINTER (cst);
5606 if (strcmp (string, "user") && strcmp (string, "supervisor")
5607 && strcmp (string, "machine"))
5609 warning (OPT_Wattributes,
5610 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
5611 "or %<\"machine\"%>", name);
5612 *no_add_attrs = true;
5617 return NULL_TREE;
5620 static tree
5621 riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args,
5622 ATTRIBUTE_UNUSED int flags,
5623 bool *no_add_attrs)
5625 if (!is_attribute_p ("riscv_rvv_vector_bits", name))
5626 return NULL_TREE;
5628 *no_add_attrs = true;
5630 if (rvv_vector_bits != RVV_VECTOR_BITS_ZVL)
5632 error (
5633 "%qs is only supported when %<-mrvv-vector-bits=zvl%> is specified",
5634 "riscv_rvv_vector_bits");
5635 return NULL_TREE;
5638 tree type = *node;
5640 if (!VECTOR_TYPE_P (type) || !riscv_vector::builtin_type_p (type))
5642 error ("%qs applied to non-RVV type %qT", "riscv_rvv_vector_bits", type);
5643 return NULL_TREE;
5646 tree size = TREE_VALUE (args);
5648 if (TREE_CODE (size) != INTEGER_CST)
5650 error ("%qs requires an integer constant", "riscv_rvv_vector_bits");
5651 return NULL_TREE;
5654 unsigned HOST_WIDE_INT args_in_bits = tree_to_uhwi (size);
5655 unsigned HOST_WIDE_INT type_mode_bits
5656 = GET_MODE_PRECISION (TYPE_MODE (type)).to_constant ();
5658 if (args_in_bits != type_mode_bits)
5660 error ("invalid RVV vector size %qd, "
5661 "expected size is %qd based on LMUL of type and %qs",
5662 (int)args_in_bits, (int)type_mode_bits, "-mrvv-vector-bits=zvl");
5663 return NULL_TREE;
5666 type = build_distinct_type_copy (type);
5667 TYPE_ATTRIBUTES (type)
5668 = remove_attribute ("RVV sizeless type",
5669 copy_list (TYPE_ATTRIBUTES (type)));
5671 /* The operations like alu/cmp on vbool*_t is not well defined,
5672 continue to treat vbool*_t as indivisible. */
5673 if (!VECTOR_BOOLEAN_TYPE_P (type))
5674 TYPE_INDIVISIBLE_P (type) = 0;
5676 *node = type;
5678 return NULL_TREE;
5681 /* Return true if function TYPE is an interrupt function. */
5682 static bool
5683 riscv_interrupt_type_p (tree type)
5685 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
5688 /* Return true if FUNC is a naked function. */
5689 static bool
5690 riscv_naked_function_p (tree func)
5692 tree func_decl = func;
5693 if (func == NULL_TREE)
5694 func_decl = current_function_decl;
5695 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
5698 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
5699 static bool
5700 riscv_allocate_stack_slots_for_args ()
5702 /* Naked functions should not allocate stack slots for arguments. */
5703 return !riscv_naked_function_p (current_function_decl);
5706 /* Implement TARGET_WARN_FUNC_RETURN. */
5707 static bool
5708 riscv_warn_func_return (tree decl)
5710 /* Naked functions are implemented entirely in assembly, including the
5711 return sequence, so suppress warnings about this. */
5712 return !riscv_naked_function_p (decl);
5715 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5717 static void
5718 riscv_va_start (tree valist, rtx nextarg)
5720 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
5721 std_expand_builtin_va_start (valist, nextarg);
5724 /* Make ADDR suitable for use as a call or sibcall target. */
5727 riscv_legitimize_call_address (rtx addr)
5729 if (!call_insn_operand (addr, VOIDmode))
5731 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
5732 riscv_emit_move (reg, addr);
5733 return reg;
5735 return addr;
5738 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
5739 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
5741 static void
5742 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
5744 const char *reloc;
5746 switch (riscv_classify_symbolic_expression (op))
5748 case SYMBOL_ABSOLUTE:
5749 reloc = hi_reloc ? "%hi" : "%lo";
5750 break;
5752 case SYMBOL_PCREL:
5753 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
5754 break;
5756 case SYMBOL_TLS_LE:
5757 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
5758 break;
5760 default:
5761 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R');
5762 return;
5765 fprintf (file, "%s(", reloc);
5766 output_addr_const (file, riscv_strip_unspec_address (op));
5767 fputc (')', file);
5770 /* Return the memory model that encapuslates both given models. */
5772 enum memmodel
5773 riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
5775 model1 = memmodel_base (model1);
5776 model2 = memmodel_base (model2);
5778 enum memmodel weaker = model1 <= model2 ? model1: model2;
5779 enum memmodel stronger = model1 > model2 ? model1: model2;
5781 switch (stronger)
5783 case MEMMODEL_SEQ_CST:
5784 case MEMMODEL_ACQ_REL:
5785 return stronger;
5786 case MEMMODEL_RELEASE:
5787 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
5788 return MEMMODEL_ACQ_REL;
5789 else
5790 return stronger;
5791 case MEMMODEL_ACQUIRE:
5792 case MEMMODEL_CONSUME:
5793 case MEMMODEL_RELAXED:
5794 return stronger;
5795 default:
5796 gcc_unreachable ();
5800 /* Return true if the .AQ suffix should be added to an AMO to implement the
5801 acquire portion of memory model MODEL. */
5803 static bool
5804 riscv_memmodel_needs_amo_acquire (enum memmodel model)
5806 /* ZTSO amo mappings require no annotations. */
5807 if (TARGET_ZTSO)
5808 return false;
5810 switch (model)
5812 case MEMMODEL_ACQ_REL:
5813 case MEMMODEL_SEQ_CST:
5814 case MEMMODEL_ACQUIRE:
5815 case MEMMODEL_CONSUME:
5816 return true;
5818 case MEMMODEL_RELEASE:
5819 case MEMMODEL_RELAXED:
5820 return false;
5822 default:
5823 gcc_unreachable ();
5827 /* Return true if the .RL suffix should be added to an AMO to implement the
5828 release portion of memory model MODEL. */
5830 static bool
5831 riscv_memmodel_needs_amo_release (enum memmodel model)
5833 /* ZTSO amo mappings require no annotations. */
5834 if (TARGET_ZTSO)
5835 return false;
5837 switch (model)
5839 case MEMMODEL_ACQ_REL:
5840 case MEMMODEL_SEQ_CST:
5841 case MEMMODEL_RELEASE:
5842 return true;
5844 case MEMMODEL_ACQUIRE:
5845 case MEMMODEL_CONSUME:
5846 case MEMMODEL_RELAXED:
5847 return false;
5849 default:
5850 gcc_unreachable ();
5854 /* Get REGNO alignment of vector mode.
5855 The alignment = LMUL when the LMUL >= 1.
5856 Otherwise, alignment = 1. */
5858 riscv_get_v_regno_alignment (machine_mode mode)
5860 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
5861 but for mask vector register, register numbers can be any number. */
5862 int lmul = 1;
5863 machine_mode rvv_mode = mode;
5864 if (riscv_v_ext_vls_mode_p (rvv_mode))
5866 int size = GET_MODE_BITSIZE (rvv_mode).to_constant ();
5867 if (size < TARGET_MIN_VLEN)
5868 return 1;
5869 else
5870 return size / TARGET_MIN_VLEN;
5872 if (riscv_v_ext_tuple_mode_p (rvv_mode))
5873 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
5874 poly_int64 size = GET_MODE_SIZE (rvv_mode);
5875 if (known_gt (size, UNITS_PER_V_REG))
5876 lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
5877 return lmul;
5880 /* Define ASM_OUTPUT_OPCODE to do anything special before
5881 emitting an opcode. */
5882 const char *
5883 riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
5885 if (TARGET_XTHEADVECTOR)
5886 return th_asm_output_opcode (asm_out_file, p);
5888 return p;
5891 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are:
5893 'h' Print the high-part relocation associated with OP, after stripping
5894 any outermost HIGH.
5895 'R' Print the low-part relocation associated with OP.
5896 'C' Print the integer branch condition for comparison OP.
5897 'N' Print the inverse of the integer branch condition for comparison OP.
5898 'A' Print the atomic operation suffix for memory model OP.
5899 'I' Print the LR suffix for memory model OP.
5900 'J' Print the SC suffix for memory model OP.
5901 'z' Print x0 if OP is zero, otherwise print OP normally.
5902 'i' Print i if the operand is not a register.
5903 'S' Print shift-index of single-bit mask OP.
5904 'T' Print shift-index of inverted single-bit mask OP.
5905 '~' Print w if TARGET_64BIT is true; otherwise not print anything.
5907 Note please keep this list and the list in riscv.md in sync. */
5909 static void
5910 riscv_print_operand (FILE *file, rtx op, int letter)
5912 /* `~` does not take an operand so op will be null
5913 Check for before accessing op.
5915 if (letter == '~')
5917 if (TARGET_64BIT)
5918 fputc('w', file);
5919 return;
5921 machine_mode mode = GET_MODE (op);
5922 enum rtx_code code = GET_CODE (op);
5924 switch (letter)
5926 case 'o': {
5927 /* Print 'OP' variant for RVV instructions.
5928 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv).
5929 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi).
5930 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */
5931 if (riscv_v_ext_mode_p (mode))
5933 if (REG_P (op))
5934 asm_fprintf (file, "v");
5935 else if (CONST_VECTOR_P (op))
5936 asm_fprintf (file, "i");
5937 else
5938 output_operand_lossage ("invalid vector operand");
5940 else
5942 if (CONST_INT_P (op))
5943 asm_fprintf (file, "i");
5944 else
5945 asm_fprintf (file, "x");
5947 break;
5949 case 'v': {
5950 rtx elt;
5952 if (REG_P (op))
5953 asm_fprintf (file, "%s", reg_names[REGNO (op)]);
5954 else
5956 if (!const_vec_duplicate_p (op, &elt))
5957 output_operand_lossage ("invalid vector constant");
5958 else if (satisfies_constraint_Wc0 (op))
5959 asm_fprintf (file, "0");
5960 else if (satisfies_constraint_vi (op)
5961 || satisfies_constraint_vj (op)
5962 || satisfies_constraint_vk (op))
5963 asm_fprintf (file, "%wd", INTVAL (elt));
5964 else
5965 output_operand_lossage ("invalid vector constant");
5967 break;
5969 case 'V': {
5970 rtx elt;
5971 if (!const_vec_duplicate_p (op, &elt))
5972 output_operand_lossage ("invalid vector constant");
5973 else if (satisfies_constraint_vj (op))
5974 asm_fprintf (file, "%wd", -INTVAL (elt));
5975 else
5976 output_operand_lossage ("invalid vector constant");
5977 break;
5979 case 'm': {
5980 if (riscv_v_ext_mode_p (mode))
5982 /* Calculate lmul according to mode and print the value. */
5983 int lmul = riscv_get_v_regno_alignment (mode);
5984 asm_fprintf (file, "%d", lmul);
5986 else if (code == CONST_INT)
5988 /* If it is a const_int value, it denotes the VLMUL field enum. */
5989 unsigned int vlmul = UINTVAL (op);
5990 switch (vlmul)
5992 case riscv_vector::LMUL_1:
5993 asm_fprintf (file, "%s", "m1");
5994 break;
5995 case riscv_vector::LMUL_2:
5996 asm_fprintf (file, "%s", "m2");
5997 break;
5998 case riscv_vector::LMUL_4:
5999 asm_fprintf (file, "%s", "m4");
6000 break;
6001 case riscv_vector::LMUL_8:
6002 asm_fprintf (file, "%s", "m8");
6003 break;
6004 case riscv_vector::LMUL_F8:
6005 asm_fprintf (file, "%s", "mf8");
6006 break;
6007 case riscv_vector::LMUL_F4:
6008 asm_fprintf (file, "%s", "mf4");
6009 break;
6010 case riscv_vector::LMUL_F2:
6011 asm_fprintf (file, "%s", "mf2");
6012 break;
6013 default:
6014 gcc_unreachable ();
6017 else
6018 output_operand_lossage ("invalid vector constant");
6019 break;
6021 case 'p': {
6022 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
6024 /* Print for RVV mask operand.
6025 If op is reg, print ",v0.t".
6026 Otherwise, don't print anything. */
6027 if (code == REG)
6028 fprintf (file, ",%s.t", reg_names[REGNO (op)]);
6030 else if (code == CONST_INT)
6032 /* Tail && Mask policy. */
6033 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u");
6035 else
6036 output_operand_lossage ("invalid vector constant");
6037 break;
6039 case 'h':
6040 if (code == HIGH)
6041 op = XEXP (op, 0);
6042 riscv_print_operand_reloc (file, op, true);
6043 break;
6045 case 'R':
6046 riscv_print_operand_reloc (file, op, false);
6047 break;
6049 case 'C':
6050 /* The RTL names match the instruction names. */
6051 fputs (GET_RTX_NAME (code), file);
6052 break;
6054 case 'N':
6055 /* The RTL names match the instruction names. */
6056 fputs (GET_RTX_NAME (reverse_condition (code)), file);
6057 break;
6059 case 'A': {
6060 const enum memmodel model = memmodel_base (INTVAL (op));
6061 if (riscv_memmodel_needs_amo_acquire (model)
6062 && riscv_memmodel_needs_amo_release (model))
6063 fputs (".aqrl", file);
6064 else if (riscv_memmodel_needs_amo_acquire (model))
6065 fputs (".aq", file);
6066 else if (riscv_memmodel_needs_amo_release (model))
6067 fputs (".rl", file);
6068 break;
6071 case 'I': {
6072 const enum memmodel model = memmodel_base (INTVAL (op));
6073 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
6074 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */
6075 break;
6076 else if (model == MEMMODEL_SEQ_CST)
6077 fputs (".aqrl", file);
6078 else if (riscv_memmodel_needs_amo_acquire (model))
6079 fputs (".aq", file);
6080 break;
6083 case 'J': {
6084 const enum memmodel model = memmodel_base (INTVAL (op));
6085 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
6086 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */
6087 fputs (".rl", file);
6088 else if (TARGET_ZTSO)
6089 break;
6090 else if (riscv_memmodel_needs_amo_release (model))
6091 fputs (".rl", file);
6092 break;
6095 case 'i':
6096 if (code != REG)
6097 fputs ("i", file);
6098 break;
6100 case 'B':
6101 fputs (GET_RTX_NAME (code), file);
6102 break;
6104 case 'S':
6106 rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
6107 output_addr_const (file, newop);
6108 break;
6110 case 'T':
6112 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
6113 output_addr_const (file, newop);
6114 break;
6116 case 'X':
6118 int ival = INTVAL (op) + 1;
6119 rtx newop = GEN_INT (ctz_hwi (ival) + 1);
6120 output_addr_const (file, newop);
6121 break;
6123 case 'Y':
6125 unsigned int imm = (UINTVAL (op) & 63);
6126 gcc_assert (imm <= 63);
6127 rtx newop = GEN_INT (imm);
6128 output_addr_const (file, newop);
6129 break;
6131 default:
6132 switch (code)
6134 case REG:
6135 if (letter && letter != 'z')
6136 output_operand_lossage ("invalid use of '%%%c'", letter);
6137 fprintf (file, "%s", reg_names[REGNO (op)]);
6138 break;
6140 case MEM:
6141 if (letter && letter != 'z')
6142 output_operand_lossage ("invalid use of '%%%c'", letter);
6143 else
6144 output_address (mode, XEXP (op, 0));
6145 break;
6147 case CONST_DOUBLE:
6149 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6151 fputs (reg_names[GP_REG_FIRST], file);
6152 break;
6155 int fli_index = riscv_float_const_rtx_index_for_fli (op);
6156 if (fli_index == -1 || fli_index > 31)
6158 output_operand_lossage ("invalid use of '%%%c'", letter);
6159 break;
6161 asm_fprintf (file, "%s", fli_value_print[fli_index]);
6162 break;
6165 default:
6166 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6167 fputs (reg_names[GP_REG_FIRST], file);
6168 else if (letter && letter != 'z')
6169 output_operand_lossage ("invalid use of '%%%c'", letter);
6170 else
6171 output_addr_const (file, riscv_strip_unspec_address (op));
6172 break;
6177 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
6178 static bool
6179 riscv_print_operand_punct_valid_p (unsigned char code)
6181 return (code == '~');
6184 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
6186 static void
6187 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6189 struct riscv_address_info addr;
6191 if (th_print_operand_address (file, mode, x))
6192 return;
6194 if (riscv_classify_address (&addr, x, word_mode, true))
6195 switch (addr.type)
6197 case ADDRESS_REG:
6198 output_addr_const (file, riscv_strip_unspec_address (addr.offset));
6199 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6200 return;
6202 case ADDRESS_LO_SUM:
6203 riscv_print_operand_reloc (file, addr.offset, false);
6204 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6205 return;
6207 case ADDRESS_CONST_INT:
6208 output_addr_const (file, x);
6209 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
6210 return;
6212 case ADDRESS_SYMBOLIC:
6213 output_addr_const (file, riscv_strip_unspec_address (x));
6214 return;
6216 default:
6217 gcc_unreachable ();
6220 gcc_unreachable ();
6223 static bool
6224 riscv_size_ok_for_small_data_p (int size)
6226 return g_switch_value && IN_RANGE (size, 1, g_switch_value);
6229 /* Return true if EXP should be placed in the small data section. */
6231 static bool
6232 riscv_in_small_data_p (const_tree x)
6234 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use
6235 the anchor symbol to address nearby objects. In large model, it can get
6236 the better result using the anchor optiomization. */
6237 if (riscv_cmodel == CM_LARGE)
6238 return false;
6240 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
6241 return false;
6243 if (VAR_P (x) && DECL_SECTION_NAME (x))
6245 const char *sec = DECL_SECTION_NAME (x);
6246 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
6249 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
6252 /* Switch to the appropriate section for output of DECL. */
6254 static section *
6255 riscv_select_section (tree decl, int reloc,
6256 unsigned HOST_WIDE_INT align)
6258 switch (categorize_decl_for_section (decl, reloc))
6260 case SECCAT_SRODATA:
6261 return get_named_section (decl, ".srodata", reloc);
6263 default:
6264 return default_elf_select_section (decl, reloc, align);
6268 /* Switch to the appropriate section for output of DECL. */
6270 static void
6271 riscv_unique_section (tree decl, int reloc)
6273 const char *prefix = NULL;
6274 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
6276 switch (categorize_decl_for_section (decl, reloc))
6278 case SECCAT_SRODATA:
6279 prefix = one_only ? ".sr" : ".srodata";
6280 break;
6282 default:
6283 break;
6285 if (prefix)
6287 const char *name, *linkonce;
6288 char *string;
6290 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6291 name = targetm.strip_name_encoding (name);
6293 /* If we're using one_only, then there needs to be a .gnu.linkonce
6294 prefix to the section name. */
6295 linkonce = one_only ? ".gnu.linkonce" : "";
6297 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6299 set_decl_section_name (decl, string);
6300 return;
6302 default_unique_section (decl, reloc);
6305 /* Constant pools are per-function when in large code model. */
6307 static inline bool
6308 riscv_can_use_per_function_literal_pools_p (void)
6310 return riscv_cmodel == CM_LARGE;
6313 static bool
6314 riscv_use_blocks_for_constant_p (machine_mode, const_rtx)
6316 /* We can't use blocks for constants when we're using a per-function
6317 constant pool. */
6318 return !riscv_can_use_per_function_literal_pools_p ();
6321 /* Return a section for X, handling small data. */
6323 static section *
6324 riscv_elf_select_rtx_section (machine_mode mode, rtx x,
6325 unsigned HOST_WIDE_INT align)
6327 /* The literal pool stays with the function. */
6328 if (riscv_can_use_per_function_literal_pools_p ())
6329 return function_section (current_function_decl);
6331 section *s = default_elf_select_rtx_section (mode, x, align);
6333 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
6335 if (startswith (s->named.name, ".rodata.cst"))
6337 /* Rename .rodata.cst* to .srodata.cst*. */
6338 char *name = (char *) alloca (strlen (s->named.name) + 2);
6339 sprintf (name, ".s%s", s->named.name + 1);
6340 return get_section (name, s->named.common.flags, NULL);
6343 if (s == data_section)
6344 return sdata_section;
6347 return s;
6350 /* Make the last instruction frame-related and note that it performs
6351 the operation described by FRAME_PATTERN. */
6353 static void
6354 riscv_set_frame_expr (rtx frame_pattern)
6356 rtx insn;
6358 insn = get_last_insn ();
6359 RTX_FRAME_RELATED_P (insn) = 1;
6360 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6361 frame_pattern,
6362 REG_NOTES (insn));
6365 /* Return a frame-related rtx that stores REG at MEM.
6366 REG must be a single register. */
6368 static rtx
6369 riscv_frame_set (rtx mem, rtx reg)
6371 rtx set = gen_rtx_SET (mem, reg);
6372 RTX_FRAME_RELATED_P (set) = 1;
6373 return set;
6376 /* Returns true if the current function might contain a far jump. */
6378 static bool
6379 riscv_far_jump_used_p ()
6381 size_t func_size = 0;
6383 if (cfun->machine->far_jump_used)
6384 return true;
6386 /* We can't change far_jump_used during or after reload, as there is
6387 no chance to change stack frame layout. So we must rely on the
6388 conservative heuristic below having done the right thing. */
6389 if (reload_in_progress || reload_completed)
6390 return false;
6392 /* Estimate the function length. */
6393 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
6394 func_size += get_attr_length (insn);
6396 /* Conservatively determine whether some jump might exceed 1 MiB
6397 displacement. */
6398 if (func_size * 2 >= 0x100000)
6399 cfun->machine->far_jump_used = true;
6401 return cfun->machine->far_jump_used;
6404 /* Return true, if the current function must save the incoming return
6405 address. */
6407 static bool
6408 riscv_save_return_addr_reg_p (void)
6410 /* The $ra register is call-clobbered: if this is not a leaf function,
6411 save it. */
6412 if (!crtl->is_leaf)
6413 return true;
6415 /* We need to save the incoming return address if __builtin_eh_return
6416 is being used to set a different return address. */
6417 if (crtl->calls_eh_return)
6418 return true;
6420 /* Far jumps/branches use $ra as a temporary to set up the target jump
6421 location (clobbering the incoming return address). */
6422 if (riscv_far_jump_used_p ())
6423 return true;
6425 /* We need to save it if anyone has used that. */
6426 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
6427 return true;
6429 /* Need not to use ra for leaf when frame pointer is turned off by
6430 option whatever the omit-leaf-frame's value. */
6431 if (frame_pointer_needed && crtl->is_leaf
6432 && !TARGET_OMIT_LEAF_FRAME_POINTER)
6433 return true;
6435 return false;
6438 /* Return true if the current function must save register REGNO. */
6440 static bool
6441 riscv_save_reg_p (unsigned int regno)
6443 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
6444 bool might_clobber = crtl->saves_all_registers
6445 || df_regs_ever_live_p (regno);
6447 if (call_saved && might_clobber)
6448 return true;
6450 /* Save callee-saved V registers. */
6451 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno)
6452 && might_clobber)
6453 return true;
6455 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
6456 return true;
6458 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ())
6459 return true;
6461 /* If this is an interrupt handler, then must save extra registers. */
6462 if (cfun->machine->interrupt_handler_p)
6464 /* zero register is always zero. */
6465 if (regno == GP_REG_FIRST)
6466 return false;
6468 /* The function will return the stack pointer to its original value. */
6469 if (regno == STACK_POINTER_REGNUM)
6470 return false;
6472 /* By convention, we assume that gp and tp are safe. */
6473 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
6474 return false;
6476 /* We must save every register used in this function. If this is not a
6477 leaf function, then we must save all temporary registers. */
6478 if (df_regs_ever_live_p (regno)
6479 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
6480 return true;
6483 return false;
6486 /* Return TRUE if Zcmp push and pop insns should be
6487 avoided. FALSE otherwise.
6488 Only use multi push & pop if all GPRs masked can be covered,
6489 and stack access is SP based,
6490 and GPRs are at top of the stack frame,
6491 and no conflicts in stack allocation with other features */
6492 static bool
6493 riscv_avoid_multi_push (const struct riscv_frame_info *frame)
6495 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed
6496 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0
6497 || crtl->args.pretend_args_size != 0
6498 || (use_shrink_wrapping_separate ()
6499 && !riscv_avoid_shrink_wrapping_separate ())
6500 || (frame->mask & ~MULTI_PUSH_GPR_MASK))
6501 return true;
6503 return false;
6506 /* Determine whether to use multi push insn. */
6507 static bool
6508 riscv_use_multi_push (const struct riscv_frame_info *frame)
6510 if (riscv_avoid_multi_push (frame))
6511 return false;
6513 return (frame->multi_push_adj_base != 0);
6516 /* Return TRUE if a libcall to save/restore GPRs should be
6517 avoided. FALSE otherwise. */
6518 static bool
6519 riscv_avoid_save_libcall (void)
6521 if (!TARGET_SAVE_RESTORE
6522 || crtl->calls_eh_return
6523 || frame_pointer_needed
6524 || cfun->machine->interrupt_handler_p
6525 || cfun->machine->varargs_size != 0
6526 || crtl->args.pretend_args_size != 0)
6527 return true;
6529 return false;
6532 /* Determine whether to call GPR save/restore routines. */
6533 static bool
6534 riscv_use_save_libcall (const struct riscv_frame_info *frame)
6536 if (riscv_avoid_save_libcall ())
6537 return false;
6539 return frame->save_libcall_adjustment != 0;
6542 /* Determine which GPR save/restore routine to call. */
6544 static unsigned
6545 riscv_save_libcall_count (unsigned mask)
6547 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
6548 if (BITSET_P (mask, n))
6549 return CALLEE_SAVED_REG_NUMBER (n) + 1;
6550 abort ();
6553 /* calculate number of s regs in multi push and pop.
6554 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
6555 static unsigned
6556 riscv_multi_push_sregs_count (unsigned mask)
6558 unsigned num = riscv_save_libcall_count (mask);
6559 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS
6560 : num;
6563 /* calculate number of regs(ra, s0-sx) in multi push and pop. */
6564 static unsigned
6565 riscv_multi_push_regs_count (unsigned mask)
6567 /* 1 is for ra */
6568 return riscv_multi_push_sregs_count (mask) + 1;
6571 /* Handle 16 bytes align for poly_int. */
6572 static poly_int64
6573 riscv_16bytes_align (poly_int64 value)
6575 return aligned_upper_bound (value, 16);
6578 static HOST_WIDE_INT
6579 riscv_16bytes_align (HOST_WIDE_INT value)
6581 return ROUND_UP (value, 16);
6584 /* Handle stack align for poly_int. */
6585 static poly_int64
6586 riscv_stack_align (poly_int64 value)
6588 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
6591 static HOST_WIDE_INT
6592 riscv_stack_align (HOST_WIDE_INT value)
6594 return RISCV_STACK_ALIGN (value);
6597 /* Populate the current function's riscv_frame_info structure.
6599 RISC-V stack frames grown downward. High addresses are at the top.
6601 +-------------------------------+
6603 | incoming stack arguments |
6605 +-------------------------------+ <-- incoming stack pointer
6607 | callee-allocated save area |
6608 | for arguments that are |
6609 | split between registers and |
6610 | the stack |
6612 +-------------------------------+ <-- arg_pointer_rtx
6614 | callee-allocated save area |
6615 | for register varargs |
6617 +-------------------------------+ <-- hard_frame_pointer_rtx;
6618 | | stack_pointer_rtx + gp_sp_offset
6619 | GPR save area | + UNITS_PER_WORD
6621 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
6622 | | + UNITS_PER_FP_REG
6623 | FPR save area |
6625 +-------------------------------+ <-- stack_pointer_rtx
6626 | | + v_sp_offset_top
6627 | Vector Registers save area |
6629 | ----------------------------- | <-- stack_pointer_rtx
6630 | padding | + v_sp_offset_bottom
6631 +-------------------------------+ <-- frame_pointer_rtx (virtual)
6633 | local variables |
6635 P +-------------------------------+
6637 | outgoing stack arguments |
6639 +-------------------------------+ <-- stack_pointer_rtx
6641 Dynamic stack allocations such as alloca insert data at point P.
6642 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
6643 hard_frame_pointer_rtx unchanged. */
6645 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
6647 static void
6648 riscv_compute_frame_info (void)
6650 struct riscv_frame_info *frame;
6651 poly_int64 offset;
6652 bool interrupt_save_prologue_temp = false;
6653 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0;
6654 unsigned int num_v_saved = 0;
6656 frame = &cfun->machine->frame;
6658 /* In an interrupt function, there are two cases in which t0 needs to be used:
6659 1, If we have a large frame, then we need to save/restore t0. We check for
6660 this before clearing the frame struct.
6661 2, Need to save and restore some CSRs in the frame. */
6662 if (cfun->machine->interrupt_handler_p)
6664 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
6665 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
6666 || (TARGET_HARD_FLOAT || TARGET_ZFINX))
6667 interrupt_save_prologue_temp = true;
6670 frame->reset();
6672 if (!cfun->machine->naked_p)
6674 /* Find out which GPRs we need to save. */
6675 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
6676 if (riscv_save_reg_p (regno)
6677 || (interrupt_save_prologue_temp
6678 && (regno == RISCV_PROLOGUE_TEMP_REGNUM)))
6679 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6681 /* If this function calls eh_return, we must also save and restore the
6682 EH data registers. */
6683 if (crtl->calls_eh_return)
6684 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
6685 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6687 /* Find out which FPRs we need to save. This loop must iterate over
6688 the same space as its companion in riscv_for_each_saved_reg. */
6689 if (TARGET_HARD_FLOAT)
6690 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
6691 if (riscv_save_reg_p (regno))
6692 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
6694 /* Find out which V registers we need to save. */
6695 if (TARGET_VECTOR)
6696 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
6697 if (riscv_save_reg_p (regno))
6699 frame->vmask |= 1 << (regno - V_REG_FIRST);
6700 num_v_saved++;
6704 if (frame->mask)
6706 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
6708 /* 1 is for ra */
6709 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
6710 /* Only use save/restore routines if they don't alter the stack size. */
6711 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
6712 && !riscv_avoid_save_libcall ())
6714 /* Libcall saves/restores 3 registers at once, so we need to
6715 allocate 12 bytes for callee-saved register. */
6716 if (TARGET_RVE)
6717 x_save_size = 3 * UNITS_PER_WORD;
6719 frame->save_libcall_adjustment = x_save_size;
6722 if (!riscv_avoid_multi_push (frame))
6724 /* num(ra, s0-sx) */
6725 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask);
6726 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
6727 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
6731 /* In an interrupt function, we need extra space for the initial saves of CSRs. */
6732 if (cfun->machine->interrupt_handler_p
6733 && ((TARGET_HARD_FLOAT && frame->fmask)
6734 || (TARGET_ZFINX
6735 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */
6736 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
6737 /* Save and restore FCSR. */
6738 /* TODO: When P or V extensions support interrupts, some of their CSRs
6739 may also need to be saved and restored. */
6740 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD);
6742 /* At the bottom of the frame are any outgoing stack arguments. */
6743 offset = riscv_stack_align (crtl->outgoing_args_size);
6744 /* Next are local stack variables. */
6745 offset += riscv_stack_align (get_frame_size ());
6746 /* The virtual frame pointer points above the local variables. */
6747 frame->frame_pointer_offset = offset;
6748 /* Next are the callee-saved VRs. */
6749 if (frame->vmask)
6750 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG);
6751 frame->v_sp_offset_top = offset;
6752 frame->v_sp_offset_bottom
6753 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG;
6754 /* Next are the callee-saved FPRs. */
6755 if (frame->fmask)
6756 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
6757 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
6758 /* Next are the callee-saved GPRs. */
6759 if (frame->mask)
6761 offset += x_save_size;
6762 /* align to 16 bytes and add paddings to GPR part to honor
6763 both stack alignment and zcmp pus/pop size alignment. */
6764 if (riscv_use_multi_push (frame)
6765 && known_lt (offset, frame->multi_push_adj_base
6766 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
6767 offset = riscv_16bytes_align (offset);
6769 frame->gp_sp_offset = offset - UNITS_PER_WORD;
6770 /* The hard frame pointer points above the callee-saved GPRs. */
6771 frame->hard_frame_pointer_offset = offset;
6772 /* Above the hard frame pointer is the callee-allocated varags save area. */
6773 offset += riscv_stack_align (cfun->machine->varargs_size);
6774 /* Next is the callee-allocated area for pretend stack arguments. */
6775 offset += riscv_stack_align (crtl->args.pretend_args_size);
6776 /* Arg pointer must be below pretend args, but must be above alignment
6777 padding. */
6778 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
6779 frame->total_size = offset;
6781 /* Next points the incoming stack pointer and any incoming arguments. */
6784 /* Make sure that we're not trying to eliminate to the wrong hard frame
6785 pointer. */
6787 static bool
6788 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
6790 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
6793 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
6794 or argument pointer. TO is either the stack pointer or hard frame
6795 pointer. */
6797 poly_int64
6798 riscv_initial_elimination_offset (int from, int to)
6800 poly_int64 src, dest;
6802 riscv_compute_frame_info ();
6804 if (to == HARD_FRAME_POINTER_REGNUM)
6805 dest = cfun->machine->frame.hard_frame_pointer_offset;
6806 else if (to == STACK_POINTER_REGNUM)
6807 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
6808 else
6809 gcc_unreachable ();
6811 if (from == FRAME_POINTER_REGNUM)
6812 src = cfun->machine->frame.frame_pointer_offset;
6813 else if (from == ARG_POINTER_REGNUM)
6814 src = cfun->machine->frame.arg_pointer_offset;
6815 else
6816 gcc_unreachable ();
6818 return src - dest;
6821 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
6822 previous frame. */
6825 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
6827 if (count != 0)
6828 return const0_rtx;
6830 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
6833 /* Emit code to change the current function's return address to
6834 ADDRESS. SCRATCH is available as a scratch register, if needed.
6835 ADDRESS and SCRATCH are both word-mode GPRs. */
6837 void
6838 riscv_set_return_address (rtx address, rtx scratch)
6840 rtx slot_address;
6842 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
6843 slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
6844 cfun->machine->frame.gp_sp_offset.to_constant());
6845 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
6848 /* Save register REG to MEM. Make the instruction frame-related. */
6850 static void
6851 riscv_save_reg (rtx reg, rtx mem)
6853 riscv_emit_move (mem, reg);
6854 riscv_set_frame_expr (riscv_frame_set (mem, reg));
6857 /* Restore register REG from MEM. */
6859 static void
6860 riscv_restore_reg (rtx reg, rtx mem)
6862 rtx insn = riscv_emit_move (reg, mem);
6863 rtx dwarf = NULL_RTX;
6864 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
6866 if (known_gt (epilogue_cfa_sp_offset, 0)
6867 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
6869 rtx cfa_adjust_rtx
6870 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
6871 gen_int_mode (epilogue_cfa_sp_offset, Pmode));
6872 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
6875 REG_NOTES (insn) = dwarf;
6876 RTX_FRAME_RELATED_P (insn) = 1;
6879 /* A function to save or store a register. The first argument is the
6880 register and the second is the stack slot. */
6881 typedef void (*riscv_save_restore_fn) (rtx, rtx);
6883 /* Use FN to save or restore register REGNO. MODE is the register's
6884 mode and OFFSET is the offset of its save slot from the current
6885 stack pointer. */
6887 static void
6888 riscv_save_restore_reg (machine_mode mode, int regno,
6889 HOST_WIDE_INT offset, riscv_save_restore_fn fn)
6891 rtx mem;
6893 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
6894 fn (gen_rtx_REG (mode, regno), mem);
6897 /* Return the next register up from REGNO up to LIMIT for the callee
6898 to save or restore. OFFSET will be adjusted accordingly.
6899 If INC is set, then REGNO will be incremented first.
6900 Returns INVALID_REGNUM if there is no such next register. */
6902 static unsigned int
6903 riscv_next_saved_reg (unsigned int regno, unsigned int limit,
6904 HOST_WIDE_INT *offset, bool inc = true)
6906 if (inc)
6907 regno++;
6909 while (regno <= limit)
6911 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
6913 *offset = *offset - UNITS_PER_WORD;
6914 return regno;
6917 regno++;
6919 return INVALID_REGNUM;
6922 /* Return TRUE if provided REGNO is eh return data register. */
6924 static bool
6925 riscv_is_eh_return_data_register (unsigned int regno)
6927 unsigned int i, regnum;
6929 if (!crtl->calls_eh_return)
6930 return false;
6932 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
6933 if (regno == regnum)
6935 return true;
6938 return false;
6941 /* Call FN for each register that is saved by the current function.
6942 SP_OFFSET is the offset of the current stack pointer from the start
6943 of the frame. */
6945 static void
6946 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
6947 bool epilogue, bool maybe_eh_return)
6949 HOST_WIDE_INT offset, first_fp_offset;
6950 unsigned int regno, num_masked_fp = 0;
6951 unsigned int start = GP_REG_FIRST;
6952 unsigned int limit = GP_REG_LAST;
6954 /* Save the link register and s-registers. */
6955 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ()
6956 + UNITS_PER_WORD;
6957 for (regno = riscv_next_saved_reg (start, limit, &offset, false);
6958 regno != INVALID_REGNUM;
6959 regno = riscv_next_saved_reg (regno, limit, &offset))
6961 if (cfun->machine->reg_is_wrapped_separately[regno])
6962 continue;
6964 /* If this is a normal return in a function that calls the eh_return
6965 builtin, then do not restore the eh return data registers as that
6966 would clobber the return value. But we do still need to save them
6967 in the prologue, and restore them for an exception return, so we
6968 need special handling here. */
6969 if (epilogue && !maybe_eh_return
6970 && riscv_is_eh_return_data_register (regno))
6971 continue;
6973 /* In an interrupt function, save and restore some necessary CSRs in the stack
6974 to avoid changes in CSRs. */
6975 if (regno == RISCV_PROLOGUE_TEMP_REGNUM
6976 && cfun->machine->interrupt_handler_p
6977 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask)
6978 || (TARGET_ZFINX
6979 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
6981 /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack
6982 offset misaligned later. */
6983 unsigned int fcsr_size = UNITS_PER_WORD;
6984 if (!epilogue)
6986 riscv_save_restore_reg (word_mode, regno, offset, fn);
6987 offset -= fcsr_size;
6988 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode)));
6989 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
6990 offset, riscv_save_reg);
6992 else
6994 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
6995 offset - fcsr_size, riscv_restore_reg);
6996 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode)));
6997 riscv_save_restore_reg (word_mode, regno, offset, fn);
6998 offset -= fcsr_size;
7000 continue;
7003 if (TARGET_XTHEADMEMPAIR)
7005 /* Get the next reg/offset pair. */
7006 HOST_WIDE_INT offset2 = offset;
7007 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
7009 /* Validate everything before emitting a mempair instruction. */
7010 if (regno2 != INVALID_REGNUM
7011 && !cfun->machine->reg_is_wrapped_separately[regno2]
7012 && !(epilogue && !maybe_eh_return
7013 && riscv_is_eh_return_data_register (regno2)))
7015 bool load_p = (fn == riscv_restore_reg);
7016 rtx operands[4];
7017 th_mempair_prepare_save_restore_operands (operands,
7018 load_p, word_mode,
7019 regno, offset,
7020 regno2, offset2);
7022 /* If the operands fit into a mempair insn, then emit one. */
7023 if (th_mempair_operands_p (operands, load_p, word_mode))
7025 th_mempair_save_restore_regs (operands, load_p, word_mode);
7026 offset = offset2;
7027 regno = regno2;
7028 continue;
7033 riscv_save_restore_reg (word_mode, regno, offset, fn);
7036 /* This loop must iterate over the same space as its companion in
7037 riscv_compute_frame_info. */
7038 first_fp_offset
7039 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
7040 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7041 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7043 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7044 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7045 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame))
7046 ? CALLEE_SAVED_FREG_NUMBER (regno)
7047 : num_masked_fp;
7048 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant ();
7049 if (handle_reg)
7050 riscv_save_restore_reg (mode, regno, offset, fn);
7051 num_masked_fp++;
7055 /* Call FN for each V register that is saved by the current function. */
7057 static void
7058 riscv_for_each_saved_v_reg (poly_int64 &remaining_size,
7059 riscv_save_restore_fn fn, bool prologue)
7061 rtx vlen = NULL_RTX;
7062 if (cfun->machine->frame.vmask != 0)
7064 if (UNITS_PER_V_REG.is_constant ()
7065 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ()))
7066 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ());
7067 else
7069 vlen = RISCV_PROLOGUE_TEMP (Pmode);
7070 rtx insn
7071 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode));
7072 RTX_FRAME_RELATED_P (insn) = 1;
7076 /* Select the mode where LMUL is 1 and SEW is largest. */
7077 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode;
7079 if (prologue)
7081 /* This loop must iterate over the same space as its companion in
7082 riscv_compute_frame_info. */
7083 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
7084 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7086 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7087 if (handle_reg)
7089 rtx insn = NULL_RTX;
7090 if (CONST_INT_P (vlen))
7092 gcc_assert (SMALL_OPERAND (-INTVAL (vlen)));
7093 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7094 stack_pointer_rtx,
7095 GEN_INT (-INTVAL (vlen))));
7097 else
7098 insn = emit_insn (
7099 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7100 gcc_assert (insn != NULL_RTX);
7101 RTX_FRAME_RELATED_P (insn) = 1;
7102 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7103 remaining_size -= UNITS_PER_V_REG;
7107 else
7109 /* This loop must iterate over the same space as its companion in
7110 riscv_compute_frame_info. */
7111 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--)
7112 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7114 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7115 if (handle_reg)
7117 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7118 rtx insn = emit_insn (
7119 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7120 gcc_assert (insn != NULL_RTX);
7121 RTX_FRAME_RELATED_P (insn) = 1;
7122 remaining_size -= UNITS_PER_V_REG;
7128 /* For stack frames that can't be allocated with a single ADDI instruction,
7129 compute the best value to initially allocate. It must at a minimum
7130 allocate enough space to spill the callee-saved registers. If TARGET_RVC,
7131 try to pick a value that will allow compression of the register saves
7132 without adding extra instructions. */
7134 static HOST_WIDE_INT
7135 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
7137 HOST_WIDE_INT remaining_const_size;
7138 if (!remaining_size.is_constant ())
7139 remaining_const_size
7140 = riscv_stack_align (remaining_size.coeffs[0])
7141 - riscv_stack_align (remaining_size.coeffs[1]);
7142 else
7143 remaining_const_size = remaining_size.to_constant ();
7145 /* First step must be set to the top of vector registers save area if any
7146 vector registers need be preversed. */
7147 if (frame->vmask != 0)
7148 return (remaining_size - frame->v_sp_offset_top).to_constant ();
7150 if (SMALL_OPERAND (remaining_const_size))
7151 return remaining_const_size;
7153 poly_int64 callee_saved_first_step =
7154 remaining_size - frame->frame_pointer_offset;
7155 gcc_assert(callee_saved_first_step.is_constant ());
7156 HOST_WIDE_INT min_first_step =
7157 riscv_stack_align (callee_saved_first_step.to_constant ());
7158 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
7159 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
7160 gcc_assert (min_first_step <= max_first_step);
7162 /* As an optimization, use the least-significant bits of the total frame
7163 size, so that the second adjustment step is just LUI + ADD. */
7164 if (!SMALL_OPERAND (min_second_step)
7165 && remaining_const_size % IMM_REACH <= max_first_step
7166 && remaining_const_size % IMM_REACH >= min_first_step)
7167 return remaining_const_size % IMM_REACH;
7169 if (TARGET_RVC || TARGET_ZCA)
7171 /* If we need two subtracts, and one is small enough to allow compressed
7172 loads and stores, then put that one first. */
7173 if (IN_RANGE (min_second_step, 0,
7174 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
7175 return MAX (min_second_step, min_first_step);
7177 /* If we need LUI + ADDI + ADD for the second adjustment step, then start
7178 with the minimum first step, so that we can get compressed loads and
7179 stores. */
7180 else if (!SMALL_OPERAND (min_second_step))
7181 return min_first_step;
7184 return max_first_step;
7187 static rtx
7188 riscv_adjust_libcall_cfi_prologue ()
7190 rtx dwarf = NULL_RTX;
7191 rtx adjust_sp_rtx, reg, mem, insn;
7192 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7193 int offset;
7195 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7196 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7198 /* The save order is ra, s0, s1, s2 to s11. */
7199 if (regno == RETURN_ADDR_REGNUM)
7200 offset = saved_size - UNITS_PER_WORD;
7201 else if (regno == S0_REGNUM)
7202 offset = saved_size - UNITS_PER_WORD * 2;
7203 else if (regno == S1_REGNUM)
7204 offset = saved_size - UNITS_PER_WORD * 3;
7205 else
7206 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
7208 reg = gen_rtx_REG (Pmode, regno);
7209 mem = gen_frame_mem (Pmode, plus_constant (Pmode,
7210 stack_pointer_rtx,
7211 offset));
7213 insn = gen_rtx_SET (mem, reg);
7214 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7217 /* Debug info for adjust sp. */
7218 adjust_sp_rtx =
7219 gen_rtx_SET (stack_pointer_rtx,
7220 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size)));
7221 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7222 dwarf);
7223 return dwarf;
7226 static rtx
7227 riscv_adjust_multi_push_cfi_prologue (int saved_size)
7229 rtx dwarf = NULL_RTX;
7230 rtx adjust_sp_rtx, reg, mem, insn;
7231 unsigned int mask = cfun->machine->frame.mask;
7232 int offset;
7233 int saved_cnt = 0;
7235 if (mask & S10_MASK)
7236 mask |= S11_MASK;
7238 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
7239 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
7241 /* The save order is s11-s0, ra
7242 from high to low addr. */
7243 offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
7245 reg = gen_rtx_REG (Pmode, regno);
7246 mem = gen_frame_mem (Pmode,
7247 plus_constant (Pmode, stack_pointer_rtx, offset));
7249 insn = gen_rtx_SET (mem, reg);
7250 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7253 /* Debug info for adjust sp. */
7254 adjust_sp_rtx
7255 = gen_rtx_SET (stack_pointer_rtx,
7256 plus_constant (Pmode, stack_pointer_rtx, -saved_size));
7257 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7258 return dwarf;
7261 static void
7262 riscv_emit_stack_tie (void)
7264 if (Pmode == SImode)
7265 emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
7266 else
7267 emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
7270 /*zcmp multi push and pop code_for_push_pop function ptr array */
7271 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM]
7272 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra,
7273 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra},
7274 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0,
7275 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0},
7276 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1,
7277 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1},
7278 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2,
7279 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2},
7280 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3,
7281 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3},
7282 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4,
7283 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4},
7284 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5,
7285 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5},
7286 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6,
7287 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6},
7288 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7,
7289 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7},
7290 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8,
7291 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8},
7292 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9,
7293 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9},
7294 {nullptr, nullptr, nullptr, nullptr},
7295 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11,
7296 code_for_gpr_multi_popret_up_to_s11,
7297 code_for_gpr_multi_popretz_up_to_s11}};
7299 static rtx
7300 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
7301 unsigned int regs_num)
7303 gcc_assert (op < ZCMP_OP_NUM);
7304 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS
7305 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/
7306 rtx stack_adj = GEN_INT (adj_size);
7307 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj);
7310 static unsigned
7311 get_multi_push_fpr_mask (unsigned max_fprs_push)
7313 unsigned mask_fprs_push = 0, num_f_pushed = 0;
7314 for (unsigned regno = FP_REG_FIRST;
7315 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++)
7316 if (riscv_save_reg_p (regno))
7317 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++;
7318 return mask_fprs_push;
7321 /* Expand the "prologue" pattern. */
7323 void
7324 riscv_expand_prologue (void)
7326 struct riscv_frame_info *frame = &cfun->machine->frame;
7327 poly_int64 remaining_size = frame->total_size;
7328 unsigned mask = frame->mask;
7329 unsigned fmask = frame->fmask;
7330 int spimm, multi_push_additional, stack_adj;
7331 rtx insn, dwarf = NULL_RTX;
7332 unsigned th_int_mask = 0;
7334 if (flag_stack_usage_info)
7335 current_function_static_stack_size = constant_lower_bound (remaining_size);
7337 if (cfun->machine->naked_p)
7338 return;
7340 /* prefer muti-push to save-restore libcall. */
7341 if (riscv_use_multi_push (frame))
7343 remaining_size -= frame->multi_push_adj_base;
7344 /* If there are vector registers that need to be saved, then it can only
7345 be reduced to the frame->v_sp_offset_top position at most, since the
7346 vector registers will need to be saved one by one by decreasing the SP
7347 later. */
7348 poly_int64 remaining_size_above_varea
7349 = frame->vmask != 0
7350 ? remaining_size - frame->v_sp_offset_top
7351 : remaining_size;
7353 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP))
7354 spimm = 3;
7355 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP))
7356 spimm = 2;
7357 else if (known_gt (remaining_size_above_varea, 0))
7358 spimm = 1;
7359 else
7360 spimm = 0;
7361 multi_push_additional = spimm * ZCMP_SP_INC_STEP;
7362 frame->multi_push_adj_addi = multi_push_additional;
7363 remaining_size -= multi_push_additional;
7365 /* emit multi push insn & dwarf along with it. */
7366 stack_adj = frame->multi_push_adj_base + multi_push_additional;
7367 insn = emit_insn (riscv_gen_multi_push_pop_insn (
7368 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask)));
7369 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj);
7370 RTX_FRAME_RELATED_P (insn) = 1;
7371 REG_NOTES (insn) = dwarf;
7373 /* Temporarily fib that we need not save GPRs. */
7374 frame->mask = 0;
7376 /* push FPRs into the addtional reserved space by cm.push. */
7377 if (fmask)
7379 unsigned mask_fprs_push
7380 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
7381 frame->fmask &= mask_fprs_push;
7382 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
7383 false);
7384 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */
7387 /* When optimizing for size, call a subroutine to save the registers. */
7388 else if (riscv_use_save_libcall (frame))
7390 rtx dwarf = NULL_RTX;
7391 dwarf = riscv_adjust_libcall_cfi_prologue ();
7393 remaining_size -= frame->save_libcall_adjustment;
7394 insn = emit_insn (riscv_gen_gpr_save_insn (frame));
7395 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
7397 RTX_FRAME_RELATED_P (insn) = 1;
7398 REG_NOTES (insn) = dwarf;
7401 th_int_mask = th_int_get_mask (frame->mask);
7402 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7404 frame->mask &= ~th_int_mask;
7406 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
7407 interrupts, such as fcsr. */
7408 if ((TARGET_HARD_FLOAT && frame->fmask)
7409 || (TARGET_ZFINX && frame->mask))
7410 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
7412 unsigned save_adjustment = th_int_get_save_adjustment ();
7413 frame->gp_sp_offset -= save_adjustment;
7414 remaining_size -= save_adjustment;
7416 insn = emit_insn (gen_th_int_push ());
7418 rtx dwarf = th_int_adjust_cfi_prologue (th_int_mask);
7419 RTX_FRAME_RELATED_P (insn) = 1;
7420 REG_NOTES (insn) = dwarf;
7423 /* Save the GP, FP registers. */
7424 if ((frame->mask | frame->fmask) != 0)
7426 if (known_gt (remaining_size, frame->frame_pointer_offset))
7428 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
7429 remaining_size -= step1;
7430 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7431 GEN_INT (-step1));
7432 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7434 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
7437 /* Undo the above fib. */
7438 frame->mask = mask;
7439 frame->fmask = fmask;
7441 /* Set up the frame pointer, if we're using one. */
7442 if (frame_pointer_needed)
7444 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
7445 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
7446 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7448 riscv_emit_stack_tie ();
7451 /* Save the V registers. */
7452 if (frame->vmask != 0)
7453 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true);
7455 /* Allocate the rest of the frame. */
7456 if (known_gt (remaining_size, 0))
7458 /* Two step adjustment:
7459 1.scalable frame. 2.constant frame. */
7460 poly_int64 scalable_frame (0, 0);
7461 if (!remaining_size.is_constant ())
7463 /* First for scalable frame. */
7464 poly_int64 scalable_frame = remaining_size;
7465 scalable_frame.coeffs[0] = remaining_size.coeffs[1];
7466 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
7467 remaining_size -= scalable_frame;
7470 /* Second step for constant frame. */
7471 HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
7472 if (constant_frame == 0)
7474 /* We must have allocated stack space for the scalable frame.
7475 Emit a stack tie if we have a frame pointer so that the
7476 allocation is ordered WRT fp setup and subsequent writes
7477 into the frame. */
7478 if (frame_pointer_needed)
7479 riscv_emit_stack_tie ();
7480 return;
7483 if (SMALL_OPERAND (-constant_frame))
7485 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7486 GEN_INT (-constant_frame));
7487 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7489 else
7491 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
7492 emit_insn (gen_add3_insn (stack_pointer_rtx,
7493 stack_pointer_rtx,
7494 RISCV_PROLOGUE_TEMP (Pmode)));
7496 /* Describe the effect of the previous instructions. */
7497 insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
7498 insn = gen_rtx_SET (stack_pointer_rtx, insn);
7499 riscv_set_frame_expr (insn);
7502 /* We must have allocated the remainder of the stack frame.
7503 Emit a stack tie if we have a frame pointer so that the
7504 allocation is ordered WRT fp setup and subsequent writes
7505 into the frame. */
7506 if (frame_pointer_needed)
7507 riscv_emit_stack_tie ();
7511 static rtx
7512 riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
7514 rtx dwarf = NULL_RTX;
7515 rtx adjust_sp_rtx, reg;
7516 unsigned int mask = cfun->machine->frame.mask;
7518 if (mask & S10_MASK)
7519 mask |= S11_MASK;
7521 /* Debug info for adjust sp. */
7522 adjust_sp_rtx
7523 = gen_rtx_SET (stack_pointer_rtx,
7524 plus_constant (Pmode, stack_pointer_rtx, saved_size));
7525 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7527 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7528 if (BITSET_P (mask, regno - GP_REG_FIRST))
7530 reg = gen_rtx_REG (Pmode, regno);
7531 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7534 return dwarf;
7537 static rtx
7538 riscv_adjust_libcall_cfi_epilogue ()
7540 rtx dwarf = NULL_RTX;
7541 rtx adjust_sp_rtx, reg;
7542 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7544 /* Debug info for adjust sp. */
7545 adjust_sp_rtx =
7546 gen_rtx_SET (stack_pointer_rtx,
7547 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size)));
7548 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7549 dwarf);
7551 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7552 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7554 reg = gen_rtx_REG (Pmode, regno);
7555 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7558 return dwarf;
7561 /* return true if popretz pattern can be matched.
7562 set (reg 10 a0) (const_int 0)
7563 use (reg 10 a0)
7564 NOTE_INSN_EPILOGUE_BEG */
7565 static rtx_insn *
7566 riscv_zcmp_can_use_popretz (void)
7568 rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
7570 /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
7571 struct sequence_stack *outer_seq = get_current_sequence ()->next;
7572 if (!outer_seq)
7573 return NULL;
7574 insn = outer_seq->first;
7575 if (!insn || !NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)
7576 return NULL;
7578 /* sequence stack for the insn before NOTE_INSN_EPILOGUE_BEG*/
7579 outer_seq = outer_seq->next;
7580 if (outer_seq)
7581 insn = outer_seq->last;
7583 /* skip notes */
7584 while (insn && NOTE_P (insn))
7586 insn = PREV_INSN (insn);
7588 use = insn;
7590 /* match use (reg 10 a0) */
7591 if (use == NULL || !INSN_P (use) || GET_CODE (PATTERN (use)) != USE
7592 || !REG_P (XEXP (PATTERN (use), 0))
7593 || REGNO (XEXP (PATTERN (use), 0)) != A0_REGNUM)
7594 return NULL;
7596 /* match set (reg 10 a0) (const_int 0 [0]) */
7597 clear = PREV_INSN (use);
7598 if (clear != NULL && INSN_P (clear) && GET_CODE (PATTERN (clear)) == SET
7599 && REG_P (SET_DEST (PATTERN (clear)))
7600 && REGNO (SET_DEST (PATTERN (clear))) == A0_REGNUM
7601 && SET_SRC (PATTERN (clear)) == const0_rtx)
7602 return clear;
7604 return NULL;
7607 static void
7608 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
7609 unsigned multipop_size)
7611 rtx insn;
7612 unsigned regs_count = riscv_multi_push_regs_count (mask);
7614 if (!use_multi_pop_normal)
7615 insn = emit_insn (
7616 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
7617 else if (rtx_insn *clear_a0_insn = riscv_zcmp_can_use_popretz ())
7619 delete_insn (NEXT_INSN (clear_a0_insn));
7620 delete_insn (clear_a0_insn);
7621 insn = emit_jump_insn (
7622 riscv_gen_multi_push_pop_insn (POPRETZ_IDX, multipop_size, regs_count));
7624 else
7625 insn = emit_jump_insn (
7626 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
7628 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size);
7629 RTX_FRAME_RELATED_P (insn) = 1;
7630 REG_NOTES (insn) = dwarf;
7633 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern;
7634 style says which. */
7636 void
7637 riscv_expand_epilogue (int style)
7639 /* Split the frame into 3 steps. STEP1 is the amount of stack we should
7640 deallocate before restoring the registers. STEP2 is the amount we
7641 should deallocate afterwards including the callee saved regs. STEP3
7642 is the amount deallocated by save-restore libcall.
7644 Start off by assuming that no registers need to be restored. */
7645 struct riscv_frame_info *frame = &cfun->machine->frame;
7646 unsigned mask = frame->mask;
7647 unsigned fmask = frame->fmask;
7648 unsigned mask_fprs_push = 0;
7649 poly_int64 step2 = 0;
7650 bool use_multi_pop_normal
7651 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame));
7652 bool use_multi_pop_sibcall
7653 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame));
7654 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
7656 bool use_restore_libcall
7657 = !use_multi_pop
7658 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame));
7659 unsigned libcall_size = use_restore_libcall && !use_multi_pop
7660 ? frame->save_libcall_adjustment
7661 : 0;
7662 unsigned multipop_size
7663 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
7664 : 0;
7665 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7666 unsigned th_int_mask = 0;
7667 rtx insn;
7669 /* We need to add memory barrier to prevent read from deallocated stack. */
7670 bool need_barrier_p = known_ne (get_frame_size ()
7671 + cfun->machine->frame.arg_pointer_offset, 0);
7673 if (cfun->machine->naked_p)
7675 gcc_assert (style == NORMAL_RETURN);
7677 emit_jump_insn (gen_return ());
7679 return;
7682 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ())
7684 emit_jump_insn (gen_return ());
7685 return;
7688 /* Reset the epilogue cfa info before starting to emit the epilogue. */
7689 epilogue_cfa_sp_offset = 0;
7691 /* Move past any dynamic stack allocations. */
7692 if (cfun->calls_alloca)
7694 /* Emit a barrier to prevent loads from a deallocated stack. */
7695 riscv_emit_stack_tie ();
7696 need_barrier_p = false;
7698 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
7699 rtx adjust = NULL_RTX;
7701 if (!adjust_offset.is_constant ())
7703 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
7704 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
7705 riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
7706 gen_int_mode (adjust_offset, Pmode));
7707 adjust = tmp1;
7709 else
7711 if (!SMALL_OPERAND (adjust_offset.to_constant ()))
7713 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
7714 GEN_INT (adjust_offset.to_constant ()));
7715 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7717 else
7718 adjust = GEN_INT (adjust_offset.to_constant ());
7721 insn = emit_insn (
7722 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
7723 adjust));
7725 rtx dwarf = NULL_RTX;
7726 rtx cfa_adjust_value = gen_rtx_PLUS (
7727 Pmode, hard_frame_pointer_rtx,
7728 gen_int_mode (-frame->hard_frame_pointer_offset, Pmode));
7729 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
7730 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
7731 RTX_FRAME_RELATED_P (insn) = 1;
7733 REG_NOTES (insn) = dwarf;
7736 if (use_restore_libcall || use_multi_pop)
7737 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7739 /* If we need to restore registers, deallocate as much stack as
7740 possible in the second step without going out of range. */
7741 if (use_multi_pop)
7743 if (frame->fmask
7744 && known_gt (frame->total_size - multipop_size,
7745 frame->frame_pointer_offset))
7746 step2
7747 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
7749 else if ((frame->mask | frame->fmask) != 0)
7750 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
7752 if (use_restore_libcall || use_multi_pop)
7753 frame->mask = mask; /* Undo the above fib. */
7755 poly_int64 step1;
7756 /* STEP1 must be set to the bottom of vector registers save area if any
7757 vector registers need be preversed. */
7758 if (frame->vmask != 0)
7760 step1 = frame->v_sp_offset_bottom;
7761 step2 = frame->total_size - step1 - libcall_size - multipop_size;
7763 else
7764 step1 = frame->total_size - step2 - libcall_size - multipop_size;
7766 /* Set TARGET to BASE + STEP1. */
7767 if (known_gt (step1, 0))
7769 /* Emit a barrier to prevent loads from a deallocated stack. */
7770 riscv_emit_stack_tie ();
7771 need_barrier_p = false;
7773 /* Restore the scalable frame which is assigned in prologue. */
7774 if (!step1.is_constant ())
7776 poly_int64 scalable_frame = step1;
7777 scalable_frame.coeffs[0] = step1.coeffs[1];
7778 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
7779 true);
7780 step1 -= scalable_frame;
7783 /* Get an rtx for STEP1 that we can add to BASE.
7784 Skip if adjust equal to zero. */
7785 if (step1.to_constant () != 0)
7787 rtx adjust = GEN_INT (step1.to_constant ());
7788 if (!SMALL_OPERAND (step1.to_constant ()))
7790 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
7791 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7794 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7795 stack_pointer_rtx,
7796 adjust));
7797 rtx dwarf = NULL_RTX;
7798 rtx cfa_adjust_rtx
7799 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7800 gen_int_mode (step2 + libcall_size + multipop_size,
7801 Pmode));
7803 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7804 RTX_FRAME_RELATED_P (insn) = 1;
7806 REG_NOTES (insn) = dwarf;
7809 else if (frame_pointer_needed)
7811 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring
7812 old value of FP. */
7813 epilogue_cfa_sp_offset = step2;
7816 if (use_multi_pop)
7818 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7819 if (fmask)
7821 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi
7822 / UNITS_PER_WORD);
7823 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */
7826 else if (use_restore_libcall)
7827 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7829 th_int_mask = th_int_get_mask (frame->mask);
7830 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7832 frame->mask &= ~th_int_mask;
7834 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
7835 interrupts, such as fcsr. */
7836 if ((TARGET_HARD_FLOAT && frame->fmask)
7837 || (TARGET_ZFINX && frame->mask))
7838 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
7841 /* Restore the registers. */
7842 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
7843 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
7844 - multipop_size,
7845 riscv_restore_reg, true, style == EXCEPTION_RETURN);
7847 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7849 frame->mask = mask; /* Undo the above fib. */
7850 unsigned save_adjustment = th_int_get_save_adjustment ();
7851 gcc_assert (step2.to_constant () >= save_adjustment);
7852 step2 -= save_adjustment;
7855 if (use_restore_libcall)
7856 frame->mask = mask; /* Undo the above fib. */
7858 if (need_barrier_p)
7859 riscv_emit_stack_tie ();
7861 /* Deallocate the final bit of the frame. */
7862 if (step2.to_constant () > 0)
7864 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7865 GEN_INT (step2.to_constant ())));
7867 rtx dwarf = NULL_RTX;
7868 rtx cfa_adjust_rtx
7869 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7870 GEN_INT (libcall_size + multipop_size));
7871 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7872 RTX_FRAME_RELATED_P (insn) = 1;
7874 REG_NOTES (insn) = dwarf;
7877 if (use_multi_pop)
7879 /* restore FPRs pushed by cm.push. */
7880 frame->fmask = fmask & mask_fprs_push;
7881 if (frame->fmask)
7882 riscv_for_each_saved_reg (frame->total_size - libcall_size
7883 - multipop_size,
7884 riscv_restore_reg, true,
7885 style == EXCEPTION_RETURN);
7886 /* Undo the above fib. */
7887 frame->mask = mask;
7888 frame->fmask = fmask;
7889 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask,
7890 multipop_size);
7891 if (use_multi_pop_normal)
7892 return;
7894 else if (use_restore_libcall)
7896 rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
7897 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
7898 RTX_FRAME_RELATED_P (insn) = 1;
7899 REG_NOTES (insn) = dwarf;
7901 emit_jump_insn (gen_gpr_restore_return (ra));
7902 return;
7905 /* Add in the __builtin_eh_return stack adjustment. */
7906 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
7907 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7908 EH_RETURN_STACKADJ_RTX));
7910 /* Return from interrupt. */
7911 if (cfun->machine->interrupt_handler_p)
7913 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode;
7915 gcc_assert (mode != UNKNOWN_MODE);
7917 if (th_int_mask && TH_INT_INTERRUPT (cfun))
7918 emit_jump_insn (gen_th_int_pop ());
7919 else if (mode == MACHINE_MODE)
7920 emit_jump_insn (gen_riscv_mret ());
7921 else if (mode == SUPERVISOR_MODE)
7922 emit_jump_insn (gen_riscv_sret ());
7923 else
7924 emit_jump_insn (gen_riscv_uret ());
7926 else if (style != SIBCALL_RETURN)
7927 emit_jump_insn (gen_simple_return_internal (ra));
7930 /* Implement EPILOGUE_USES. */
7932 bool
7933 riscv_epilogue_uses (unsigned int regno)
7935 if (regno == RETURN_ADDR_REGNUM)
7936 return true;
7938 if (epilogue_completed && cfun->machine->interrupt_handler_p)
7940 /* An interrupt function restores temp regs, so we must indicate that
7941 they are live at function end. */
7942 if (df_regs_ever_live_p (regno)
7943 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
7944 return true;
7947 return false;
7950 static bool
7951 riscv_avoid_shrink_wrapping_separate ()
7953 if (riscv_use_save_libcall (&cfun->machine->frame)
7954 || cfun->machine->interrupt_handler_p
7955 || !cfun->machine->frame.gp_sp_offset.is_constant ())
7956 return true;
7958 return false;
7961 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
7963 static sbitmap
7964 riscv_get_separate_components (void)
7966 HOST_WIDE_INT offset;
7967 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
7968 bitmap_clear (components);
7970 if (riscv_avoid_shrink_wrapping_separate ())
7971 return components;
7973 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
7974 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7975 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7977 /* We can only wrap registers that have small operand offsets.
7978 For large offsets a pseudo register might be needed which
7979 cannot be created during the shrink wrapping pass. */
7980 if (SMALL_OPERAND (offset))
7981 bitmap_set_bit (components, regno);
7983 offset -= UNITS_PER_WORD;
7986 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
7987 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7988 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7990 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7992 /* We can only wrap registers that have small operand offsets.
7993 For large offsets a pseudo register might be needed which
7994 cannot be created during the shrink wrapping pass. */
7995 if (SMALL_OPERAND (offset))
7996 bitmap_set_bit (components, regno);
7998 offset -= GET_MODE_SIZE (mode).to_constant ();
8001 /* Don't mess with the hard frame pointer. */
8002 if (frame_pointer_needed)
8003 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
8005 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
8007 return components;
8010 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
8012 static sbitmap
8013 riscv_components_for_bb (basic_block bb)
8015 bitmap in = DF_LIVE_IN (bb);
8016 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
8017 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
8019 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8020 bitmap_clear (components);
8022 function_abi_aggregator callee_abis;
8023 rtx_insn *insn;
8024 FOR_BB_INSNS (bb, insn)
8025 if (CALL_P (insn))
8026 callee_abis.note_callee_abi (insn_callee_abi (insn));
8027 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
8029 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
8030 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8031 if (!fixed_regs[regno]
8032 && !crtl->abi->clobbers_full_reg_p (regno)
8033 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8034 || bitmap_bit_p (in, regno)
8035 || bitmap_bit_p (gen, regno)
8036 || bitmap_bit_p (kill, regno)))
8037 bitmap_set_bit (components, regno);
8039 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8040 if (!fixed_regs[regno]
8041 && !crtl->abi->clobbers_full_reg_p (regno)
8042 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8043 || bitmap_bit_p (in, regno)
8044 || bitmap_bit_p (gen, regno)
8045 || bitmap_bit_p (kill, regno)))
8046 bitmap_set_bit (components, regno);
8048 return components;
8051 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
8053 static void
8054 riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
8056 /* Nothing to do for riscv. */
8059 static void
8060 riscv_process_components (sbitmap components, bool prologue_p)
8062 HOST_WIDE_INT offset;
8063 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
8065 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
8066 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8067 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8069 if (bitmap_bit_p (components, regno))
8070 riscv_save_restore_reg (word_mode, regno, offset, fn);
8072 offset -= UNITS_PER_WORD;
8075 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
8076 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8077 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8079 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8081 if (bitmap_bit_p (components, regno))
8082 riscv_save_restore_reg (mode, regno, offset, fn);
8084 offset -= GET_MODE_SIZE (mode).to_constant ();
8088 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
8090 static void
8091 riscv_emit_prologue_components (sbitmap components)
8093 riscv_process_components (components, true);
8096 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
8098 static void
8099 riscv_emit_epilogue_components (sbitmap components)
8101 riscv_process_components (components, false);
8104 static void
8105 riscv_set_handled_components (sbitmap components)
8107 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8108 if (bitmap_bit_p (components, regno))
8109 cfun->machine->reg_is_wrapped_separately[regno] = true;
8111 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8112 if (bitmap_bit_p (components, regno))
8113 cfun->machine->reg_is_wrapped_separately[regno] = true;
8116 /* Return nonzero if this function is known to have a null epilogue.
8117 This allows the optimizer to omit jumps to jumps if no stack
8118 was created. */
8120 bool
8121 riscv_can_use_return_insn (void)
8123 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
8124 && ! cfun->machine->interrupt_handler_p);
8127 /* Given that there exists at least one variable that is set (produced)
8128 by OUT_INSN and read (consumed) by IN_INSN, return true iff
8129 IN_INSN represents one or more memory store operations and none of
8130 the variables set by OUT_INSN is used by IN_INSN as the address of a
8131 store operation. If either IN_INSN or OUT_INSN does not represent
8132 a "single" RTL SET expression (as loosely defined by the
8133 implementation of the single_set function) or a PARALLEL with only
8134 SETs, CLOBBERs, and USEs inside, this function returns false.
8136 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain
8137 conditions that result in assertion failures in the generic
8138 store_data_bypass_p function and returns FALSE in such cases.
8140 This is required to make -msave-restore work with the sifive-7
8141 pipeline description. */
8143 bool
8144 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
8146 rtx out_set, in_set;
8147 rtx out_pat, in_pat;
8148 rtx out_exp, in_exp;
8149 int i, j;
8151 in_set = single_set (in_insn);
8152 if (in_set)
8154 if (MEM_P (SET_DEST (in_set)))
8156 out_set = single_set (out_insn);
8157 if (!out_set)
8159 out_pat = PATTERN (out_insn);
8160 if (GET_CODE (out_pat) == PARALLEL)
8162 for (i = 0; i < XVECLEN (out_pat, 0); i++)
8164 out_exp = XVECEXP (out_pat, 0, i);
8165 if ((GET_CODE (out_exp) == CLOBBER)
8166 || (GET_CODE (out_exp) == USE))
8167 continue;
8168 else if (GET_CODE (out_exp) != SET)
8169 return false;
8175 else
8177 in_pat = PATTERN (in_insn);
8178 if (GET_CODE (in_pat) != PARALLEL)
8179 return false;
8181 for (i = 0; i < XVECLEN (in_pat, 0); i++)
8183 in_exp = XVECEXP (in_pat, 0, i);
8184 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
8185 continue;
8186 else if (GET_CODE (in_exp) != SET)
8187 return false;
8189 if (MEM_P (SET_DEST (in_exp)))
8191 out_set = single_set (out_insn);
8192 if (!out_set)
8194 out_pat = PATTERN (out_insn);
8195 if (GET_CODE (out_pat) != PARALLEL)
8196 return false;
8197 for (j = 0; j < XVECLEN (out_pat, 0); j++)
8199 out_exp = XVECEXP (out_pat, 0, j);
8200 if ((GET_CODE (out_exp) == CLOBBER)
8201 || (GET_CODE (out_exp) == USE))
8202 continue;
8203 else if (GET_CODE (out_exp) != SET)
8204 return false;
8211 return store_data_bypass_p (out_insn, in_insn);
8214 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
8216 When floating-point registers are wider than integer ones, moves between
8217 them must go through memory. */
8219 static bool
8220 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
8221 reg_class_t class2)
8223 return (!riscv_v_ext_mode_p (mode)
8224 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
8225 && (class1 == FP_REGS) != (class2 == FP_REGS)
8226 && !TARGET_XTHEADFMV
8227 && !TARGET_ZFA);
8230 /* Implement TARGET_REGISTER_MOVE_COST. */
8232 static int
8233 riscv_register_move_cost (machine_mode mode,
8234 reg_class_t from, reg_class_t to)
8236 if ((from == FP_REGS && to == GR_REGS) ||
8237 (from == GR_REGS && to == FP_REGS))
8238 return tune_param->fmv_cost;
8240 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
8243 /* Implement TARGET_HARD_REGNO_NREGS. */
8245 static unsigned int
8246 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
8248 if (riscv_v_ext_vector_mode_p (mode))
8250 /* Handle fractional LMUL, it only occupy part of vector register but
8251 still need one vector register to hold. */
8252 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
8253 return 1;
8255 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
8258 /* For tuple modes, the number of register = NF * LMUL. */
8259 if (riscv_v_ext_tuple_mode_p (mode))
8261 unsigned int nf = riscv_vector::get_nf (mode);
8262 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode);
8263 poly_int64 size = GET_MODE_SIZE (subpart_mode);
8264 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode)));
8265 if (maybe_lt (size, UNITS_PER_V_REG))
8266 return nf;
8267 else
8269 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
8270 return nf * lmul;
8274 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */
8275 if (riscv_v_ext_vls_mode_p (mode))
8277 int size = GET_MODE_BITSIZE (mode).to_constant ();
8278 if (size < TARGET_MIN_VLEN)
8279 return 1;
8280 else
8281 return size / TARGET_MIN_VLEN;
8284 /* mode for VL or VTYPE are just a marker, not holding value,
8285 so it always consume one register. */
8286 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8287 || FRM_REG_P (regno))
8288 return 1;
8290 /* Assume every valid non-vector mode fits in one vector register. */
8291 if (V_REG_P (regno))
8292 return 1;
8294 if (FP_REG_P (regno))
8295 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
8297 /* All other registers are word-sized. */
8298 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8301 /* Implement TARGET_HARD_REGNO_MODE_OK. */
8303 static bool
8304 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
8306 unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
8308 if (GP_REG_P (regno))
8310 if (riscv_v_ext_mode_p (mode))
8311 return false;
8313 if (!GP_REG_P (regno + nregs - 1))
8314 return false;
8316 else if (FP_REG_P (regno))
8318 if (riscv_v_ext_mode_p (mode))
8319 return false;
8321 if (!FP_REG_P (regno + nregs - 1))
8322 return false;
8324 if (GET_MODE_CLASS (mode) != MODE_FLOAT
8325 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
8326 return false;
8328 /* Only use callee-saved registers if a potential callee is guaranteed
8329 to spill the requisite width. */
8330 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
8331 || (!call_used_or_fixed_reg_p (regno)
8332 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
8333 return false;
8335 else if (V_REG_P (regno))
8337 if (!riscv_v_ext_mode_p (mode))
8338 return false;
8340 if (!V_REG_P (regno + nregs - 1))
8341 return false;
8343 int regno_alignment = riscv_get_v_regno_alignment (mode);
8344 if (regno_alignment != 1)
8345 return ((regno % regno_alignment) == 0);
8347 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8348 || FRM_REG_P (regno))
8349 return true;
8350 else
8351 return false;
8353 /* Require same callee-savedness for all registers. */
8354 for (unsigned i = 1; i < nregs; i++)
8355 if (call_used_or_fixed_reg_p (regno)
8356 != call_used_or_fixed_reg_p (regno + i))
8357 return false;
8359 /* Only use even registers in RV32 ZDINX */
8360 if (!TARGET_64BIT && TARGET_ZDINX){
8361 if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
8362 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
8363 return !(regno & 1);
8366 return true;
8369 /* Implement TARGET_MODES_TIEABLE_P.
8371 Don't allow floating-point modes to be tied, since type punning of
8372 single-precision and double-precision is implementation defined. */
8374 static bool
8375 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
8377 /* We don't allow different REG_CLASS modes tieable since it
8378 will cause ICE in register allocation (RA).
8379 E.g. V2SI and DI are not tieable. */
8380 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
8381 return false;
8382 return (mode1 == mode2
8383 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
8384 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
8387 /* Implement CLASS_MAX_NREGS. */
8389 static unsigned char
8390 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
8392 if (reg_class_subset_p (rclass, FP_REGS))
8393 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
8395 if (reg_class_subset_p (rclass, GR_REGS))
8396 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
8398 if (reg_class_subset_p (rclass, V_REGS))
8399 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
8401 return 0;
8404 /* Implement TARGET_MEMORY_MOVE_COST. */
8406 static int
8407 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
8409 return (tune_param->memory_cost
8410 + memory_move_secondary_cost (mode, rclass, in));
8413 /* Return the number of instructions that can be issued per cycle. */
8415 static int
8416 riscv_issue_rate (void)
8418 return tune_param->issue_rate;
8421 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
8422 static int
8423 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
8425 if (DEBUG_INSN_P (insn))
8426 return more;
8428 rtx_code code = GET_CODE (PATTERN (insn));
8429 if (code == USE || code == CLOBBER)
8430 return more;
8432 /* GHOST insns are used for blockage and similar cases which
8433 effectively end a cycle. */
8434 if (get_attr_type (insn) == TYPE_GHOST)
8435 return 0;
8437 /* If we ever encounter an insn with an unknown type, trip
8438 an assert so we can find and fix this problem. */
8439 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN);
8441 /* If we ever encounter an insn without an insn reservation, trip
8442 an assert so we can find and fix this problem. */
8443 gcc_assert (insn_has_dfa_reservation_p (insn));
8445 return more - 1;
8448 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
8449 instruction fusion of some sort. */
8451 static bool
8452 riscv_macro_fusion_p (void)
8454 return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
8457 /* Return true iff the instruction fusion described by OP is enabled. */
8459 static bool
8460 riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
8462 return tune_param->fusible_ops & op;
8465 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
8466 should be kept together during scheduling. */
8468 static bool
8469 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
8471 rtx prev_set = single_set (prev);
8472 rtx curr_set = single_set (curr);
8473 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
8474 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
8476 if (!riscv_macro_fusion_p ())
8477 return false;
8479 if (simple_sets_p
8480 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
8481 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
8483 /* We are trying to match the following:
8484 prev (slli) == (set (reg:DI rD)
8485 (ashift:DI (reg:DI rS) (const_int 32)))
8486 curr (slri) == (set (reg:DI rD)
8487 (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
8488 with <shift> being either 32 for FUSE_ZEXTW, or
8489 `less than 32 for FUSE_ZEXTWS. */
8491 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8492 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8493 && REG_P (SET_DEST (prev_set))
8494 && REG_P (SET_DEST (curr_set))
8495 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8496 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8497 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8498 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8499 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
8500 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
8501 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
8502 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
8503 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
8504 return true;
8507 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
8509 /* We are trying to match the following:
8510 prev (slli) == (set (reg:DI rD)
8511 (ashift:DI (reg:DI rS) (const_int 48)))
8512 curr (slri) == (set (reg:DI rD)
8513 (lshiftrt:DI (reg:DI rD) (const_int 48))) */
8515 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8516 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8517 && REG_P (SET_DEST (prev_set))
8518 && REG_P (SET_DEST (curr_set))
8519 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8520 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8521 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8522 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8523 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
8524 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
8525 return true;
8528 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
8530 /* We are trying to match the following:
8531 prev (add) == (set (reg:DI rD)
8532 (plus:DI (reg:DI rS1) (reg:DI rS2))
8533 curr (ld) == (set (reg:DI rD)
8534 (mem:DI (reg:DI rD))) */
8536 if (MEM_P (SET_SRC (curr_set))
8537 && REG_P (XEXP (SET_SRC (curr_set), 0))
8538 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8539 && GET_CODE (SET_SRC (prev_set)) == PLUS
8540 && REG_P (XEXP (SET_SRC (prev_set), 0))
8541 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8542 return true;
8544 /* We are trying to match the following:
8545 prev (add) == (set (reg:DI rD)
8546 (plus:DI (reg:DI rS1) (reg:DI rS2)))
8547 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
8549 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8550 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
8551 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8552 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
8553 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
8554 && GET_CODE (SET_SRC (prev_set)) == PLUS
8555 && REG_P (XEXP (SET_SRC (prev_set), 0))
8556 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8557 return true;
8560 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
8562 /* We are trying to match the following:
8563 prev (add) == (set (reg:DI rS)
8564 (plus:DI (reg:DI rS) (const_int))
8565 curr (ld) == (set (reg:DI rD)
8566 (mem:DI (reg:DI rS))) */
8568 if (MEM_P (SET_SRC (curr_set))
8569 && REG_P (XEXP (SET_SRC (curr_set), 0))
8570 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8571 && GET_CODE (SET_SRC (prev_set)) == PLUS
8572 && REG_P (XEXP (SET_SRC (prev_set), 0))
8573 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
8574 return true;
8577 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
8579 /* We are trying to match the following:
8580 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8581 curr (addi) == (set (reg:DI rD)
8582 (plus:DI (reg:DI rD) (const_int IMM12))) */
8584 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
8585 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8586 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8587 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
8588 && (GET_CODE (SET_SRC (prev_set)) == HIGH
8589 || (CONST_INT_P (SET_SRC (prev_set))
8590 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
8591 return true;
8594 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
8596 /* We are trying to match the following:
8597 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8598 curr (addi) == (set (reg:DI rD)
8599 (plus:DI (reg:DI rD) (const_int IMM12)))
8601 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8602 curr (addi) == (set (reg:DI rD)
8603 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
8605 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8606 && XINT (SET_SRC (prev_set), 1) == UNSPEC_AUIPC
8607 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
8608 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8609 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
8611 return true;
8614 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
8616 /* We are trying to match the following:
8617 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8618 curr (ld) == (set (reg:DI rD)
8619 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8621 if (CONST_INT_P (SET_SRC (prev_set))
8622 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
8623 && MEM_P (SET_SRC (curr_set))
8624 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8625 return true;
8627 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8628 && MEM_P (SET_SRC (curr_set))
8629 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
8630 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
8631 return true;
8633 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8634 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8635 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
8636 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8637 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
8638 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
8639 return true;
8642 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
8644 /* We are trying to match the following:
8645 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8646 curr (ld) == (set (reg:DI rD)
8647 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8649 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8650 && XINT (prev_set, 1) == UNSPEC_AUIPC
8651 && MEM_P (SET_SRC (curr_set))
8652 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8653 return true;
8656 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
8658 /* We are trying to match the following:
8659 prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8660 (reg rS1))
8661 curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8662 (reg rS2)) */
8664 if (MEM_P (SET_DEST (prev_set))
8665 && MEM_P (SET_DEST (curr_set))
8666 /* We can probably relax this condition. The documentation is a bit
8667 unclear about sub-word cases. So we just model DImode for now. */
8668 && GET_MODE (SET_DEST (curr_set)) == DImode
8669 && GET_MODE (SET_DEST (prev_set)) == DImode)
8671 rtx base_prev, base_curr, offset_prev, offset_curr;
8673 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
8674 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
8676 /* The two stores must be contained within opposite halves of the same
8677 16 byte aligned block of memory. We know that the stack pointer and
8678 the frame pointer have suitable alignment. So we just need to check
8679 the offsets of the two stores for suitable alignment.
8681 Originally the thought was to check MEM_ALIGN, but that was reporting
8682 incorrect alignments, even for SP/FP accesses, so we gave up on that
8683 approach. */
8684 if (base_prev != NULL_RTX
8685 && base_curr != NULL_RTX
8686 && REG_P (base_prev)
8687 && REG_P (base_curr)
8688 && REGNO (base_prev) == REGNO (base_curr)
8689 && (REGNO (base_prev) == STACK_POINTER_REGNUM
8690 || REGNO (base_prev) == HARD_FRAME_POINTER_REGNUM)
8691 && ((INTVAL (offset_prev) == INTVAL (offset_curr) + 8
8692 && (INTVAL (offset_prev) % 16) == 0)
8693 || ((INTVAL (offset_curr) == INTVAL (offset_prev) + 8)
8694 && (INTVAL (offset_curr) % 16) == 0)))
8695 return true;
8699 return false;
8702 /* Adjust the cost/latency of instructions for scheduling.
8703 For now this is just used to change the latency of vector instructions
8704 according to their LMUL. We assume that an insn with LMUL == 8 requires
8705 eight times more execution cycles than the same insn with LMUL == 1.
8706 As this may cause very high latencies which lead to scheduling artifacts
8707 we currently only perform the adjustment when -madjust-lmul-cost is given.
8709 static int
8710 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
8711 unsigned int)
8713 /* Only do adjustments for the generic out-of-order scheduling model. */
8714 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
8715 return cost;
8717 if (recog_memoized (insn) < 0)
8718 return cost;
8720 enum attr_type type = get_attr_type (insn);
8722 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
8724 /* TODO: For ordered reductions scale the base cost relative to the
8725 number of units. */
8729 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */
8730 if (!TARGET_ADJUST_LMUL_COST)
8731 return cost;
8733 /* vsetvl has a vlmul attribute but its latency does not depend on it. */
8734 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE)
8735 return cost;
8737 enum riscv_vector::vlmul_type lmul =
8738 (riscv_vector::vlmul_type)get_attr_vlmul (insn);
8740 double factor = 1;
8741 switch (lmul)
8743 case riscv_vector::LMUL_2:
8744 factor = 2;
8745 break;
8746 case riscv_vector::LMUL_4:
8747 factor = 4;
8748 break;
8749 case riscv_vector::LMUL_8:
8750 factor = 8;
8751 break;
8752 case riscv_vector::LMUL_F2:
8753 factor = 0.5;
8754 break;
8755 case riscv_vector::LMUL_F4:
8756 factor = 0.25;
8757 break;
8758 case riscv_vector::LMUL_F8:
8759 factor = 0.125;
8760 break;
8761 default:
8762 factor = 1;
8765 /* If the latency was nonzero, keep it that way. */
8766 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor);
8768 return new_cost;
8771 /* Auxiliary function to emit RISC-V ELF attribute. */
8772 static void
8773 riscv_emit_attribute ()
8775 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n",
8776 riscv_arch_str ().c_str ());
8778 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n",
8779 TARGET_STRICT_ALIGN ? 0 : 1);
8781 fprintf (asm_out_file, "\t.attribute stack_align, %d\n",
8782 riscv_stack_boundary / 8);
8785 /* Output .variant_cc for function symbol which follows vector calling
8786 convention. */
8788 static void
8789 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
8791 if (TREE_CODE (decl) == FUNCTION_DECL)
8793 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
8794 if (cc == RISCV_CC_V)
8796 fprintf (stream, "\t.variant_cc\t");
8797 assemble_name (stream, name);
8798 fprintf (stream, "\n");
8803 /* Implement ASM_DECLARE_FUNCTION_NAME. */
8805 void
8806 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
8808 riscv_asm_output_variant_cc (stream, fndecl, name);
8809 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
8810 ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl);
8811 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8813 fprintf (stream, "\t.option push\n");
8815 std::string *target_name = riscv_func_target_get (fndecl);
8816 std::string isa = target_name != NULL
8817 ? *target_name
8818 : riscv_cmdline_subset_list ()->to_string (true);
8819 fprintf (stream, "\t.option arch, %s\n", isa.c_str ());
8820 riscv_func_target_remove_and_destory (fndecl);
8822 struct cl_target_option *local_cl_target =
8823 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
8824 struct cl_target_option *global_cl_target =
8825 TREE_TARGET_OPTION (target_option_default_node);
8826 const char *local_tune_str = get_tune_str (local_cl_target);
8827 const char *global_tune_str = get_tune_str (global_cl_target);
8828 if (strcmp (local_tune_str, global_tune_str) != 0)
8829 fprintf (stream, "\t# tune = %s\n", local_tune_str);
8833 void
8834 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
8836 if (!flag_inhibit_size_directive)
8837 ASM_OUTPUT_MEASURED_SIZE (stream, name);
8839 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8841 fprintf (stream, "\t.option pop\n");
8845 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */
8847 void
8848 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target)
8850 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8851 const char *value = IDENTIFIER_POINTER (target);
8852 riscv_asm_output_variant_cc (stream, decl, name);
8853 ASM_OUTPUT_DEF (stream, name, value);
8856 /* Implement ASM_OUTPUT_EXTERNAL. */
8858 void
8859 riscv_asm_output_external (FILE *stream, tree decl, const char *name)
8861 default_elf_asm_output_external (stream, decl, name);
8862 riscv_asm_output_variant_cc (stream, decl, name);
8865 /* Implement TARGET_ASM_FILE_START. */
8867 static void
8868 riscv_file_start (void)
8870 default_file_start ();
8872 /* Instruct GAS to generate position-[in]dependent code. */
8873 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
8875 /* If the user specifies "-mno-relax" on the command line then disable linker
8876 relaxation in the assembler. */
8877 if (! riscv_mrelax)
8878 fprintf (asm_out_file, "\t.option norelax\n");
8880 /* If the user specifies "-mcsr-check" on the command line then enable csr
8881 check in the assembler. */
8882 if (riscv_mcsr_check)
8883 fprintf (asm_out_file, "\t.option csr-check\n");
8885 if (riscv_emit_attribute_p)
8886 riscv_emit_attribute ();
8889 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
8890 in order to avoid duplicating too much logic from elsewhere. */
8892 static void
8893 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8894 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8895 tree function)
8897 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8898 rtx this_rtx, temp1, temp2, fnaddr;
8899 rtx_insn *insn;
8901 riscv_in_thunk_func = true;
8903 /* Pretend to be a post-reload pass while generating rtl. */
8904 reload_completed = 1;
8906 /* Mark the end of the (empty) prologue. */
8907 emit_note (NOTE_INSN_PROLOGUE_END);
8909 /* Determine if we can use a sibcall to call FUNCTION directly. */
8910 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
8912 /* We need two temporary registers in some cases. */
8913 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
8914 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
8916 /* Find out which register contains the "this" pointer. */
8917 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8918 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
8919 else
8920 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
8922 /* Add DELTA to THIS_RTX. */
8923 if (delta != 0)
8925 rtx offset = GEN_INT (delta);
8926 if (!SMALL_OPERAND (delta))
8928 riscv_emit_move (temp1, offset);
8929 offset = temp1;
8931 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
8934 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
8935 if (vcall_offset != 0)
8937 rtx addr;
8939 /* Set TEMP1 to *THIS_RTX. */
8940 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
8942 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
8943 addr = riscv_add_offset (temp2, temp1, vcall_offset);
8945 /* Load the offset and add it to THIS_RTX. */
8946 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
8947 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
8950 /* Jump to the target function. */
8951 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode);
8952 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc));
8953 SIBLING_CALL_P (insn) = 1;
8955 /* Run just enough of rest_of_compilation. This sequence was
8956 "borrowed" from alpha.cc. */
8957 insn = get_insns ();
8958 split_all_insns_noflow ();
8959 shorten_branches (insn);
8960 assemble_start_function (thunk_fndecl, fnname);
8961 final_start_function (insn, file, 1);
8962 final (insn, file, 1);
8963 final_end_function ();
8964 assemble_end_function (thunk_fndecl, fnname);
8966 /* Clean up the vars set above. Note that final_end_function resets
8967 the global pointer for us. */
8968 reload_completed = 0;
8969 riscv_in_thunk_func = false;
8972 /* Allocate a chunk of memory for per-function machine-dependent data. */
8974 static struct machine_function *
8975 riscv_init_machine_status (void)
8977 return ggc_cleared_alloc<machine_function> ();
8980 /* Return the VLEN value associated with -march and -mwrvv-vector-bits.
8981 TODO: So far we only support length-agnostic value. */
8982 static poly_uint16
8983 riscv_convert_vector_chunks (struct gcc_options *opts)
8985 int chunk_num;
8986 int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
8987 if (min_vlen > 32)
8989 /* When targetting minimum VLEN > 32, we should use 64-bit chunk size.
8990 Otherwise we can not include SEW = 64bits.
8991 Runtime invariant: The single indeterminate represent the
8992 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
8993 Thus the number of bytes in a vector is 8 + 8 * x1 which is
8994 riscv_vector_chunks * 8 = poly_int (8, 8). */
8995 riscv_bytes_per_vector_chunk = 8;
8996 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN:
8997 - TARGET_MIN_VLEN = 64bit: [8,8]
8998 - TARGET_MIN_VLEN = 128bit: [16,16]
8999 - TARGET_MIN_VLEN = 256bit: [32,32]
9000 - TARGET_MIN_VLEN = 512bit: [64,64]
9001 - TARGET_MIN_VLEN = 1024bit: [128,128]
9002 - TARGET_MIN_VLEN = 2048bit: [256,256]
9003 - TARGET_MIN_VLEN = 4096bit: [512,512]
9004 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */
9005 chunk_num = min_vlen / 64;
9007 else
9009 /* When targetting minimum VLEN = 32, we should use 32-bit
9010 chunk size. Runtime invariant: The single indeterminate represent the
9011 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
9012 Thus the number of bytes in a vector is 4 + 4 * x1 which is
9013 riscv_vector_chunks * 4 = poly_int (4, 4). */
9014 riscv_bytes_per_vector_chunk = 4;
9015 chunk_num = 1;
9018 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR
9019 is enabled. Set riscv_vector_chunks as 1 compile-time constant if
9020 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def"
9021 to set RVV mode size. The RVV machine modes size are run-time constant if
9022 TARGET_VECTOR is enabled. The RVV machine modes size remains default
9023 compile-time constant if TARGET_VECTOR is disabled. */
9024 if (TARGET_VECTOR_OPTS_P (opts))
9026 switch (opts->x_rvv_vector_bits)
9028 case RVV_VECTOR_BITS_SCALABLE:
9029 return poly_uint16 (chunk_num, chunk_num);
9030 case RVV_VECTOR_BITS_ZVL:
9031 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
9032 default:
9033 gcc_unreachable ();
9036 else
9037 return 1;
9040 /* 'Unpack' up the internal tuning structs and update the options
9041 in OPTS. The caller must have set up selected_tune and selected_arch
9042 as all the other target-specific codegen decisions are
9043 derived from them. */
9044 void
9045 riscv_override_options_internal (struct gcc_options *opts)
9047 const struct riscv_tune_info *cpu;
9049 /* The presence of the M extension implies that division instructions
9050 are present, so include them unless explicitly disabled. */
9051 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
9052 opts->x_target_flags |= MASK_DIV;
9053 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
9054 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
9056 /* Likewise floating-point division and square root. */
9057 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
9058 && ((target_flags_explicit & MASK_FDIV) == 0))
9059 opts->x_target_flags |= MASK_FDIV;
9061 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
9062 if both -mtune and -mcpu are not given. */
9063 const char *tune_string = get_tune_str (opts);
9064 cpu = riscv_parse_tune (tune_string, false);
9065 riscv_microarchitecture = cpu->microarchitecture;
9066 tune_param = opts->x_optimize_size
9067 ? &optimize_size_tune_info
9068 : cpu->tune_param;
9070 /* Use -mtune's setting for slow_unaligned_access, even when optimizing
9071 for size. For architectures that trap and emulate unaligned accesses,
9072 the performance cost is too great, even for -Os. Similarly, if
9073 -m[no-]strict-align is left unspecified, heed -mtune's advice. */
9074 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
9075 || TARGET_STRICT_ALIGN);
9077 /* Make a note if user explicity passed -mstrict-align for later
9078 builtin macro generation. Can't use target_flags_explicitly since
9079 it is set even for -mno-strict-align. */
9080 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts);
9082 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
9083 && cpu->tune_param->slow_unaligned_access)
9084 opts->x_target_flags |= MASK_STRICT_ALIGN;
9086 /* If the user hasn't specified a branch cost, use the processor's
9087 default. */
9088 if (opts->x_riscv_branch_cost == 0)
9089 opts->x_riscv_branch_cost = tune_param->branch_cost;
9091 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of
9092 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16.
9094 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */
9095 if (TARGET_MIN_VLEN_OPTS (opts) > 4096)
9096 sorry ("Current RISC-V GCC does not support VLEN greater than 4096bit for "
9097 "'V' Extension");
9099 /* FIXME: We don't support RVV in big-endian for now, we may enable RVV with
9100 big-endian after finishing full coverage testing. */
9101 if (TARGET_VECTOR && TARGET_BIG_ENDIAN)
9102 sorry ("Current RISC-V GCC does not support RVV in big-endian mode");
9104 /* Convert -march and -mrvv-vector-bits to a chunks count. */
9105 riscv_vector_chunks = riscv_convert_vector_chunks (opts);
9108 /* Implement TARGET_OPTION_OVERRIDE. */
9110 static void
9111 riscv_option_override (void)
9113 #ifdef SUBTARGET_OVERRIDE_OPTIONS
9114 SUBTARGET_OVERRIDE_OPTIONS;
9115 #endif
9117 flag_pcc_struct_return = 0;
9119 if (flag_pic)
9120 g_switch_value = 0;
9122 /* Always prefer medlow than medany for RV32 since medlow can access
9123 full address space. */
9124 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT)
9125 riscv_cmodel = CM_MEDLOW;
9127 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS)
9128 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs");
9130 if (riscv_cmodel == CM_LARGE && flag_pic)
9131 sorry ("code model %qs with %qs", "large",
9132 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic");
9134 if (flag_pic)
9135 riscv_cmodel = CM_PIC;
9137 /* We need to save the fp with ra for non-leaf functions with no fp and ra
9138 for leaf functions while no-omit-frame-pointer with
9139 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first
9140 priority to determine whether the frame pointer is needed. If we do not
9141 override it, the fp and ra will be stored for leaf functions, which is not
9142 our wanted. */
9143 riscv_save_frame_pointer = false;
9144 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
9146 if (!global_options.x_flag_omit_frame_pointer)
9147 riscv_save_frame_pointer = true;
9149 global_options.x_flag_omit_frame_pointer = 1;
9152 /* We get better code with explicit relocs for CM_MEDLOW, but
9153 worse code for the others (for now). Pick the best default. */
9154 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
9155 if (riscv_cmodel == CM_MEDLOW)
9156 target_flags |= MASK_EXPLICIT_RELOCS;
9158 /* Require that the ISA supports the requested floating-point ABI. */
9159 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
9160 error ("requested ABI requires %<-march%> to subsume the %qc extension",
9161 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
9163 /* RVE requires specific ABI. */
9164 if (TARGET_RVE)
9166 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E)
9167 error ("rv32e requires ilp32e ABI");
9168 else if (TARGET_64BIT && riscv_abi != ABI_LP64E)
9169 error ("rv64e requires lp64e ABI");
9172 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */
9173 if (TARGET_ZFINX
9174 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64
9175 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E)
9176 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
9178 /* We do not yet support ILP32 on RV64. */
9179 if (BITS_PER_WORD != POINTER_SIZE)
9180 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
9182 /* Validate -mpreferred-stack-boundary= value. */
9183 riscv_stack_boundary = ABI_STACK_BOUNDARY;
9184 if (riscv_preferred_stack_boundary_arg)
9186 int min = ctz_hwi (STACK_BOUNDARY / 8);
9187 int max = 8;
9189 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max))
9190 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d",
9191 riscv_preferred_stack_boundary_arg, min, max);
9193 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg;
9196 if (riscv_emit_attribute_p < 0)
9197 #ifdef HAVE_AS_RISCV_ATTRIBUTE
9198 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE;
9199 #else
9200 riscv_emit_attribute_p = 0;
9202 if (riscv_emit_attribute_p)
9203 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32"
9204 " [%<-mriscv-attribute%>]");
9205 #endif
9207 if (riscv_stack_protector_guard == SSP_GLOBAL
9208 && OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9210 error ("incompatible options %<-mstack-protector-guard=global%> and "
9211 "%<-mstack-protector-guard-offset=%s%>",
9212 riscv_stack_protector_guard_offset_str);
9215 if (riscv_stack_protector_guard == SSP_TLS
9216 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str)
9217 && OPTION_SET_P (riscv_stack_protector_guard_reg_str)))
9219 error ("both %<-mstack-protector-guard-offset%> and "
9220 "%<-mstack-protector-guard-reg%> must be used "
9221 "with %<-mstack-protector-guard=sysreg%>");
9224 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str))
9226 const char *str = riscv_stack_protector_guard_reg_str;
9227 int reg = decode_reg_name (str);
9229 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
9230 error ("%qs is not a valid base register in %qs", str,
9231 "-mstack-protector-guard-reg=");
9233 riscv_stack_protector_guard_reg = reg;
9236 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9238 char *end;
9239 const char *str = riscv_stack_protector_guard_offset_str;
9240 errno = 0;
9241 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
9243 if (!*str || *end || errno)
9244 error ("%qs is not a valid number in %qs", str,
9245 "-mstack-protector-guard-offset=");
9247 if (!SMALL_OPERAND (offs))
9248 error ("%qs is not a valid offset in %qs", str,
9249 "-mstack-protector-guard-offset=");
9251 riscv_stack_protector_guard_offset = offs;
9254 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
9255 param_sched_pressure_algorithm,
9256 SCHED_PRESSURE_MODEL);
9258 /* Function to allocate machine-dependent function status. */
9259 init_machine_status = &riscv_init_machine_status;
9261 riscv_override_options_internal (&global_options);
9263 /* Save these options as the default ones in case we push and pop them later
9264 while processing functions with potential target attributes. */
9265 target_option_default_node = target_option_current_node
9266 = build_target_option_node (&global_options, &global_options_set);
9269 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9270 Used by riscv_set_current_function to
9271 make sure optab availability predicates are recomputed when necessary. */
9273 void
9274 riscv_save_restore_target_globals (tree new_tree)
9276 if (TREE_TARGET_GLOBALS (new_tree))
9277 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9278 else if (new_tree == target_option_default_node)
9279 restore_target_globals (&default_target_globals);
9280 else
9281 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9284 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9285 using the information saved in PTR. */
9287 static void
9288 riscv_option_restore (struct gcc_options *opts,
9289 struct gcc_options * /* opts_set */,
9290 struct cl_target_option * /* ptr */)
9292 riscv_override_options_internal (opts);
9295 static GTY (()) tree riscv_previous_fndecl;
9297 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9299 static void
9300 riscv_conditional_register_usage (void)
9302 /* We have only x0~x15 on RV32E/RV64E. */
9303 if (TARGET_RVE)
9305 for (int r = 16; r <= 31; r++)
9306 fixed_regs[r] = 1;
9309 if (riscv_abi == ABI_ILP32E)
9311 for (int r = 16; r <= 31; r++)
9312 call_used_regs[r] = 1;
9315 if (!TARGET_HARD_FLOAT)
9317 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9318 fixed_regs[regno] = call_used_regs[regno] = 1;
9321 /* In the soft-float ABI, there are no callee-saved FP registers. */
9322 if (UNITS_PER_FP_ARG == 0)
9324 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9325 call_used_regs[regno] = 1;
9328 if (!TARGET_VECTOR)
9330 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
9331 fixed_regs[regno] = call_used_regs[regno] = 1;
9333 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1;
9334 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1;
9335 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1;
9336 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1;
9340 /* Return a register priority for hard reg REGNO. */
9342 static int
9343 riscv_register_priority (int regno)
9345 /* Favor compressed registers to improve the odds of RVC instruction
9346 selection. */
9347 if (riscv_compressed_reg_p (regno))
9348 return 1;
9350 return 0;
9353 /* Implement TARGET_TRAMPOLINE_INIT. */
9355 static void
9356 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9358 rtx addr, end_addr, mem;
9359 uint32_t trampoline[4];
9360 unsigned int i;
9361 HOST_WIDE_INT static_chain_offset, target_function_offset;
9363 /* Work out the offsets of the pointers from the start of the
9364 trampoline code. */
9365 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
9367 /* Get pointers to the beginning and end of the code block. */
9368 addr = force_reg (Pmode, XEXP (m_tramp, 0));
9369 end_addr = riscv_force_binary (Pmode, PLUS, addr,
9370 GEN_INT (TRAMPOLINE_CODE_SIZE));
9373 if (Pmode == SImode)
9375 chain_value = force_reg (Pmode, chain_value);
9377 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9378 /* lui t2, hi(chain)
9379 lui t0, hi(func)
9380 addi t2, t2, lo(chain)
9381 jr t0, lo(func)
9383 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
9384 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
9386 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
9388 /* 0xfff. */
9389 rtx imm12_mask = gen_reg_rtx (SImode);
9390 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
9392 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
9394 /* Gen lui t2, hi(chain). */
9395 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
9396 fixup_value);
9397 hi_chain = riscv_force_binary (SImode, AND, hi_chain,
9398 uimm_mask);
9399 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9400 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
9401 gen_int_mode (lui_hi_chain_code, SImode));
9403 mem = adjust_address (m_tramp, SImode, 0);
9404 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
9406 /* Gen lui t0, hi(func). */
9407 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
9408 fixup_value);
9409 hi_func = riscv_force_binary (SImode, AND, hi_func,
9410 uimm_mask);
9411 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
9412 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
9413 gen_int_mode (lui_hi_func_code, SImode));
9415 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
9416 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
9418 /* Gen addi t2, t2, lo(chain). */
9419 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
9420 imm12_mask);
9421 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
9423 lo_chain_code = OPCODE_ADDI
9424 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9425 | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
9427 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
9428 force_reg (SImode, GEN_INT (lo_chain_code)));
9430 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
9431 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
9433 /* Gen jr t0, lo(func). */
9434 rtx lo_func = riscv_force_binary (SImode, AND, target_function,
9435 imm12_mask);
9436 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
9438 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9440 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
9441 force_reg (SImode, GEN_INT (lo_func_code)));
9443 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
9444 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
9446 else
9448 static_chain_offset = TRAMPOLINE_CODE_SIZE;
9449 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
9451 /* auipc t2, 0
9452 l[wd] t0, target_function_offset(t2)
9453 l[wd] t2, static_chain_offset(t2)
9454 jr t0
9456 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9457 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9458 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
9459 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9460 | (target_function_offset << SHIFT_IMM);
9461 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9462 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9463 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9464 | (static_chain_offset << SHIFT_IMM);
9465 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9467 /* Copy the trampoline code. */
9468 for (i = 0; i < ARRAY_SIZE (trampoline); i++)
9470 if (BYTES_BIG_ENDIAN)
9471 trampoline[i] = __builtin_bswap32(trampoline[i]);
9472 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
9473 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
9476 /* Set up the static chain pointer field. */
9477 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
9478 riscv_emit_move (mem, chain_value);
9480 /* Set up the target function field. */
9481 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
9482 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
9485 /* Flush the code part of the trampoline. */
9486 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
9487 emit_insn (gen_clear_cache (addr, end_addr));
9490 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
9492 static bool
9493 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
9494 tree exp ATTRIBUTE_UNUSED)
9496 /* Don't use sibcalls when use save-restore routine. */
9497 if (TARGET_SAVE_RESTORE)
9498 return false;
9500 /* Don't use sibcall for naked functions. */
9501 if (cfun->machine->naked_p)
9502 return false;
9504 /* Don't use sibcall for interrupt functions. */
9505 if (cfun->machine->interrupt_handler_p)
9506 return false;
9508 /* Don't use sibcalls in the large model, because a sibcall instruction
9509 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP
9510 register. */
9511 if (riscv_cmodel == CM_LARGE)
9512 return false;
9514 return true;
9517 /* Get the interrupt type, return UNKNOWN_MODE if it's not
9518 interrupt function. */
9519 static enum riscv_privilege_levels
9520 riscv_get_interrupt_type (tree decl)
9522 gcc_assert (decl != NULL_TREE);
9524 if ((TREE_CODE(decl) != FUNCTION_DECL)
9525 || (!riscv_interrupt_type_p (TREE_TYPE (decl))))
9526 return UNKNOWN_MODE;
9528 tree attr_args
9529 = TREE_VALUE (lookup_attribute ("interrupt",
9530 TYPE_ATTRIBUTES (TREE_TYPE (decl))));
9532 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE)
9534 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
9536 if (!strcmp (string, "user"))
9537 return USER_MODE;
9538 else if (!strcmp (string, "supervisor"))
9539 return SUPERVISOR_MODE;
9540 else /* Must be "machine". */
9541 return MACHINE_MODE;
9543 else
9544 /* Interrupt attributes are machine mode by default. */
9545 return MACHINE_MODE;
9548 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
9549 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9550 of the function, if such exists. This function may be called multiple
9551 times on a single function so use aarch64_previous_fndecl to avoid
9552 setting up identical state. */
9554 /* Sanity cheching for above function attributes. */
9555 static void
9556 riscv_set_current_function (tree decl)
9558 if (decl == NULL_TREE
9559 || current_function_decl == NULL_TREE
9560 || current_function_decl == error_mark_node
9561 || ! cfun->machine)
9562 return;
9564 if (!cfun->machine->attributes_checked_p)
9566 cfun->machine->naked_p = riscv_naked_function_p (decl);
9567 cfun->machine->interrupt_handler_p
9568 = riscv_interrupt_type_p (TREE_TYPE (decl));
9570 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p)
9571 error ("function attributes %qs and %qs are mutually exclusive",
9572 "interrupt", "naked");
9574 if (cfun->machine->interrupt_handler_p)
9576 tree ret = TREE_TYPE (TREE_TYPE (decl));
9577 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
9579 if (TREE_CODE (ret) != VOID_TYPE)
9580 error ("%qs function cannot return a value", "interrupt");
9582 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
9583 error ("%qs function cannot have arguments", "interrupt");
9585 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl);
9587 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE);
9590 /* Don't print the above diagnostics more than once. */
9591 cfun->machine->attributes_checked_p = 1;
9594 if (!decl || decl == riscv_previous_fndecl)
9595 return;
9597 tree old_tree = (riscv_previous_fndecl
9598 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl)
9599 : NULL_TREE);
9601 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl);
9603 /* If current function has no attributes but the previous one did,
9604 use the default node. */
9605 if (!new_tree && old_tree)
9606 new_tree = target_option_default_node;
9608 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9609 the default have been handled by aarch64_save_restore_target_globals from
9610 aarch64_pragma_target_parse. */
9611 if (old_tree == new_tree)
9612 return;
9614 riscv_previous_fndecl = decl;
9616 /* First set the target options. */
9617 cl_target_option_restore (&global_options, &global_options_set,
9618 TREE_TARGET_OPTION (new_tree));
9620 /* The ISA extension can vary based on the function extension like target.
9621 Thus, make sure that the machine modes are reflected correctly here. */
9622 init_adjust_machine_modes ();
9624 riscv_save_restore_target_globals (new_tree);
9627 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
9628 static tree
9629 riscv_merge_decl_attributes (tree olddecl, tree newdecl)
9631 tree combined_attrs;
9633 enum riscv_privilege_levels old_interrupt_type
9634 = riscv_get_interrupt_type (olddecl);
9635 enum riscv_privilege_levels new_interrupt_type
9636 = riscv_get_interrupt_type (newdecl);
9638 /* Check old and new has same interrupt type. */
9639 if ((old_interrupt_type != UNKNOWN_MODE)
9640 && (new_interrupt_type != UNKNOWN_MODE)
9641 && (old_interrupt_type != new_interrupt_type))
9642 error ("%qs function cannot have different interrupt type", "interrupt");
9644 /* Create combined attributes. */
9645 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
9646 DECL_ATTRIBUTES (newdecl));
9648 return combined_attrs;
9651 /* Implement TARGET_CANNOT_COPY_INSN_P. */
9653 static bool
9654 riscv_cannot_copy_insn_p (rtx_insn *insn)
9656 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
9659 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
9661 static bool
9662 riscv_slow_unaligned_access (machine_mode, unsigned int)
9664 return riscv_slow_unaligned_access_p;
9667 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9669 static bool
9670 riscv_can_change_mode_class (machine_mode from, machine_mode to,
9671 reg_class_t rclass)
9673 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS.
9674 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation
9675 between wider mode (FROM) and narrow mode (TO).
9677 E.g. We should not allow copy propagation
9678 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0])
9679 since we can't order their size which will cause ICE in regcprop.
9681 TODO: Even though they are have different size, they always change
9682 the whole register. We may enhance such case in regcprop to optimize
9683 it in the future. */
9684 if (reg_classes_intersect_p (V_REGS, rclass)
9685 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
9686 return false;
9687 return !reg_classes_intersect_p (FP_REGS, rclass);
9690 /* Implement TARGET_CONSTANT_ALIGNMENT. */
9692 static HOST_WIDE_INT
9693 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align)
9695 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
9696 && (riscv_align_data_type == riscv_align_data_type_xlen))
9697 return MAX (align, BITS_PER_WORD);
9698 return align;
9701 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
9703 /* This function is equivalent to default_promote_function_mode_always_promote
9704 except that it returns a promoted mode even if type is NULL_TREE. This is
9705 needed by libcalls which have no type (only a mode) such as fixed conversion
9706 routines that take a signed or unsigned char/short/int argument and convert
9707 it to a fixed type. */
9709 static machine_mode
9710 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9711 machine_mode mode,
9712 int *punsignedp ATTRIBUTE_UNUSED,
9713 const_tree fntype ATTRIBUTE_UNUSED,
9714 int for_return ATTRIBUTE_UNUSED)
9716 int unsignedp;
9718 if (type != NULL_TREE)
9719 return promote_mode (type, mode, punsignedp);
9721 unsignedp = *punsignedp;
9722 scalar_mode smode = as_a <scalar_mode> (mode);
9723 PROMOTE_MODE (smode, unsignedp, type);
9724 *punsignedp = unsignedp;
9725 return smode;
9728 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
9730 static void
9731 riscv_reorg (void)
9733 /* Do nothing unless we have -msave-restore */
9734 if (TARGET_SAVE_RESTORE)
9735 riscv_remove_unneeded_save_restore_calls ();
9738 /* Return nonzero if register FROM_REGNO can be renamed to register
9739 TO_REGNO. */
9741 bool
9742 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
9743 unsigned to_regno)
9745 /* Interrupt functions can only use registers that have already been
9746 saved by the prologue, even if they would normally be
9747 call-clobbered. */
9748 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno);
9751 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */
9753 bool
9754 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
9756 /* Prefer old address if it is less expensive. */
9757 addr_space_t as = MEM_ADDR_SPACE (memref);
9758 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
9759 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed);
9760 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed);
9761 return new_cost <= old_cost;
9764 /* Helper function for generating gpr_save pattern. */
9767 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
9769 unsigned count = riscv_save_libcall_count (frame->mask);
9770 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
9771 unsigned veclen = 1 + 2 + 1 + count;
9772 rtvec vec = rtvec_alloc (veclen);
9774 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
9776 RTVEC_ELT (vec, 0) =
9777 gen_rtx_UNSPEC_VOLATILE (VOIDmode,
9778 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE);
9780 for (unsigned i = 1; i < veclen; ++i)
9782 unsigned regno = gpr_save_reg_order[i];
9783 rtx reg = gen_rtx_REG (Pmode, regno);
9784 rtx elt;
9786 /* t0 and t1 are CLOBBERs, others are USEs. */
9787 if (i < 3)
9788 elt = gen_rtx_CLOBBER (Pmode, reg);
9789 else
9790 elt = gen_rtx_USE (Pmode, reg);
9792 RTVEC_ELT (vec, i) = elt;
9795 /* Largest number of caller-save register must set in mask if we are
9796 not using __riscv_save_0. */
9797 gcc_assert ((count == 0) ||
9798 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
9800 return gen_rtx_PARALLEL (VOIDmode, vec);
9803 static HOST_WIDE_INT
9804 zcmp_base_adj (int regs_num)
9806 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode));
9809 static HOST_WIDE_INT
9810 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num)
9812 return total - zcmp_base_adj (regs_num);
9815 bool
9816 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num)
9818 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num);
9819 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP
9820 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP
9821 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP;
9824 /* Return true if it's valid gpr_save pattern. */
9826 bool
9827 riscv_gpr_save_operation_p (rtx op)
9829 unsigned len = XVECLEN (op, 0);
9831 if (len > ARRAY_SIZE (gpr_save_reg_order))
9832 return false;
9834 for (unsigned i = 0; i < len; i++)
9836 rtx elt = XVECEXP (op, 0, i);
9837 if (i == 0)
9839 /* First element in parallel is unspec. */
9840 if (GET_CODE (elt) != UNSPEC_VOLATILE
9841 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
9842 || XINT (elt, 1) != UNSPECV_GPR_SAVE)
9843 return false;
9845 else
9847 /* Two CLOBBER and USEs, must check the order. */
9848 unsigned expect_code = i < 3 ? CLOBBER : USE;
9849 if (GET_CODE (elt) != expect_code
9850 || !REG_P (XEXP (elt, 1))
9851 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
9852 return false;
9854 break;
9856 return true;
9859 /* Implement TARGET_ASAN_SHADOW_OFFSET. */
9861 static unsigned HOST_WIDE_INT
9862 riscv_asan_shadow_offset (void)
9864 /* We only have libsanitizer support for RV64 at present.
9866 This number must match ASAN_SHADOW_OFFSET_CONST in the file
9867 libsanitizer/asan/asan_mapping.h. */
9868 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
9871 /* Implement TARGET_MANGLE_TYPE. */
9873 static const char *
9874 riscv_mangle_type (const_tree type)
9876 /* Half-precision float, _Float16 is "DF16_". */
9877 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
9878 return "DF16_";
9880 /* Mangle all vector type for vector extension. */
9881 /* The mangle name follows the rule of RVV LLVM
9882 that is "u" + length of (abi_name) + abi_name. */
9883 if (TYPE_NAME (type) != NULL)
9885 const char *res = riscv_vector::mangle_builtin_type (type);
9886 if (res)
9887 return res;
9890 /* Use the default mangling. */
9891 return NULL;
9894 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
9896 static bool
9897 riscv_scalar_mode_supported_p (scalar_mode mode)
9899 if (mode == HFmode)
9900 return true;
9901 else
9902 return default_scalar_mode_supported_p (mode);
9905 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
9906 if MODE is HFmode, and punt to the generic implementation otherwise. */
9908 static bool
9909 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
9911 if (mode == HFmode)
9912 return true;
9913 else
9914 return default_libgcc_floating_mode_supported_p (mode);
9917 /* Set the value of FLT_EVAL_METHOD.
9918 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
9920 0: evaluate all operations and constants, whose semantic type has at
9921 most the range and precision of type float, to the range and
9922 precision of float; evaluate all other operations and constants to
9923 the range and precision of the semantic type;
9925 N, where _FloatN is a supported interchange floating type
9926 evaluate all operations and constants, whose semantic type has at
9927 most the range and precision of _FloatN type, to the range and
9928 precision of the _FloatN type; evaluate all other operations and
9929 constants to the range and precision of the semantic type;
9931 If we have the zfh/zhinx/zvfh extensions then we support _Float16
9932 in native precision, so we should set this to 16. */
9933 static enum flt_eval_method
9934 riscv_excess_precision (enum excess_precision_type type)
9936 switch (type)
9938 case EXCESS_PRECISION_TYPE_FAST:
9939 case EXCESS_PRECISION_TYPE_STANDARD:
9940 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
9941 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
9942 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
9943 case EXCESS_PRECISION_TYPE_IMPLICIT:
9944 case EXCESS_PRECISION_TYPE_FLOAT16:
9945 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
9946 default:
9947 gcc_unreachable ();
9949 return FLT_EVAL_METHOD_UNPREDICTABLE;
9952 /* Implement TARGET_FLOATN_MODE. */
9953 static opt_scalar_float_mode
9954 riscv_floatn_mode (int n, bool extended)
9956 if (!extended && n == 16)
9957 return HFmode;
9959 return default_floatn_mode (n, extended);
9962 static void
9963 riscv_init_libfuncs (void)
9965 /* Half-precision float operations. The compiler handles all operations
9966 with NULL libfuncs by converting to SFmode. */
9968 /* Arithmetic. */
9969 set_optab_libfunc (add_optab, HFmode, NULL);
9970 set_optab_libfunc (sdiv_optab, HFmode, NULL);
9971 set_optab_libfunc (smul_optab, HFmode, NULL);
9972 set_optab_libfunc (neg_optab, HFmode, NULL);
9973 set_optab_libfunc (sub_optab, HFmode, NULL);
9975 /* Comparisons. */
9976 set_optab_libfunc (eq_optab, HFmode, NULL);
9977 set_optab_libfunc (ne_optab, HFmode, NULL);
9978 set_optab_libfunc (lt_optab, HFmode, NULL);
9979 set_optab_libfunc (le_optab, HFmode, NULL);
9980 set_optab_libfunc (ge_optab, HFmode, NULL);
9981 set_optab_libfunc (gt_optab, HFmode, NULL);
9982 set_optab_libfunc (unord_optab, HFmode, NULL);
9985 #if CHECKING_P
9986 void
9987 riscv_reinit (void)
9989 riscv_option_override ();
9990 init_adjust_machine_modes ();
9991 init_derived_machine_modes ();
9992 reinit_regs ();
9993 init_optabs ();
9995 #endif
9997 #if CHECKING_P
9998 #undef TARGET_RUN_TARGET_SELFTESTS
9999 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests
10000 #endif /* #if CHECKING_P */
10002 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
10004 static bool
10005 riscv_vector_mode_supported_p (machine_mode mode)
10007 if (TARGET_VECTOR)
10008 return riscv_v_ext_mode_p (mode);
10010 return false;
10013 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */
10015 static bool
10016 riscv_verify_type_context (location_t loc, type_context_kind context,
10017 const_tree type, bool silent_p)
10019 return riscv_vector::verify_type_context (loc, context, type, silent_p);
10022 /* Implement TARGET_VECTOR_ALIGNMENT. */
10024 static HOST_WIDE_INT
10025 riscv_vector_alignment (const_tree type)
10027 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
10028 be set for non-predicate vectors of booleans. Modes are the most
10029 direct way we have of identifying real RVV predicate types. */
10030 /* FIXME: RVV didn't mention the alignment of bool, we uses
10031 one byte align. */
10032 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
10033 return 8;
10035 widest_int min_size
10036 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
10037 return wi::umin (min_size, 128).to_uhwi ();
10040 /* Implement REGMODE_NATURAL_SIZE. */
10042 poly_uint64
10043 riscv_regmode_natural_size (machine_mode mode)
10045 /* The natural size for RVV data modes is one RVV data vector,
10046 and similarly for predicates. We can't independently modify
10047 anything smaller than that. */
10048 /* ??? For now, only do this for variable-width RVV registers.
10049 Doing it for constant-sized registers breaks lower-subreg.c. */
10051 if (riscv_v_ext_mode_p (mode))
10053 poly_uint64 size = GET_MODE_SIZE (mode);
10054 if (riscv_v_ext_tuple_mode_p (mode))
10056 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode));
10057 if (known_lt (size, BYTES_PER_RISCV_VECTOR))
10058 return size;
10060 else if (riscv_v_ext_vector_mode_p (mode))
10062 /* RVV mask modes always consume a single register. */
10063 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
10064 return BYTES_PER_RISCV_VECTOR;
10066 if (!size.is_constant ())
10067 return BYTES_PER_RISCV_VECTOR;
10068 else if (!riscv_v_ext_vls_mode_p (mode))
10069 /* For -march=rv64gc_zve32f, the natural vector register size
10070 is 32bits which is smaller than scalar register size, so we
10071 return minimum size between vector register size and scalar
10072 register size. */
10073 return MIN (size.to_constant (), UNITS_PER_WORD);
10075 return UNITS_PER_WORD;
10078 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
10080 static unsigned int
10081 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
10082 int *offset)
10084 /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
10085 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
10086 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
10088 gcc_assert (i == 1);
10089 *factor = riscv_bytes_per_vector_chunk;
10090 *offset = 1;
10091 return RISCV_DWARF_VLENB;
10094 /* Implement TARGET_ESTIMATED_POLY_VALUE. */
10096 static HOST_WIDE_INT
10097 riscv_estimated_poly_value (poly_int64 val,
10098 poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
10100 if (TARGET_VECTOR)
10101 return riscv_vector::estimated_poly_value (val, kind);
10102 return default_estimated_poly_value (val, kind);
10105 /* Return true if the vector misalignment factor is supported by the
10106 target. */
10107 bool
10108 riscv_support_vector_misalignment (machine_mode mode,
10109 const_tree type ATTRIBUTE_UNUSED,
10110 int misalignment,
10111 bool is_packed ATTRIBUTE_UNUSED)
10113 /* Depend on movmisalign pattern. */
10114 return default_builtin_support_vector_misalignment (mode, type, misalignment,
10115 is_packed);
10118 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
10120 static opt_machine_mode
10121 riscv_get_mask_mode (machine_mode mode)
10123 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode))
10124 return riscv_vector::get_mask_mode (mode);
10126 return default_get_mask_mode (mode);
10129 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
10130 it isn't worth branching around empty masked ops (including masked
10131 stores). */
10133 static bool
10134 riscv_empty_mask_is_expensive (unsigned)
10136 return false;
10139 /* Return true if a shift-amount matches the trailing cleared bits on
10140 a bitmask. */
10142 bool
10143 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
10145 return shamt == ctz_hwi (mask);
10148 static HARD_REG_SET
10149 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10151 HARD_REG_SET zeroed_hardregs;
10152 CLEAR_HARD_REG_SET (zeroed_hardregs);
10154 /* Find a register to hold vl. */
10155 unsigned vl_regno = INVALID_REGNUM;
10156 /* Skip the first GPR, otherwise the existing vl is kept due to the same
10157 between vl and avl. */
10158 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
10160 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10162 vl_regno = regno;
10163 break;
10167 if (vl_regno > GP_REG_LAST)
10168 sorry ("cannot allocate vl register for %qs on this target",
10169 "-fzero-call-used-regs");
10171 /* Vector configurations need not be saved and restored here. The
10172 -fzero-call-used-regs=* option will zero all vector registers and
10173 return. So there's no vector operations between them. */
10175 bool emitted_vlmax_vsetvl = false;
10176 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */
10177 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
10179 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10181 rtx target = regno_reg_rtx[regno];
10182 machine_mode mode = GET_MODE (target);
10184 if (!emitted_vlmax_vsetvl)
10186 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl);
10187 emitted_vlmax_vsetvl = true;
10190 rtx ops[] = {target, CONST0_RTX (mode)};
10191 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode),
10192 riscv_vector::UNARY_OP, ops, vl);
10194 SET_HARD_REG_BIT (zeroed_hardregs, regno);
10198 return zeroed_hardregs;
10201 /* Generate a sequence of instructions that zero registers specified by
10202 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
10203 zeroed. */
10204 HARD_REG_SET
10205 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10207 HARD_REG_SET zeroed_hardregs;
10208 CLEAR_HARD_REG_SET (zeroed_hardregs);
10210 if (TARGET_VECTOR)
10211 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs);
10213 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs
10214 & ~zeroed_hardregs);
10217 /* Implement target hook TARGET_ARRAY_MODE. */
10219 static opt_machine_mode
10220 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
10222 machine_mode vmode;
10223 if (TARGET_VECTOR
10224 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode))
10225 return vmode;
10227 return opt_machine_mode ();
10230 /* Given memory reference MEM, expand code to compute the aligned
10231 memory address, shift and mask values and store them into
10232 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */
10234 void
10235 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
10236 rtx *not_mask)
10238 /* Align the memory address to a word. */
10239 rtx addr = force_reg (Pmode, XEXP (mem, 0));
10241 rtx addr_mask = gen_int_mode (-4, Pmode);
10243 rtx aligned_addr = gen_reg_rtx (Pmode);
10244 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask));
10246 *aligned_mem = change_address (mem, SImode, aligned_addr);
10248 /* Calculate the shift amount. */
10249 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
10250 gen_int_mode (3, SImode)));
10251 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
10252 gen_int_mode (3, SImode)));
10254 /* Calculate the mask. */
10255 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
10257 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
10259 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
10260 gen_lowpart (QImode, *shift)));
10262 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
10265 /* Leftshift a subword within an SImode register. */
10267 void
10268 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
10269 rtx *shifted_value)
10271 rtx value_reg = gen_reg_rtx (SImode);
10272 emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
10273 mode, 0));
10275 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
10276 gen_lowpart (QImode, shift)));
10279 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This
10280 allows the behavior to be tuned for specific implementations as well as
10281 when optimizing for size. */
10283 bool
10284 riscv_use_divmod_expander (void)
10286 return tune_param->use_divmod_expansion;
10289 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
10291 static machine_mode
10292 riscv_preferred_simd_mode (scalar_mode mode)
10294 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10295 return riscv_vector::preferred_simd_mode (mode);
10297 return word_mode;
10300 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
10302 static poly_uint64
10303 riscv_vectorize_preferred_vector_alignment (const_tree type)
10305 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
10306 return TYPE_ALIGN (TREE_TYPE (type));
10307 return TYPE_ALIGN (type);
10310 /* Return true if it is static FRM rounding mode. */
10312 static bool
10313 riscv_static_frm_mode_p (int mode)
10315 switch (mode)
10317 case riscv_vector::FRM_RDN:
10318 case riscv_vector::FRM_RUP:
10319 case riscv_vector::FRM_RTZ:
10320 case riscv_vector::FRM_RMM:
10321 case riscv_vector::FRM_RNE:
10322 return true;
10323 default:
10324 return false;
10327 gcc_unreachable ();
10330 /* Implement the floating-point Mode Switching. */
10332 static void
10333 riscv_emit_frm_mode_set (int mode, int prev_mode)
10335 rtx backup_reg = DYNAMIC_FRM_RTL (cfun);
10337 if (prev_mode == riscv_vector::FRM_DYN_CALL)
10338 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
10340 if (mode != prev_mode)
10342 rtx frm = gen_int_mode (mode, SImode);
10344 if (mode == riscv_vector::FRM_DYN_CALL
10345 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
10346 /* No need to emit when prev mode is DYN already. */
10347 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10348 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
10349 && prev_mode != riscv_vector::FRM_DYN
10350 && prev_mode != riscv_vector::FRM_DYN_CALL)
10351 /* No need to emit when prev mode is DYN or DYN_CALL already. */
10352 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10353 else if (mode == riscv_vector::FRM_DYN
10354 && prev_mode != riscv_vector::FRM_DYN_CALL)
10355 /* Restore frm value from backup when switch to DYN mode. */
10356 emit_insn (gen_fsrmsi_restore (backup_reg));
10357 else if (riscv_static_frm_mode_p (mode))
10358 /* Set frm value when switch to static mode. */
10359 emit_insn (gen_fsrmsi_restore (frm));
10363 /* Implement Mode switching. */
10365 static void
10366 riscv_emit_mode_set (int entity, int mode, int prev_mode,
10367 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
10369 switch (entity)
10371 case RISCV_VXRM:
10372 if (mode != VXRM_MODE_NONE && mode != prev_mode)
10373 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
10374 break;
10375 case RISCV_FRM:
10376 riscv_emit_frm_mode_set (mode, prev_mode);
10377 break;
10378 default:
10379 gcc_unreachable ();
10383 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the
10384 underlying emit. */
10386 static int
10387 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
10389 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
10391 if (insn && CALL_P (insn))
10392 return riscv_vector::FRM_DYN;
10394 return mode;
10397 /* Insert the backup frm insn to the end of the bb if and only if the call
10398 is the last insn of this bb. */
10400 static void
10401 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
10403 edge eg;
10404 bool abnormal_edge_p = false;
10405 edge_iterator eg_iterator;
10406 basic_block bb = BLOCK_FOR_INSN (cur_insn);
10408 FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
10410 if (eg->flags & EDGE_ABNORMAL)
10411 abnormal_edge_p = true;
10412 else
10414 start_sequence ();
10415 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10416 rtx_insn *backup_insn = get_insns ();
10417 end_sequence ();
10419 insert_insn_on_edge (backup_insn, eg);
10423 if (abnormal_edge_p)
10425 start_sequence ();
10426 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10427 rtx_insn *backup_insn = get_insns ();
10428 end_sequence ();
10430 insert_insn_end_basic_block (backup_insn, bb);
10433 commit_edge_insertions ();
10436 /* Return mode that frm must be switched into
10437 prior to the execution of insn. */
10439 static int
10440 riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
10442 if (!DYNAMIC_FRM_RTL(cfun))
10444 /* The dynamic frm will be initialized only onece during cfun. */
10445 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
10446 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10449 if (CALL_P (cur_insn))
10451 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
10453 if (!insn)
10454 riscv_frm_emit_after_bb_end (cur_insn);
10456 return riscv_vector::FRM_DYN_CALL;
10459 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
10461 if (mode == riscv_vector::FRM_NONE)
10462 /* After meet a call, we need to backup the frm because it may be
10463 updated during the call. Here, for each insn, we will check if
10464 the previous insn is a call or not. When previous insn is call,
10465 there will be 2 cases for the emit mode set.
10467 1. Current insn is not MODE_NONE, then the mode switch framework
10468 will do the mode switch from MODE_CALL to MODE_NONE natively.
10469 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
10470 the MODE_DYN, and leave the mode switch itself to perform
10471 the emit mode set.
10473 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
10475 return mode;
10478 /* Return mode that entity must be switched into
10479 prior to the execution of insn. */
10481 static int
10482 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
10484 int code = recog_memoized (insn);
10486 switch (entity)
10488 case RISCV_VXRM:
10489 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
10490 case RISCV_FRM:
10491 return riscv_frm_mode_needed (insn, code);
10492 default:
10493 gcc_unreachable ();
10497 /* Return TRUE that an insn is asm. */
10499 static bool
10500 asm_insn_p (rtx_insn *insn)
10502 extract_insn (insn);
10504 return recog_data.is_asm;
10507 /* Return TRUE that an insn is unknown for VXRM. */
10509 static bool
10510 vxrm_unknown_p (rtx_insn *insn)
10512 /* Return true if there is a definition of VXRM. */
10513 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn))
10514 return true;
10516 /* A CALL function may contain an instruction that modifies the VXRM,
10517 return true in this situation. */
10518 if (CALL_P (insn))
10519 return true;
10521 /* Return true for all assembly since users may hardcode a assembly
10522 like this: asm volatile ("csrwi vxrm, 0"). */
10523 if (asm_insn_p (insn))
10524 return true;
10526 return false;
10529 /* Return TRUE that an insn is unknown dynamic for FRM. */
10531 static bool
10532 frm_unknown_dynamic_p (rtx_insn *insn)
10534 /* Return true if there is a definition of FRM. */
10535 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn))
10536 return true;
10538 return false;
10541 /* Return the mode that an insn results in for VXRM. */
10543 static int
10544 riscv_vxrm_mode_after (rtx_insn *insn, int mode)
10546 if (vxrm_unknown_p (insn))
10547 return VXRM_MODE_NONE;
10549 if (recog_memoized (insn) < 0)
10550 return mode;
10552 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn)))
10553 return get_attr_vxrm_mode (insn);
10554 else
10555 return mode;
10558 /* Return the mode that an insn results in for FRM. */
10560 static int
10561 riscv_frm_mode_after (rtx_insn *insn, int mode)
10563 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode);
10565 if (CALL_P (insn))
10566 return mode;
10568 if (frm_unknown_dynamic_p (insn))
10569 return riscv_vector::FRM_DYN;
10571 if (recog_memoized (insn) < 0)
10572 return mode;
10574 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn)))
10575 return get_attr_frm_mode (insn);
10576 else
10577 return mode;
10580 /* Return the mode that an insn results in. */
10582 static int
10583 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
10585 switch (entity)
10587 case RISCV_VXRM:
10588 return riscv_vxrm_mode_after (insn, mode);
10589 case RISCV_FRM:
10590 return riscv_frm_mode_after (insn, mode);
10591 default:
10592 gcc_unreachable ();
10596 /* Return a mode that ENTITY is assumed to be
10597 switched to at function entry. */
10599 static int
10600 riscv_mode_entry (int entity)
10602 switch (entity)
10604 case RISCV_VXRM:
10605 return VXRM_MODE_NONE;
10606 case RISCV_FRM:
10608 /* According to RVV 1.0 spec, all vector floating-point operations use
10609 the dynamic rounding mode in the frm register. Likewise in other
10610 similar places. */
10611 return riscv_vector::FRM_DYN;
10613 default:
10614 gcc_unreachable ();
10618 /* Return a mode that ENTITY is assumed to be
10619 switched to at function exit. */
10621 static int
10622 riscv_mode_exit (int entity)
10624 switch (entity)
10626 case RISCV_VXRM:
10627 return VXRM_MODE_NONE;
10628 case RISCV_FRM:
10629 return riscv_vector::FRM_DYN_EXIT;
10630 default:
10631 gcc_unreachable ();
10635 static int
10636 riscv_mode_priority (int, int n)
10638 return n;
10641 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
10642 unsigned int
10643 riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
10645 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10646 return riscv_vector::autovectorize_vector_modes (modes, all);
10648 return default_autovectorize_vector_modes (modes, all);
10651 /* Implement TARGET_VECTORIZE_RELATED_MODE. */
10652 opt_machine_mode
10653 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
10654 poly_uint64 nunits)
10656 if (TARGET_VECTOR)
10657 return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
10658 nunits);
10659 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
10662 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
10664 static bool
10665 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
10666 rtx target, rtx op0, rtx op1,
10667 const vec_perm_indices &sel)
10669 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode))
10670 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
10671 op1, sel);
10673 return false;
10676 static bool
10677 riscv_frame_pointer_required (void)
10679 return riscv_save_frame_pointer && !crtl->is_leaf;
10682 /* Return the appropriate common costs according to VECTYPE from COSTS. */
10683 static const common_vector_cost *
10684 get_common_costs (const cpu_vector_cost *costs, tree vectype)
10686 gcc_assert (costs);
10688 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
10689 return costs->vls;
10690 return costs->vla;
10693 /* Return the CPU vector costs according to -mtune if tune info has non-NULL
10694 vector cost. Otherwide, return the default generic vector costs. */
10695 const cpu_vector_cost *
10696 get_vector_costs ()
10698 const cpu_vector_cost *costs = tune_param->vec_costs;
10699 if (!costs)
10700 return &generic_vector_cost;
10701 return costs;
10704 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10706 static int
10707 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10708 tree vectype, int misalign ATTRIBUTE_UNUSED)
10710 const cpu_vector_cost *costs = get_vector_costs ();
10711 bool fp = false;
10713 if (vectype != NULL)
10714 fp = FLOAT_TYPE_P (vectype);
10716 const common_vector_cost *common_costs = get_common_costs (costs, vectype);
10717 gcc_assert (common_costs != NULL);
10718 switch (type_of_cost)
10720 case scalar_stmt:
10721 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
10723 case scalar_load:
10724 return costs->scalar_load_cost;
10726 case scalar_store:
10727 return costs->scalar_store_cost;
10729 case vector_stmt:
10730 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10732 case vector_load:
10733 return common_costs->align_load_cost;
10735 case vector_store:
10736 return common_costs->align_store_cost;
10738 case vec_to_scalar:
10739 return common_costs->vec_to_scalar_cost;
10741 case scalar_to_vec:
10742 return common_costs->scalar_to_vec_cost;
10744 case unaligned_load:
10745 return common_costs->unalign_load_cost;
10746 case vector_gather_load:
10747 return common_costs->gather_load_cost;
10749 case unaligned_store:
10750 return common_costs->unalign_store_cost;
10751 case vector_scatter_store:
10752 return common_costs->scatter_store_cost;
10754 case cond_branch_taken:
10755 return costs->cond_taken_branch_cost;
10757 case cond_branch_not_taken:
10758 return costs->cond_not_taken_branch_cost;
10760 case vec_perm:
10761 return common_costs->permute_cost;
10763 case vec_promote_demote:
10764 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10766 case vec_construct:
10767 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
10769 default:
10770 gcc_unreachable ();
10773 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
10776 /* Implement targetm.vectorize.create_costs. */
10778 static vector_costs *
10779 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
10781 if (TARGET_VECTOR)
10782 return new riscv_vector::costs (vinfo, costing_for_scalar);
10783 /* Default vector costs. */
10784 return new vector_costs (vinfo, costing_for_scalar);
10787 /* Implement TARGET_PREFERRED_ELSE_VALUE. */
10789 static tree
10790 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
10791 tree *ops)
10793 if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
10794 return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype));
10796 return default_preferred_else_value (ifn, vectype, nops, ops);
10799 /* If MEM is in the form of "base+offset", extract the two parts
10800 of address and set to BASE and OFFSET, otherwise return false
10801 after clearing BASE and OFFSET. */
10803 bool
10804 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10806 rtx addr;
10808 gcc_assert (MEM_P (mem));
10810 addr = XEXP (mem, 0);
10812 if (REG_P (addr))
10814 *base = addr;
10815 *offset = const0_rtx;
10816 return true;
10819 if (GET_CODE (addr) == PLUS
10820 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10822 *base = XEXP (addr, 0);
10823 *offset = XEXP (addr, 1);
10824 return true;
10827 *base = NULL_RTX;
10828 *offset = NULL_RTX;
10830 return false;
10833 /* Implements target hook vector_mode_supported_any_target_p. */
10835 static bool
10836 riscv_vector_mode_supported_any_target_p (machine_mode)
10838 if (TARGET_XTHEADVECTOR)
10839 return false;
10840 return true;
10843 /* Initialize the GCC target structure. */
10844 #undef TARGET_ASM_ALIGNED_HI_OP
10845 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
10846 #undef TARGET_ASM_ALIGNED_SI_OP
10847 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10848 #undef TARGET_ASM_ALIGNED_DI_OP
10849 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
10851 #undef TARGET_OPTION_OVERRIDE
10852 #define TARGET_OPTION_OVERRIDE riscv_option_override
10854 #undef TARGET_OPTION_RESTORE
10855 #define TARGET_OPTION_RESTORE riscv_option_restore
10857 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
10858 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
10860 #undef TARGET_LEGITIMIZE_ADDRESS
10861 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
10863 #undef TARGET_SCHED_ISSUE_RATE
10864 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
10865 #undef TARGET_SCHED_MACRO_FUSION_P
10866 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
10867 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
10868 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
10870 #undef TARGET_SCHED_VARIABLE_ISSUE
10871 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
10873 #undef TARGET_SCHED_ADJUST_COST
10874 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
10876 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10877 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
10879 #undef TARGET_SET_CURRENT_FUNCTION
10880 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function
10882 #undef TARGET_REGISTER_MOVE_COST
10883 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
10884 #undef TARGET_MEMORY_MOVE_COST
10885 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
10886 #undef TARGET_RTX_COSTS
10887 #define TARGET_RTX_COSTS riscv_rtx_costs
10888 #undef TARGET_ADDRESS_COST
10889 #define TARGET_ADDRESS_COST riscv_address_cost
10890 #undef TARGET_INSN_COST
10891 #define TARGET_INSN_COST riscv_insn_cost
10893 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
10894 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost
10895 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
10896 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p
10898 #undef TARGET_ASM_FILE_START
10899 #define TARGET_ASM_FILE_START riscv_file_start
10900 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
10901 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
10902 #undef TARGET_ASM_FILE_END
10903 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
10905 #undef TARGET_EXPAND_BUILTIN_VA_START
10906 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
10908 #undef TARGET_PROMOTE_FUNCTION_MODE
10909 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode
10911 #undef TARGET_RETURN_IN_MEMORY
10912 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
10914 #undef TARGET_ASM_OUTPUT_MI_THUNK
10915 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
10916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10919 #undef TARGET_PRINT_OPERAND
10920 #define TARGET_PRINT_OPERAND riscv_print_operand
10921 #undef TARGET_PRINT_OPERAND_ADDRESS
10922 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
10923 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
10924 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
10926 #undef TARGET_SETUP_INCOMING_VARARGS
10927 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
10928 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
10929 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args
10930 #undef TARGET_STRICT_ARGUMENT_NAMING
10931 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10932 #undef TARGET_MUST_PASS_IN_STACK
10933 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10934 #undef TARGET_PASS_BY_REFERENCE
10935 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
10936 #undef TARGET_ARG_PARTIAL_BYTES
10937 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
10938 #undef TARGET_FUNCTION_ARG
10939 #define TARGET_FUNCTION_ARG riscv_function_arg
10940 #undef TARGET_FUNCTION_ARG_ADVANCE
10941 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
10942 #undef TARGET_FUNCTION_ARG_BOUNDARY
10943 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
10944 #undef TARGET_FNTYPE_ABI
10945 #define TARGET_FNTYPE_ABI riscv_fntype_abi
10946 #undef TARGET_INSN_CALLEE_ABI
10947 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi
10949 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
10950 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
10951 riscv_get_separate_components
10953 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
10954 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
10955 riscv_components_for_bb
10957 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
10958 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
10959 riscv_disqualify_components
10961 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
10962 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
10963 riscv_emit_prologue_components
10965 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
10966 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
10967 riscv_emit_epilogue_components
10969 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
10970 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
10971 riscv_set_handled_components
10973 /* The generic ELF target does not always have TLS support. */
10974 #ifdef HAVE_AS_TLS
10975 #undef TARGET_HAVE_TLS
10976 #define TARGET_HAVE_TLS true
10977 #endif
10979 #undef TARGET_CANNOT_FORCE_CONST_MEM
10980 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
10982 #undef TARGET_LEGITIMATE_CONSTANT_P
10983 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
10985 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10986 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p
10988 #undef TARGET_LEGITIMATE_ADDRESS_P
10989 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p
10991 #undef TARGET_CAN_ELIMINATE
10992 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
10994 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10995 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
10997 #undef TARGET_CLASS_MAX_NREGS
10998 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
11000 #undef TARGET_TRAMPOLINE_INIT
11001 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
11003 #undef TARGET_IN_SMALL_DATA_P
11004 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
11006 #undef TARGET_HAVE_SRODATA_SECTION
11007 #define TARGET_HAVE_SRODATA_SECTION true
11009 #undef TARGET_ASM_SELECT_SECTION
11010 #define TARGET_ASM_SELECT_SECTION riscv_select_section
11012 #undef TARGET_ASM_UNIQUE_SECTION
11013 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section
11015 #undef TARGET_ASM_SELECT_RTX_SECTION
11016 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section
11018 #undef TARGET_MIN_ANCHOR_OFFSET
11019 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
11021 #undef TARGET_MAX_ANCHOR_OFFSET
11022 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
11024 #undef TARGET_REGISTER_PRIORITY
11025 #define TARGET_REGISTER_PRIORITY riscv_register_priority
11027 #undef TARGET_CANNOT_COPY_INSN_P
11028 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
11030 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11031 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
11033 #undef TARGET_INIT_BUILTINS
11034 #define TARGET_INIT_BUILTINS riscv_init_builtins
11036 #undef TARGET_BUILTIN_DECL
11037 #define TARGET_BUILTIN_DECL riscv_builtin_decl
11039 #undef TARGET_GIMPLE_FOLD_BUILTIN
11040 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
11042 #undef TARGET_EXPAND_BUILTIN
11043 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
11045 #undef TARGET_HARD_REGNO_NREGS
11046 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs
11047 #undef TARGET_HARD_REGNO_MODE_OK
11048 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok
11050 #undef TARGET_MODES_TIEABLE_P
11051 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
11053 #undef TARGET_SLOW_UNALIGNED_ACCESS
11054 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
11056 #undef TARGET_SECONDARY_MEMORY_NEEDED
11057 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
11059 #undef TARGET_CAN_CHANGE_MODE_CLASS
11060 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class
11062 #undef TARGET_CONSTANT_ALIGNMENT
11063 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment
11065 #undef TARGET_MERGE_DECL_ATTRIBUTES
11066 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes
11068 #undef TARGET_ATTRIBUTE_TABLE
11069 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table
11071 #undef TARGET_WARN_FUNC_RETURN
11072 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return
11074 /* The low bit is ignored by jump instructions so is safe to use. */
11075 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
11076 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
11078 #undef TARGET_MACHINE_DEPENDENT_REORG
11079 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
11081 #undef TARGET_NEW_ADDRESS_PROFITABLE_P
11082 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
11084 #undef TARGET_MANGLE_TYPE
11085 #define TARGET_MANGLE_TYPE riscv_mangle_type
11087 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11088 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
11090 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
11091 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
11092 riscv_libgcc_floating_mode_supported_p
11094 #undef TARGET_INIT_LIBFUNCS
11095 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
11097 #undef TARGET_C_EXCESS_PRECISION
11098 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision
11100 #undef TARGET_FLOATN_MODE
11101 #define TARGET_FLOATN_MODE riscv_floatn_mode
11103 #undef TARGET_ASAN_SHADOW_OFFSET
11104 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
11106 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11107 #undef TARGET_DEFAULT_TARGET_FLAGS
11108 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
11109 #endif
11111 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11112 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p
11114 #undef TARGET_VERIFY_TYPE_CONTEXT
11115 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
11117 #undef TARGET_ESTIMATED_POLY_VALUE
11118 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
11120 #undef TARGET_VECTORIZE_GET_MASK_MODE
11121 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
11123 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
11124 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive
11126 #undef TARGET_VECTOR_ALIGNMENT
11127 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
11129 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
11130 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment
11132 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
11133 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
11135 #undef TARGET_ZERO_CALL_USED_REGS
11136 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
11138 #undef TARGET_ARRAY_MODE
11139 #define TARGET_ARRAY_MODE riscv_array_mode
11141 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11142 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
11144 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
11145 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
11146 riscv_vectorize_preferred_vector_alignment
11148 /* Mode switching hooks. */
11150 #undef TARGET_MODE_EMIT
11151 #define TARGET_MODE_EMIT riscv_emit_mode_set
11152 #undef TARGET_MODE_NEEDED
11153 #define TARGET_MODE_NEEDED riscv_mode_needed
11154 #undef TARGET_MODE_AFTER
11155 #define TARGET_MODE_AFTER riscv_mode_after
11156 #undef TARGET_MODE_ENTRY
11157 #define TARGET_MODE_ENTRY riscv_mode_entry
11158 #undef TARGET_MODE_EXIT
11159 #define TARGET_MODE_EXIT riscv_mode_exit
11160 #undef TARGET_MODE_PRIORITY
11161 #define TARGET_MODE_PRIORITY riscv_mode_priority
11163 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
11164 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
11165 riscv_autovectorize_vector_modes
11167 #undef TARGET_VECTORIZE_RELATED_MODE
11168 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
11170 #undef TARGET_VECTORIZE_VEC_PERM_CONST
11171 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
11173 #undef TARGET_FRAME_POINTER_REQUIRED
11174 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
11176 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11177 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11178 riscv_builtin_vectorization_cost
11180 #undef TARGET_VECTORIZE_CREATE_COSTS
11181 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
11183 #undef TARGET_PREFERRED_ELSE_VALUE
11184 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
11186 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
11187 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
11189 struct gcc_target targetm = TARGET_INITIALIZER;
11191 #include "gt-riscv.h"