aarch64: Add fpm register helper functions.
[official-gcc.git] / gcc / config / riscv / riscv.cc
blob8ece7859945ceb5597a8abef7bd28dd7a596c59e
1 /* Subroutines used for code generation for RISC-V.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Andrew Waterman (andrew@sifive.com).
4 Based on MIPS target for GNU compiler.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #define INCLUDE_STRING
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "target.h"
29 #include "backend.h"
30 #include "tm.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "insn-config.h"
34 #include "insn-attr.h"
35 #include "recog.h"
36 #include "output.h"
37 #include "alias.h"
38 #include "tree.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "varasm.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "function.h"
45 #include "explow.h"
46 #include "ifcvt.h"
47 #include "memmodel.h"
48 #include "emit-rtl.h"
49 #include "reload.h"
50 #include "tm_p.h"
51 #include "basic-block.h"
52 #include "expr.h"
53 #include "optabs.h"
54 #include "bitmap.h"
55 #include "df.h"
56 #include "function-abi.h"
57 #include "diagnostic.h"
58 #include "builtins.h"
59 #include "predict.h"
60 #include "tree-pass.h"
61 #include "opts.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
64 #include "gimple.h"
65 #include "cfghooks.h"
66 #include "cfgloop.h"
67 #include "cfgrtl.h"
68 #include "shrink-wrap.h"
69 #include "sel-sched.h"
70 #include "sched-int.h"
71 #include "fold-const.h"
72 #include "gimple-iterator.h"
73 #include "gimple-expr.h"
74 #include "tree-vectorizer.h"
75 #include "gcse.h"
76 #include "tree-dfa.h"
77 #include "target-globals.h"
79 /* This file should be included last. */
80 #include "target-def.h"
81 #include "riscv-vector-costs.h"
82 #include "riscv-subset.h"
84 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
85 #define UNSPEC_ADDRESS_P(X) \
86 (GET_CODE (X) == UNSPEC \
87 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
88 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
90 /* Extract the symbol or label from UNSPEC wrapper X. */
91 #define UNSPEC_ADDRESS(X) \
92 XVECEXP (X, 0, 0)
94 /* Extract the symbol type from UNSPEC wrapper X. */
95 #define UNSPEC_ADDRESS_TYPE(X) \
96 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
98 /* Extract the backup dynamic frm rtl. */
99 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm)
101 /* True the mode switching has static frm, or false. */
102 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
104 /* True if we can use the instructions in the XTheadInt extension
105 to handle interrupts, or false. */
106 #define TH_INT_INTERRUPT(c) \
107 (TARGET_XTHEADINT \
108 /* The XTheadInt extension only supports rv32. */ \
109 && !TARGET_64BIT \
110 && (c)->machine->interrupt_handler_p \
111 /* The XTheadInt instructions can only be executed in M-mode. */ \
112 && (c)->machine->interrupt_mode == MACHINE_MODE)
114 /* Information about a function's frame layout. */
115 struct GTY(()) riscv_frame_info {
116 /* The size of the frame in bytes. */
117 poly_int64 total_size;
119 /* Bit X is set if the function saves or restores GPR X. */
120 unsigned int mask;
122 /* Likewise FPR X. */
123 unsigned int fmask;
125 /* Likewise for vector registers. */
126 unsigned int vmask;
128 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */
129 unsigned save_libcall_adjustment;
131 /* the minimum number of bytes, in multiples of 16-byte address increments,
132 required to cover the registers in a multi push & pop. */
133 unsigned multi_push_adj_base;
135 /* the number of additional 16-byte address increments allocated for the stack
136 frame in a multi push & pop. */
137 unsigned multi_push_adj_addi;
139 /* Offsets of fixed-point and floating-point save areas from frame bottom */
140 poly_int64 gp_sp_offset;
141 poly_int64 fp_sp_offset;
143 /* Top and bottom offsets of vector save areas from frame bottom. */
144 poly_int64 v_sp_offset_top;
145 poly_int64 v_sp_offset_bottom;
147 /* Offset of virtual frame pointer from stack pointer/frame bottom */
148 poly_int64 frame_pointer_offset;
150 /* Offset of hard frame pointer from stack pointer/frame bottom */
151 poly_int64 hard_frame_pointer_offset;
153 /* The offset of arg_pointer_rtx from the bottom of the frame. */
154 poly_int64 arg_pointer_offset;
156 /* Reset this struct, clean all field to zero. */
157 void reset(void);
160 enum riscv_privilege_levels {
161 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
164 struct GTY(()) mode_switching_info {
165 /* The RTL variable which stores the dynamic FRM value. We always use this
166 RTX to restore dynamic FRM rounding mode in mode switching. */
167 rtx dynamic_frm;
169 /* The boolean variables indicates there is at least one static rounding
170 mode instruction in the function or not. */
171 bool static_frm_p;
173 mode_switching_info ()
175 dynamic_frm = NULL_RTX;
176 static_frm_p = false;
180 struct GTY(()) machine_function {
181 /* The number of extra stack bytes taken up by register varargs.
182 This area is allocated by the callee at the very top of the frame. */
183 int varargs_size;
185 /* True if current function is a naked function. */
186 bool naked_p;
188 /* True if current function is an interrupt function. */
189 bool interrupt_handler_p;
190 /* For an interrupt handler, indicates the privilege level. */
191 enum riscv_privilege_levels interrupt_mode;
193 /* True if attributes on current function have been checked. */
194 bool attributes_checked_p;
196 /* True if RA must be saved because of a far jump. */
197 bool far_jump_used;
199 /* The current frame information, calculated by riscv_compute_frame_info. */
200 struct riscv_frame_info frame;
202 /* The components already handled by separate shrink-wrapping, which should
203 not be considered by the prologue and epilogue. */
204 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
206 /* The mode switching information for the FRM rounding modes. */
207 struct mode_switching_info mode_sw_info;
210 /* Information about a single argument. */
211 struct riscv_arg_info {
212 /* True if the argument is at least partially passed on the stack. */
213 bool stack_p;
215 /* The number of integer registers allocated to this argument. */
216 unsigned int num_gprs;
218 /* The offset of the first register used, provided num_gprs is nonzero.
219 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
220 unsigned int gpr_offset;
222 /* The number of floating-point registers allocated to this argument. */
223 unsigned int num_fprs;
225 /* The offset of the first register used, provided num_fprs is nonzero. */
226 unsigned int fpr_offset;
228 /* The number of vector registers allocated to this argument. */
229 unsigned int num_vrs;
231 /* The offset of the first register used, provided num_vrs is nonzero. */
232 unsigned int vr_offset;
234 /* The number of mask registers allocated to this argument. */
235 unsigned int num_mrs;
237 /* The offset of the first register used, provided num_mrs is nonzero. */
238 unsigned int mr_offset;
241 /* One stage in a constant building sequence. These sequences have
242 the form:
244 A = VALUE[0]
245 A = A CODE[1] VALUE[1]
246 A = A CODE[2] VALUE[2]
249 where A is an accumulator, each CODE[i] is a binary rtl operation
250 and each VALUE[i] is a constant integer. CODE[0] is undefined. */
251 struct riscv_integer_op {
252 bool use_uw;
253 bool save_temporary;
254 enum rtx_code code;
255 unsigned HOST_WIDE_INT value;
258 /* The largest number of operations needed to load an integer constant.
259 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
260 #define RISCV_MAX_INTEGER_OPS 8
262 enum riscv_fusion_pairs
264 RISCV_FUSE_NOTHING = 0,
265 RISCV_FUSE_ZEXTW = (1 << 0),
266 RISCV_FUSE_ZEXTH = (1 << 1),
267 RISCV_FUSE_ZEXTWS = (1 << 2),
268 RISCV_FUSE_LDINDEXED = (1 << 3),
269 RISCV_FUSE_LUI_ADDI = (1 << 4),
270 RISCV_FUSE_AUIPC_ADDI = (1 << 5),
271 RISCV_FUSE_LUI_LD = (1 << 6),
272 RISCV_FUSE_AUIPC_LD = (1 << 7),
273 RISCV_FUSE_LDPREINCREMENT = (1 << 8),
274 RISCV_FUSE_ALIGNED_STD = (1 << 9),
277 /* Costs of various operations on the different architectures. */
279 struct riscv_tune_param
281 unsigned short fp_add[2];
282 unsigned short fp_mul[2];
283 unsigned short fp_div[2];
284 unsigned short int_mul[2];
285 unsigned short int_div[2];
286 unsigned short issue_rate;
287 unsigned short branch_cost;
288 unsigned short memory_cost;
289 unsigned short fmv_cost;
290 bool slow_unaligned_access;
291 bool vector_unaligned_access;
292 bool use_divmod_expansion;
293 bool overlap_op_by_pieces;
294 unsigned int fusible_ops;
295 const struct cpu_vector_cost *vec_costs;
299 /* Global variables for machine-dependent things. */
301 /* Whether unaligned accesses execute very slowly. */
302 bool riscv_slow_unaligned_access_p;
304 /* Whether misaligned vector accesses are supported (i.e. do not
305 throw an exception). */
306 bool riscv_vector_unaligned_access_p;
308 /* Whether user explicitly passed -mstrict-align. */
309 bool riscv_user_wants_strict_align;
311 /* Stack alignment to assume/maintain. */
312 unsigned riscv_stack_boundary;
314 /* Whether in riscv_output_mi_thunk. */
315 static bool riscv_in_thunk_func = false;
317 /* If non-zero, this is an offset to be added to SP to redefine the CFA
318 when restoring the FP register from the stack. Only valid when generating
319 the epilogue. */
320 static poly_int64 epilogue_cfa_sp_offset;
322 /* Which tuning parameters to use. */
323 static const struct riscv_tune_param *tune_param;
325 /* Which automaton to use for tuning. */
326 enum riscv_microarchitecture_type riscv_microarchitecture;
328 /* The number of chunks in a single vector register. */
329 poly_uint16 riscv_vector_chunks;
331 /* The number of bytes in a vector chunk. */
332 unsigned riscv_bytes_per_vector_chunk;
334 /* Index R is the smallest register class that contains register R. */
335 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
336 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
337 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS,
338 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS,
339 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
340 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS,
341 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
342 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
343 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
344 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
345 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
346 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
347 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
348 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
349 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
350 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
351 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
352 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS,
353 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
354 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
355 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
356 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
357 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
358 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
359 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
360 VM_REGS, VD_REGS, VD_REGS, VD_REGS,
361 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
362 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
363 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
364 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
365 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
366 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
367 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
370 /* RVV costs for VLS vector operations. */
371 static const common_vector_cost rvv_vls_vector_cost = {
372 1, /* int_stmt_cost */
373 1, /* fp_stmt_cost */
374 1, /* gather_load_cost */
375 1, /* scatter_store_cost */
376 1, /* segment_permute (2) */
377 1, /* segment_permute (3) */
378 1, /* segment_permute (4) */
379 1, /* segment_permute (5) */
380 1, /* segment_permute (6) */
381 1, /* segment_permute (7) */
382 1, /* segment_permute (8) */
383 1, /* vec_to_scalar_cost */
384 1, /* scalar_to_vec_cost */
385 1, /* permute_cost */
386 1, /* align_load_cost */
387 1, /* align_store_cost */
388 2, /* unalign_load_cost */
389 2, /* unalign_store_cost */
392 /* RVV costs for VLA vector operations. */
393 static const scalable_vector_cost rvv_vla_vector_cost = {
395 1, /* int_stmt_cost */
396 1, /* fp_stmt_cost */
397 1, /* gather_load_cost */
398 1, /* scatter_store_cost */
399 1, /* segment_permute (2) */
400 1, /* segment_permute (3) */
401 1, /* segment_permute (4) */
402 1, /* segment_permute (5) */
403 1, /* segment_permute (6) */
404 1, /* segment_permute (7) */
405 1, /* segment_permute (8) */
406 1, /* vec_to_scalar_cost */
407 1, /* scalar_to_vec_cost */
408 1, /* permute_cost */
409 1, /* align_load_cost */
410 1, /* align_store_cost */
411 2, /* unalign_load_cost */
412 2, /* unalign_store_cost */
416 /* RVV register move cost. */
417 static const regmove_vector_cost rvv_regmove_vector_cost = {
418 2, /* GR2VR */
419 2, /* FR2VR */
420 2, /* VR2GR */
421 2, /* VR2FR */
424 /* Generic costs for vector insn classes. It is supposed to be the vector cost
425 models used by default if no other cost model was specified. */
426 static const struct cpu_vector_cost generic_vector_cost = {
427 1, /* scalar_int_stmt_cost */
428 1, /* scalar_fp_stmt_cost */
429 1, /* scalar_load_cost */
430 1, /* scalar_store_cost */
431 3, /* cond_taken_branch_cost */
432 1, /* cond_not_taken_branch_cost */
433 &rvv_vls_vector_cost, /* vls */
434 &rvv_vla_vector_cost, /* vla */
435 &rvv_regmove_vector_cost, /* regmove */
438 /* Costs to use when optimizing for rocket. */
439 static const struct riscv_tune_param rocket_tune_info = {
440 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
441 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
442 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
443 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
444 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
445 1, /* issue_rate */
446 3, /* branch_cost */
447 5, /* memory_cost */
448 8, /* fmv_cost */
449 true, /* slow_unaligned_access */
450 false, /* vector_unaligned_access */
451 false, /* use_divmod_expansion */
452 false, /* overlap_op_by_pieces */
453 RISCV_FUSE_NOTHING, /* fusible_ops */
454 NULL, /* vector cost */
457 /* Costs to use when optimizing for Sifive 7 Series. */
458 static const struct riscv_tune_param sifive_7_tune_info = {
459 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
460 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
461 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
462 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
463 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
464 2, /* issue_rate */
465 4, /* branch_cost */
466 3, /* memory_cost */
467 8, /* fmv_cost */
468 true, /* slow_unaligned_access */
469 false, /* vector_unaligned_access */
470 false, /* use_divmod_expansion */
471 false, /* overlap_op_by_pieces */
472 RISCV_FUSE_NOTHING, /* fusible_ops */
473 NULL, /* vector cost */
476 /* Costs to use when optimizing for Sifive p400 Series. */
477 static const struct riscv_tune_param sifive_p400_tune_info = {
478 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
479 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
480 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
481 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
482 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
483 3, /* issue_rate */
484 4, /* branch_cost */
485 3, /* memory_cost */
486 4, /* fmv_cost */
487 true, /* slow_unaligned_access */
488 false, /* vector_unaligned_access */
489 false, /* use_divmod_expansion */
490 false, /* overlap_op_by_pieces */
491 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
492 &generic_vector_cost, /* vector cost */
495 /* Costs to use when optimizing for Sifive p600 Series. */
496 static const struct riscv_tune_param sifive_p600_tune_info = {
497 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
498 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
499 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
500 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
501 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
502 4, /* issue_rate */
503 4, /* branch_cost */
504 3, /* memory_cost */
505 4, /* fmv_cost */
506 true, /* slow_unaligned_access */
507 false, /* vector_unaligned_access */
508 false, /* use_divmod_expansion */
509 false, /* overlap_op_by_pieces */
510 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
511 &generic_vector_cost, /* vector cost */
514 /* Costs to use when optimizing for T-HEAD c906. */
515 static const struct riscv_tune_param thead_c906_tune_info = {
516 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
517 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
518 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
519 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
520 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */
521 1, /* issue_rate */
522 3, /* branch_cost */
523 5, /* memory_cost */
524 8, /* fmv_cost */
525 false, /* slow_unaligned_access */
526 false, /* vector_unaligned_access */
527 false, /* use_divmod_expansion */
528 false, /* overlap_op_by_pieces */
529 RISCV_FUSE_NOTHING, /* fusible_ops */
530 NULL, /* vector cost */
533 /* Costs to use when optimizing for xiangshan nanhu. */
534 static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
535 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_add */
536 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_mul */
537 {COSTS_N_INSNS (10), COSTS_N_INSNS (20)}, /* fp_div */
538 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* int_mul */
539 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
540 6, /* issue_rate */
541 3, /* branch_cost */
542 3, /* memory_cost */
543 3, /* fmv_cost */
544 true, /* slow_unaligned_access */
545 false, /* vector_unaligned_access */
546 false, /* use_divmod_expansion */
547 false, /* overlap_op_by_pieces */
548 RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
549 NULL, /* vector cost */
552 /* Costs to use when optimizing for a generic ooo profile. */
553 static const struct riscv_tune_param generic_ooo_tune_info = {
554 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */
555 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */
556 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */
557 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
558 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
559 1, /* issue_rate */
560 3, /* branch_cost */
561 4, /* memory_cost */
562 4, /* fmv_cost */
563 false, /* slow_unaligned_access */
564 true, /* vector_unaligned_access */
565 false, /* use_divmod_expansion */
566 true, /* overlap_op_by_pieces */
567 RISCV_FUSE_NOTHING, /* fusible_ops */
568 &generic_vector_cost, /* vector cost */
571 /* Costs to use when optimizing for size. */
572 static const struct riscv_tune_param optimize_size_tune_info = {
573 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
574 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
575 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
576 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */
577 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */
578 1, /* issue_rate */
579 1, /* branch_cost */
580 2, /* memory_cost */
581 8, /* fmv_cost */
582 false, /* slow_unaligned_access */
583 false, /* vector_unaligned_access */
584 false, /* use_divmod_expansion */
585 false, /* overlap_op_by_pieces */
586 RISCV_FUSE_NOTHING, /* fusible_ops */
587 NULL, /* vector cost */
590 static bool riscv_avoid_shrink_wrapping_separate ();
591 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
592 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
593 static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int,
594 bool *);
596 /* Defining target-specific uses of __attribute__. */
597 static const attribute_spec riscv_gnu_attributes[] =
599 /* Syntax: { name, min_len, max_len, decl_required, type_required,
600 function_type_required, affects_type_identity, handler,
601 exclude } */
603 /* The attribute telling no prologue/epilogue. */
604 {"naked", 0, 0, true, false, false, false, riscv_handle_fndecl_attribute,
605 NULL},
606 /* This attribute generates prologue/epilogue for interrupt handlers. */
607 {"interrupt", 0, 1, false, true, true, false, riscv_handle_type_attribute,
608 NULL},
610 /* The following two are used for the built-in properties of the Vector type
611 and are not used externally */
612 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL},
613 {"RVV type", 0, 0, false, true, false, true, NULL, NULL},
614 /* This attribute is used to declare a function, forcing it to use the
615 standard vector calling convention variant. Syntax:
616 __attribute__((riscv_vector_cc)). */
617 {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL},
618 /* This attribute is used to declare a new type, to appoint the exactly
619 bits size of the type. For example:
621 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
623 The new created type f_vint8m1_t will be exactly 256 bits. It can be
624 be used in globals, structs, unions, and arrays instead of sizeless
625 types. */
626 {"riscv_rvv_vector_bits", 1, 1, false, true, false, true,
627 riscv_handle_rvv_vector_bits_attribute, NULL},
630 static const scoped_attribute_specs riscv_gnu_attribute_table =
632 "gnu", {riscv_gnu_attributes}
635 static const attribute_spec riscv_attributes[] =
637 /* This attribute is used to declare a function, forcing it to use the
638 standard vector calling convention variant. Syntax:
639 [[riscv::vector_cc]]. */
640 {"vector_cc", 0, 0, false, true, true, true, NULL, NULL},
641 /* This attribute is used to declare a new type, to appoint the exactly
642 bits size of the type. For example:
644 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
646 The new created type f_vint8m1_t will be exactly 256 bits. It can be
647 be used in globals, structs, unions, and arrays instead of sizeless
648 types. */
649 {"rvv_vector_bits", 1, 1, false, true, false, true,
650 riscv_handle_rvv_vector_bits_attribute, NULL},
653 static const scoped_attribute_specs riscv_nongnu_attribute_table =
655 "riscv", {riscv_attributes}
658 static const scoped_attribute_specs *const riscv_attribute_table[] =
660 &riscv_gnu_attribute_table,
661 &riscv_nongnu_attribute_table
664 /* Order for the CLOBBERs/USEs of gpr_save. */
665 static const unsigned gpr_save_reg_order[] = {
666 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
667 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
668 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
669 S10_REGNUM, S11_REGNUM
672 /* A table describing all the processors GCC knows about. */
673 static const struct riscv_tune_info riscv_tune_info_table[] = {
674 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
675 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
676 #include "riscv-cores.def"
679 /* Global variable to distinguish whether we should save and restore s0/fp for
680 function. */
681 static bool riscv_save_frame_pointer;
683 typedef enum
685 PUSH_IDX = 0,
686 POP_IDX,
687 POPRET_IDX,
688 POPRETZ_IDX,
689 ZCMP_OP_NUM
690 } riscv_zcmp_op_t;
692 typedef insn_code (*code_for_push_pop_t) (machine_mode);
694 void riscv_frame_info::reset(void)
696 total_size = 0;
697 mask = 0;
698 fmask = 0;
699 vmask = 0;
700 save_libcall_adjustment = 0;
702 gp_sp_offset = 0;
703 fp_sp_offset = 0;
704 v_sp_offset_top = 0;
705 v_sp_offset_bottom = 0;
707 frame_pointer_offset = 0;
709 hard_frame_pointer_offset = 0;
711 arg_pointer_offset = 0;
714 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
716 static unsigned int
717 riscv_min_arithmetic_precision (void)
719 return 32;
722 /* Get the arch string from an options object. */
724 template <class T>
725 static const char *
726 get_arch_str (const T *opts)
728 return opts->x_riscv_arch_string;
731 template <class T>
732 static const char *
733 get_tune_str (const T *opts)
735 const char *tune_string = RISCV_TUNE_STRING_DEFAULT;
736 if (opts->x_riscv_tune_string)
737 tune_string = opts->x_riscv_tune_string;
738 else if (opts->x_riscv_cpu_string)
739 tune_string = opts->x_riscv_cpu_string;
740 return tune_string;
743 /* Return the riscv_tune_info entry for the given name string, return nullptr
744 if NULL_P is true, otherwise return an placeholder and report error. */
746 const struct riscv_tune_info *
747 riscv_parse_tune (const char *tune_string, bool null_p)
749 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
751 if (cpu)
752 tune_string = cpu->tune;
754 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
755 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
756 return riscv_tune_info_table + i;
758 if (null_p)
759 return nullptr;
761 error ("unknown cpu %qs for %<-mtune%>", tune_string);
762 return riscv_tune_info_table;
765 /* Helper function for riscv_build_integer; arguments are as for
766 riscv_build_integer. */
768 static int
769 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
770 HOST_WIDE_INT value, machine_mode mode)
772 HOST_WIDE_INT low_part = CONST_LOW_PART (value);
773 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
774 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
775 int upper_trailing_ones = ctz_hwi (~value >> 32);
776 int lower_leading_ones = clz_hwi (~value << 32);
779 if (SMALL_OPERAND (value) || LUI_OPERAND (value))
781 /* Simply ADDI or LUI. */
782 codes[0].code = UNKNOWN;
783 codes[0].value = value;
784 codes[0].use_uw = false;
785 codes[0].save_temporary = false;
786 return 1;
788 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
790 /* Simply BSETI. */
791 codes[0].code = UNKNOWN;
792 codes[0].value = value;
793 codes[0].use_uw = false;
794 codes[0].save_temporary = false;
796 /* RISC-V sign-extends all 32bit values that live in a 32bit
797 register. To avoid paradoxes, we thus need to use the
798 sign-extended (negative) representation (-1 << 31) for the
799 value, if we want to build (1 << 31) in SImode. This will
800 then expand to an LUI instruction. */
801 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
802 codes[0].value = (HOST_WIDE_INT_M1U << 31);
804 return 1;
807 /* End with ADDI. When constructing HImode constants, do not generate any
808 intermediate value that is not itself a valid HImode constant. The
809 XORI case below will handle those remaining HImode constants. */
810 if (low_part != 0
811 && (mode != HImode
812 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
814 HOST_WIDE_INT upper_part = value - low_part;
815 if (mode != VOIDmode)
816 upper_part = trunc_int_for_mode (value - low_part, mode);
818 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
819 if (alt_cost < cost)
821 alt_codes[alt_cost-1].code = PLUS;
822 alt_codes[alt_cost-1].value = low_part;
823 alt_codes[alt_cost-1].use_uw = false;
824 alt_codes[alt_cost-1].save_temporary = false;
825 memcpy (codes, alt_codes, sizeof (alt_codes));
826 cost = alt_cost;
830 /* End with XORI. */
831 if (cost > 2 && (low_part < 0 || mode == HImode))
833 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
834 if (alt_cost < cost)
836 alt_codes[alt_cost-1].code = XOR;
837 alt_codes[alt_cost-1].value = low_part;
838 alt_codes[alt_cost-1].use_uw = false;
839 alt_codes[alt_cost-1].save_temporary = false;
840 memcpy (codes, alt_codes, sizeof (alt_codes));
841 cost = alt_cost;
845 /* Eliminate trailing zeros and end with SLLI. */
846 if (cost > 2 && (value & 1) == 0)
848 int shift = ctz_hwi (value);
849 unsigned HOST_WIDE_INT x = value;
850 bool use_uw = false;
851 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
853 /* Don't eliminate the lower 12 bits if LUI might apply. */
854 if (shift > IMM_BITS
855 && !SMALL_OPERAND (x)
856 && (LUI_OPERAND (x << IMM_BITS)
857 || (TARGET_64BIT
858 && TARGET_ZBA
859 && LUI_OPERAND ((x << IMM_BITS)
860 & ~HOST_WIDE_INT_C (0x80000000)))))
861 shift -= IMM_BITS, x <<= IMM_BITS;
863 /* If X has bits 32..63 clear and bit 31 set, then go ahead and mark
864 it as desiring a "uw" operation for the shift. That way we can have
865 LUI+ADDI to generate the constant, then shift it into position
866 clearing out the undesirable bits. */
867 if (!LUI_OPERAND (x)
868 && TARGET_64BIT
869 && TARGET_ZBA
870 && clz_hwi (x) == 32)
872 x = sext_hwi (x, 32);
873 use_uw = true;
876 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
877 if (alt_cost < cost)
879 alt_codes[alt_cost-1].code = ASHIFT;
880 alt_codes[alt_cost-1].value = shift;
881 alt_codes[alt_cost-1].use_uw = use_uw;
882 alt_codes[alt_cost-1].save_temporary = false;
883 memcpy (codes, alt_codes, sizeof (alt_codes));
884 cost = alt_cost;
888 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB))
890 int leading_ones = clz_hwi (~value);
891 int trailing_ones = ctz_hwi (~value);
893 /* If all bits are one except a few that are zero, and the zero bits
894 are within a range of 11 bits, then we can synthesize a constant
895 by loading a small negative constant and rotating. */
896 if (leading_ones < 64
897 && ((64 - leading_ones - trailing_ones) < 12))
899 codes[0].code = UNKNOWN;
900 /* The sign-bit might be zero, so just rotate to be safe. */
901 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
902 | (value << (64 - trailing_ones)));
903 codes[0].use_uw = false;
904 codes[0].save_temporary = false;
905 codes[1].code = ROTATERT;
906 codes[1].value = 64 - trailing_ones;
907 codes[1].use_uw = false;
908 codes[1].save_temporary = false;
909 cost = 2;
911 /* Handle the case where the 11 bit range of zero bits wraps around. */
912 else if (upper_trailing_ones < 32 && lower_leading_ones < 32
913 && ((64 - upper_trailing_ones - lower_leading_ones) < 12))
915 codes[0].code = UNKNOWN;
916 /* The sign-bit might be zero, so just rotate to be safe. */
917 codes[0].value = ((value << (32 - upper_trailing_ones))
918 | ((unsigned HOST_WIDE_INT) value
919 >> (32 + upper_trailing_ones)));
920 codes[0].use_uw = false;
921 codes[0].save_temporary = false;
922 codes[1].code = ROTATERT;
923 codes[1].value = 32 - upper_trailing_ones;
924 codes[1].use_uw = false;
925 codes[1].save_temporary = false;
926 cost = 2;
929 /* If LUI/ADDI are going to set bits 32..63 and we need a small
930 number of them cleared, we might be able to use bclri profitably.
932 Note we may allow clearing of bit 31 using bclri. There's a class
933 of constants with that bit clear where this helps. */
934 else if (TARGET_64BIT
935 && TARGET_ZBS
936 && (32 - popcount_hwi (value & HOST_WIDE_INT_C (0xffffffff80000000))) + 1 < cost)
938 /* Turn on all those upper bits and synthesize the result. */
939 HOST_WIDE_INT nval = value | HOST_WIDE_INT_C (0xffffffff80000000);
940 alt_cost = riscv_build_integer_1 (alt_codes, nval, mode);
942 /* Now iterate over the bits we want to clear until the cost is
943 too high or we're done. */
944 nval = value ^ HOST_WIDE_INT_C (-1);
945 nval &= HOST_WIDE_INT_C (~0x7fffffff);
946 while (nval && alt_cost < cost)
948 HOST_WIDE_INT bit = ctz_hwi (nval);
949 alt_codes[alt_cost].code = AND;
950 alt_codes[alt_cost].value = ~(1UL << bit);
951 alt_codes[alt_cost].use_uw = false;
952 alt_codes[alt_cost].save_temporary = false;
953 alt_cost++;
954 nval &= ~(1UL << bit);
957 if (nval == 0 && alt_cost <= cost)
959 memcpy (codes, alt_codes, sizeof (alt_codes));
960 cost = alt_cost;
965 if (cost > 2 && TARGET_64BIT && TARGET_ZBA)
967 if ((value % 9) == 0
968 && (alt_cost
969 = riscv_build_integer_1 (alt_codes, value / 9, mode) + 1) < cost)
971 alt_codes[alt_cost - 1].code = FMA;
972 alt_codes[alt_cost - 1].value = 9;
973 alt_codes[alt_cost - 1].use_uw = false;
974 alt_codes[alt_cost - 1].save_temporary = false;
975 memcpy (codes, alt_codes, sizeof (alt_codes));
976 cost = alt_cost;
978 if ((value % 5) == 0
979 && (alt_cost
980 = riscv_build_integer_1 (alt_codes, value / 5, mode) + 1) < cost)
982 alt_codes[alt_cost - 1].code = FMA;
983 alt_codes[alt_cost - 1].value = 5;
984 alt_codes[alt_cost - 1].use_uw = false;
985 alt_codes[alt_cost - 1].save_temporary = false;
986 memcpy (codes, alt_codes, sizeof (alt_codes));
987 cost = alt_cost;
989 if ((value % 3) == 0
990 && (alt_cost
991 = riscv_build_integer_1 (alt_codes, value / 3, mode) + 1) < cost)
993 alt_codes[alt_cost - 1].code = FMA;
994 alt_codes[alt_cost - 1].value = 3;
995 alt_codes[alt_cost - 1].use_uw = false;
996 alt_codes[alt_cost - 1].save_temporary = false;
997 memcpy (codes, alt_codes, sizeof (alt_codes));
998 cost = alt_cost;
1002 /* We might be able to generate a constant close to our target
1003 then a final ADDI to get the desired constant. */
1004 if (cost > 2
1005 && (value & 0xfff) != 0
1006 && (value & 0x1800) == 0x1000)
1008 HOST_WIDE_INT adjustment = -(0x800 - (value & 0xfff));
1009 alt_cost = 1 + riscv_build_integer_1 (alt_codes,
1010 value - adjustment, mode);
1012 if (alt_cost < cost)
1014 alt_codes[alt_cost - 1].code = PLUS;
1015 alt_codes[alt_cost - 1].value = adjustment;
1016 alt_codes[alt_cost - 1].use_uw = false;
1017 alt_codes[alt_cost - 1].save_temporary = false;
1018 memcpy (codes, alt_codes, sizeof (alt_codes));
1019 cost = alt_cost;
1023 /* Final cases, particularly focused on bseti. */
1024 if (cost > 2 && TARGET_ZBS)
1026 int i = 0;
1028 /* First handle any bits set by LUI. Be careful of the
1029 SImode sign bit!. */
1030 if (value & 0x7ffff000)
1032 alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
1033 alt_codes[i].value = value & 0x7ffff000;
1034 alt_codes[i].use_uw = false;
1035 alt_codes[i].save_temporary = false;
1036 value &= ~0x7ffff000;
1037 i++;
1040 /* Next, any bits we can handle with addi. */
1041 if (value & 0x7ff)
1043 alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
1044 alt_codes[i].value = value & 0x7ff;
1045 alt_codes[i].use_uw = false;
1046 alt_codes[i].save_temporary = false;
1047 value &= ~0x7ff;
1048 i++;
1051 /* And any residuals with bseti. */
1052 while (i < cost && value)
1054 HOST_WIDE_INT bit = ctz_hwi (value);
1055 alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
1056 alt_codes[i].value = 1UL << bit;
1057 alt_codes[i].use_uw = false;
1058 alt_codes[i].save_temporary = false;
1059 value &= ~(1ULL << bit);
1060 i++;
1063 /* If LUI+ADDI+BSETI resulted in a more efficient
1064 sequence, then use it. */
1065 if (value == 0 && i < cost)
1067 memcpy (codes, alt_codes, sizeof (alt_codes));
1068 cost = i;
1072 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
1073 return cost;
1076 /* Fill CODES with a sequence of rtl operations to load VALUE.
1077 Return the number of operations needed. */
1079 static int
1080 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
1081 machine_mode mode)
1083 int cost = riscv_build_integer_1 (codes, value, mode);
1085 /* Eliminate leading zeros and end with SRLI. */
1086 if (value > 0 && cost > 2)
1088 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1089 int alt_cost, shift = clz_hwi (value);
1090 HOST_WIDE_INT shifted_val;
1092 /* Try filling trailing bits with 1s. */
1093 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
1094 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
1095 if (alt_cost < cost)
1097 alt_codes[alt_cost-1].code = LSHIFTRT;
1098 alt_codes[alt_cost-1].value = shift;
1099 alt_codes[alt_cost-1].use_uw = false;
1100 alt_codes[alt_cost-1].save_temporary = false;
1101 memcpy (codes, alt_codes, sizeof (alt_codes));
1102 cost = alt_cost;
1105 /* Try filling trailing bits with 0s. */
1106 shifted_val = value << shift;
1107 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
1108 if (alt_cost < cost)
1110 alt_codes[alt_cost-1].code = LSHIFTRT;
1111 alt_codes[alt_cost-1].value = shift;
1112 alt_codes[alt_cost-1].use_uw = false;
1113 alt_codes[alt_cost-1].save_temporary = false;
1114 memcpy (codes, alt_codes, sizeof (alt_codes));
1115 cost = alt_cost;
1119 /* See if we can generate the inverted constant, then use
1120 not to get the desired constant.
1122 This can't be in riscv_build_integer_1 as it'll mutually
1123 recurse with another case in there. And it has to recurse
1124 into riscv_build_integer so we get the trailing 0s case
1125 above. */
1126 if (cost > 2 && value < 0)
1128 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1129 int alt_cost;
1131 HOST_WIDE_INT nval = ~value;
1132 alt_cost = 1 + riscv_build_integer (alt_codes, nval, mode);
1133 if (alt_cost < cost)
1135 alt_codes[alt_cost - 1].code = XOR;
1136 alt_codes[alt_cost - 1].value = -1;
1137 alt_codes[alt_cost - 1].use_uw = false;
1138 alt_codes[alt_cost - 1].save_temporary = false;
1139 memcpy (codes, alt_codes, sizeof (alt_codes));
1140 cost = alt_cost;
1145 if (!TARGET_64BIT
1146 && (value > INT32_MAX || value < INT32_MIN))
1148 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32);
1149 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32);
1150 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1151 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS];
1152 int hi_cost, lo_cost;
1154 hi_cost = riscv_build_integer_1 (hicode, hival, mode);
1155 if (hi_cost < cost)
1157 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode);
1158 if (lo_cost + hi_cost < cost)
1160 memcpy (codes, alt_codes,
1161 lo_cost * sizeof (struct riscv_integer_op));
1162 memcpy (codes + lo_cost, hicode,
1163 hi_cost * sizeof (struct riscv_integer_op));
1164 cost = lo_cost + hi_cost;
1169 /* With pack we can generate a 64 bit constant with the same high
1170 and low 32 bits triviall. */
1171 if (cost > 3 && TARGET_64BIT && TARGET_ZBKB)
1173 unsigned HOST_WIDE_INT loval = value & 0xffffffff;
1174 unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
1175 if (hival == loval)
1177 cost = 1 + riscv_build_integer_1 (codes, sext_hwi (loval, 32), mode);
1178 codes[cost - 1].code = CONCAT;
1179 codes[cost - 1].value = 0;
1180 codes[cost - 1].use_uw = false;
1181 codes[cost - 1].save_temporary = false;
1184 /* An arbitrary 64 bit constant can be synthesized in 5 instructions
1185 using zbkb. We may do better than that if the upper or lower half
1186 can be synthsized with a single LUI, ADDI or BSET. Regardless the
1187 basic steps are the same. */
1188 if (cost > 3 && can_create_pseudo_p ())
1190 struct riscv_integer_op hi_codes[RISCV_MAX_INTEGER_OPS];
1191 struct riscv_integer_op lo_codes[RISCV_MAX_INTEGER_OPS];
1192 int hi_cost, lo_cost;
1194 /* Synthesize and get cost for each half. */
1195 lo_cost
1196 = riscv_build_integer_1 (lo_codes, sext_hwi (loval, 32), mode);
1197 hi_cost
1198 = riscv_build_integer_1 (hi_codes, sext_hwi (hival, 32), mode);
1200 /* If profitable, finish synthesis using zbkb. */
1201 if (cost > hi_cost + lo_cost + 1)
1203 /* We need the low half independent of the high half. So
1204 mark it has creating a temporary we'll use later. */
1205 memcpy (codes, lo_codes,
1206 lo_cost * sizeof (struct riscv_integer_op));
1207 codes[lo_cost - 1].save_temporary = true;
1209 /* Now the high half synthesis. */
1210 memcpy (codes + lo_cost, hi_codes,
1211 hi_cost * sizeof (struct riscv_integer_op));
1213 /* Adjust the cost. */
1214 cost = hi_cost + lo_cost + 1;
1216 /* And finally (ab)use VEC_MERGE to indicate we want to
1217 put merge the two parts together. */
1218 codes[cost - 1].code = VEC_MERGE;
1219 codes[cost - 1].value = 0;
1220 codes[cost - 1].use_uw = false;
1221 codes[cost - 1].save_temporary = false;
1227 return cost;
1230 /* Return the cost of constructing VAL in the event that a scratch
1231 register is available. */
1233 static int
1234 riscv_split_integer_cost (HOST_WIDE_INT val)
1236 int cost;
1237 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
1238 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
1239 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
1241 cost = 2 + riscv_build_integer (codes, loval, VOIDmode);
1242 if (loval != hival)
1243 cost += riscv_build_integer (codes, hival, VOIDmode);
1245 return cost;
1248 /* Return the cost of constructing the integer constant VAL. */
1250 static int
1251 riscv_integer_cost (HOST_WIDE_INT val)
1253 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
1254 return MIN (riscv_build_integer (codes, val, VOIDmode),
1255 riscv_split_integer_cost (val));
1258 /* Try to split a 64b integer into 32b parts, then reassemble. */
1260 static rtx
1261 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
1263 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
1264 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
1265 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
1267 riscv_move_integer (lo, lo, loval, mode);
1269 if (loval == hival)
1270 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
1271 else
1273 riscv_move_integer (hi, hi, hival, mode);
1274 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32));
1277 hi = force_reg (mode, hi);
1278 return gen_rtx_PLUS (mode, hi, lo);
1281 /* Return true if X is a thread-local symbol. */
1283 static bool
1284 riscv_tls_symbol_p (const_rtx x)
1286 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
1289 /* Return true if symbol X binds locally. */
1291 static bool
1292 riscv_symbol_binds_local_p (const_rtx x)
1294 if (SYMBOL_REF_P (x))
1295 return (SYMBOL_REF_DECL (x)
1296 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
1297 : SYMBOL_REF_LOCAL_P (x));
1298 else
1299 return false;
1302 /* Return the method that should be used to access SYMBOL_REF or
1303 LABEL_REF X. */
1305 static enum riscv_symbol_type
1306 riscv_classify_symbol (const_rtx x)
1308 if (riscv_tls_symbol_p (x))
1309 return SYMBOL_TLS;
1311 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
1312 return SYMBOL_GOT_DISP;
1314 switch (riscv_cmodel)
1316 case CM_MEDLOW:
1317 return SYMBOL_ABSOLUTE;
1318 case CM_LARGE:
1319 if (SYMBOL_REF_P (x))
1320 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM;
1321 return SYMBOL_PCREL;
1322 default:
1323 return SYMBOL_PCREL;
1327 /* Classify the base of symbolic expression X. */
1329 enum riscv_symbol_type
1330 riscv_classify_symbolic_expression (rtx x)
1332 rtx offset;
1334 split_const (x, &x, &offset);
1335 if (UNSPEC_ADDRESS_P (x))
1336 return UNSPEC_ADDRESS_TYPE (x);
1338 return riscv_classify_symbol (x);
1341 /* Return true if X is a symbolic constant. If it is, store the type of
1342 the symbol in *SYMBOL_TYPE. */
1344 bool
1345 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
1347 rtx offset;
1349 split_const (x, &x, &offset);
1350 if (UNSPEC_ADDRESS_P (x))
1352 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1353 x = UNSPEC_ADDRESS (x);
1355 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
1356 *symbol_type = riscv_classify_symbol (x);
1357 else
1358 return false;
1360 if (offset == const0_rtx)
1361 return true;
1363 /* Nonzero offsets are only valid for references that don't use the GOT. */
1364 switch (*symbol_type)
1366 case SYMBOL_ABSOLUTE:
1367 case SYMBOL_PCREL:
1368 case SYMBOL_TLS_LE:
1369 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1370 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1372 default:
1373 return false;
1377 /* Returns the number of instructions necessary to reference a symbol. */
1379 static int riscv_symbol_insns (enum riscv_symbol_type type)
1381 switch (type)
1383 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */
1384 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */
1385 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */
1386 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */
1387 case SYMBOL_TLSDESC: return 6; /* 4-instruction call + ADD TP + the reference. */
1388 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */
1389 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */
1390 default: gcc_unreachable ();
1394 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA
1395 Manual draft. For details, please see:
1396 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */
1398 static unsigned HOST_WIDE_INT fli_value_hf[32] =
1400 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
1401 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
1402 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
1403 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
1404 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */
1405 0x78p8,
1406 0x7cp8, 0x7ep8
1409 static unsigned HOST_WIDE_INT fli_value_sf[32] =
1411 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
1412 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
1413 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
1414 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
1417 static unsigned HOST_WIDE_INT fli_value_df[32] =
1419 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
1420 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
1421 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
1422 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
1423 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
1424 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
1425 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
1426 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
1429 /* Display floating-point values at the assembly level, which is consistent
1430 with the zfa extension of llvm:
1431 https://reviews.llvm.org/D145645. */
1433 const char *fli_value_print[32] =
1435 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125",
1436 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875",
1437 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0",
1438 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan"
1441 /* Return index of the FLI instruction table if rtx X is an immediate constant that can
1442 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */
1445 riscv_float_const_rtx_index_for_fli (rtx x)
1447 unsigned HOST_WIDE_INT *fli_value_array;
1449 machine_mode mode = GET_MODE (x);
1451 if (!TARGET_ZFA
1452 || !CONST_DOUBLE_P(x)
1453 || mode == VOIDmode
1454 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH))
1455 || (mode == SFmode && !TARGET_HARD_FLOAT)
1456 || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
1457 return -1;
1459 if (!SCALAR_FLOAT_MODE_P (mode)
1460 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT
1461 /* Only support up to DF mode. */
1462 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode))
1463 return -1;
1465 unsigned HOST_WIDE_INT ival = 0;
1467 long res[2];
1468 real_to_target (res,
1469 CONST_DOUBLE_REAL_VALUE (x),
1470 REAL_MODE_FORMAT (mode));
1472 if (mode == DFmode)
1474 int order = BYTES_BIG_ENDIAN ? 1 : 0;
1475 ival = zext_hwi (res[order], 32);
1476 ival |= (zext_hwi (res[1 - order], 32) << 32);
1478 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */
1479 if (ival & (unsigned HOST_WIDE_INT)0xffffffff)
1480 return -1;
1482 else
1483 ival = zext_hwi (res[0], 32);
1485 switch (mode)
1487 case E_HFmode:
1488 fli_value_array = fli_value_hf;
1489 break;
1490 case E_SFmode:
1491 fli_value_array = fli_value_sf;
1492 break;
1493 case E_DFmode:
1494 fli_value_array = fli_value_df;
1495 break;
1496 default:
1497 return -1;
1500 if (fli_value_array[0] == ival)
1501 return 0;
1503 if (fli_value_array[1] == ival)
1504 return 1;
1506 /* Perform a binary search to find target index. */
1507 unsigned l, r, m;
1509 l = 2;
1510 r = 31;
1512 while (l <= r)
1514 m = (l + r) / 2;
1515 if (fli_value_array[m] == ival)
1516 return m;
1517 else if (fli_value_array[m] < ival)
1518 l = m+1;
1519 else
1520 r = m-1;
1523 return -1;
1526 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1528 static bool
1529 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1531 return riscv_const_insns (x) > 0;
1534 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.
1535 Return true if X cannot (or should not) be spilled to the
1536 constant pool. */
1538 static bool
1539 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1541 enum riscv_symbol_type type;
1542 rtx base, offset;
1544 /* There's no way to calculate VL-based values using relocations. */
1545 subrtx_iterator::array_type array;
1546 FOR_EACH_SUBRTX (iter, array, x, ALL)
1547 if (GET_CODE (*iter) == CONST_POLY_INT)
1548 return true;
1550 /* There is no assembler syntax for expressing an address-sized
1551 high part. */
1552 if (GET_CODE (x) == HIGH)
1553 return true;
1555 if (satisfies_constraint_zfli (x))
1556 return true;
1558 split_const (x, &base, &offset);
1559 if (riscv_symbolic_constant_p (base, &type))
1561 if (type == SYMBOL_FORCE_TO_MEM)
1562 return false;
1564 /* As an optimization, don't spill symbolic constants that are as
1565 cheap to rematerialize as to access in the constant pool. */
1566 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
1567 return true;
1569 /* As an optimization, avoid needlessly generate dynamic relocations. */
1570 if (flag_pic)
1571 return true;
1574 /* TLS symbols must be computed by riscv_legitimize_move. */
1575 if (tls_referenced_p (x))
1576 return true;
1578 return false;
1581 /* Return true if register REGNO is a valid base register for mode MODE.
1582 STRICT_P is true if REG_OK_STRICT is in effect. */
1585 riscv_regno_mode_ok_for_base_p (int regno,
1586 machine_mode mode ATTRIBUTE_UNUSED,
1587 bool strict_p)
1589 if (!HARD_REGISTER_NUM_P (regno))
1591 if (!strict_p)
1592 return true;
1593 regno = reg_renumber[regno];
1596 /* These fake registers will be eliminated to either the stack or
1597 hard frame pointer, both of which are usually valid base registers.
1598 Reload deals with the cases where the eliminated form isn't valid. */
1599 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
1600 return true;
1602 return GP_REG_P (regno);
1605 /* Get valid index register class.
1606 The RISC-V base instructions don't support index registers,
1607 but extensions might support that. */
1609 enum reg_class
1610 riscv_index_reg_class ()
1612 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1613 return GR_REGS;
1615 return NO_REGS;
1618 /* Return true if register REGNO is a valid index register.
1619 The RISC-V base instructions don't support index registers,
1620 but extensions might support that. */
1623 riscv_regno_ok_for_index_p (int regno)
1625 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1626 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1);
1628 return 0;
1631 /* Return true if X is a valid base register for mode MODE.
1632 STRICT_P is true if REG_OK_STRICT is in effect. */
1634 bool
1635 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
1637 if (!strict_p && GET_CODE (x) == SUBREG)
1638 x = SUBREG_REG (x);
1640 return (REG_P (x)
1641 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
1644 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
1645 can address a value of mode MODE. */
1647 static bool
1648 riscv_valid_offset_p (rtx x, machine_mode mode)
1650 /* Check that X is a signed 12-bit number. */
1651 if (!const_arith_operand (x, Pmode))
1652 return false;
1654 /* We may need to split multiword moves, so make sure that every word
1655 is accessible. */
1656 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
1657 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
1658 return false;
1660 return true;
1663 /* Should a symbol of type SYMBOL_TYPE should be split in two? */
1665 bool
1666 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
1668 if (symbol_type == SYMBOL_TLS_LE)
1669 return true;
1671 if (!TARGET_EXPLICIT_RELOCS)
1672 return false;
1674 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
1677 /* Return true if a LO_SUM can address a value of mode MODE when the
1678 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which
1679 is used when the mode is BLKmode. */
1681 static bool
1682 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
1683 rtx x)
1685 int align, size;
1687 /* Check that symbols of type SYMBOL_TYPE can be used to access values
1688 of mode MODE. */
1689 if (riscv_symbol_insns (sym_type) == 0)
1690 return false;
1692 /* Check that there is a known low-part relocation. */
1693 if (!riscv_split_symbol_type (sym_type))
1694 return false;
1696 /* We can't tell size or alignment when we have BLKmode, so try extracting a
1697 decl from the symbol if possible. */
1698 if (mode == BLKmode)
1700 rtx offset;
1702 /* Extract the symbol from the LO_SUM operand, if any. */
1703 split_const (x, &x, &offset);
1705 /* Might be a CODE_LABEL. We can compute align but not size for that,
1706 so don't bother trying to handle it. */
1707 if (!SYMBOL_REF_P (x))
1708 return false;
1710 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
1711 align = (SYMBOL_REF_DECL (x)
1712 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
1713 : 1);
1714 size = (SYMBOL_REF_DECL (x)
1715 && DECL_SIZE (SYMBOL_REF_DECL (x))
1716 && tree_fits_uhwi_p (DECL_SIZE (SYMBOL_REF_DECL (x)))
1717 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
1718 : 2*BITS_PER_WORD);
1720 else
1722 align = GET_MODE_ALIGNMENT (mode);
1723 size = GET_MODE_BITSIZE (mode).to_constant ();
1726 /* We may need to split multiword moves, so make sure that each word
1727 can be accessed without inducing a carry. */
1728 if (size > BITS_PER_WORD
1729 && (!TARGET_STRICT_ALIGN || size > align))
1730 return false;
1732 return true;
1735 /* Return true if mode is the RVV enabled mode.
1736 For example: 'RVVMF2SI' mode is disabled,
1737 whereas 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */
1739 bool
1740 riscv_v_ext_vector_mode_p (machine_mode mode)
1742 #define ENTRY(MODE, REQUIREMENT, ...) \
1743 case MODE##mode: \
1744 return REQUIREMENT;
1745 switch (mode)
1747 #include "riscv-vector-switch.def"
1748 default:
1749 return false;
1752 return false;
1755 /* Return true if mode is the RVV enabled tuple mode. */
1757 bool
1758 riscv_v_ext_tuple_mode_p (machine_mode mode)
1760 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \
1761 case MODE##mode: \
1762 return REQUIREMENT;
1763 switch (mode)
1765 #include "riscv-vector-switch.def"
1766 default:
1767 return false;
1770 return false;
1773 /* Return true if mode is the RVV enabled vls mode. */
1775 bool
1776 riscv_v_ext_vls_mode_p (machine_mode mode)
1778 #define VLS_ENTRY(MODE, REQUIREMENT) \
1779 case MODE##mode: \
1780 return REQUIREMENT;
1781 switch (mode)
1783 #include "riscv-vector-switch.def"
1784 default:
1785 return false;
1788 return false;
1791 /* Return true if it is either of below modes.
1792 1. RVV vector mode.
1793 2. RVV tuple mode.
1794 3. RVV vls mode. */
1796 static bool
1797 riscv_v_ext_mode_p (machine_mode mode)
1799 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
1800 || riscv_v_ext_vls_mode_p (mode);
1803 static unsigned
1804 riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size,
1805 unsigned scalar_unit_size)
1807 gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0);
1809 if (vls_unit_size < scalar_unit_size)
1810 return 1;
1812 /* Ensure the vls mode is exact_div by scalar_unit_size. */
1813 gcc_assert ((vls_unit_size % scalar_unit_size) == 0);
1815 return vls_unit_size / scalar_unit_size;
1818 static machine_mode
1819 riscv_v_vls_to_gpr_mode (unsigned vls_mode_size)
1821 switch (vls_mode_size)
1823 case 16:
1824 return TImode;
1825 case 8:
1826 return DImode;
1827 case 4:
1828 return SImode;
1829 case 2:
1830 return HImode;
1831 case 1:
1832 return QImode;
1833 default:
1834 gcc_unreachable ();
1838 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1839 NUNITS size for corresponding machine_mode. */
1841 poly_int64
1842 riscv_v_adjust_nunits (machine_mode mode, int scale)
1844 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
1845 if (riscv_v_ext_mode_p (mode))
1847 if (TARGET_MIN_VLEN == 32)
1848 scale = scale / 2;
1849 return riscv_vector_chunks * scale;
1851 return scale;
1854 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1855 NUNITS size for corresponding machine_mode. */
1857 poly_int64
1858 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf)
1860 if (riscv_v_ext_mode_p (mode))
1862 scalar_mode smode = GET_MODE_INNER (mode);
1863 int size = GET_MODE_SIZE (smode);
1864 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size;
1865 if (fractional_p)
1866 return nunits_per_chunk / lmul * riscv_vector_chunks * nf;
1867 else
1868 return nunits_per_chunk * lmul * riscv_vector_chunks * nf;
1870 /* Set the disabled RVV modes size as 1 by default. */
1871 return 1;
1874 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
1875 BYTE size for corresponding machine_mode. */
1877 poly_int64
1878 riscv_v_adjust_bytesize (machine_mode mode, int scale)
1880 if (riscv_v_ext_vector_mode_p (mode))
1882 if (TARGET_XTHEADVECTOR)
1883 return BYTES_PER_RISCV_VECTOR;
1885 poly_int64 nunits = GET_MODE_NUNITS (mode);
1887 if (nunits.coeffs[0] > 8)
1888 return exact_div (nunits, 8);
1889 else if (nunits.is_constant ())
1890 return 1;
1891 else
1892 return poly_int64 (1, 1);
1895 return scale;
1898 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
1899 PRECISION size for corresponding machine_mode. */
1901 poly_int64
1902 riscv_v_adjust_precision (machine_mode mode, int scale)
1904 return riscv_v_adjust_nunits (mode, scale);
1907 /* Return true if X is a valid address for machine mode MODE. If it is,
1908 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
1909 effect. */
1911 static bool
1912 riscv_classify_address (struct riscv_address_info *info, rtx x,
1913 machine_mode mode, bool strict_p)
1915 if (th_classify_address (info, x, mode, strict_p))
1916 return true;
1918 switch (GET_CODE (x))
1920 case REG:
1921 case SUBREG:
1922 info->type = ADDRESS_REG;
1923 info->reg = x;
1924 info->offset = const0_rtx;
1925 return riscv_valid_base_register_p (info->reg, mode, strict_p);
1927 case PLUS:
1928 /* RVV load/store disallow any offset. */
1929 if (riscv_v_ext_mode_p (mode))
1930 return false;
1932 info->type = ADDRESS_REG;
1933 info->reg = XEXP (x, 0);
1934 info->offset = XEXP (x, 1);
1935 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1936 && riscv_valid_offset_p (info->offset, mode));
1938 case LO_SUM:
1939 /* RVV load/store disallow LO_SUM. */
1940 if (riscv_v_ext_mode_p (mode))
1941 return false;
1943 info->type = ADDRESS_LO_SUM;
1944 info->reg = XEXP (x, 0);
1945 info->offset = XEXP (x, 1);
1946 /* We have to trust the creator of the LO_SUM to do something vaguely
1947 sane. Target-independent code that creates a LO_SUM should also
1948 create and verify the matching HIGH. Target-independent code that
1949 adds an offset to a LO_SUM must prove that the offset will not
1950 induce a carry. Failure to do either of these things would be
1951 a bug, and we are not required to check for it here. The RISC-V
1952 backend itself should only create LO_SUMs for valid symbolic
1953 constants, with the high part being either a HIGH or a copy
1954 of _gp. */
1955 info->symbol_type
1956 = riscv_classify_symbolic_expression (info->offset);
1957 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1958 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
1960 case CONST_INT:
1961 /* We only allow the const0_rtx for the RVV load/store. For example:
1962 +----------------------------------------------------------+
1963 | li a5,0 |
1964 | vsetvli zero,a1,e32,m1,ta,ma |
1965 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. |
1966 | vs1r.v v24,0(a0) |
1967 +----------------------------------------------------------+
1968 It can be folded to:
1969 +----------------------------------------------------------+
1970 | vsetvli zero,a1,e32,m1,ta,ma |
1971 | vle32.v v24,0(zero) |
1972 | vs1r.v v24,0(a0) |
1973 +----------------------------------------------------------+
1974 This behavior will benefit the underlying RVV auto vectorization. */
1975 if (riscv_v_ext_mode_p (mode))
1976 return x == const0_rtx;
1978 /* Small-integer addresses don't occur very often, but they
1979 are legitimate if x0 is a valid base register. */
1980 info->type = ADDRESS_CONST_INT;
1981 return SMALL_OPERAND (INTVAL (x));
1983 default:
1984 return false;
1988 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1990 static bool
1991 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
1992 code_helper = ERROR_MARK)
1994 /* Disallow RVV modes base address.
1995 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */
1996 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x))))
1997 return false;
1998 struct riscv_address_info addr;
2000 return riscv_classify_address (&addr, x, mode, strict_p);
2003 /* Return true if hard reg REGNO can be used in compressed instructions. */
2005 static bool
2006 riscv_compressed_reg_p (int regno)
2008 /* x8-x15/f8-f15 are compressible registers. */
2009 return ((TARGET_RVC || TARGET_ZCA)
2010 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
2011 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)));
2014 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */
2016 static bool
2017 riscv_compressed_lw_offset_p (rtx x)
2019 return (CONST_INT_P (x)
2020 && (INTVAL (x) & 3) == 0
2021 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET));
2024 /* Return true if load/store from/to address x can be compressed. */
2026 static bool
2027 riscv_compressed_lw_address_p (rtx x)
2029 struct riscv_address_info addr;
2030 bool result = riscv_classify_address (&addr, x, GET_MODE (x),
2031 reload_completed);
2033 /* Return false if address is not compressed_reg + small_offset. */
2034 if (!result
2035 || addr.type != ADDRESS_REG
2036 /* Before reload, assume all registers are OK. */
2037 || (reload_completed
2038 && !riscv_compressed_reg_p (REGNO (addr.reg))
2039 && addr.reg != stack_pointer_rtx)
2040 || !riscv_compressed_lw_offset_p (addr.offset))
2041 return false;
2043 return result;
2046 /* Return the number of instructions needed to load or store a value
2047 of mode MODE at address X. Return 0 if X isn't valid for MODE.
2048 Assume that multiword moves may need to be split into word moves
2049 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
2050 enough. */
2053 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
2055 struct riscv_address_info addr = {};
2056 int n = 1;
2058 if (!riscv_classify_address (&addr, x, mode, false))
2060 /* This could be a pattern from the pic.md file. In which case we want
2061 this address to always have a cost of 3 to make it as expensive as the
2062 most expensive symbol. This prevents constant propagation from
2063 preferring symbols over register plus offset. */
2064 return 3;
2067 /* BLKmode is used for single unaligned loads and stores and should
2068 not count as a multiword mode. */
2069 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p)
2070 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2072 if (addr.type == ADDRESS_LO_SUM)
2073 n += riscv_symbol_insns (addr.symbol_type) - 1;
2075 return n;
2078 /* Return the number of instructions needed to load constant X.
2079 Return 0 if X isn't a valid constant. */
2082 riscv_const_insns (rtx x)
2084 enum riscv_symbol_type symbol_type;
2085 rtx offset;
2087 switch (GET_CODE (x))
2089 case HIGH:
2090 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
2091 || !riscv_split_symbol_type (symbol_type))
2092 return 0;
2094 /* This is simply an LUI. */
2095 return 1;
2097 case CONST_INT:
2099 int cost = riscv_integer_cost (INTVAL (x));
2100 /* Force complicated constants to memory. */
2101 return cost < 4 ? cost : 0;
2104 case CONST_DOUBLE:
2105 /* See if we can use FMV directly. */
2106 if (satisfies_constraint_zfli (x))
2107 return 1;
2109 /* We can use x0 to load floating-point zero. */
2110 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2111 case CONST_VECTOR:
2113 /* TODO: This is not accurate, we will need to
2114 adapt the COST of CONST_VECTOR in the future
2115 for the following cases:
2117 - 1. const duplicate vector with element value
2118 in range of [-16, 15].
2119 - 2. const duplicate vector with element value
2120 out range of [-16, 15].
2121 - 3. const series vector.
2122 ...etc. */
2123 if (riscv_v_ext_mode_p (GET_MODE (x)))
2125 /* const series vector. */
2126 rtx base, step;
2127 if (const_vec_series_p (x, &base, &step))
2129 /* This is not accurate, we will need to adapt the COST
2130 * accurately according to BASE && STEP. */
2131 return 1;
2134 rtx elt;
2135 if (const_vec_duplicate_p (x, &elt))
2137 /* We don't allow CONST_VECTOR for DI vector on RV32
2138 system since the ELT constant value can not held
2139 within a single register to disable reload a DI
2140 register vec_duplicate into vmv.v.x. */
2141 scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
2142 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
2143 && !immediate_operand (elt, Pmode))
2144 return 0;
2145 /* Constants from -16 to 15 can be loaded with vmv.v.i.
2146 The Wc0, Wc1 constraints are already covered by the
2147 vi constraint so we do not need to check them here
2148 separately. */
2149 if (satisfies_constraint_vi (x))
2150 return 1;
2152 /* Any int/FP constants can always be broadcast from a
2153 scalar register. Loading of a floating-point
2154 constant incurs a literal-pool access. Allow this in
2155 order to increase vectorization possibilities. */
2156 int n = riscv_const_insns (elt);
2157 if (CONST_DOUBLE_P (elt))
2158 return 1 + 4; /* vfmv.v.f + memory access. */
2159 else
2161 /* We need as many insns as it takes to load the constant
2162 into a GPR and one vmv.v.x. */
2163 if (n != 0)
2164 return 1 + n;
2165 else
2166 return 1 + 4; /*vmv.v.x + memory access. */
2171 /* TODO: We may support more const vector in the future. */
2172 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2175 case CONST:
2176 /* See if we can refer to X directly. */
2177 if (riscv_symbolic_constant_p (x, &symbol_type))
2178 return riscv_symbol_insns (symbol_type);
2180 /* Otherwise try splitting the constant into a base and offset. */
2181 split_const (x, &x, &offset);
2182 if (offset != 0)
2184 int n = riscv_const_insns (x);
2185 if (n != 0)
2186 return n + riscv_integer_cost (INTVAL (offset));
2188 return 0;
2190 case SYMBOL_REF:
2191 case LABEL_REF:
2192 return riscv_symbol_insns (riscv_classify_symbol (x));
2194 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB
2195 instruction and several scalar shift or mult instructions,
2196 it is so far unknown. We set it to 4 temporarily. */
2197 case CONST_POLY_INT:
2198 return 4;
2200 default:
2201 return 0;
2205 /* X is a doubleword constant that can be handled by splitting it into
2206 two words and loading each word separately. Return the number of
2207 instructions required to do this. */
2210 riscv_split_const_insns (rtx x)
2212 unsigned int low, high;
2214 low = riscv_const_insns (riscv_subword (x, false));
2215 high = riscv_const_insns (riscv_subword (x, true));
2216 gcc_assert (low > 0 && high > 0);
2217 return low + high;
2220 /* Return the number of instructions needed to implement INSN,
2221 given that it loads from or stores to MEM. */
2224 riscv_load_store_insns (rtx mem, rtx_insn *insn)
2226 machine_mode mode;
2227 bool might_split_p;
2228 rtx set;
2230 gcc_assert (MEM_P (mem));
2231 mode = GET_MODE (mem);
2233 /* Try to prove that INSN does not need to be split. */
2234 might_split_p = true;
2235 if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
2236 might_split_p = false;
2237 else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
2239 set = single_set (insn);
2240 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
2241 might_split_p = false;
2244 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
2247 /* Emit a move from SRC to DEST. Assume that the move expanders can
2248 handle all moves if !can_create_pseudo_p (). The distinction is
2249 important because, unlike emit_move_insn, the move expanders know
2250 how to force Pmode objects into the constant pool even when the
2251 constant pool address is not itself legitimate. */
2254 riscv_emit_move (rtx dest, rtx src)
2256 return (can_create_pseudo_p ()
2257 ? emit_move_insn (dest, src)
2258 : emit_move_insn_1 (dest, src));
2261 /* Emit an instruction of the form (set TARGET SRC). */
2263 static rtx
2264 riscv_emit_set (rtx target, rtx src)
2266 emit_insn (gen_rtx_SET (target, src));
2267 return target;
2270 /* Emit an instruction of the form (set DEST (CODE X)). */
2273 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x)
2275 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x));
2278 /* Emit an instruction of the form (set DEST (CODE X Y)). */
2281 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
2283 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
2286 /* Compute (CODE X Y) and store the result in a new register
2287 of mode MODE. Return that new register. */
2289 static rtx
2290 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y)
2292 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
2295 static rtx
2296 riscv_swap_instruction (rtx inst)
2298 gcc_assert (GET_MODE (inst) == SImode);
2299 if (BYTES_BIG_ENDIAN)
2300 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
2301 return inst;
2304 /* Copy VALUE to a register and return that register. If new pseudos
2305 are allowed, copy it into a new register, otherwise use DEST. */
2307 static rtx
2308 riscv_force_temporary (rtx dest, rtx value)
2310 if (can_create_pseudo_p ())
2311 return force_reg (Pmode, value);
2312 else
2314 riscv_emit_move (dest, value);
2315 return dest;
2319 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
2320 then add CONST_INT OFFSET to the result. */
2322 static rtx
2323 riscv_unspec_address_offset (rtx base, rtx offset,
2324 enum riscv_symbol_type symbol_type)
2326 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
2327 UNSPEC_ADDRESS_FIRST + symbol_type);
2328 if (offset != const0_rtx)
2329 base = gen_rtx_PLUS (Pmode, base, offset);
2330 return gen_rtx_CONST (Pmode, base);
2333 /* Return an UNSPEC address with underlying address ADDRESS and symbol
2334 type SYMBOL_TYPE. */
2337 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
2339 rtx base, offset;
2341 split_const (address, &base, &offset);
2342 return riscv_unspec_address_offset (base, offset, symbol_type);
2345 /* If OP is an UNSPEC address, return the address to which it refers,
2346 otherwise return OP itself. */
2348 static rtx
2349 riscv_strip_unspec_address (rtx op)
2351 rtx base, offset;
2353 split_const (op, &base, &offset);
2354 if (UNSPEC_ADDRESS_P (base))
2355 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2356 return op;
2359 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
2360 high part to BASE and return the result. Just return BASE otherwise.
2361 TEMP is as for riscv_force_temporary.
2363 The returned expression can be used as the first operand to a LO_SUM. */
2365 static rtx
2366 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
2368 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
2369 return riscv_force_temporary (temp, addr);
2372 /* Load an entry from the GOT for a TLS GD access. */
2374 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
2376 if (Pmode == DImode)
2377 return gen_got_load_tls_gddi (dest, sym);
2378 else
2379 return gen_got_load_tls_gdsi (dest, sym);
2382 /* Load an entry from the GOT for a TLS IE access. */
2384 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
2386 if (Pmode == DImode)
2387 return gen_got_load_tls_iedi (dest, sym);
2388 else
2389 return gen_got_load_tls_iesi (dest, sym);
2392 /* Add in the thread pointer for a TLS LE access. */
2394 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
2396 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2397 if (Pmode == DImode)
2398 return gen_tls_add_tp_ledi (dest, base, tp, sym);
2399 else
2400 return gen_tls_add_tp_lesi (dest, base, tp, sym);
2403 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
2404 it appears in a MEM of that mode. Return true if ADDR is a legitimate
2405 constant in that context and can be split into high and low parts.
2406 If so, and if LOW_OUT is nonnull, emit the high part and store the
2407 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
2409 TEMP is as for riscv_force_temporary and is used to load the high
2410 part into a register.
2412 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
2413 a legitimize SET_SRC for an .md pattern, otherwise the low part
2414 is guaranteed to be a legitimate address for mode MODE. */
2416 bool
2417 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
2419 enum riscv_symbol_type symbol_type;
2421 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
2422 || !riscv_symbolic_constant_p (addr, &symbol_type)
2423 || riscv_symbol_insns (symbol_type) == 0
2424 || !riscv_split_symbol_type (symbol_type))
2425 return false;
2427 if (low_out)
2428 switch (symbol_type)
2430 case SYMBOL_FORCE_TO_MEM:
2431 return false;
2433 case SYMBOL_ABSOLUTE:
2435 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
2436 high = riscv_force_temporary (temp, high);
2437 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
2439 break;
2441 case SYMBOL_PCREL:
2443 static unsigned seqno;
2444 char buf[32];
2445 rtx label;
2447 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
2448 gcc_assert ((size_t) bytes < sizeof (buf));
2450 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
2451 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
2452 /* ??? Ugly hack to make weak symbols work. May need to change the
2453 RTL for the auipc and/or low patterns to get a better fix for
2454 this. */
2455 if (! nonzero_address_p (addr))
2456 SYMBOL_REF_WEAK (label) = 1;
2458 if (temp == NULL)
2459 temp = gen_reg_rtx (Pmode);
2461 if (Pmode == DImode)
2462 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
2463 else
2464 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
2466 *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
2468 seqno++;
2470 break;
2472 default:
2473 gcc_unreachable ();
2476 return true;
2479 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2480 riscv_force_temporary; it is only needed when OFFSET is not a
2481 SMALL_OPERAND. */
2483 static rtx
2484 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2486 if (!SMALL_OPERAND (offset))
2488 rtx high;
2490 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2491 The addition inside the macro CONST_HIGH_PART may cause an
2492 overflow, so we need to force a sign-extension check. */
2493 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2494 offset = CONST_LOW_PART (offset);
2495 high = riscv_force_temporary (temp, high);
2496 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2498 return plus_constant (Pmode, reg, offset);
2501 /* The __tls_get_attr symbol. */
2502 static GTY(()) rtx riscv_tls_symbol;
2504 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2505 the TLS symbol we are referencing and TYPE is the symbol type to use
2506 (either global dynamic or local dynamic). RESULT is an RTX for the
2507 return value location. */
2509 static rtx_insn *
2510 riscv_call_tls_get_addr (rtx sym, rtx result)
2512 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
2513 rtx_insn *insn;
2515 if (!riscv_tls_symbol)
2516 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
2517 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
2519 start_sequence ();
2521 emit_insn (riscv_got_load_tls_gd (a0, sym));
2522 insn = emit_call_insn (gen_call_value (result, func, const0_rtx,
2523 gen_int_mode (RISCV_CC_BASE, SImode)));
2524 RTL_CONST_CALL_P (insn) = 1;
2525 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2526 insn = get_insns ();
2528 end_sequence ();
2530 return insn;
2533 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2534 its address. The return value will be both a valid address and a valid
2535 SET_SRC (either a REG or a LO_SUM). */
2537 static rtx
2538 riscv_legitimize_tls_address (rtx loc)
2540 rtx dest, tp, tmp, a0;
2541 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2543 #if 0
2544 /* TLS copy relocs are now deprecated and should not be used. */
2545 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */
2546 if (!flag_pic)
2547 model = TLS_MODEL_LOCAL_EXEC;
2548 #endif
2550 switch (model)
2552 case TLS_MODEL_LOCAL_DYNAMIC:
2553 /* Rely on section anchors for the optimization that LDM TLS
2554 provides. The anchor's address is loaded with GD TLS. */
2555 case TLS_MODEL_GLOBAL_DYNAMIC:
2556 if (TARGET_TLSDESC)
2558 static unsigned seqno;
2559 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2560 a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
2561 dest = gen_reg_rtx (Pmode);
2563 emit_insn (gen_tlsdesc (Pmode, loc, GEN_INT (seqno)));
2564 emit_insn (gen_add3_insn (dest, a0, tp));
2565 seqno++;
2567 else
2569 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2570 dest = gen_reg_rtx (Pmode);
2571 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp,
2572 loc);
2574 break;
2576 case TLS_MODEL_INITIAL_EXEC:
2577 /* la.tls.ie; tp-relative add */
2578 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2579 tmp = gen_reg_rtx (Pmode);
2580 emit_insn (riscv_got_load_tls_ie (tmp, loc));
2581 dest = gen_reg_rtx (Pmode);
2582 emit_insn (gen_add3_insn (dest, tmp, tp));
2583 break;
2585 case TLS_MODEL_LOCAL_EXEC:
2586 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
2587 dest = gen_reg_rtx (Pmode);
2588 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
2589 dest = gen_rtx_LO_SUM (Pmode, dest,
2590 riscv_unspec_address (loc, SYMBOL_TLS_LE));
2591 break;
2593 default:
2594 gcc_unreachable ();
2596 return dest;
2599 /* If X is not a valid address for mode MODE, force it into a register. */
2601 static rtx
2602 riscv_force_address (rtx x, machine_mode mode)
2604 if (!riscv_legitimate_address_p (mode, x, false))
2606 if (can_create_pseudo_p ())
2607 return force_reg (Pmode, x);
2608 else
2610 /* It's only safe for the thunk function.
2611 Use ra as the temp register. */
2612 gcc_assert (riscv_in_thunk_func);
2613 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode);
2614 riscv_emit_move (reg, x);
2615 return reg;
2619 return x;
2622 /* Modify base + offset so that offset fits within a compressed load/store insn
2623 and the excess is added to base. */
2625 static rtx
2626 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset)
2628 rtx addr, high;
2629 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess
2630 into HIGH. */
2631 high = GEN_INT (offset & ~CSW_MAX_OFFSET);
2632 offset &= CSW_MAX_OFFSET;
2633 if (!SMALL_OPERAND (INTVAL (high)))
2634 high = force_reg (Pmode, high);
2635 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base));
2636 addr = plus_constant (Pmode, base, offset);
2637 return addr;
2640 /* Helper for riscv_legitimize_address. Given X, return true if it
2641 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
2643 This respectively represent canonical shift-add rtxs or scaled
2644 memory addresses. */
2645 static bool
2646 mem_shadd_or_shadd_rtx_p (rtx x)
2648 return ((GET_CODE (x) == ASHIFT
2649 || GET_CODE (x) == MULT)
2650 && register_operand (XEXP (x, 0), GET_MODE (x))
2651 && CONST_INT_P (XEXP (x, 1))
2652 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
2653 || (GET_CODE (x) == MULT
2654 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
2657 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
2658 be legitimized in a way that the generic machinery might not expect,
2659 return a new address, otherwise return NULL. MODE is the mode of
2660 the memory being accessed. */
2662 static rtx
2663 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2664 machine_mode mode)
2666 rtx addr;
2668 if (riscv_tls_symbol_p (x))
2669 return riscv_legitimize_tls_address (x);
2671 /* See if the address can split into a high part and a LO_SUM. */
2672 if (riscv_split_symbol (NULL, x, mode, &addr))
2673 return riscv_force_address (addr, mode);
2675 /* Handle BASE + OFFSET. */
2676 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
2677 && INTVAL (XEXP (x, 1)) != 0)
2679 rtx base = XEXP (x, 0);
2680 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
2682 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
2683 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
2684 && SMALL_OPERAND (offset))
2686 rtx index = XEXP (base, 0);
2687 rtx fp = XEXP (base, 1);
2688 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
2691 /* If we were given a MULT, we must fix the constant
2692 as we're going to create the ASHIFT form. */
2693 int shift_val = INTVAL (XEXP (index, 1));
2694 if (GET_CODE (index) == MULT)
2695 shift_val = exact_log2 (shift_val);
2697 rtx reg1 = gen_reg_rtx (Pmode);
2698 rtx reg2 = gen_reg_rtx (Pmode);
2699 rtx reg3 = gen_reg_rtx (Pmode);
2700 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
2701 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val));
2702 riscv_emit_binary (PLUS, reg3, reg2, reg1);
2704 return reg3;
2708 if (!riscv_valid_base_register_p (base, mode, false))
2709 base = copy_to_mode_reg (Pmode, base);
2710 if (optimize_function_for_size_p (cfun)
2711 && (strcmp (current_pass->name, "shorten_memrefs") == 0)
2712 && mode == SImode)
2713 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow
2714 possible compressed load/store. */
2715 addr = riscv_shorten_lw_offset (base, offset);
2716 else
2717 addr = riscv_add_offset (NULL, base, offset);
2718 return riscv_force_address (addr, mode);
2721 return x;
2724 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE
2725 is the original src mode before promotion. */
2727 void
2728 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
2729 machine_mode orig_mode)
2731 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
2732 machine_mode mode;
2733 int i, num_ops;
2734 rtx x = NULL_RTX;
2736 mode = GET_MODE (dest);
2737 /* We use the original mode for the riscv_build_integer call, because HImode
2738 values are given special treatment. */
2739 num_ops = riscv_build_integer (codes, value, orig_mode);
2741 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
2742 && num_ops >= riscv_split_integer_cost (value))
2743 x = riscv_split_integer (value, mode);
2744 else
2746 rtx old_value = NULL_RTX;
2747 for (i = 0; i < num_ops; i++)
2749 if (i != 0 && !can_create_pseudo_p ())
2750 x = riscv_emit_set (temp, x);
2751 else if (i != 0)
2752 x = force_reg (mode, x);
2753 codes[i].value = trunc_int_for_mode (codes[i].value, mode);
2754 if (codes[i].code == UNKNOWN)
2756 /* UNKNOWN means load the constant value into X. */
2757 x = GEN_INT (codes[i].value);
2759 else if (codes[i].use_uw)
2761 /* If the sequence requires using a "uw" form of an insn, we're
2762 going to have to construct the RTL ourselves and put it in
2763 a register to avoid force_reg/force_operand from mucking
2764 things up. */
2765 gcc_assert (TARGET_64BIT || TARGET_ZBA);
2766 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
2768 /* Create the proper mask for the slli.uw instruction. */
2769 unsigned HOST_WIDE_INT value = 0xffffffff;
2770 value <<= codes[i].value;
2772 /* Right now the only "uw" form we use is slli, we may add more
2773 in the future. */
2774 x = gen_rtx_fmt_ee (codes[i].code, mode,
2775 x, GEN_INT (codes[i].value));
2776 x = gen_rtx_fmt_ee (AND, mode, x, GEN_INT (value));
2777 x = riscv_emit_set (t, x);
2779 else if (codes[i].code == FMA)
2781 HOST_WIDE_INT value = exact_log2 (codes[i].value - 1);
2782 rtx ashift = gen_rtx_fmt_ee (ASHIFT, mode, x, GEN_INT (value));
2783 x = gen_rtx_fmt_ee (PLUS, mode, ashift, x);
2784 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
2785 x = riscv_emit_set (t, x);
2787 else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
2789 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
2790 rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
2791 gcc_assert (t2);
2792 t2 = gen_lowpart (SImode, t2);
2793 emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
2794 x = t;
2796 else
2797 x = gen_rtx_fmt_ee (codes[i].code, mode,
2798 x, GEN_INT (codes[i].value));
2800 /* If this entry in the code table indicates we should save away
2801 the temporary holding the current value of X, then do so. */
2802 if (codes[i].save_temporary)
2804 gcc_assert (old_value == NULL_RTX);
2805 x = force_reg (mode, x);
2806 old_value = x;
2811 riscv_emit_set (dest, x);
2814 /* Subroutine of riscv_legitimize_move. Move constant SRC into register
2815 DEST given that SRC satisfies immediate_operand but doesn't satisfy
2816 move_operand. */
2818 static void
2819 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
2821 rtx base, offset;
2823 /* Split moves of big integers into smaller pieces. */
2824 if (splittable_const_int_operand (src, mode))
2826 riscv_move_integer (dest, dest, INTVAL (src), mode);
2827 return;
2830 if (satisfies_constraint_zfli (src))
2832 riscv_emit_set (dest, src);
2833 return;
2836 /* Split moves of symbolic constants into high/low pairs. */
2837 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
2839 riscv_emit_set (dest, src);
2840 return;
2843 /* Generate the appropriate access sequences for TLS symbols. */
2844 if (riscv_tls_symbol_p (src))
2846 riscv_emit_move (dest, riscv_legitimize_tls_address (src));
2847 return;
2850 /* If we have (const (plus symbol offset)), and that expression cannot
2851 be forced into memory, load the symbol first and add in the offset. Also
2852 prefer to do this even if the constant _can_ be forced into memory, as it
2853 usually produces better code. */
2854 split_const (src, &base, &offset);
2855 if (offset != const0_rtx
2856 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
2858 base = riscv_force_temporary (dest, base);
2859 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
2860 return;
2863 /* Handle below format.
2864 (const:DI
2865 (plus:DI
2866 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0
2867 (const_poly_int:DI [16, 16]) // <- op_1
2870 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS
2871 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1)))
2873 rtx dest_tmp = gen_reg_rtx (mode);
2874 rtx tmp = gen_reg_rtx (mode);
2876 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0));
2877 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1));
2879 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp)));
2880 return;
2883 src = force_const_mem (mode, src);
2885 /* When using explicit relocs, constant pool references are sometimes
2886 not legitimate addresses. */
2887 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
2888 riscv_emit_move (dest, src);
2891 /* Report when we try to do something that requires vector when vector is
2892 disabled. This is an error of last resort and isn't very high-quality. It
2893 usually involves attempts to measure the vector length in some way. */
2895 static void
2896 riscv_report_v_required (void)
2898 static bool reported_p = false;
2900 /* Avoid reporting a slew of messages for a single oversight. */
2901 if (reported_p)
2902 return;
2904 error ("this operation requires the RVV ISA extension");
2905 inform (input_location, "you can enable RVV using the command-line"
2906 " option %<-march%>, or by using the %<target%>"
2907 " attribute or pragma");
2908 reported_p = true;
2911 /* Helper function to operation for rtx_code CODE. */
2912 static void
2913 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1,
2914 rtx op2)
2916 if (can_create_pseudo_p ())
2918 rtx result;
2919 if (GET_RTX_CLASS (code) == RTX_UNARY)
2920 result = expand_simple_unop (mode, code, op1, NULL_RTX, false);
2921 else
2922 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false,
2923 OPTAB_DIRECT);
2924 riscv_emit_move (op0, result);
2926 else
2928 rtx pat;
2929 /* The following implementation is for prologue and epilogue.
2930 Because prologue and epilogue can not use pseudo register.
2931 We can't using expand_simple_binop or expand_simple_unop. */
2932 if (GET_RTX_CLASS (code) == RTX_UNARY)
2933 pat = gen_rtx_fmt_e (code, mode, op1);
2934 else
2935 pat = gen_rtx_fmt_ee (code, mode, op1, op2);
2936 emit_insn (gen_rtx_SET (op0, pat));
2940 /* Expand mult operation with constant integer, multiplicand also used as a
2941 * temporary register. */
2943 static void
2944 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand,
2945 HOST_WIDE_INT multiplier)
2947 if (multiplier == 0)
2949 riscv_emit_move (dest, GEN_INT (0));
2950 return;
2953 bool neg_p = multiplier < 0;
2954 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier);
2956 if (multiplier_abs == 1)
2958 if (neg_p)
2959 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX);
2960 else
2961 riscv_emit_move (dest, multiplicand);
2963 else
2965 if (pow2p_hwi (multiplier_abs))
2968 multiplicand = [BYTES_PER_RISCV_VECTOR].
2969 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8].
2970 Sequence:
2971 csrr a5, vlenb
2972 slli a5, a5, 3
2973 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8].
2974 Sequence:
2975 csrr a5, vlenb
2976 slli a5, a5, 3
2977 neg a5, a5
2979 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2980 gen_int_mode (exact_log2 (multiplier_abs), QImode));
2981 if (neg_p)
2982 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2984 else if (pow2p_hwi (multiplier_abs + 1))
2987 multiplicand = [BYTES_PER_RISCV_VECTOR].
2988 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7].
2989 Sequence:
2990 csrr a5, vlenb
2991 slli a4, a5, 3
2992 sub a5, a4, a5
2993 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7].
2994 Sequence:
2995 csrr a5, vlenb
2996 slli a4, a5, 3
2997 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4
2999 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
3000 gen_int_mode (exact_log2 (multiplier_abs + 1),
3001 QImode));
3002 if (neg_p)
3003 riscv_expand_op (MINUS, mode, dest, multiplicand, dest);
3004 else
3005 riscv_expand_op (MINUS, mode, dest, dest, multiplicand);
3007 else if (pow2p_hwi (multiplier - 1))
3010 multiplicand = [BYTES_PER_RISCV_VECTOR].
3011 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9].
3012 Sequence:
3013 csrr a5, vlenb
3014 slli a4, a5, 3
3015 add a5, a4, a5
3016 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9].
3017 Sequence:
3018 csrr a5, vlenb
3019 slli a4, a5, 3
3020 add a5, a4, a5
3021 neg a5, a5
3023 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
3024 gen_int_mode (exact_log2 (multiplier_abs - 1),
3025 QImode));
3026 riscv_expand_op (PLUS, mode, dest, dest, multiplicand);
3027 if (neg_p)
3028 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
3030 else
3032 /* We use multiplication for remaining cases. */
3033 gcc_assert (
3034 TARGET_MUL
3035 && "M-extension must be enabled to calculate the poly_int "
3036 "size/offset.");
3037 riscv_emit_move (dest, gen_int_mode (multiplier, mode));
3038 riscv_expand_op (MULT, mode, dest, dest, multiplicand);
3043 /* Analyze src and emit const_poly_int mov sequence. */
3045 void
3046 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
3048 poly_int64 value = rtx_to_poly_int64 (src);
3049 /* It use HOST_WIDE_INT instead of int since 32bit type is not enough
3050 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */
3051 HOST_WIDE_INT offset = value.coeffs[0];
3052 HOST_WIDE_INT factor = value.coeffs[1];
3053 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
3054 int div_factor = 0;
3055 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions.
3056 For any (const_poly_int:MODE [m, n]), the calculation formula is as
3057 follows.
3058 constant = m - n.
3059 When minimum VLEN = 32, poly of VLENB = (4, 4).
3060 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1).
3061 When minimum VLEN > 32, poly of VLENB = (8, 8).
3062 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1).
3063 magn = (n, n) / base.
3064 (m, n) = base * magn + constant.
3065 This calculation doesn't need div operation. */
3067 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
3068 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode));
3069 else
3071 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode));
3072 emit_move_insn (gen_lowpart (Pmode, tmp),
3073 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
3076 if (BYTES_PER_RISCV_VECTOR.is_constant ())
3078 gcc_assert (value.is_constant ());
3079 riscv_emit_move (dest, GEN_INT (value.to_constant ()));
3080 return;
3082 else
3084 int max_power = exact_log2 (MAX_POLY_VARIANT);
3085 for (int i = 0; i <= max_power; i++)
3087 int possible_div_factor = 1 << i;
3088 if (factor % (vlenb / possible_div_factor) == 0)
3090 div_factor = possible_div_factor;
3091 break;
3094 gcc_assert (div_factor != 0);
3097 if (div_factor != 1)
3098 riscv_expand_op (LSHIFTRT, mode, tmp, tmp,
3099 gen_int_mode (exact_log2 (div_factor), QImode));
3101 riscv_expand_mult_with_const_int (mode, dest, tmp,
3102 factor / (vlenb / div_factor));
3103 HOST_WIDE_INT constant = offset - factor;
3105 if (constant == 0)
3106 return;
3107 else if (SMALL_OPERAND (constant))
3108 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
3109 else
3111 /* Handle the constant value is not a 12-bit value. */
3112 rtx high;
3114 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
3115 The addition inside the macro CONST_HIGH_PART may cause an
3116 overflow, so we need to force a sign-extension check. */
3117 high = gen_int_mode (CONST_HIGH_PART (constant), mode);
3118 constant = CONST_LOW_PART (constant);
3119 riscv_emit_move (tmp, high);
3120 riscv_expand_op (PLUS, mode, dest, tmp, dest);
3121 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
3125 /* Adjust scalable frame of vector for prologue && epilogue. */
3127 static void
3128 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
3130 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
3131 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
3132 rtx insn, dwarf, adjust_frame_rtx;
3134 riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
3135 gen_int_mode (offset, Pmode));
3137 if (epilogue)
3138 insn = gen_add3_insn (target, target, adjust_size);
3139 else
3140 insn = gen_sub3_insn (target, target, adjust_size);
3142 insn = emit_insn (insn);
3144 RTX_FRAME_RELATED_P (insn) = 1;
3146 adjust_frame_rtx
3147 = gen_rtx_SET (target,
3148 plus_constant (Pmode, target, epilogue ? offset : -offset));
3150 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
3151 NULL_RTX);
3153 REG_NOTES (insn) = dwarf;
3156 /* Take care below subreg const_poly_int move:
3158 1. (set (subreg:DI (reg:TI 237) 8)
3159 (subreg:DI (const_poly_int:TI [4, 2]) 8))
3161 (set (subreg:DI (reg:TI 237) 8)
3162 (const_int 0)) */
3164 static bool
3165 riscv_legitimize_subreg_const_poly_move (machine_mode mode, rtx dest, rtx src)
3167 gcc_assert (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src)));
3168 gcc_assert (SUBREG_BYTE (src).is_constant ());
3170 int byte_offset = SUBREG_BYTE (src).to_constant ();
3171 rtx const_poly = SUBREG_REG (src);
3172 machine_mode subreg_mode = GET_MODE (const_poly);
3174 if (subreg_mode != TImode) /* Only TImode is needed for now. */
3175 return false;
3177 if (byte_offset == 8)
3179 /* The const_poly_int cannot exceed int64, just set zero here. */
3180 emit_move_insn (dest, CONST0_RTX (mode));
3181 return true;
3184 /* The below transform will be covered in somewhere else.
3185 Thus, ignore this here.
3186 (set (subreg:DI (reg:TI 237) 0)
3187 (subreg:DI (const_poly_int:TI [4, 2]) 0))
3189 (set (subreg:DI (reg:TI 237) 0)
3190 (const_poly_int:DI [4, 2])) */
3192 return false;
3195 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
3196 sequence that is valid. */
3198 bool
3199 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
3201 if (CONST_POLY_INT_P (src))
3204 Handle:
3205 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156)
3206 (const_int 96 [0x60])) [0 S1 A8])
3207 (const_poly_int:QI [8, 8]))
3208 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil))
3210 if (MEM_P (dest))
3212 emit_move_insn (dest, force_reg (mode, src));
3213 return true;
3215 poly_int64 value = rtx_to_poly_int64 (src);
3216 if (!value.is_constant () && !TARGET_VECTOR)
3218 riscv_report_v_required ();
3219 return false;
3222 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode)
3223 return false;
3225 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode))
3227 /* In RV32 system, handle (const_poly_int:QI [m, n])
3228 (const_poly_int:HI [m, n]).
3229 In RV64 system, handle (const_poly_int:QI [m, n])
3230 (const_poly_int:HI [m, n])
3231 (const_poly_int:SI [m, n]). */
3232 rtx tmp = gen_reg_rtx (Pmode);
3233 rtx tmp2 = gen_reg_rtx (Pmode);
3234 riscv_legitimize_poly_move (Pmode, tmp2, tmp, src);
3235 emit_move_insn (dest, gen_lowpart (mode, tmp2));
3237 else
3239 /* In RV32 system, handle (const_poly_int:SI [m, n])
3240 (const_poly_int:DI [m, n]).
3241 In RV64 system, handle (const_poly_int:DI [m, n]).
3242 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to
3243 DImode, the offset should not exceed 4GiB in general. */
3244 rtx tmp = gen_reg_rtx (mode);
3245 riscv_legitimize_poly_move (mode, dest, tmp, src);
3247 return true;
3250 if (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src))
3251 && riscv_legitimize_subreg_const_poly_move (mode, dest, src))
3252 return true;
3254 /* Expand
3255 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
3256 Expand this data movement instead of simply forbid it since
3257 we can improve the code generation for this following scenario
3258 by RVV auto-vectorization:
3259 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
3260 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
3261 Since RVV mode and scalar mode are in different REG_CLASS,
3262 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
3263 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
3265 machine_mode vmode = GET_MODE (SUBREG_REG (src));
3266 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
3267 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant ();
3268 /* We should be able to handle both partial and paradoxical subreg. */
3269 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1;
3270 scalar_mode smode = as_a<scalar_mode> (mode);
3271 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size;
3272 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8)
3273 && !TARGET_VECTOR_ELEN_64 ? 2 : 1;
3274 bool need_int_reg_p = false;
3276 if (num == 2)
3278 /* If we want to extract 64bit value but ELEN < 64,
3279 we use RVV vector mode with EEW = 32 to extract
3280 the highpart and lowpart. */
3281 need_int_reg_p = smode == DFmode;
3282 smode = SImode;
3283 nunits = nunits * 2;
3286 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
3288 rtx v = gen_lowpart (vmode, SUBREG_REG (src));
3289 rtx int_reg = dest;
3291 if (need_int_reg_p)
3293 int_reg = gen_reg_rtx (DImode);
3294 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest));
3297 for (unsigned int i = 0; i < num; i++)
3299 rtx result;
3300 if (num == 1)
3301 result = int_reg;
3302 else if (i == 0)
3303 result = gen_lowpart (smode, int_reg);
3304 else
3305 result = gen_reg_rtx (smode);
3307 riscv_vector::emit_vec_extract (result, v,
3308 gen_int_mode (index + i, Pmode));
3310 if (i == 1)
3312 if (UNITS_PER_WORD < mode_size)
3313 /* If Pmode = SImode and mode = DImode, we just need to
3314 extract element of index = 1 from the vector and move it
3315 into the highpart of the DEST since DEST consists of 2
3316 scalar registers. */
3317 emit_move_insn (gen_highpart (smode, int_reg), result);
3318 else
3320 rtx tmp = expand_binop (Pmode, ashl_optab,
3321 gen_lowpart (Pmode, result),
3322 gen_int_mode (32, Pmode),
3323 NULL_RTX, 0, OPTAB_DIRECT);
3324 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg,
3325 NULL_RTX, 0, OPTAB_DIRECT);
3326 emit_move_insn (int_reg, tmp2);
3331 if (need_int_reg_p)
3332 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
3333 else
3334 emit_move_insn (dest, int_reg);
3336 else
3337 gcc_unreachable ();
3339 return true;
3341 /* Expand
3342 (set (reg:QI target) (mem:QI (address)))
3344 (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
3345 (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
3346 with auto-sign/zero extend. */
3347 if (GET_MODE_CLASS (mode) == MODE_INT
3348 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
3349 && can_create_pseudo_p ()
3350 && MEM_P (src))
3352 rtx temp_reg;
3353 int zero_extend_p;
3355 temp_reg = gen_reg_rtx (word_mode);
3356 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
3357 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
3358 zero_extend_p));
3359 riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
3360 return true;
3363 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
3365 rtx reg;
3367 if (GET_CODE (src) == CONST_INT)
3369 /* Apply the equivalent of PROMOTE_MODE here for constants to
3370 improve cse. */
3371 machine_mode promoted_mode = mode;
3372 if (GET_MODE_CLASS (mode) == MODE_INT
3373 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
3374 promoted_mode = word_mode;
3376 if (splittable_const_int_operand (src, mode))
3378 reg = gen_reg_rtx (promoted_mode);
3379 riscv_move_integer (reg, reg, INTVAL (src), mode);
3381 else
3382 reg = force_reg (promoted_mode, src);
3384 if (promoted_mode != mode)
3385 reg = gen_lowpart (mode, reg);
3387 else
3388 reg = force_reg (mode, src);
3389 riscv_emit_move (dest, reg);
3390 return true;
3393 /* In order to fit NaN boxing, expand
3394 (set FP_REG (reg:HF/BF src))
3396 (set (reg:SI/DI mask) (const_int -65536)
3397 (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF/BF src) 0)))
3398 (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
3399 (set (reg:HF/BF dest) (unspec:HF/BF[ (reg:SI/DI temp) ] UNSPEC_FMV_FP16_X))
3402 if (TARGET_HARD_FLOAT
3403 && !TARGET_ZFHMIN
3404 && (mode == HFmode || mode == BFmode)
3405 && REG_P (dest) && FP_REG_P (REGNO (dest))
3406 && REG_P (src) && !FP_REG_P (REGNO (src))
3407 && can_create_pseudo_p ())
3409 rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
3410 rtx temp = gen_reg_rtx (word_mode);
3411 emit_insn (gen_extend_insn (temp,
3412 simplify_gen_subreg (HImode, src, mode, 0),
3413 word_mode, HImode, 1));
3414 if (word_mode == SImode)
3415 emit_insn (gen_iorsi3 (temp, mask, temp));
3416 else
3417 emit_insn (gen_iordi3 (temp, mask, temp));
3419 riscv_emit_move (dest, gen_rtx_UNSPEC (mode, gen_rtvec (1, temp),
3420 UNSPEC_FMV_FP16_X));
3422 return true;
3425 /* We need to deal with constants that would be legitimate
3426 immediate_operands but aren't legitimate move_operands. */
3427 if (CONSTANT_P (src) && !move_operand (src, mode))
3429 riscv_legitimize_const_move (mode, dest, src);
3430 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
3431 return true;
3434 /* RISC-V GCC may generate non-legitimate address due to we provide some
3435 pattern for optimize access PIC local symbol and it's make GCC generate
3436 unrecognizable instruction during optimizing. */
3438 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
3439 reload_completed))
3441 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
3444 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
3445 reload_completed))
3447 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
3450 return false;
3453 /* Return true if there is an instruction that implements CODE and accepts
3454 X as an immediate operand. */
3456 static int
3457 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
3459 switch (code)
3461 case ASHIFT:
3462 case ASHIFTRT:
3463 case LSHIFTRT:
3464 /* All shift counts are truncated to a valid constant. */
3465 return true;
3467 case AND:
3468 case IOR:
3469 case XOR:
3470 case PLUS:
3471 case LT:
3472 case LTU:
3473 /* These instructions take 12-bit signed immediates. */
3474 return SMALL_OPERAND (x);
3476 case LE:
3477 /* We add 1 to the immediate and use SLT. */
3478 return SMALL_OPERAND (x + 1);
3480 case LEU:
3481 /* Likewise SLTU, but reject the always-true case. */
3482 return SMALL_OPERAND (x + 1) && x + 1 != 0;
3484 case GE:
3485 case GEU:
3486 /* We can emulate an immediate of 1 by using GT/GTU against x0. */
3487 return x == 1;
3489 default:
3490 /* By default assume that x0 can be used for 0. */
3491 return x == 0;
3495 /* Return the cost of binary operation X, given that the instruction
3496 sequence for a word-sized or smaller operation takes SINGLE_INSNS
3497 instructions and that the sequence of a double-word operation takes
3498 DOUBLE_INSNS instructions. */
3500 static int
3501 riscv_binary_cost (rtx x, int single_insns, int double_insns)
3503 if (!riscv_v_ext_mode_p (GET_MODE (x))
3504 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
3505 return COSTS_N_INSNS (double_insns);
3506 return COSTS_N_INSNS (single_insns);
3509 /* Return the cost of sign- or zero-extending OP. */
3511 static int
3512 riscv_extend_cost (rtx op, bool unsigned_p)
3514 if (MEM_P (op))
3515 return 0;
3517 if (unsigned_p && GET_MODE (op) == QImode)
3518 /* We can use ANDI. */
3519 return COSTS_N_INSNS (1);
3521 /* ZBA provide zext.w. */
3522 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode)
3523 return COSTS_N_INSNS (1);
3525 /* ZBB provide zext.h, sext.b and sext.h. */
3526 if (TARGET_ZBB)
3528 if (!unsigned_p && GET_MODE (op) == QImode)
3529 return COSTS_N_INSNS (1);
3531 if (GET_MODE (op) == HImode)
3532 return COSTS_N_INSNS (1);
3535 if (!unsigned_p && GET_MODE (op) == SImode)
3536 /* We can use SEXT.W. */
3537 return COSTS_N_INSNS (1);
3539 /* We need to use a shift left and a shift right. */
3540 return COSTS_N_INSNS (2);
3543 /* Implement TARGET_RTX_COSTS. */
3545 #define SINGLE_SHIFT_COST 1
3547 static bool
3548 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
3549 int *total, bool speed)
3551 /* TODO: We set RVV instruction cost as 1 by default.
3552 Cost Model need to be well analyzed and supported in the future. */
3553 if (riscv_v_ext_mode_p (mode))
3555 *total = COSTS_N_INSNS (1);
3556 return true;
3559 bool float_mode_p = FLOAT_MODE_P (mode);
3560 int cost;
3562 switch (GET_CODE (x))
3564 case SET:
3565 /* If we are called for an INSN that's a simple set of a register,
3566 then cost based on the SET_SRC alone. */
3567 if (outer_code == INSN
3568 && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x))))
3570 riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
3571 return true;
3574 /* Otherwise return FALSE indicating we should recurse into both the
3575 SET_DEST and SET_SRC combining the cost of both. */
3576 return false;
3578 case CONST_INT:
3579 /* trivial constants checked using OUTER_CODE in case they are
3580 encodable in insn itself w/o need for additional insn(s). */
3581 if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
3583 *total = 0;
3584 return true;
3586 /* Fall through. */
3588 case SYMBOL_REF:
3589 case LABEL_REF:
3590 case CONST_DOUBLE:
3591 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE
3592 rtl object. Weird recheck due to switch-case fall through above. */
3593 if (GET_CODE (x) == CONST_DOUBLE)
3594 gcc_assert (GET_MODE (x) != VOIDmode);
3595 /* Fall through. */
3597 case CONST:
3598 /* Non trivial CONST_INT Fall through: check if need multiple insns. */
3599 if ((cost = riscv_const_insns (x)) > 0)
3601 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
3602 2. For constants loaded more than once, the approach so far has
3603 been to duplicate the operation than to CSE the constant.
3604 3. TODO: make cost more accurate specially if riscv_const_insns
3605 returns > 1. */
3606 if (outer_code == SET || GET_MODE (x) == VOIDmode)
3607 *total = COSTS_N_INSNS (1);
3609 else /* The instruction will be fetched from the constant pool. */
3610 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
3611 return true;
3613 case MEM:
3614 /* If the address is legitimate, return the number of
3615 instructions it needs. */
3616 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
3618 /* When optimizing for size, make uncompressible 32-bit addresses
3619 more expensive so that compressible 32-bit addresses are
3620 preferred. */
3621 if ((TARGET_RVC || TARGET_ZCA)
3622 && !speed && riscv_mshorten_memrefs && mode == SImode
3623 && !riscv_compressed_lw_address_p (XEXP (x, 0)))
3624 cost++;
3626 *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
3627 return true;
3629 /* Otherwise use the default handling. */
3630 return false;
3632 case IF_THEN_ELSE:
3633 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
3634 && reg_or_0_operand (XEXP (x, 1), mode)
3635 && sfb_alu_operand (XEXP (x, 2), mode)
3636 && comparison_operator (XEXP (x, 0), VOIDmode))
3638 /* For predicated conditional-move operations we assume the cost
3639 of a single instruction even though there are actually two. */
3640 *total = COSTS_N_INSNS (1);
3641 return true;
3643 else if (TARGET_ZICOND_LIKE
3644 && outer_code == SET
3645 && ((GET_CODE (XEXP (x, 1)) == REG
3646 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1))))
3647 || (GET_CODE (XEXP (x, 2)) == REG
3648 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2))))
3649 || (GET_CODE (XEXP (x, 1)) == REG
3650 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0)))
3651 || (GET_CODE (XEXP (x, 1)) == REG
3652 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0)))))
3654 *total = COSTS_N_INSNS (1);
3655 return true;
3657 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx)
3659 if (equality_operator (XEXP (x, 0), mode)
3660 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
3662 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1);
3663 return true;
3665 if (ordered_comparison_operator (XEXP (x, 0), mode))
3667 *total = COSTS_N_INSNS (1);
3668 return true;
3671 return false;
3673 case NOT:
3674 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
3675 return false;
3677 case AND:
3678 /* slli.uw pattern for zba. */
3679 if (TARGET_ZBA && TARGET_64BIT && mode == DImode
3680 && GET_CODE (XEXP (x, 0)) == ASHIFT)
3682 rtx and_rhs = XEXP (x, 1);
3683 rtx ashift_lhs = XEXP (XEXP (x, 0), 0);
3684 rtx ashift_rhs = XEXP (XEXP (x, 0), 1);
3685 if (register_operand (ashift_lhs, GET_MODE (ashift_lhs))
3686 && CONST_INT_P (ashift_rhs)
3687 && CONST_INT_P (and_rhs)
3688 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3689 *total = COSTS_N_INSNS (1);
3690 return true;
3692 /* bclri pattern for zbs. */
3693 if (TARGET_ZBS
3694 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode))
3696 *total = COSTS_N_INSNS (1);
3697 return true;
3699 /* bclr pattern for zbs. */
3700 if (TARGET_ZBS
3701 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1)))
3702 && GET_CODE (XEXP (x, 0)) == ROTATE
3703 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0))
3704 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2)
3706 *total = COSTS_N_INSNS (1);
3707 return true;
3710 gcc_fallthrough ();
3711 case IOR:
3712 case XOR:
3713 /* orn, andn and xorn pattern for zbb. */
3714 if (TARGET_ZBB
3715 && GET_CODE (XEXP (x, 0)) == NOT)
3717 *total = riscv_binary_cost (x, 1, 2);
3718 return true;
3721 /* bset[i] and binv[i] pattern for zbs. */
3722 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR)
3723 && TARGET_ZBS
3724 && ((GET_CODE (XEXP (x, 0)) == ASHIFT
3725 && CONST_INT_P (XEXP (XEXP (x, 0), 0)))
3726 || single_bit_mask_operand (XEXP (x, 1), VOIDmode)))
3728 *total = COSTS_N_INSNS (1);
3729 return true;
3732 /* Double-word operations use two single-word operations. */
3733 *total = riscv_binary_cost (x, 1, 2);
3734 return false;
3736 case ZERO_EXTRACT:
3737 /* This is an SImode shift. */
3738 if (outer_code == SET
3739 && CONST_INT_P (XEXP (x, 1))
3740 && CONST_INT_P (XEXP (x, 2))
3741 && (INTVAL (XEXP (x, 2)) > 0)
3742 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32))
3744 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3745 return true;
3747 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */
3748 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET
3749 && GET_CODE (XEXP (x, 1)) == CONST_INT
3750 && INTVAL (XEXP (x, 1)) == 1)
3752 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3753 return true;
3755 gcc_fallthrough ();
3756 case SIGN_EXTRACT:
3757 if (TARGET_XTHEADBB && outer_code == SET
3758 && CONST_INT_P (XEXP (x, 1))
3759 && CONST_INT_P (XEXP (x, 2)))
3761 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3762 return true;
3764 return false;
3766 case ASHIFT:
3767 /* bset pattern for zbs. */
3768 if (TARGET_ZBS
3769 && CONST_INT_P (XEXP (x, 0))
3770 && INTVAL (XEXP (x, 0)) == 1)
3772 *total = COSTS_N_INSNS (1);
3773 return true;
3775 gcc_fallthrough ();
3776 case ASHIFTRT:
3777 case LSHIFTRT:
3778 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST,
3779 CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
3780 return false;
3782 case ABS:
3783 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
3784 return false;
3786 case LO_SUM:
3787 *total = set_src_cost (XEXP (x, 0), mode, speed);
3788 return true;
3790 case LT:
3791 /* This is an SImode shift. */
3792 if (outer_code == SET && GET_MODE (x) == DImode
3793 && GET_MODE (XEXP (x, 0)) == SImode)
3795 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3796 return true;
3798 /* Fall through. */
3799 case LTU:
3800 case LE:
3801 case LEU:
3802 case GT:
3803 case GTU:
3804 case GE:
3805 case GEU:
3806 case EQ:
3807 case NE:
3808 /* Branch comparisons have VOIDmode, so use the first operand's
3809 mode instead. */
3810 mode = GET_MODE (XEXP (x, 0));
3811 if (float_mode_p)
3812 *total = tune_param->fp_add[mode == DFmode];
3813 else
3814 *total = riscv_binary_cost (x, 1, 3);
3815 return false;
3817 case UNORDERED:
3818 case ORDERED:
3819 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
3820 mode = GET_MODE (XEXP (x, 0));
3821 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3822 return false;
3824 case UNEQ:
3825 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
3826 mode = GET_MODE (XEXP (x, 0));
3827 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
3828 return false;
3830 case LTGT:
3831 /* (FLT(A, A) || FGT(B, B)). */
3832 mode = GET_MODE (XEXP (x, 0));
3833 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3834 return false;
3836 case UNGE:
3837 case UNGT:
3838 case UNLE:
3839 case UNLT:
3840 /* FLT or FLE, but guarded by an FFLAGS read and write. */
3841 mode = GET_MODE (XEXP (x, 0));
3842 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
3843 return false;
3845 case MINUS:
3846 if (float_mode_p)
3847 *total = tune_param->fp_add[mode == DFmode];
3848 else
3849 *total = riscv_binary_cost (x, 1, 4);
3850 return false;
3852 case PLUS:
3853 /* add.uw pattern for zba. */
3854 if (TARGET_ZBA
3855 && (TARGET_64BIT && (mode == DImode))
3856 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
3857 && register_operand (XEXP (XEXP (x, 0), 0),
3858 GET_MODE (XEXP (XEXP (x, 0), 0)))
3859 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
3861 *total = COSTS_N_INSNS (1);
3862 return true;
3864 /* shNadd pattern for zba. */
3865 if (TARGET_ZBA
3866 && ((!TARGET_64BIT && (mode == SImode)) ||
3867 (TARGET_64BIT && (mode == DImode)))
3868 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
3869 && register_operand (XEXP (XEXP (x, 0), 0),
3870 GET_MODE (XEXP (XEXP (x, 0), 0)))
3871 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3872 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
3874 *total = COSTS_N_INSNS (1);
3875 return true;
3877 /* Before strength-reduction, the shNadd can be expressed as the addition
3878 of a multiplication with a power-of-two. If this case is not handled,
3879 the strength-reduction in expmed.c will calculate an inflated cost. */
3880 if (TARGET_ZBA
3881 && mode == word_mode
3882 && GET_CODE (XEXP (x, 0)) == MULT
3883 && register_operand (XEXP (XEXP (x, 0), 0),
3884 GET_MODE (XEXP (XEXP (x, 0), 0)))
3885 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3886 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1)))
3887 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
3889 *total = COSTS_N_INSNS (1);
3890 return true;
3892 /* shNadd.uw pattern for zba.
3893 [(set (match_operand:DI 0 "register_operand" "=r")
3894 (plus:DI
3895 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
3896 (match_operand:QI 2 "immediate_operand" "I"))
3897 (match_operand 3 "immediate_operand" ""))
3898 (match_operand:DI 4 "register_operand" "r")))]
3899 "TARGET_64BIT && TARGET_ZBA
3900 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
3901 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff"
3903 if (TARGET_ZBA
3904 && (TARGET_64BIT && (mode == DImode))
3905 && (GET_CODE (XEXP (x, 0)) == AND)
3906 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1))))
3908 do {
3909 rtx and_lhs = XEXP (XEXP (x, 0), 0);
3910 rtx and_rhs = XEXP (XEXP (x, 0), 1);
3911 if (GET_CODE (and_lhs) != ASHIFT)
3912 break;
3913 if (!CONST_INT_P (and_rhs))
3914 break;
3916 rtx ashift_rhs = XEXP (and_lhs, 1);
3918 if (!CONST_INT_P (ashift_rhs)
3919 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3))
3920 break;
3922 if (CONST_INT_P (and_rhs)
3923 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3925 *total = COSTS_N_INSNS (1);
3926 return true;
3928 } while (false);
3931 if (float_mode_p)
3932 *total = tune_param->fp_add[mode == DFmode];
3933 else
3934 *total = riscv_binary_cost (x, 1, 4);
3935 return false;
3937 case NEG:
3939 rtx op = XEXP (x, 0);
3940 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
3942 *total = (tune_param->fp_mul[mode == DFmode]
3943 + set_src_cost (XEXP (op, 0), mode, speed)
3944 + set_src_cost (XEXP (op, 1), mode, speed)
3945 + set_src_cost (XEXP (op, 2), mode, speed));
3946 return true;
3950 if (float_mode_p)
3951 *total = tune_param->fp_add[mode == DFmode];
3952 else
3953 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
3954 return false;
3956 case MULT:
3957 if (float_mode_p)
3958 *total = tune_param->fp_mul[mode == DFmode];
3959 else if (!(TARGET_MUL || TARGET_ZMMUL))
3960 /* Estimate the cost of a library call. */
3961 *total = COSTS_N_INSNS (speed ? 32 : 6);
3962 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
3963 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
3964 else if (!speed)
3965 *total = COSTS_N_INSNS (1);
3966 else
3967 *total = tune_param->int_mul[mode == DImode];
3968 return false;
3970 case DIV:
3971 case SQRT:
3972 case MOD:
3973 if (float_mode_p)
3975 *total = tune_param->fp_div[mode == DFmode];
3976 return false;
3978 /* Fall through. */
3980 case UDIV:
3981 case UMOD:
3982 if (!TARGET_DIV)
3983 /* Estimate the cost of a library call. */
3984 *total = COSTS_N_INSNS (speed ? 32 : 6);
3985 else if (speed)
3986 *total = tune_param->int_div[mode == DImode];
3987 else
3988 *total = COSTS_N_INSNS (1);
3989 return false;
3991 case ZERO_EXTEND:
3992 /* This is an SImode shift. */
3993 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3995 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3996 return true;
3998 /* Fall through. */
3999 case SIGN_EXTEND:
4000 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
4001 return false;
4003 case BSWAP:
4004 if (TARGET_ZBB)
4006 /* RISC-V only defines rev8 for XLEN, so we will need an extra
4007 shift-right instruction for smaller modes. */
4008 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2);
4009 return true;
4011 return false;
4013 case FLOAT:
4014 case UNSIGNED_FLOAT:
4015 case FIX:
4016 case FLOAT_EXTEND:
4017 case FLOAT_TRUNCATE:
4018 *total = tune_param->fp_add[mode == DFmode];
4019 return false;
4021 case FMA:
4022 *total = (tune_param->fp_mul[mode == DFmode]
4023 + set_src_cost (XEXP (x, 0), mode, speed)
4024 + set_src_cost (XEXP (x, 1), mode, speed)
4025 + set_src_cost (XEXP (x, 2), mode, speed));
4026 return true;
4028 case UNSPEC:
4029 if (XINT (x, 1) == UNSPEC_AUIPC)
4031 /* Make AUIPC cheap to avoid spilling its result to the stack. */
4032 *total = 1;
4033 return true;
4035 return false;
4037 default:
4038 return false;
4042 /* Implement TARGET_ADDRESS_COST. */
4044 static int
4045 riscv_address_cost (rtx addr, machine_mode mode,
4046 addr_space_t as ATTRIBUTE_UNUSED,
4047 bool speed ATTRIBUTE_UNUSED)
4049 /* When optimizing for size, make uncompressible 32-bit addresses more
4050 * expensive so that compressible 32-bit addresses are preferred. */
4051 if ((TARGET_RVC || TARGET_ZCA)
4052 && !speed && riscv_mshorten_memrefs && mode == SImode
4053 && !riscv_compressed_lw_address_p (addr))
4054 return riscv_address_insns (addr, mode, false) + 1;
4055 return riscv_address_insns (addr, mode, false);
4058 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost
4059 calculation for conditional branches: one unit is considered the cost
4060 of microarchitecture-dependent actual branch execution and therefore
4061 multiplied by BRANCH_COST and any remaining units are considered fixed
4062 branch overhead. Branches on a floating-point condition incur an extra
4063 instruction cost as they will be split into an FCMP operation followed
4064 by a branch on an integer condition. */
4066 static int
4067 riscv_insn_cost (rtx_insn *insn, bool speed)
4069 rtx x = PATTERN (insn);
4070 int cost = pattern_cost (x, speed);
4072 if (JUMP_P (insn))
4074 if (GET_CODE (x) == PARALLEL)
4075 x = XVECEXP (x, 0, 0);
4076 if (GET_CODE (x) == SET
4077 && GET_CODE (SET_DEST (x)) == PC
4078 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE)
4080 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1);
4081 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0))))
4082 cost += COSTS_N_INSNS (1);
4085 return cost;
4088 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
4089 but we consider cost units of branch instructions equal to cost units of
4090 other instructions. */
4092 static unsigned int
4093 riscv_max_noce_ifcvt_seq_cost (edge e)
4095 bool predictable_p = predictable_edge_p (e);
4097 if (predictable_p)
4099 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
4100 return param_max_rtl_if_conversion_predictable_cost;
4102 else
4104 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
4105 return param_max_rtl_if_conversion_unpredictable_cost;
4108 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p));
4111 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a
4112 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)'
4113 by our actual conditional branch cost, observing that our branches test
4114 conditions directly, so there is no preparatory extra condition-set
4115 instruction. */
4117 static bool
4118 riscv_noce_conversion_profitable_p (rtx_insn *seq,
4119 struct noce_if_info *if_info)
4121 struct noce_if_info riscv_if_info = *if_info;
4123 riscv_if_info.original_cost -= COSTS_N_INSNS (2);
4124 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p);
4126 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4'
4127 to emit a conditional set operation on DImode output it comes up
4128 with a sequence such as:
4130 (insn 26 0 27 (set (reg:SI 140)
4131 (eq:SI (reg/v:DI 137 [ c ])
4132 (const_int 0 [0]))) 302 {*seq_zero_disi}
4133 (nil))
4134 (insn 27 26 28 (set (reg:DI 139)
4135 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal}
4136 (nil))
4138 because our `cstore<mode>4' pattern expands to an insn that gives
4139 a SImode output. The output of conditional set is 0 or 1 boolean,
4140 so it is valid for input in any scalar integer mode and therefore
4141 combine later folds the zero extend operation into an equivalent
4142 conditional set operation that produces a DImode output, however
4143 this redundant zero extend operation counts towards the cost of
4144 the replacement sequence. Compensate for that by incrementing the
4145 cost of the original sequence as well as the maximum sequence cost
4146 accordingly. Likewise for sign extension. */
4147 rtx last_dest = NULL_RTX;
4148 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
4150 if (!NONDEBUG_INSN_P (insn))
4151 continue;
4153 rtx x = PATTERN (insn);
4154 if (NONJUMP_INSN_P (insn)
4155 && GET_CODE (x) == SET)
4157 rtx src = SET_SRC (x);
4158 enum rtx_code code = GET_CODE (src);
4159 if (last_dest != NULL_RTX
4160 && (code == SIGN_EXTEND || code == ZERO_EXTEND)
4161 && REG_P (XEXP (src, 0))
4162 && REGNO (XEXP (src, 0)) == REGNO (last_dest))
4164 riscv_if_info.original_cost += COSTS_N_INSNS (1);
4165 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
4167 last_dest = NULL_RTX;
4168 rtx dest = SET_DEST (x);
4169 if (COMPARISON_P (src)
4170 && REG_P (dest)
4171 && GET_MODE (dest) == SImode)
4172 last_dest = dest;
4174 else
4175 last_dest = NULL_RTX;
4178 return default_noce_conversion_profitable_p (seq, &riscv_if_info);
4181 /* Return one word of double-word value OP. HIGH_P is true to select the
4182 high part or false to select the low part. */
4185 riscv_subword (rtx op, bool high_p)
4187 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0;
4188 machine_mode mode = GET_MODE (op);
4190 if (mode == VOIDmode)
4191 mode = TARGET_64BIT ? TImode : DImode;
4193 if (MEM_P (op))
4194 return adjust_address (op, word_mode, byte);
4196 if (REG_P (op))
4197 gcc_assert (!FP_REG_RTX_P (op));
4199 return simplify_gen_subreg (word_mode, op, mode, byte);
4202 /* Return true if a 64-bit move from SRC to DEST should be split into two. */
4204 bool
4205 riscv_split_64bit_move_p (rtx dest, rtx src)
4207 if (TARGET_64BIT)
4208 return false;
4210 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
4211 if (satisfies_constraint_zfli (src))
4212 return false;
4214 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
4215 of zeroing an FPR with FCVT.D.W. */
4216 if (TARGET_DOUBLE_FLOAT
4217 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
4218 || (FP_REG_RTX_P (dest) && MEM_P (src))
4219 || (FP_REG_RTX_P (src) && MEM_P (dest))
4220 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
4221 return false;
4223 return true;
4226 /* Split a doubleword move from SRC to DEST. On 32-bit targets,
4227 this function handles 64-bit moves for which riscv_split_64bit_move_p
4228 holds. For 64-bit targets, this function handles 128-bit moves. */
4230 void
4231 riscv_split_doubleword_move (rtx dest, rtx src)
4233 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */
4234 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV))
4236 if (FP_REG_RTX_P (dest))
4238 rtx low_src = riscv_subword (src, false);
4239 rtx high_src = riscv_subword (src, true);
4241 if (TARGET_ZFA)
4242 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src));
4243 else
4244 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src));
4245 return;
4247 if (FP_REG_RTX_P (src))
4249 rtx low_dest = riscv_subword (dest, false);
4250 rtx high_dest = riscv_subword (dest, true);
4252 if (TARGET_ZFA)
4254 emit_insn (gen_movsidf2_low_rv32 (low_dest, src));
4255 emit_insn (gen_movsidf2_high_rv32 (high_dest, src));
4256 return;
4258 else
4260 emit_insn (gen_th_fmv_x_w (low_dest, src));
4261 emit_insn (gen_th_fmv_x_hw (high_dest, src));
4263 return;
4267 /* The operation can be split into two normal moves. Decide in
4268 which order to do them. */
4269 rtx low_dest = riscv_subword (dest, false);
4270 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
4272 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
4273 riscv_emit_move (low_dest, riscv_subword (src, false));
4275 else
4277 riscv_emit_move (low_dest, riscv_subword (src, false));
4278 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
4282 /* Constant VAL is known to be sum of two S12 constants. Break it into
4283 comprising BASE and OFF.
4284 Numerically S12 is -2048 to 2047, however it uses the more conservative
4285 range -2048 to 2032 as offsets pertain to stack related registers. */
4287 void
4288 riscv_split_sum_of_two_s12 (HOST_WIDE_INT val, HOST_WIDE_INT *base,
4289 HOST_WIDE_INT *off)
4291 if (SUM_OF_TWO_S12_N (val))
4293 *base = -2048;
4294 *off = val - (-2048);
4296 else if (SUM_OF_TWO_S12_P_ALGN (val))
4298 *base = 2032;
4299 *off = val - 2032;
4301 else
4303 gcc_unreachable ();
4308 /* Return the appropriate instructions to move SRC into DEST. Assume
4309 that SRC is operand 1 and DEST is operand 0. */
4311 const char *
4312 riscv_output_move (rtx dest, rtx src)
4314 enum rtx_code dest_code, src_code;
4315 machine_mode mode;
4316 bool dbl_p;
4317 unsigned width;
4318 const char *insn;
4320 if ((insn = th_output_move (dest, src)))
4321 return insn;
4323 dest_code = GET_CODE (dest);
4324 src_code = GET_CODE (src);
4325 mode = GET_MODE (dest);
4326 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
4327 width = GET_MODE_SIZE (mode).to_constant ();
4329 if (dbl_p && riscv_split_64bit_move_p (dest, src))
4330 return "#";
4332 if (dest_code == REG && GP_REG_P (REGNO (dest)))
4334 if (src_code == REG && FP_REG_P (REGNO (src)))
4335 switch (width)
4337 case 2:
4338 if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
4339 return "fmv.x.h\t%0,%1";
4340 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
4341 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
4342 case 4:
4343 return "fmv.x.s\t%0,%1";
4344 case 8:
4345 return "fmv.x.d\t%0,%1";
4348 if (src_code == MEM)
4349 switch (width)
4351 case 1: return "lbu\t%0,%1";
4352 case 2: return "lhu\t%0,%1";
4353 case 4: return "lw\t%0,%1";
4354 case 8: return "ld\t%0,%1";
4357 if (src_code == CONST_INT)
4359 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src)))
4360 return "li\t%0,%1";
4362 if (TARGET_ZBS
4363 && SINGLE_BIT_MASK_OPERAND (INTVAL (src)))
4364 return "bseti\t%0,zero,%S1";
4366 /* Should never reach here. */
4367 abort ();
4370 if (src_code == HIGH)
4371 return "lui\t%0,%h1";
4373 if (symbolic_operand (src, VOIDmode))
4374 switch (riscv_classify_symbolic_expression (src))
4376 case SYMBOL_GOT_DISP: return "la\t%0,%1";
4377 case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
4378 case SYMBOL_PCREL: return "lla\t%0,%1";
4379 default: gcc_unreachable ();
4382 if ((src_code == REG && GP_REG_P (REGNO (src)))
4383 || (src == CONST0_RTX (mode)))
4385 if (dest_code == REG)
4387 if (GP_REG_P (REGNO (dest)))
4388 return "mv\t%0,%z1";
4390 if (FP_REG_P (REGNO (dest)))
4391 switch (width)
4393 case 2:
4394 if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
4395 return "fmv.h.x\t%0,%z1";
4396 /* High 16 bits should be all-1, otherwise HW will treated
4397 as a n-bit canonical NaN, but isn't matter for softfloat. */
4398 return "fmv.s.x\t%0,%1";
4399 case 4:
4400 return "fmv.s.x\t%0,%z1";
4401 case 8:
4402 if (TARGET_64BIT)
4403 return "fmv.d.x\t%0,%z1";
4404 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
4405 gcc_assert (src == CONST0_RTX (mode));
4406 return "fcvt.d.w\t%0,x0";
4409 if (dest_code == MEM)
4410 switch (width)
4412 case 1: return "sb\t%z1,%0";
4413 case 2: return "sh\t%z1,%0";
4414 case 4: return "sw\t%z1,%0";
4415 case 8: return "sd\t%z1,%0";
4418 if (src_code == REG && FP_REG_P (REGNO (src)))
4420 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4421 switch (width)
4423 case 2:
4424 if (TARGET_ZFH)
4425 return "fmv.h\t%0,%1";
4426 return "fmv.s\t%0,%1";
4427 case 4:
4428 return "fmv.s\t%0,%1";
4429 case 8:
4430 return "fmv.d\t%0,%1";
4433 if (dest_code == MEM)
4434 switch (width)
4436 case 2:
4437 return "fsh\t%1,%0";
4438 case 4:
4439 return "fsw\t%1,%0";
4440 case 8:
4441 return "fsd\t%1,%0";
4444 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4446 if (src_code == MEM)
4447 switch (width)
4449 case 2:
4450 return "flh\t%0,%1";
4451 case 4:
4452 return "flw\t%0,%1";
4453 case 8:
4454 return "fld\t%0,%1";
4457 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src))
4458 switch (width)
4460 case 2:
4461 return "fli.h\t%0,%1";
4462 case 4:
4463 return "fli.s\t%0,%1";
4464 case 8:
4465 return "fli.d\t%0,%1";
4468 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT)
4470 /* We only want a single full vector register VLEN read after reload. */
4471 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR));
4472 return "csrr\t%0,vlenb";
4474 gcc_unreachable ();
4477 const char *
4478 riscv_output_return ()
4480 if (cfun->machine->naked_p)
4481 return "";
4483 return "ret";
4487 /* Return true if CMP1 is a suitable second operand for integer ordering
4488 test CODE. See also the *sCC patterns in riscv.md. */
4490 static bool
4491 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4493 switch (code)
4495 case GT:
4496 case GTU:
4497 return reg_or_0_operand (cmp1, VOIDmode);
4499 case GE:
4500 case GEU:
4501 return cmp1 == const1_rtx;
4503 case LT:
4504 case LTU:
4505 return arith_operand (cmp1, VOIDmode);
4507 case LE:
4508 return sle_operand (cmp1, VOIDmode);
4510 case LEU:
4511 return sleu_operand (cmp1, VOIDmode);
4513 default:
4514 gcc_unreachable ();
4518 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
4519 integer ordering test *CODE, or if an equivalent combination can
4520 be formed by adjusting *CODE and *CMP1. When returning true, update
4521 *CODE and *CMP1 with the chosen code and operand, otherwise leave
4522 them alone. */
4524 static bool
4525 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
4526 machine_mode mode)
4528 HOST_WIDE_INT plus_one;
4530 if (riscv_int_order_operand_ok_p (*code, *cmp1))
4531 return true;
4533 if (CONST_INT_P (*cmp1))
4534 switch (*code)
4536 case LE:
4537 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4538 if (INTVAL (*cmp1) < plus_one)
4540 *code = LT;
4541 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4542 return true;
4544 break;
4546 case LEU:
4547 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4548 if (plus_one != 0)
4550 *code = LTU;
4551 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4552 return true;
4554 break;
4556 default:
4557 break;
4559 return false;
4562 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
4563 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
4564 is nonnull, it's OK to set TARGET to the inverse of the result and
4565 flip *INVERT_PTR instead. */
4567 static void
4568 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
4569 rtx target, rtx cmp0, rtx cmp1)
4571 machine_mode mode;
4573 /* First see if there is a RISCV instruction that can do this operation.
4574 If not, try doing the same for the inverse operation. If that also
4575 fails, force CMP1 into a register and try again. */
4576 mode = GET_MODE (cmp0);
4577 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
4578 riscv_emit_binary (code, target, cmp0, cmp1);
4579 else
4581 enum rtx_code inv_code = reverse_condition (code);
4582 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
4584 cmp1 = force_reg (mode, cmp1);
4585 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
4587 else if (invert_ptr == 0)
4589 rtx inv_target = riscv_force_binary (word_mode,
4590 inv_code, cmp0, cmp1);
4591 riscv_emit_binary (EQ, target, inv_target, const0_rtx);
4593 else
4595 *invert_ptr = !*invert_ptr;
4596 riscv_emit_binary (inv_code, target, cmp0, cmp1);
4601 /* Return a register that is zero iff CMP0 and CMP1 are equal.
4602 The register will have the same mode as CMP0. */
4604 static rtx
4605 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
4607 if (cmp1 == const0_rtx)
4608 return cmp0;
4610 return expand_binop (GET_MODE (cmp0), sub_optab,
4611 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
4614 /* Helper function for riscv_extend_comparands to Sign-extend the OP.
4615 However if the OP is SI subreg promoted with an inner DI, such as
4616 (subreg/s/v:SI (reg/v:DI) 0)
4617 just peel off the SUBREG to get DI, avoiding extraneous extension. */
4619 static void
4620 riscv_sign_extend_if_not_subreg_prom (rtx *op)
4622 if (GET_CODE (*op) == SUBREG
4623 && SUBREG_PROMOTED_VAR_P (*op)
4624 && SUBREG_PROMOTED_SIGNED_P (*op)
4625 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant ()
4626 == GET_MODE_SIZE (word_mode)))
4627 *op = XEXP (*op, 0);
4628 else
4629 *op = gen_rtx_SIGN_EXTEND (word_mode, *op);
4632 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
4634 static void
4635 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
4637 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
4638 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
4640 /* It is more profitable to zero-extend QImode values. But not if the
4641 first operand has already been sign-extended, and the second one is
4642 is a constant or has already been sign-extended also. */
4643 if (unsigned_condition (code) == code
4644 && (GET_MODE (*op0) == QImode
4645 && ! (GET_CODE (*op0) == SUBREG
4646 && SUBREG_PROMOTED_VAR_P (*op0)
4647 && SUBREG_PROMOTED_SIGNED_P (*op0)
4648 && (CONST_INT_P (*op1)
4649 || (GET_CODE (*op1) == SUBREG
4650 && SUBREG_PROMOTED_VAR_P (*op1)
4651 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4653 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4654 if (CONST_INT_P (*op1))
4655 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4656 else
4657 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4659 else
4661 riscv_sign_extend_if_not_subreg_prom (op0);
4663 if (*op1 != const0_rtx)
4664 riscv_sign_extend_if_not_subreg_prom (op1);
4669 /* Convert a comparison into something that can be used in a branch or
4670 conditional move. On entry, *OP0 and *OP1 are the values being
4671 compared and *CODE is the code used to compare them.
4673 Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
4674 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are
4675 emitted. */
4677 static void
4678 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4679 bool need_eq_ne_p = false)
4681 if (need_eq_ne_p)
4683 rtx cmp_op0 = *op0;
4684 rtx cmp_op1 = *op1;
4685 if (*code == EQ || *code == NE)
4687 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1);
4688 *op1 = const0_rtx;
4689 return;
4691 gcc_unreachable ();
4694 if (splittable_const_int_operand (*op1, VOIDmode))
4696 HOST_WIDE_INT rhs = INTVAL (*op1);
4698 if (*code == EQ || *code == NE)
4700 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4701 if (SMALL_OPERAND (-rhs))
4703 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
4704 GEN_INT (-rhs));
4705 *op1 = const0_rtx;
4708 else
4710 static const enum rtx_code mag_comparisons[][2] = {
4711 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
4714 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4715 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4717 HOST_WIDE_INT new_rhs;
4718 bool increment = *code == mag_comparisons[i][0];
4719 bool decrement = *code == mag_comparisons[i][1];
4720 if (!increment && !decrement)
4721 continue;
4723 new_rhs = rhs + (increment ? 1 : -1);
4724 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
4725 if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
4726 && (rhs < 0) == (new_rhs < 0))
4728 *op1 = GEN_INT (new_rhs);
4729 *code = mag_comparisons[i][increment];
4731 break;
4736 riscv_extend_comparands (*code, op0, op1);
4738 *op0 = force_reg (word_mode, *op0);
4739 if (*op1 != const0_rtx)
4740 *op1 = force_reg (word_mode, *op1);
4743 /* Like riscv_emit_int_compare, but for floating-point comparisons. */
4745 static void
4746 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4747 bool *invert_ptr = nullptr)
4749 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
4750 enum rtx_code fp_code = *code;
4751 *code = NE;
4753 switch (fp_code)
4755 case UNORDERED:
4756 *code = EQ;
4757 /* Fall through. */
4759 case ORDERED:
4760 /* a == a && b == b */
4761 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4762 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4763 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4764 *op1 = const0_rtx;
4765 break;
4767 case UNEQ:
4768 /* ordered(a, b) > (a == b) */
4769 *code = EQ;
4770 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4771 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4772 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4773 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
4774 break;
4776 #define UNORDERED_COMPARISON(CODE, CMP) \
4777 case CODE: \
4778 *code = EQ; \
4779 *op0 = gen_reg_rtx (word_mode); \
4780 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \
4781 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \
4782 else if (GET_MODE (cmp_op0) == SFmode) \
4783 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \
4784 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \
4785 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \
4786 else if (GET_MODE (cmp_op0) == DFmode) \
4787 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \
4788 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \
4789 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \
4790 else if (GET_MODE (cmp_op0) == HFmode) \
4791 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \
4792 else \
4793 gcc_unreachable (); \
4794 *op1 = const0_rtx; \
4795 break;
4797 case UNLT:
4798 std::swap (cmp_op0, cmp_op1);
4799 gcc_fallthrough ();
4801 UNORDERED_COMPARISON(UNGT, le)
4803 case UNLE:
4804 std::swap (cmp_op0, cmp_op1);
4805 gcc_fallthrough ();
4807 UNORDERED_COMPARISON(UNGE, lt)
4808 #undef UNORDERED_COMPARISON
4810 case NE:
4811 fp_code = EQ;
4812 if (invert_ptr != nullptr)
4813 *invert_ptr = !*invert_ptr;
4814 else
4816 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
4817 cmp_op1 = const0_rtx;
4819 gcc_fallthrough ();
4821 case EQ:
4822 case LE:
4823 case LT:
4824 case GE:
4825 case GT:
4826 /* We have instructions for these cases. */
4827 *code = fp_code;
4828 *op0 = cmp_op0;
4829 *op1 = cmp_op1;
4830 break;
4832 case LTGT:
4833 /* (a < b) | (a > b) */
4834 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1);
4835 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1);
4836 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1);
4837 *op1 = const0_rtx;
4838 break;
4840 default:
4841 gcc_unreachable ();
4845 /* CODE-compare OP0 and OP1. Store the result in TARGET. */
4847 void
4848 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr)
4850 riscv_extend_comparands (code, &op0, &op1);
4851 op0 = force_reg (word_mode, op0);
4853 if (code == EQ || code == NE)
4855 rtx zie = riscv_zero_if_equal (op0, op1);
4856 riscv_emit_binary (code, target, zie, const0_rtx);
4858 else
4859 riscv_emit_int_order_test (code, invert_ptr, target, op0, op1);
4862 /* Like riscv_expand_int_scc, but for floating-point comparisons. */
4864 void
4865 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
4866 bool *invert_ptr)
4868 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr);
4870 machine_mode mode = GET_MODE (target);
4871 if (mode != word_mode)
4873 rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
4874 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
4876 else
4877 riscv_emit_binary (code, target, op0, op1);
4880 /* Jump to LABEL if (CODE OP0 OP1) holds. */
4882 void
4883 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
4885 if (FLOAT_MODE_P (GET_MODE (op1)))
4886 riscv_emit_float_compare (&code, &op0, &op1);
4887 else
4888 riscv_emit_int_compare (&code, &op0, &op1);
4890 if (FLOAT_MODE_P (GET_MODE (op0)))
4892 op0 = riscv_force_binary (word_mode, code, op0, op1);
4893 op1 = const0_rtx;
4894 code = NE;
4897 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
4898 emit_jump_insn (gen_condjump (condition, label));
4901 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
4902 Return 0 if expansion failed. */
4904 bool
4905 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
4907 machine_mode mode = GET_MODE (dest);
4908 rtx_code code = GET_CODE (op);
4909 rtx op0 = XEXP (op, 0);
4910 rtx op1 = XEXP (op, 1);
4912 if (((TARGET_ZICOND_LIKE
4913 || (arith_operand (cons, mode) && arith_operand (alt, mode)))
4914 && (GET_MODE_CLASS (mode) == MODE_INT))
4915 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4917 machine_mode mode0 = GET_MODE (op0);
4918 machine_mode mode1 = GET_MODE (op1);
4920 /* An integer comparison must be comparing WORD_MODE objects. We
4921 must enforce that so that we don't strip away a sign_extension
4922 thinking it is unnecessary. We might consider using
4923 riscv_extend_operands if they are not already properly extended. */
4924 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
4925 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
4926 return false;
4928 /* In the fallback generic case use MODE rather than WORD_MODE for
4929 the output of the SCC instruction, to match the mode of the NEG
4930 operation below. The output of SCC is 0 or 1 boolean, so it is
4931 valid for input in any scalar integer mode. */
4932 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
4933 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4934 ? word_mode : mode);
4935 bool invert = false;
4937 /* Canonicalize the comparison. It must be an equality comparison
4938 of integer operands, or with SFB it can be any comparison of
4939 integer operands. If it isn't, then emit an SCC instruction
4940 so that we can then use an equality comparison against zero. */
4941 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode))
4942 || !INTEGRAL_MODE_P (mode0))
4944 bool *invert_ptr = nullptr;
4946 /* If riscv_expand_int_scc inverts the condition, then it will
4947 flip the value of INVERT. We need to know where so that
4948 we can adjust it for our needs. */
4949 if (code == LE || code == LEU || code == GE || code == GEU)
4950 invert_ptr = &invert;
4952 /* Emit an SCC-like instruction into a temporary so that we can
4953 use an EQ/NE comparison. We can support both FP and integer
4954 conditional moves. */
4955 if (INTEGRAL_MODE_P (mode0))
4956 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr);
4957 else if (FLOAT_MODE_P (mode0)
4958 && fp_scc_comparison (op, GET_MODE (op)))
4959 riscv_expand_float_scc (tmp, code, op0, op1, &invert);
4960 else
4961 return false;
4963 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
4965 /* We've generated a new comparison. Update the local variables. */
4966 code = GET_CODE (op);
4967 op0 = XEXP (op, 0);
4968 op1 = XEXP (op, 1);
4970 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
4971 riscv_expand_int_scc (tmp, code, op0, op1, &invert);
4973 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4975 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
4976 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4978 /* The expander is a bit loose in its specification of the true
4979 arm of the conditional move. That allows us to support more
4980 cases for extensions which are more general than SFB. But
4981 does mean we need to force CONS into a register at this point. */
4982 cons = force_reg (mode, cons);
4983 /* With XTheadCondMov we need to force ALT into a register too. */
4984 alt = force_reg (mode, alt);
4985 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4986 cons, alt)));
4987 return true;
4989 else if (!TARGET_ZICOND_LIKE)
4991 if (invert)
4992 std::swap (cons, alt);
4994 rtx reg1 = gen_reg_rtx (mode);
4995 rtx reg2 = gen_reg_rtx (mode);
4996 rtx reg3 = gen_reg_rtx (mode);
4997 rtx reg4 = gen_reg_rtx (mode);
4999 riscv_emit_unary (NEG, reg1, tmp);
5000 riscv_emit_binary (AND, reg2, reg1, cons);
5001 riscv_emit_unary (NOT, reg3, reg1);
5002 riscv_emit_binary (AND, reg4, reg3, alt);
5003 riscv_emit_binary (IOR, dest, reg2, reg4);
5004 return true;
5006 /* 0, reg or 0, imm */
5007 else if (cons == CONST0_RTX (mode)
5008 && (REG_P (alt)
5009 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
5011 riscv_emit_int_compare (&code, &op0, &op1, true);
5012 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5013 alt = force_reg (mode, alt);
5014 emit_insn (gen_rtx_SET (dest,
5015 gen_rtx_IF_THEN_ELSE (mode, cond,
5016 cons, alt)));
5017 return true;
5019 /* imm, imm */
5020 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
5021 && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
5023 riscv_emit_int_compare (&code, &op0, &op1, true);
5024 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5025 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
5026 alt = force_reg (mode, gen_int_mode (t, mode));
5027 emit_insn (gen_rtx_SET (dest,
5028 gen_rtx_IF_THEN_ELSE (mode, cond,
5029 CONST0_RTX (mode),
5030 alt)));
5031 /* CONS might not fit into a signed 12 bit immediate suitable
5032 for an addi instruction. If that's the case, force it
5033 into a register. */
5034 if (!SMALL_OPERAND (INTVAL (cons)))
5035 cons = force_reg (mode, cons);
5036 riscv_emit_binary (PLUS, dest, dest, cons);
5037 return true;
5039 /* imm, reg */
5040 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
5042 /* Optimize for register value of 0. */
5043 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
5045 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5046 cons = force_reg (mode, cons);
5047 emit_insn (gen_rtx_SET (dest,
5048 gen_rtx_IF_THEN_ELSE (mode, cond,
5049 cons, alt)));
5050 return true;
5053 riscv_emit_int_compare (&code, &op0, &op1, true);
5054 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5056 rtx temp1 = gen_reg_rtx (mode);
5057 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
5059 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
5060 suitable for an addi instruction. If that's the case, force it
5061 into a register. */
5062 if (!SMALL_OPERAND (INTVAL (temp2)))
5063 temp2 = force_reg (mode, temp2);
5064 if (!SMALL_OPERAND (INTVAL (cons)))
5065 cons = force_reg (mode, cons);
5067 riscv_emit_binary (PLUS, temp1, alt, temp2);
5068 emit_insn (gen_rtx_SET (dest,
5069 gen_rtx_IF_THEN_ELSE (mode, cond,
5070 CONST0_RTX (mode),
5071 temp1)));
5072 riscv_emit_binary (PLUS, dest, dest, cons);
5073 return true;
5075 /* reg, 0 or imm, 0 */
5076 else if ((REG_P (cons)
5077 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
5078 && alt == CONST0_RTX (mode))
5080 riscv_emit_int_compare (&code, &op0, &op1, true);
5081 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5082 cons = force_reg (mode, cons);
5083 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
5084 cons, alt)));
5085 return true;
5087 /* reg, imm */
5088 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
5090 /* Optimize for register value of 0. */
5091 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
5093 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5094 alt = force_reg (mode, alt);
5095 emit_insn (gen_rtx_SET (dest,
5096 gen_rtx_IF_THEN_ELSE (mode, cond,
5097 cons, alt)));
5098 return true;
5101 riscv_emit_int_compare (&code, &op0, &op1, true);
5102 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5104 rtx temp1 = gen_reg_rtx (mode);
5105 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
5107 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
5108 suitable for an addi instruction. If that's the case, force it
5109 into a register. */
5110 if (!SMALL_OPERAND (INTVAL (temp2)))
5111 temp2 = force_reg (mode, temp2);
5112 if (!SMALL_OPERAND (INTVAL (alt)))
5113 alt = force_reg (mode, alt);
5115 riscv_emit_binary (PLUS, temp1, cons, temp2);
5116 emit_insn (gen_rtx_SET (dest,
5117 gen_rtx_IF_THEN_ELSE (mode, cond,
5118 temp1,
5119 CONST0_RTX (mode))));
5120 riscv_emit_binary (PLUS, dest, dest, alt);
5121 return true;
5123 /* reg, reg */
5124 else if (REG_P (cons) && REG_P (alt))
5126 if (((code == EQ && rtx_equal_p (cons, op0))
5127 || (code == NE && rtx_equal_p (alt, op0)))
5128 && op1 == CONST0_RTX (mode))
5130 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5131 alt = force_reg (mode, alt);
5132 emit_insn (gen_rtx_SET (dest,
5133 gen_rtx_IF_THEN_ELSE (mode, cond,
5134 cons, alt)));
5135 return true;
5138 rtx reg1 = gen_reg_rtx (mode);
5139 rtx reg2 = gen_reg_rtx (mode);
5140 riscv_emit_int_compare (&code, &op0, &op1, true);
5141 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5142 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
5143 GET_MODE (op0), op0, op1);
5144 emit_insn (gen_rtx_SET (reg2,
5145 gen_rtx_IF_THEN_ELSE (mode, cond2,
5146 CONST0_RTX (mode),
5147 cons)));
5148 emit_insn (gen_rtx_SET (reg1,
5149 gen_rtx_IF_THEN_ELSE (mode, cond1,
5150 CONST0_RTX (mode),
5151 alt)));
5152 riscv_emit_binary (PLUS, dest, reg1, reg2);
5153 return true;
5157 return false;
5160 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
5161 least PARM_BOUNDARY bits of alignment, but will be given anything up
5162 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
5164 static unsigned int
5165 riscv_function_arg_boundary (machine_mode mode, const_tree type)
5167 unsigned int alignment;
5169 /* Use natural alignment if the type is not aggregate data. */
5170 if (type && !AGGREGATE_TYPE_P (type))
5171 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
5172 else
5173 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
5175 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
5178 /* If MODE represents an argument that can be passed or returned in
5179 floating-point registers, return the number of registers, else 0. */
5181 static unsigned
5182 riscv_pass_mode_in_fpr_p (machine_mode mode)
5184 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
5186 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5187 return 1;
5189 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5190 return 2;
5193 return 0;
5196 typedef struct {
5197 const_tree type;
5198 HOST_WIDE_INT offset;
5199 } riscv_aggregate_field;
5201 /* Identify subfields of aggregates that are candidates for passing in
5202 floating-point registers. */
5204 static int
5205 riscv_flatten_aggregate_field (const_tree type,
5206 riscv_aggregate_field fields[2],
5207 int n, HOST_WIDE_INT offset,
5208 bool ignore_zero_width_bit_field_p)
5210 switch (TREE_CODE (type))
5212 case RECORD_TYPE:
5213 /* Can't handle incomplete types nor sizes that are not fixed. */
5214 if (!COMPLETE_TYPE_P (type)
5215 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5216 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
5217 return -1;
5219 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
5220 if (TREE_CODE (f) == FIELD_DECL)
5222 if (!TYPE_P (TREE_TYPE (f)))
5223 return -1;
5225 /* The C++ front end strips zero-length bit-fields from structs.
5226 So we need to ignore them in the C front end to make C code
5227 compatible with C++ code. */
5228 if (ignore_zero_width_bit_field_p
5229 && DECL_BIT_FIELD (f)
5230 && (DECL_SIZE (f) == NULL_TREE
5231 || integer_zerop (DECL_SIZE (f))))
5233 else
5235 HOST_WIDE_INT pos = offset + int_byte_position (f);
5236 n = riscv_flatten_aggregate_field (TREE_TYPE (f),
5237 fields, n, pos,
5238 ignore_zero_width_bit_field_p);
5240 if (n < 0)
5241 return -1;
5243 return n;
5245 case ARRAY_TYPE:
5247 HOST_WIDE_INT n_elts;
5248 riscv_aggregate_field subfields[2];
5249 tree index = TYPE_DOMAIN (type);
5250 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
5251 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
5252 subfields, 0, offset,
5253 ignore_zero_width_bit_field_p);
5255 /* Can't handle incomplete types nor sizes that are not fixed. */
5256 if (n_subfields <= 0
5257 || !COMPLETE_TYPE_P (type)
5258 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5259 || !index
5260 || !TYPE_MAX_VALUE (index)
5261 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5262 || !TYPE_MIN_VALUE (index)
5263 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5264 || !tree_fits_uhwi_p (elt_size))
5265 return -1;
5267 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5268 - tree_to_uhwi (TYPE_MIN_VALUE (index));
5269 gcc_assert (n_elts >= 0);
5271 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
5272 for (int j = 0; j < n_subfields; j++)
5274 if (n >= 2)
5275 return -1;
5277 fields[n] = subfields[j];
5278 fields[n++].offset += i * tree_to_uhwi (elt_size);
5281 return n;
5284 case COMPLEX_TYPE:
5286 /* Complex type need consume 2 field, so n must be 0. */
5287 if (n != 0)
5288 return -1;
5290 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
5292 if (elt_size <= UNITS_PER_FP_ARG)
5294 fields[0].type = TREE_TYPE (type);
5295 fields[0].offset = offset;
5296 fields[1].type = TREE_TYPE (type);
5297 fields[1].offset = offset + elt_size;
5299 return 2;
5302 return -1;
5305 default:
5306 if (n < 2
5307 && ((SCALAR_FLOAT_TYPE_P (type)
5308 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
5309 || (INTEGRAL_TYPE_P (type)
5310 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
5312 fields[n].type = type;
5313 fields[n].offset = offset;
5314 return n + 1;
5316 else
5317 return -1;
5321 /* Identify candidate aggregates for passing in floating-point registers.
5322 Candidates have at most two fields after flattening. */
5324 static int
5325 riscv_flatten_aggregate_argument (const_tree type,
5326 riscv_aggregate_field fields[2],
5327 bool ignore_zero_width_bit_field_p)
5329 if (!type || TREE_CODE (type) != RECORD_TYPE)
5330 return -1;
5332 return riscv_flatten_aggregate_field (type, fields, 0, 0,
5333 ignore_zero_width_bit_field_p);
5336 /* See whether TYPE is a record whose fields should be returned in one or
5337 two floating-point registers. If so, populate FIELDS accordingly. */
5339 static unsigned
5340 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
5341 riscv_aggregate_field fields[2])
5343 static int warned = 0;
5345 /* This is the old ABI, which differs for C++ and C. */
5346 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
5347 for (int i = 0; i < n_old; i++)
5348 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
5350 n_old = -1;
5351 break;
5354 /* This is the new ABI, which is the same for C++ and C. */
5355 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
5356 for (int i = 0; i < n_new; i++)
5357 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
5359 n_new = -1;
5360 break;
5363 if ((n_old != n_new) && (warned == 0))
5365 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
5366 "bit-fields changed in GCC 10");
5367 warned = 1;
5370 return n_new > 0 ? n_new : 0;
5373 /* See whether TYPE is a record whose fields should be returned in one or
5374 floating-point register and one integer register. If so, populate
5375 FIELDS accordingly. */
5377 static bool
5378 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
5379 riscv_aggregate_field fields[2])
5381 static int warned = 0;
5383 /* This is the old ABI, which differs for C++ and C. */
5384 unsigned num_int_old = 0, num_float_old = 0;
5385 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
5386 for (int i = 0; i < n_old; i++)
5388 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
5389 num_int_old += INTEGRAL_TYPE_P (fields[i].type);
5392 /* This is the new ABI, which is the same for C++ and C. */
5393 unsigned num_int_new = 0, num_float_new = 0;
5394 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
5395 for (int i = 0; i < n_new; i++)
5397 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
5398 num_int_new += INTEGRAL_TYPE_P (fields[i].type);
5401 if (((num_int_old == 1 && num_float_old == 1
5402 && (num_int_old != num_int_new || num_float_old != num_float_new))
5403 || (num_int_new == 1 && num_float_new == 1
5404 && (num_int_old != num_int_new || num_float_old != num_float_new)))
5405 && (warned == 0))
5407 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
5408 "bit-fields changed in GCC 10");
5409 warned = 1;
5412 return num_int_new == 1 && num_float_new == 1;
5415 /* Return the representation of an argument passed or returned in an FPR
5416 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
5417 two modes may be different for structures like:
5419 struct __attribute__((packed)) foo { float f; }
5421 where the SFmode value "f" is passed in REGNO but the struct itself
5422 has mode BLKmode. */
5424 static rtx
5425 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno,
5426 machine_mode value_mode,
5427 HOST_WIDE_INT offset)
5429 rtx x = gen_rtx_REG (value_mode, regno);
5431 if (type_mode != value_mode)
5433 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
5434 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
5436 return x;
5439 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
5440 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
5441 byte offset for the first value, likewise MODE2 and OFFSET2 for the
5442 second value. */
5444 static rtx
5445 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
5446 machine_mode mode1, HOST_WIDE_INT offset1,
5447 unsigned regno2, machine_mode mode2,
5448 HOST_WIDE_INT offset2)
5450 return gen_rtx_PARALLEL
5451 (mode,
5452 gen_rtvec (2,
5453 gen_rtx_EXPR_LIST (VOIDmode,
5454 gen_rtx_REG (mode1, regno1),
5455 GEN_INT (offset1)),
5456 gen_rtx_EXPR_LIST (VOIDmode,
5457 gen_rtx_REG (mode2, regno2),
5458 GEN_INT (offset2))));
5461 static rtx
5462 riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
5463 unsigned gpr_base)
5465 gcc_assert (riscv_v_ext_vls_mode_p (mode));
5467 unsigned count = 0;
5468 unsigned regnum = 0;
5469 machine_mode gpr_mode = VOIDmode;
5470 unsigned vls_size = GET_MODE_SIZE (mode).to_constant ();
5471 unsigned gpr_size = GET_MODE_SIZE (Xmode);
5473 if (IN_RANGE (vls_size, 0, gpr_size * 2))
5475 count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size);
5477 if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS)
5479 regnum = gpr_base + info->gpr_offset;
5480 info->num_gprs = count;
5481 gpr_mode = riscv_v_vls_to_gpr_mode (vls_size);
5485 if (!regnum)
5486 return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
5488 gcc_assert (gpr_mode != VOIDmode);
5490 rtx reg = gen_rtx_REG (gpr_mode, regnum);
5491 rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode));
5493 return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
5496 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5497 for a call to a function whose data type is FNTYPE.
5498 For a library call, FNTYPE is 0. */
5500 void
5501 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
5503 memset (cum, 0, sizeof (*cum));
5505 if (fntype)
5506 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
5507 else
5508 cum->variant_cc = RISCV_CC_BASE;
5511 /* Return true if TYPE is a vector type that can be passed in vector registers.
5514 static bool
5515 riscv_vector_type_p (const_tree type)
5517 /* Currently, only builtin scalabler vector type is allowed, in the future,
5518 more vector types may be allowed, such as GNU vector type, etc. */
5519 return riscv_vector::builtin_type_p (type);
5522 static unsigned int
5523 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
5525 /* Subroutine of riscv_get_arg_info. */
5527 static rtx
5528 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5529 machine_mode mode, bool return_p)
5531 gcc_assert (riscv_v_ext_mode_p (mode));
5533 info->mr_offset = cum->num_mrs;
5534 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5536 /* For scalable mask return value. */
5537 if (return_p)
5538 return gen_rtx_REG (mode, V_REG_FIRST);
5540 /* For the first scalable mask argument. */
5541 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS)
5543 info->num_mrs = 1;
5544 return gen_rtx_REG (mode, V_REG_FIRST);
5546 else
5548 /* Rest scalable mask arguments are treated as scalable data
5549 arguments. */
5553 /* The number and alignment of vector registers need for this scalable vector
5554 argument. When the mode size is less than a full vector, we use 1 vector
5555 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number
5556 information. */
5557 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode);
5558 int LMUL = riscv_v_ext_tuple_mode_p (mode)
5559 ? nregs / riscv_vector::get_nf (mode)
5560 : nregs;
5561 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST;
5562 int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
5563 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
5565 /* For scalable data and scalable tuple return value. */
5566 if (return_p)
5567 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
5569 /* Iterate through the USED_VRS array to find vector register groups that have
5570 not been allocated and the first register is aligned with LMUL. */
5571 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL)
5573 /* The index in USED_VRS array. */
5574 int idx = i - arg_reg_start;
5575 /* Find the first register unused. */
5576 if (!cum->used_vrs[idx])
5578 bool find_set = true;
5579 /* Ensure there are NREGS continuous unused registers. */
5580 for (int j = 1; j < nregs; j++)
5581 if (cum->used_vrs[idx + j])
5583 find_set = false;
5584 /* Update I to the last aligned register which
5585 cannot be used and the next iteration will add
5586 LMUL step to I. */
5587 i += (j / LMUL) * LMUL;
5588 break;
5591 if (find_set)
5593 info->num_vrs = nregs;
5594 info->vr_offset = idx;
5595 return gen_rtx_REG (mode, i + V_REG_FIRST);
5600 return NULL_RTX;
5603 /* Fill INFO with information about a single argument, and return an RTL
5604 pattern to pass or return the argument. Return NULL_RTX if argument cannot
5605 pass or return in registers, then the argument may be passed by reference or
5606 through the stack or . CUM is the cumulative state for earlier arguments.
5607 MODE is the mode of this argument and TYPE is its type (if known). NAMED is
5608 true if this is a named (fixed) argument rather than a variable one. RETURN_P
5609 is true if returning the argument, or false if passing the argument. */
5611 static rtx
5612 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5613 machine_mode mode, const_tree type, bool named,
5614 bool return_p)
5616 unsigned num_bytes, num_words;
5617 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
5618 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
5619 unsigned alignment = riscv_function_arg_boundary (mode, type);
5621 memset (info, 0, sizeof (*info));
5622 info->gpr_offset = cum->num_gprs;
5623 info->fpr_offset = cum->num_fprs;
5625 /* Passed by reference when the scalable vector argument is anonymous. */
5626 if (riscv_v_ext_mode_p (mode) && !named)
5627 return NULL_RTX;
5629 if (named)
5631 riscv_aggregate_field fields[2];
5632 unsigned fregno = fpr_base + info->fpr_offset;
5633 unsigned gregno = gpr_base + info->gpr_offset;
5635 /* Pass one- or two-element floating-point aggregates in FPRs. */
5636 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
5637 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5638 switch (info->num_fprs)
5640 case 1:
5641 return riscv_pass_fpr_single (mode, fregno,
5642 TYPE_MODE (fields[0].type),
5643 fields[0].offset);
5645 case 2:
5646 return riscv_pass_fpr_pair (mode, fregno,
5647 TYPE_MODE (fields[0].type),
5648 fields[0].offset,
5649 fregno + 1,
5650 TYPE_MODE (fields[1].type),
5651 fields[1].offset);
5653 default:
5654 gcc_unreachable ();
5657 /* Pass real and complex floating-point numbers in FPRs. */
5658 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
5659 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5660 switch (GET_MODE_CLASS (mode))
5662 case MODE_FLOAT:
5663 return gen_rtx_REG (mode, fregno);
5665 case MODE_COMPLEX_FLOAT:
5666 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
5667 fregno + 1, GET_MODE_INNER (mode),
5668 GET_MODE_UNIT_SIZE (mode));
5670 default:
5671 gcc_unreachable ();
5674 /* Pass structs with one float and one integer in an FPR and a GPR. */
5675 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
5676 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
5677 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
5679 info->num_gprs = 1;
5680 info->num_fprs = 1;
5682 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
5683 std::swap (fregno, gregno);
5685 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
5686 fields[0].offset,
5687 gregno, TYPE_MODE (fields[1].type),
5688 fields[1].offset);
5691 /* For scalable vector argument. */
5692 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
5693 return riscv_get_vector_arg (info, cum, mode, return_p);
5695 /* For vls mode aggregated in gpr. */
5696 if (riscv_v_ext_vls_mode_p (mode))
5697 return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
5700 /* Work out the size of the argument. */
5701 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
5702 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5704 /* Doubleword-aligned varargs start on an even register boundary. */
5705 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
5706 info->gpr_offset += info->gpr_offset & 1;
5708 /* Partition the argument between registers and stack. */
5709 info->num_fprs = 0;
5710 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
5711 info->stack_p = (num_words - info->num_gprs) != 0;
5713 if (info->num_gprs || return_p)
5714 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
5716 return NULL_RTX;
5719 /* Implement TARGET_FUNCTION_ARG. */
5721 static rtx
5722 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5724 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5725 struct riscv_arg_info info;
5727 if (arg.end_marker_p ())
5728 /* Return the calling convention that used by the current function. */
5729 return gen_int_mode (cum->variant_cc, SImode);
5731 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5734 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
5736 static void
5737 riscv_function_arg_advance (cumulative_args_t cum_v,
5738 const function_arg_info &arg)
5740 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5741 struct riscv_arg_info info;
5743 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5745 /* Set the corresponding register in USED_VRS to used status. */
5746 for (unsigned int i = 0; i < info.num_vrs; i++)
5748 gcc_assert (!cum->used_vrs[info.vr_offset + i]);
5749 cum->used_vrs[info.vr_offset + i] = true;
5752 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
5754 error ("RVV type %qT cannot be passed to an unprototyped function",
5755 arg.type);
5756 /* Avoid repeating the message */
5757 cum->variant_cc = RISCV_CC_V;
5760 /* Advance the register count. This has the effect of setting
5761 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
5762 argument required us to skip the final GPR and pass the whole
5763 argument on the stack. */
5764 cum->num_fprs = info.fpr_offset + info.num_fprs;
5765 cum->num_gprs = info.gpr_offset + info.num_gprs;
5766 cum->num_mrs = info.mr_offset + info.num_mrs;
5769 /* Implement TARGET_ARG_PARTIAL_BYTES. */
5771 static int
5772 riscv_arg_partial_bytes (cumulative_args_t cum,
5773 const function_arg_info &generic_arg)
5775 struct riscv_arg_info arg;
5777 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
5778 generic_arg.type, generic_arg.named, false);
5779 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
5782 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
5783 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
5784 VALTYPE is null and MODE is the mode of the return value. */
5787 riscv_function_value (const_tree type, const_tree func, machine_mode mode)
5789 struct riscv_arg_info info;
5790 CUMULATIVE_ARGS args;
5792 if (type)
5794 int unsigned_p = TYPE_UNSIGNED (type);
5796 mode = TYPE_MODE (type);
5798 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
5799 return values, promote the mode here too. */
5800 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
5803 memset (&args, 0, sizeof args);
5805 return riscv_get_arg_info (&info, &args, mode, type, true, true);
5808 /* Implement TARGET_PASS_BY_REFERENCE. */
5810 static bool
5811 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
5813 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();;
5814 struct riscv_arg_info info;
5815 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5817 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
5818 never pass variadic arguments in floating-point and vector registers,
5819 so we can avoid the call to riscv_get_arg_info in this case. */
5820 if (cum != NULL)
5822 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5824 /* Don't pass by reference if we can use a floating-point register. */
5825 if (info.num_fprs)
5826 return false;
5828 /* Don't pass by reference if we can use general register(s) for vls. */
5829 if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode))
5830 return false;
5832 /* Don't pass by reference if we can use vector register groups. */
5833 if (info.num_vrs > 0 || info.num_mrs > 0)
5834 return false;
5837 /* Passed by reference when:
5838 1. The scalable vector argument is anonymous.
5839 2. Args cannot be passed through vector registers. */
5840 if (riscv_v_ext_mode_p (arg.mode))
5841 return true;
5843 /* Pass by reference if the data do not fit in two integer registers. */
5844 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
5847 /* Implement TARGET_RETURN_IN_MEMORY. */
5849 static bool
5850 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5852 CUMULATIVE_ARGS args;
5853 cumulative_args_t cum = pack_cumulative_args (&args);
5855 /* The rules for returning in memory are the same as for passing the
5856 first named argument by reference. */
5857 memset (&args, 0, sizeof args);
5858 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
5859 return riscv_pass_by_reference (cum, arg);
5862 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5864 static void
5865 riscv_setup_incoming_varargs (cumulative_args_t cum,
5866 const function_arg_info &arg,
5867 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
5869 CUMULATIVE_ARGS local_cum;
5870 int gp_saved;
5872 /* The caller has advanced CUM up to, but not beyond, the last named
5873 argument. Advance a local copy of CUM past the last "real" named
5874 argument, to find out how many registers are left over. */
5875 local_cum = *get_cumulative_args (cum);
5876 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
5877 || arg.type != NULL_TREE)
5878 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg);
5880 /* Found out how many registers we need to save. */
5881 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
5883 if (!no_rtl && gp_saved > 0)
5885 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5886 REG_PARM_STACK_SPACE (cfun->decl)
5887 - gp_saved * UNITS_PER_WORD);
5888 rtx mem = gen_frame_mem (BLKmode, ptr);
5889 set_mem_alias_set (mem, get_varargs_alias_set ());
5891 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
5892 mem, gp_saved);
5894 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
5895 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
5898 /* Return the descriptor of the Standard Vector Calling Convention Variant. */
5900 static const predefined_function_abi &
5901 riscv_v_abi ()
5903 predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
5904 if (!v_abi.initialized_p ())
5906 HARD_REG_SET full_reg_clobbers
5907 = default_function_abi.full_reg_clobbers ();
5908 /* Callee-saved vector registers: v1-v7, v24-v31. */
5909 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1)
5910 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5911 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
5912 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5913 v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
5915 return v_abi;
5918 static bool
5919 riscv_vector_int_type_p (const_tree type)
5921 machine_mode mode = TYPE_MODE (type);
5923 if (VECTOR_MODE_P (mode))
5924 return INTEGRAL_MODE_P (GET_MODE_INNER (mode));
5926 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5927 return strstr (name, "int") != NULL || strstr (name, "uint") != NULL;
5930 static bool
5931 riscv_vector_float_type_p (const_tree type)
5933 machine_mode mode = TYPE_MODE (type);
5935 if (VECTOR_MODE_P (mode))
5936 return FLOAT_MODE_P (GET_MODE_INNER (mode));
5938 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5939 return strstr (name, "vfloat") != NULL;
5942 static int
5943 riscv_vector_element_bitsize (const_tree type)
5945 machine_mode mode = TYPE_MODE (type);
5947 if (VECTOR_MODE_P (mode))
5948 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
5950 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5952 if (strstr (name, "bool") != NULL)
5953 return 1;
5954 else if (strstr (name, "int8") != NULL)
5955 return 8;
5956 else if (strstr (name, "int16") != NULL || strstr (name, "float16") != NULL)
5957 return 16;
5958 else if (strstr (name, "int32") != NULL || strstr (name, "float32") != NULL)
5959 return 32;
5960 else if (strstr (name, "int64") != NULL || strstr (name, "float64") != NULL)
5961 return 64;
5963 gcc_unreachable ();
5966 static int
5967 riscv_vector_required_min_vlen (const_tree type)
5969 machine_mode mode = TYPE_MODE (type);
5971 if (riscv_v_ext_mode_p (mode))
5972 return TARGET_MIN_VLEN;
5974 int element_bitsize = riscv_vector_element_bitsize (type);
5975 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
5977 if (strstr (name, "bool64") != NULL)
5978 return element_bitsize * 64;
5979 else if (strstr (name, "bool32") != NULL)
5980 return element_bitsize * 32;
5981 else if (strstr (name, "bool16") != NULL)
5982 return element_bitsize * 16;
5983 else if (strstr (name, "bool8") != NULL)
5984 return element_bitsize * 8;
5985 else if (strstr (name, "bool4") != NULL)
5986 return element_bitsize * 4;
5987 else if (strstr (name, "bool2") != NULL)
5988 return element_bitsize * 2;
5990 if (strstr (name, "mf8") != NULL)
5991 return element_bitsize * 8;
5992 else if (strstr (name, "mf4") != NULL)
5993 return element_bitsize * 4;
5994 else if (strstr (name, "mf2") != NULL)
5995 return element_bitsize * 2;
5997 return element_bitsize;
6000 static void
6001 riscv_validate_vector_type (const_tree type, const char *hint)
6003 gcc_assert (riscv_vector_type_p (type));
6005 if (!TARGET_VECTOR)
6007 error_at (input_location, "%s %qT requires the V ISA extension",
6008 hint, type);
6009 return;
6012 int element_bitsize = riscv_vector_element_bitsize (type);
6013 bool int_type_p = riscv_vector_int_type_p (type);
6015 if (int_type_p && element_bitsize == 64
6016 && !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
6018 error_at (input_location,
6019 "%s %qT requires the zve64x, zve64f, zve64d or v ISA extension",
6020 hint, type);
6021 return;
6024 bool float_type_p = riscv_vector_float_type_p (type);
6026 if (float_type_p && element_bitsize == 16
6027 && (!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags)
6028 && !TARGET_VECTOR_ELEN_BF_16_P (riscv_vector_elen_flags)))
6030 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
6031 if (strstr (name, "vfloat"))
6032 error_at (input_location,
6033 "%s %qT requires the zvfhmin or zvfh ISA extension",
6034 hint, type);
6035 return;
6038 if (float_type_p && element_bitsize == 32
6039 && !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
6041 error_at (input_location,
6042 "%s %qT requires the zve32f, zve64f, zve64d or v ISA extension",
6043 hint, type);
6044 return;
6047 if (float_type_p && element_bitsize == 64
6048 && !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
6050 error_at (input_location,
6051 "%s %qT requires the zve64d or v ISA extension", hint, type);
6052 return;
6055 int required_min_vlen = riscv_vector_required_min_vlen (type);
6057 if (TARGET_MIN_VLEN < required_min_vlen)
6059 error_at (
6060 input_location,
6061 "%s %qT requires the minimal vector length %qd but %qd is given",
6062 hint, type, required_min_vlen, TARGET_MIN_VLEN);
6063 return;
6067 /* Return true if a function with type FNTYPE returns its value in
6068 RISC-V V registers. */
6070 static bool
6071 riscv_return_value_is_vector_type_p (const_tree fntype)
6073 tree return_type = TREE_TYPE (fntype);
6075 if (riscv_vector_type_p (return_type))
6077 riscv_validate_vector_type (return_type, "return type");
6078 return true;
6080 else
6081 return false;
6084 /* Return true if a function with type FNTYPE takes arguments in
6085 RISC-V V registers. */
6087 static bool
6088 riscv_arguments_is_vector_type_p (const_tree fntype)
6090 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
6091 chain = TREE_CHAIN (chain))
6093 tree arg_type = TREE_VALUE (chain);
6094 if (riscv_vector_type_p (arg_type))
6096 riscv_validate_vector_type (arg_type, "argument type");
6097 return true;
6101 return false;
6104 /* Return true if FUNC is a riscv_vector_cc function.
6105 For more details please reference the below link.
6106 https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
6107 static bool
6108 riscv_vector_cc_function_p (const_tree fntype)
6110 tree attr = TYPE_ATTRIBUTES (fntype);
6111 bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE
6112 || lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE;
6114 if (vector_cc_p && !TARGET_VECTOR)
6115 error_at (input_location,
6116 "function attribute %qs requires the V ISA extension",
6117 "riscv_vector_cc");
6119 return vector_cc_p;
6122 /* Implement TARGET_FNTYPE_ABI. */
6124 static const predefined_function_abi &
6125 riscv_fntype_abi (const_tree fntype)
6127 /* Implement the vector calling convention. For more details please
6128 reference the below link.
6129 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
6130 if (riscv_return_value_is_vector_type_p (fntype)
6131 || riscv_arguments_is_vector_type_p (fntype)
6132 || riscv_vector_cc_function_p (fntype))
6133 return riscv_v_abi ();
6135 return default_function_abi;
6138 /* Return riscv calling convention of call_insn. */
6139 riscv_cc
6140 get_riscv_cc (const rtx use)
6142 gcc_assert (GET_CODE (use) == USE);
6143 rtx unspec = XEXP (use, 0);
6144 gcc_assert (GET_CODE (unspec) == UNSPEC
6145 && XINT (unspec, 1) == UNSPEC_CALLEE_CC);
6146 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0));
6147 gcc_assert (cc < RISCV_CC_UNKNOWN);
6148 return cc;
6151 /* Implement TARGET_INSN_CALLEE_ABI. */
6153 const predefined_function_abi &
6154 riscv_insn_callee_abi (const rtx_insn *insn)
6156 rtx pat = PATTERN (insn);
6157 gcc_assert (GET_CODE (pat) == PARALLEL);
6158 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1));
6159 return function_abis[cc];
6162 /* Handle an attribute requiring a FUNCTION_DECL;
6163 arguments as in struct attribute_spec.handler. */
6164 static tree
6165 riscv_handle_fndecl_attribute (tree *node, tree name,
6166 tree args ATTRIBUTE_UNUSED,
6167 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6169 if (TREE_CODE (*node) != FUNCTION_DECL)
6171 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6172 name);
6173 *no_add_attrs = true;
6176 return NULL_TREE;
6179 /* Verify type based attributes. NODE is the what the attribute is being
6180 applied to. NAME is the attribute name. ARGS are the attribute args.
6181 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if
6182 the attribute should be ignored. */
6184 static tree
6185 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6186 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6188 /* Check for an argument. */
6189 if (is_attribute_p ("interrupt", name))
6191 if (args)
6193 tree cst = TREE_VALUE (args);
6194 const char *string;
6196 if (TREE_CODE (cst) != STRING_CST)
6198 warning (OPT_Wattributes,
6199 "%qE attribute requires a string argument",
6200 name);
6201 *no_add_attrs = true;
6202 return NULL_TREE;
6205 string = TREE_STRING_POINTER (cst);
6206 if (strcmp (string, "user") && strcmp (string, "supervisor")
6207 && strcmp (string, "machine"))
6209 warning (OPT_Wattributes,
6210 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
6211 "or %<\"machine\"%>", name);
6212 *no_add_attrs = true;
6217 return NULL_TREE;
6220 static tree
6221 riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args,
6222 ATTRIBUTE_UNUSED int flags,
6223 bool *no_add_attrs)
6225 if (!is_attribute_p ("riscv_rvv_vector_bits", name))
6226 return NULL_TREE;
6228 *no_add_attrs = true;
6230 if (rvv_vector_bits != RVV_VECTOR_BITS_ZVL)
6232 error (
6233 "%qs is only supported when %<-mrvv-vector-bits=zvl%> is specified",
6234 "riscv_rvv_vector_bits");
6235 return NULL_TREE;
6238 tree type = *node;
6240 if (!VECTOR_TYPE_P (type) || !riscv_vector::builtin_type_p (type))
6242 error ("%qs applied to non-RVV type %qT", "riscv_rvv_vector_bits", type);
6243 return NULL_TREE;
6246 tree size = TREE_VALUE (args);
6248 if (TREE_CODE (size) != INTEGER_CST)
6250 error ("%qs requires an integer constant", "riscv_rvv_vector_bits");
6251 return NULL_TREE;
6254 unsigned HOST_WIDE_INT args_in_bits = tree_to_uhwi (size);
6255 unsigned HOST_WIDE_INT type_mode_bits
6256 = GET_MODE_PRECISION (TYPE_MODE (type)).to_constant ();
6258 if (args_in_bits != type_mode_bits)
6260 error ("invalid RVV vector size %qd, "
6261 "expected size is %qd based on LMUL of type and %qs",
6262 (int)args_in_bits, (int)type_mode_bits, "-mrvv-vector-bits=zvl");
6263 return NULL_TREE;
6266 type = build_distinct_type_copy (type);
6267 TYPE_ATTRIBUTES (type)
6268 = remove_attribute ("RVV sizeless type",
6269 copy_list (TYPE_ATTRIBUTES (type)));
6271 /* The operations like alu/cmp on vbool*_t is not well defined,
6272 continue to treat vbool*_t as indivisible. */
6273 if (!VECTOR_BOOLEAN_TYPE_P (type))
6274 TYPE_INDIVISIBLE_P (type) = 0;
6276 *node = type;
6278 return NULL_TREE;
6281 /* Return true if function TYPE is an interrupt function. */
6282 static bool
6283 riscv_interrupt_type_p (tree type)
6285 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
6288 /* Return true if FUNC is a naked function. */
6289 static bool
6290 riscv_naked_function_p (tree func)
6292 tree func_decl = func;
6293 if (func == NULL_TREE)
6294 func_decl = current_function_decl;
6295 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
6298 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
6299 static bool
6300 riscv_allocate_stack_slots_for_args ()
6302 /* Naked functions should not allocate stack slots for arguments. */
6303 return !riscv_naked_function_p (current_function_decl);
6306 /* Implement TARGET_WARN_FUNC_RETURN. */
6307 static bool
6308 riscv_warn_func_return (tree decl)
6310 /* Naked functions are implemented entirely in assembly, including the
6311 return sequence, so suppress warnings about this. */
6312 return !riscv_naked_function_p (decl);
6315 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6317 static void
6318 riscv_va_start (tree valist, rtx nextarg)
6320 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
6321 std_expand_builtin_va_start (valist, nextarg);
6324 /* Make ADDR suitable for use as a call or sibcall target. */
6327 riscv_legitimize_call_address (rtx addr)
6329 if (!call_insn_operand (addr, VOIDmode))
6331 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
6332 riscv_emit_move (reg, addr);
6333 return reg;
6335 return addr;
6338 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
6339 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
6341 static void
6342 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
6344 const char *reloc;
6346 switch (riscv_classify_symbolic_expression (op))
6348 case SYMBOL_ABSOLUTE:
6349 reloc = hi_reloc ? "%hi" : "%lo";
6350 break;
6352 case SYMBOL_PCREL:
6353 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
6354 break;
6356 case SYMBOL_TLS_LE:
6357 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
6358 break;
6360 default:
6361 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R');
6362 return;
6365 fprintf (file, "%s(", reloc);
6366 output_addr_const (file, riscv_strip_unspec_address (op));
6367 fputc (')', file);
6370 /* Return the memory model that encapsulates both given models. */
6372 enum memmodel
6373 riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
6375 model1 = memmodel_base (model1);
6376 model2 = memmodel_base (model2);
6378 enum memmodel weaker = model1 <= model2 ? model1: model2;
6379 enum memmodel stronger = model1 > model2 ? model1: model2;
6381 switch (stronger)
6383 case MEMMODEL_SEQ_CST:
6384 case MEMMODEL_ACQ_REL:
6385 return stronger;
6386 case MEMMODEL_RELEASE:
6387 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
6388 return MEMMODEL_ACQ_REL;
6389 else
6390 return stronger;
6391 case MEMMODEL_ACQUIRE:
6392 case MEMMODEL_CONSUME:
6393 case MEMMODEL_RELAXED:
6394 return stronger;
6395 default:
6396 gcc_unreachable ();
6400 /* Return true if the .AQ suffix should be added to an AMO to implement the
6401 acquire portion of memory model MODEL. */
6403 static bool
6404 riscv_memmodel_needs_amo_acquire (enum memmodel model)
6406 /* ZTSO amo mappings require no annotations. */
6407 if (TARGET_ZTSO)
6408 return false;
6410 switch (model)
6412 case MEMMODEL_ACQ_REL:
6413 case MEMMODEL_SEQ_CST:
6414 case MEMMODEL_ACQUIRE:
6415 case MEMMODEL_CONSUME:
6416 return true;
6418 case MEMMODEL_RELEASE:
6419 case MEMMODEL_RELAXED:
6420 return false;
6422 default:
6423 gcc_unreachable ();
6427 /* Return true if the .RL suffix should be added to an AMO to implement the
6428 release portion of memory model MODEL. */
6430 static bool
6431 riscv_memmodel_needs_amo_release (enum memmodel model)
6433 /* ZTSO amo mappings require no annotations. */
6434 if (TARGET_ZTSO)
6435 return false;
6437 switch (model)
6439 case MEMMODEL_ACQ_REL:
6440 case MEMMODEL_SEQ_CST:
6441 case MEMMODEL_RELEASE:
6442 return true;
6444 case MEMMODEL_ACQUIRE:
6445 case MEMMODEL_CONSUME:
6446 case MEMMODEL_RELAXED:
6447 return false;
6449 default:
6450 gcc_unreachable ();
6454 /* Get REGNO alignment of vector mode.
6455 The alignment = LMUL when the LMUL >= 1.
6456 Otherwise, alignment = 1. */
6458 riscv_get_v_regno_alignment (machine_mode mode)
6460 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
6461 but for mask vector register, register numbers can be any number. */
6462 int lmul = 1;
6463 machine_mode rvv_mode = mode;
6464 if (riscv_v_ext_vls_mode_p (rvv_mode))
6466 int size = GET_MODE_BITSIZE (rvv_mode).to_constant ();
6467 if (size < TARGET_MIN_VLEN)
6468 return 1;
6469 else
6470 return size / TARGET_MIN_VLEN;
6472 if (riscv_v_ext_tuple_mode_p (rvv_mode))
6473 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
6474 poly_int64 size = GET_MODE_SIZE (rvv_mode);
6475 if (known_gt (size, UNITS_PER_V_REG))
6476 lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
6477 return lmul;
6480 /* Define ASM_OUTPUT_OPCODE to do anything special before
6481 emitting an opcode. */
6482 const char *
6483 riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
6485 if (TARGET_XTHEADVECTOR)
6486 return th_asm_output_opcode (asm_out_file, p);
6488 return p;
6491 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are:
6493 'h' Print the high-part relocation associated with OP, after stripping
6494 any outermost HIGH.
6495 'R' Print the low-part relocation associated with OP.
6496 'C' Print the integer branch condition for comparison OP.
6497 'N' Print the inverse of the integer branch condition for comparison OP.
6498 'A' Print the atomic operation suffix for memory model OP.
6499 'I' Print the LR suffix for memory model OP.
6500 'J' Print the SC suffix for memory model OP.
6501 'L' Print a non-temporal locality hints instruction.
6502 'z' Print x0 if OP is zero, otherwise print OP normally.
6503 'i' Print i if the operand is not a register.
6504 'S' Print shift-index of single-bit mask OP.
6505 'T' Print shift-index of inverted single-bit mask OP.
6506 '~' Print w if TARGET_64BIT is true; otherwise not print anything.
6508 Note please keep this list and the list in riscv.md in sync. */
6510 static void
6511 riscv_print_operand (FILE *file, rtx op, int letter)
6513 /* `~` does not take an operand so op will be null
6514 Check for before accessing op.
6516 if (letter == '~')
6518 if (TARGET_64BIT)
6519 fputc('w', file);
6520 return;
6522 machine_mode mode = GET_MODE (op);
6523 enum rtx_code code = GET_CODE (op);
6525 switch (letter)
6527 case 'o': {
6528 /* Print 'OP' variant for RVV instructions.
6529 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv).
6530 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi).
6531 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */
6532 if (riscv_v_ext_mode_p (mode))
6534 if (REG_P (op))
6535 asm_fprintf (file, "v");
6536 else if (CONST_VECTOR_P (op))
6537 asm_fprintf (file, "i");
6538 else
6539 output_operand_lossage ("invalid vector operand");
6541 else
6543 if (CONST_INT_P (op))
6544 asm_fprintf (file, "i");
6545 else
6546 asm_fprintf (file, "x");
6548 break;
6550 case 'v': {
6551 rtx elt;
6553 if (REG_P (op))
6554 asm_fprintf (file, "%s", reg_names[REGNO (op)]);
6555 else
6557 if (!const_vec_duplicate_p (op, &elt))
6558 output_operand_lossage ("invalid vector constant");
6559 else if (satisfies_constraint_Wc0 (op))
6560 asm_fprintf (file, "0");
6561 else if (satisfies_constraint_vi (op)
6562 || satisfies_constraint_vj (op)
6563 || satisfies_constraint_vk (op))
6564 asm_fprintf (file, "%wd", INTVAL (elt));
6565 else
6566 output_operand_lossage ("invalid vector constant");
6568 break;
6570 case 'V': {
6571 rtx elt;
6572 if (!const_vec_duplicate_p (op, &elt))
6573 output_operand_lossage ("invalid vector constant");
6574 else if (satisfies_constraint_vj (op))
6575 asm_fprintf (file, "%wd", -INTVAL (elt));
6576 else
6577 output_operand_lossage ("invalid vector constant");
6578 break;
6580 case 'm': {
6581 if (riscv_v_ext_mode_p (mode))
6583 /* Calculate lmul according to mode and print the value. */
6584 int lmul = riscv_get_v_regno_alignment (mode);
6585 asm_fprintf (file, "%d", lmul);
6587 else if (code == CONST_INT)
6589 /* If it is a const_int value, it denotes the VLMUL field enum. */
6590 unsigned int vlmul = UINTVAL (op);
6591 switch (vlmul)
6593 case riscv_vector::LMUL_1:
6594 asm_fprintf (file, "%s", "m1");
6595 break;
6596 case riscv_vector::LMUL_2:
6597 asm_fprintf (file, "%s", "m2");
6598 break;
6599 case riscv_vector::LMUL_4:
6600 asm_fprintf (file, "%s", "m4");
6601 break;
6602 case riscv_vector::LMUL_8:
6603 asm_fprintf (file, "%s", "m8");
6604 break;
6605 case riscv_vector::LMUL_F8:
6606 asm_fprintf (file, "%s", "mf8");
6607 break;
6608 case riscv_vector::LMUL_F4:
6609 asm_fprintf (file, "%s", "mf4");
6610 break;
6611 case riscv_vector::LMUL_F2:
6612 asm_fprintf (file, "%s", "mf2");
6613 break;
6614 default:
6615 gcc_unreachable ();
6618 else
6619 output_operand_lossage ("invalid vector constant");
6620 break;
6622 case 'p': {
6623 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
6625 /* Print for RVV mask operand.
6626 If op is reg, print ",v0.t".
6627 Otherwise, don't print anything. */
6628 if (code == REG)
6629 fprintf (file, ",%s.t", reg_names[REGNO (op)]);
6631 else if (code == CONST_INT)
6633 /* Tail && Mask policy. */
6634 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u");
6636 else
6637 output_operand_lossage ("invalid vector constant");
6638 break;
6640 case 'h':
6641 if (code == HIGH)
6642 op = XEXP (op, 0);
6643 riscv_print_operand_reloc (file, op, true);
6644 break;
6646 case 'R':
6647 riscv_print_operand_reloc (file, op, false);
6648 break;
6650 case 'C':
6651 /* The RTL names match the instruction names. */
6652 fputs (GET_RTX_NAME (code), file);
6653 break;
6655 case 'N':
6656 /* The RTL names match the instruction names. */
6657 fputs (GET_RTX_NAME (reverse_condition (code)), file);
6658 break;
6660 case 'A': {
6661 const enum memmodel model = memmodel_base (INTVAL (op));
6662 if (riscv_memmodel_needs_amo_acquire (model)
6663 && riscv_memmodel_needs_amo_release (model))
6664 fputs (".aqrl", file);
6665 else if (riscv_memmodel_needs_amo_acquire (model))
6666 fputs (".aq", file);
6667 else if (riscv_memmodel_needs_amo_release (model))
6668 fputs (".rl", file);
6669 break;
6672 case 'I': {
6673 const enum memmodel model = memmodel_base (INTVAL (op));
6674 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
6675 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */
6676 break;
6677 else if (model == MEMMODEL_SEQ_CST)
6678 fputs (".aqrl", file);
6679 else if (riscv_memmodel_needs_amo_acquire (model))
6680 fputs (".aq", file);
6681 break;
6684 case 'J': {
6685 const enum memmodel model = memmodel_base (INTVAL (op));
6686 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
6687 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */
6688 fputs (".rl", file);
6689 else if (TARGET_ZTSO)
6690 break;
6691 else if (riscv_memmodel_needs_amo_release (model))
6692 fputs (".rl", file);
6693 break;
6696 case 'L':
6698 const char *ntl_hint = NULL;
6699 switch (INTVAL (op))
6701 case 0:
6702 ntl_hint = "ntl.all";
6703 break;
6704 case 1:
6705 ntl_hint = "ntl.pall";
6706 break;
6707 case 2:
6708 ntl_hint = "ntl.p1";
6709 break;
6712 if (ntl_hint)
6713 asm_fprintf (file, "%s\n\t", ntl_hint);
6714 break;
6717 case 'i':
6718 if (code != REG)
6719 fputs ("i", file);
6720 break;
6722 case 'B':
6723 fputs (GET_RTX_NAME (code), file);
6724 break;
6726 case 'S':
6728 rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
6729 output_addr_const (file, newop);
6730 break;
6732 case 'T':
6734 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
6735 output_addr_const (file, newop);
6736 break;
6738 case 'X':
6740 int ival = INTVAL (op) + 1;
6741 rtx newop = GEN_INT (ctz_hwi (ival) + 1);
6742 output_addr_const (file, newop);
6743 break;
6745 case 'Y':
6747 unsigned int imm = (UINTVAL (op) & 63);
6748 gcc_assert (imm <= 63);
6749 rtx newop = GEN_INT (imm);
6750 output_addr_const (file, newop);
6751 break;
6753 default:
6754 switch (code)
6756 case REG:
6757 if (letter && letter != 'z')
6758 output_operand_lossage ("invalid use of '%%%c'", letter);
6759 fprintf (file, "%s", reg_names[REGNO (op)]);
6760 break;
6762 case MEM:
6763 if (letter && letter != 'z')
6764 output_operand_lossage ("invalid use of '%%%c'", letter);
6765 else
6766 output_address (mode, XEXP (op, 0));
6767 break;
6769 case CONST_DOUBLE:
6771 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6773 fputs (reg_names[GP_REG_FIRST], file);
6774 break;
6777 int fli_index = riscv_float_const_rtx_index_for_fli (op);
6778 if (fli_index == -1 || fli_index > 31)
6780 output_operand_lossage ("invalid use of '%%%c'", letter);
6781 break;
6783 asm_fprintf (file, "%s", fli_value_print[fli_index]);
6784 break;
6787 default:
6788 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6789 fputs (reg_names[GP_REG_FIRST], file);
6790 else if (letter && letter != 'z')
6791 output_operand_lossage ("invalid use of '%%%c'", letter);
6792 else
6793 output_addr_const (file, riscv_strip_unspec_address (op));
6794 break;
6799 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
6800 static bool
6801 riscv_print_operand_punct_valid_p (unsigned char code)
6803 return (code == '~');
6806 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
6808 static void
6809 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6811 struct riscv_address_info addr;
6813 if (th_print_operand_address (file, mode, x))
6814 return;
6816 if (riscv_classify_address (&addr, x, word_mode, true))
6817 switch (addr.type)
6819 case ADDRESS_REG:
6820 output_addr_const (file, riscv_strip_unspec_address (addr.offset));
6821 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6822 return;
6824 case ADDRESS_LO_SUM:
6825 riscv_print_operand_reloc (file, addr.offset, false);
6826 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
6827 return;
6829 case ADDRESS_CONST_INT:
6830 output_addr_const (file, x);
6831 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
6832 return;
6834 case ADDRESS_SYMBOLIC:
6835 output_addr_const (file, riscv_strip_unspec_address (x));
6836 return;
6838 default:
6839 gcc_unreachable ();
6842 gcc_unreachable ();
6845 static bool
6846 riscv_size_ok_for_small_data_p (int size)
6848 return g_switch_value && IN_RANGE (size, 1, g_switch_value);
6851 /* Return true if EXP should be placed in the small data section. */
6853 static bool
6854 riscv_in_small_data_p (const_tree x)
6856 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use
6857 the anchor symbol to address nearby objects. In large model, it can get
6858 the better result using the anchor optimization. */
6859 if (riscv_cmodel == CM_LARGE)
6860 return false;
6862 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
6863 return false;
6865 if (VAR_P (x) && DECL_SECTION_NAME (x))
6867 const char *sec = DECL_SECTION_NAME (x);
6868 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
6871 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
6874 /* Switch to the appropriate section for output of DECL. */
6876 static section *
6877 riscv_select_section (tree decl, int reloc,
6878 unsigned HOST_WIDE_INT align)
6880 switch (categorize_decl_for_section (decl, reloc))
6882 case SECCAT_SRODATA:
6883 return get_named_section (decl, ".srodata", reloc);
6885 default:
6886 return default_elf_select_section (decl, reloc, align);
6890 /* Switch to the appropriate section for output of DECL. */
6892 static void
6893 riscv_unique_section (tree decl, int reloc)
6895 const char *prefix = NULL;
6896 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
6898 switch (categorize_decl_for_section (decl, reloc))
6900 case SECCAT_SRODATA:
6901 prefix = one_only ? ".sr" : ".srodata";
6902 break;
6904 default:
6905 break;
6907 if (prefix)
6909 const char *name, *linkonce;
6910 char *string;
6912 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6913 name = targetm.strip_name_encoding (name);
6915 /* If we're using one_only, then there needs to be a .gnu.linkonce
6916 prefix to the section name. */
6917 linkonce = one_only ? ".gnu.linkonce" : "";
6919 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6921 set_decl_section_name (decl, string);
6922 return;
6924 default_unique_section (decl, reloc);
6927 /* Constant pools are per-function when in large code model. */
6929 static inline bool
6930 riscv_can_use_per_function_literal_pools_p (void)
6932 return riscv_cmodel == CM_LARGE;
6935 static bool
6936 riscv_use_blocks_for_constant_p (machine_mode, const_rtx)
6938 /* We can't use blocks for constants when we're using a per-function
6939 constant pool. */
6940 return !riscv_can_use_per_function_literal_pools_p ();
6943 /* Return a section for X, handling small data. */
6945 static section *
6946 riscv_elf_select_rtx_section (machine_mode mode, rtx x,
6947 unsigned HOST_WIDE_INT align)
6949 /* The literal pool stays with the function. */
6950 if (riscv_can_use_per_function_literal_pools_p ())
6951 return function_section (current_function_decl);
6953 section *s = default_elf_select_rtx_section (mode, x, align);
6955 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
6957 if (startswith (s->named.name, ".rodata.cst"))
6959 /* Rename .rodata.cst* to .srodata.cst*. */
6960 char *name = (char *) alloca (strlen (s->named.name) + 2);
6961 sprintf (name, ".s%s", s->named.name + 1);
6962 return get_section (name, s->named.common.flags, NULL);
6965 if (s == data_section)
6966 return sdata_section;
6969 return s;
6972 /* Make the last instruction frame-related and note that it performs
6973 the operation described by FRAME_PATTERN. */
6975 static void
6976 riscv_set_frame_expr (rtx frame_pattern)
6978 rtx insn;
6980 insn = get_last_insn ();
6981 RTX_FRAME_RELATED_P (insn) = 1;
6982 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6983 frame_pattern,
6984 REG_NOTES (insn));
6987 /* Return a frame-related rtx that stores REG at MEM.
6988 REG must be a single register. */
6990 static rtx
6991 riscv_frame_set (rtx mem, rtx reg)
6993 rtx set = gen_rtx_SET (mem, reg);
6994 RTX_FRAME_RELATED_P (set) = 1;
6995 return set;
6998 /* Returns true if the current function might contain a far jump. */
7000 static bool
7001 riscv_far_jump_used_p ()
7003 size_t func_size = 0;
7005 if (cfun->machine->far_jump_used)
7006 return true;
7008 /* We can't change far_jump_used during or after reload, as there is
7009 no chance to change stack frame layout. So we must rely on the
7010 conservative heuristic below having done the right thing. */
7011 if (reload_in_progress || reload_completed)
7012 return false;
7014 /* Estimate the function length. */
7015 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
7016 func_size += get_attr_length (insn);
7018 /* Conservatively determine whether some jump might exceed 1 MiB
7019 displacement. */
7020 if (func_size * 2 >= 0x100000)
7021 cfun->machine->far_jump_used = true;
7023 return cfun->machine->far_jump_used;
7026 /* Return true, if the current function must save the incoming return
7027 address. */
7029 static bool
7030 riscv_save_return_addr_reg_p (void)
7032 /* The $ra register is call-clobbered: if this is not a leaf function,
7033 save it. */
7034 if (!crtl->is_leaf)
7035 return true;
7037 /* We need to save the incoming return address if __builtin_eh_return
7038 is being used to set a different return address. */
7039 if (crtl->calls_eh_return)
7040 return true;
7042 /* Far jumps/branches use $ra as a temporary to set up the target jump
7043 location (clobbering the incoming return address). */
7044 if (riscv_far_jump_used_p ())
7045 return true;
7047 /* We need to save it if anyone has used that. */
7048 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
7049 return true;
7051 /* Need not to use ra for leaf when frame pointer is turned off by
7052 option whatever the omit-leaf-frame's value. */
7053 if (frame_pointer_needed && crtl->is_leaf
7054 && !TARGET_OMIT_LEAF_FRAME_POINTER)
7055 return true;
7057 return false;
7060 /* Return true if the current function must save register REGNO. */
7062 static bool
7063 riscv_save_reg_p (unsigned int regno)
7065 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
7066 bool might_clobber = crtl->saves_all_registers
7067 || df_regs_ever_live_p (regno);
7069 if (call_saved && might_clobber)
7070 return true;
7072 /* Save callee-saved V registers. */
7073 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno)
7074 && might_clobber)
7075 return true;
7077 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
7078 return true;
7080 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ())
7081 return true;
7083 /* If this is an interrupt handler, then must save extra registers. */
7084 if (cfun->machine->interrupt_handler_p)
7086 /* zero register is always zero. */
7087 if (regno == GP_REG_FIRST)
7088 return false;
7090 /* The function will return the stack pointer to its original value. */
7091 if (regno == STACK_POINTER_REGNUM)
7092 return false;
7094 /* By convention, we assume that gp and tp are safe. */
7095 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
7096 return false;
7098 /* We must save every register used in this function. If this is not a
7099 leaf function, then we must save all temporary registers. */
7100 if (df_regs_ever_live_p (regno)
7101 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
7102 return true;
7105 return false;
7108 /* Return TRUE if Zcmp push and pop insns should be
7109 avoided. FALSE otherwise.
7110 Only use multi push & pop if all GPRs masked can be covered,
7111 and stack access is SP based,
7112 and GPRs are at top of the stack frame,
7113 and no conflicts in stack allocation with other features */
7114 static bool
7115 riscv_avoid_multi_push (const struct riscv_frame_info *frame)
7117 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed
7118 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0
7119 || crtl->args.pretend_args_size != 0
7120 || (use_shrink_wrapping_separate ()
7121 && !riscv_avoid_shrink_wrapping_separate ())
7122 || (frame->mask & ~MULTI_PUSH_GPR_MASK))
7123 return true;
7125 return false;
7128 /* Determine whether to use multi push insn. */
7129 static bool
7130 riscv_use_multi_push (const struct riscv_frame_info *frame)
7132 if (riscv_avoid_multi_push (frame))
7133 return false;
7135 return (frame->multi_push_adj_base != 0);
7138 /* Return TRUE if a libcall to save/restore GPRs should be
7139 avoided. FALSE otherwise. */
7140 static bool
7141 riscv_avoid_save_libcall (void)
7143 if (!TARGET_SAVE_RESTORE
7144 || crtl->calls_eh_return
7145 || frame_pointer_needed
7146 || cfun->machine->interrupt_handler_p
7147 || cfun->machine->varargs_size != 0
7148 || crtl->args.pretend_args_size != 0)
7149 return true;
7151 return false;
7154 /* Determine whether to call GPR save/restore routines. */
7155 static bool
7156 riscv_use_save_libcall (const struct riscv_frame_info *frame)
7158 if (riscv_avoid_save_libcall ())
7159 return false;
7161 return frame->save_libcall_adjustment != 0;
7164 /* Determine which GPR save/restore routine to call. */
7166 static unsigned
7167 riscv_save_libcall_count (unsigned mask)
7169 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
7170 if (BITSET_P (mask, n))
7171 return CALLEE_SAVED_REG_NUMBER (n) + 1;
7172 abort ();
7175 /* calculate number of s regs in multi push and pop.
7176 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
7177 static unsigned
7178 riscv_multi_push_sregs_count (unsigned mask)
7180 unsigned num = riscv_save_libcall_count (mask);
7181 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS
7182 : num;
7185 /* calculate number of regs(ra, s0-sx) in multi push and pop. */
7186 static unsigned
7187 riscv_multi_push_regs_count (unsigned mask)
7189 /* 1 is for ra */
7190 return riscv_multi_push_sregs_count (mask) + 1;
7193 /* Handle 16 bytes align for poly_int. */
7194 static poly_int64
7195 riscv_16bytes_align (poly_int64 value)
7197 return aligned_upper_bound (value, 16);
7200 static HOST_WIDE_INT
7201 riscv_16bytes_align (HOST_WIDE_INT value)
7203 return ROUND_UP (value, 16);
7206 /* Handle stack align for poly_int. */
7207 static poly_int64
7208 riscv_stack_align (poly_int64 value)
7210 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
7213 static HOST_WIDE_INT
7214 riscv_stack_align (HOST_WIDE_INT value)
7216 return RISCV_STACK_ALIGN (value);
7219 /* Populate the current function's riscv_frame_info structure.
7221 RISC-V stack frames grown downward. High addresses are at the top.
7223 +-------------------------------+
7225 | incoming stack arguments |
7227 +-------------------------------+ <-- incoming stack pointer
7229 | callee-allocated save area |
7230 | for arguments that are |
7231 | split between registers and |
7232 | the stack |
7234 +-------------------------------+ <-- arg_pointer_rtx
7236 | callee-allocated save area |
7237 | for register varargs |
7239 +-------------------------------+ <-- hard_frame_pointer_rtx;
7240 | | stack_pointer_rtx + gp_sp_offset
7241 | GPR save area | + UNITS_PER_WORD
7243 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
7244 | | + UNITS_PER_FP_REG
7245 | FPR save area |
7247 +-------------------------------+ <-- stack_pointer_rtx
7248 | | + v_sp_offset_top
7249 | Vector Registers save area |
7251 | ----------------------------- | <-- stack_pointer_rtx
7252 | padding | + v_sp_offset_bottom
7253 +-------------------------------+ <-- frame_pointer_rtx (virtual)
7255 | local variables |
7257 P +-------------------------------+
7259 | outgoing stack arguments |
7261 +-------------------------------+ <-- stack_pointer_rtx
7263 Dynamic stack allocations such as alloca insert data at point P.
7264 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
7265 hard_frame_pointer_rtx unchanged. */
7267 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
7269 static void
7270 riscv_compute_frame_info (void)
7272 struct riscv_frame_info *frame;
7273 poly_int64 offset;
7274 bool interrupt_save_prologue_temp = false;
7275 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0;
7276 unsigned int num_v_saved = 0;
7278 frame = &cfun->machine->frame;
7280 /* In an interrupt function, there are two cases in which t0 needs to be used:
7281 1, If we have a large frame, then we need to save/restore t0. We check for
7282 this before clearing the frame struct.
7283 2, Need to save and restore some CSRs in the frame. */
7284 if (cfun->machine->interrupt_handler_p)
7286 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
7287 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
7288 || (TARGET_HARD_FLOAT || TARGET_ZFINX))
7289 interrupt_save_prologue_temp = true;
7292 frame->reset();
7294 if (!cfun->machine->naked_p)
7296 /* Find out which GPRs we need to save. */
7297 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7298 if (riscv_save_reg_p (regno)
7299 || (interrupt_save_prologue_temp
7300 && (regno == RISCV_PROLOGUE_TEMP_REGNUM)))
7301 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
7303 /* If this function calls eh_return, we must also save and restore the
7304 EH data registers. */
7305 if (crtl->calls_eh_return)
7306 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
7307 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
7309 /* Find out which FPRs we need to save. This loop must iterate over
7310 the same space as its companion in riscv_for_each_saved_reg. */
7311 if (TARGET_HARD_FLOAT)
7312 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7313 if (riscv_save_reg_p (regno))
7314 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
7316 /* Find out which V registers we need to save. */
7317 if (TARGET_VECTOR)
7318 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
7319 if (riscv_save_reg_p (regno))
7321 frame->vmask |= 1 << (regno - V_REG_FIRST);
7322 num_v_saved++;
7326 if (frame->mask)
7328 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
7330 /* 1 is for ra */
7331 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
7332 /* Only use save/restore routines if they don't alter the stack size. */
7333 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
7334 && !riscv_avoid_save_libcall ())
7336 /* Libcall saves/restores 3 registers at once, so we need to
7337 allocate 12 bytes for callee-saved register. */
7338 if (TARGET_RVE)
7339 x_save_size = 3 * UNITS_PER_WORD;
7341 frame->save_libcall_adjustment = x_save_size;
7344 if (!riscv_avoid_multi_push (frame))
7346 /* num(ra, s0-sx) */
7347 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask);
7348 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
7349 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
7353 /* In an interrupt function, we need extra space for the initial saves of CSRs. */
7354 if (cfun->machine->interrupt_handler_p
7355 && ((TARGET_HARD_FLOAT && frame->fmask)
7356 || (TARGET_ZFINX
7357 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */
7358 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
7359 /* Save and restore FCSR. */
7360 /* TODO: When P or V extensions support interrupts, some of their CSRs
7361 may also need to be saved and restored. */
7362 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD);
7364 /* At the bottom of the frame are any outgoing stack arguments. */
7365 offset = riscv_stack_align (crtl->outgoing_args_size);
7366 /* Next are local stack variables. */
7367 offset += riscv_stack_align (get_frame_size ());
7368 /* The virtual frame pointer points above the local variables. */
7369 frame->frame_pointer_offset = offset;
7370 /* Next are the callee-saved VRs. */
7371 if (frame->vmask)
7372 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG);
7373 frame->v_sp_offset_top = offset;
7374 frame->v_sp_offset_bottom
7375 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG;
7376 /* Next are the callee-saved FPRs. */
7377 if (frame->fmask)
7378 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
7379 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
7380 /* Next are the callee-saved GPRs. */
7381 if (frame->mask)
7383 offset += x_save_size;
7384 /* align to 16 bytes and add paddings to GPR part to honor
7385 both stack alignment and zcmp pus/pop size alignment. */
7386 if (riscv_use_multi_push (frame)
7387 && known_lt (offset, frame->multi_push_adj_base
7388 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
7389 offset = riscv_16bytes_align (offset);
7391 frame->gp_sp_offset = offset - UNITS_PER_WORD;
7392 /* The hard frame pointer points above the callee-saved GPRs. */
7393 frame->hard_frame_pointer_offset = offset;
7394 /* Above the hard frame pointer is the callee-allocated varags save area. */
7395 offset += riscv_stack_align (cfun->machine->varargs_size);
7396 /* Next is the callee-allocated area for pretend stack arguments. */
7397 offset += riscv_stack_align (crtl->args.pretend_args_size);
7398 /* Arg pointer must be below pretend args, but must be above alignment
7399 padding. */
7400 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
7401 frame->total_size = offset;
7403 /* Next points the incoming stack pointer and any incoming arguments. */
7406 /* Make sure that we're not trying to eliminate to the wrong hard frame
7407 pointer. */
7409 static bool
7410 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7412 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
7415 /* Helper to determine if reg X pertains to stack. */
7416 bool
7417 riscv_reg_frame_related (rtx x)
7419 return REG_P (x)
7420 && (REGNO (x) == FRAME_POINTER_REGNUM
7421 || REGNO (x) == HARD_FRAME_POINTER_REGNUM
7422 || REGNO (x) == ARG_POINTER_REGNUM
7423 || REGNO (x) == VIRTUAL_STACK_VARS_REGNUM);
7426 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
7427 or argument pointer. TO is either the stack pointer or hard frame
7428 pointer. */
7430 poly_int64
7431 riscv_initial_elimination_offset (int from, int to)
7433 poly_int64 src, dest;
7435 riscv_compute_frame_info ();
7437 if (to == HARD_FRAME_POINTER_REGNUM)
7438 dest = cfun->machine->frame.hard_frame_pointer_offset;
7439 else if (to == STACK_POINTER_REGNUM)
7440 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
7441 else
7442 gcc_unreachable ();
7444 if (from == FRAME_POINTER_REGNUM)
7445 src = cfun->machine->frame.frame_pointer_offset;
7446 else if (from == ARG_POINTER_REGNUM)
7447 src = cfun->machine->frame.arg_pointer_offset;
7448 else
7449 gcc_unreachable ();
7451 return src - dest;
7454 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
7455 previous frame. */
7458 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
7460 if (count != 0)
7461 return const0_rtx;
7463 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
7466 /* Emit code to change the current function's return address to
7467 ADDRESS. SCRATCH is available as a scratch register, if needed.
7468 ADDRESS and SCRATCH are both word-mode GPRs. */
7470 void
7471 riscv_set_return_address (rtx address, rtx scratch)
7473 rtx slot_address;
7475 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
7476 slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
7477 cfun->machine->frame.gp_sp_offset.to_constant());
7478 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
7481 /* Save register REG to MEM. Make the instruction frame-related. */
7483 static void
7484 riscv_save_reg (rtx reg, rtx mem)
7486 riscv_emit_move (mem, reg);
7487 riscv_set_frame_expr (riscv_frame_set (mem, reg));
7490 /* Restore register REG from MEM. */
7492 static void
7493 riscv_restore_reg (rtx reg, rtx mem)
7495 rtx insn = riscv_emit_move (reg, mem);
7496 rtx dwarf = NULL_RTX;
7497 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7499 if (known_gt (epilogue_cfa_sp_offset, 0)
7500 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
7502 rtx cfa_adjust_rtx
7503 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7504 gen_int_mode (epilogue_cfa_sp_offset, Pmode));
7505 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7508 REG_NOTES (insn) = dwarf;
7509 RTX_FRAME_RELATED_P (insn) = 1;
7512 /* A function to save or store a register. The first argument is the
7513 register and the second is the stack slot. */
7514 typedef void (*riscv_save_restore_fn) (rtx, rtx);
7516 /* Use FN to save or restore register REGNO. MODE is the register's
7517 mode and OFFSET is the offset of its save slot from the current
7518 stack pointer. */
7520 static void
7521 riscv_save_restore_reg (machine_mode mode, int regno,
7522 HOST_WIDE_INT offset, riscv_save_restore_fn fn)
7524 rtx mem;
7526 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
7527 fn (gen_rtx_REG (mode, regno), mem);
7530 /* Return the next register up from REGNO up to LIMIT for the callee
7531 to save or restore. OFFSET will be adjusted accordingly.
7532 If INC is set, then REGNO will be incremented first.
7533 Returns INVALID_REGNUM if there is no such next register. */
7535 static unsigned int
7536 riscv_next_saved_reg (unsigned int regno, unsigned int limit,
7537 HOST_WIDE_INT *offset, bool inc = true)
7539 if (inc)
7540 regno++;
7542 while (regno <= limit)
7544 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7546 *offset = *offset - UNITS_PER_WORD;
7547 return regno;
7550 regno++;
7552 return INVALID_REGNUM;
7555 /* Return TRUE if provided REGNO is eh return data register. */
7557 static bool
7558 riscv_is_eh_return_data_register (unsigned int regno)
7560 unsigned int i, regnum;
7562 if (!crtl->calls_eh_return)
7563 return false;
7565 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
7566 if (regno == regnum)
7568 return true;
7571 return false;
7574 /* Call FN for each register that is saved by the current function.
7575 SP_OFFSET is the offset of the current stack pointer from the start
7576 of the frame. */
7578 static void
7579 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
7580 bool epilogue, bool maybe_eh_return)
7582 HOST_WIDE_INT offset, first_fp_offset;
7583 unsigned int regno, num_masked_fp = 0;
7584 unsigned int start = GP_REG_FIRST;
7585 unsigned int limit = GP_REG_LAST;
7587 /* Save the link register and s-registers. */
7588 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ()
7589 + UNITS_PER_WORD;
7590 for (regno = riscv_next_saved_reg (start, limit, &offset, false);
7591 regno != INVALID_REGNUM;
7592 regno = riscv_next_saved_reg (regno, limit, &offset))
7594 if (cfun->machine->reg_is_wrapped_separately[regno])
7595 continue;
7597 /* If this is a normal return in a function that calls the eh_return
7598 builtin, then do not restore the eh return data registers as that
7599 would clobber the return value. But we do still need to save them
7600 in the prologue, and restore them for an exception return, so we
7601 need special handling here. */
7602 if (epilogue && !maybe_eh_return
7603 && riscv_is_eh_return_data_register (regno))
7604 continue;
7606 /* In an interrupt function, save and restore some necessary CSRs in the stack
7607 to avoid changes in CSRs. */
7608 if (regno == RISCV_PROLOGUE_TEMP_REGNUM
7609 && cfun->machine->interrupt_handler_p
7610 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask)
7611 || (TARGET_ZFINX
7612 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
7614 /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack
7615 offset misaligned later. */
7616 unsigned int fcsr_size = UNITS_PER_WORD;
7617 if (!epilogue)
7619 riscv_save_restore_reg (word_mode, regno, offset, fn);
7620 offset -= fcsr_size;
7621 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode)));
7622 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7623 offset, riscv_save_reg);
7625 else
7627 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7628 offset - fcsr_size, riscv_restore_reg);
7629 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode)));
7630 riscv_save_restore_reg (word_mode, regno, offset, fn);
7631 offset -= fcsr_size;
7633 continue;
7636 if (TARGET_XTHEADMEMPAIR)
7638 /* Get the next reg/offset pair. */
7639 HOST_WIDE_INT offset2 = offset;
7640 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
7642 /* Validate everything before emitting a mempair instruction. */
7643 if (regno2 != INVALID_REGNUM
7644 && !cfun->machine->reg_is_wrapped_separately[regno2]
7645 && !(epilogue && !maybe_eh_return
7646 && riscv_is_eh_return_data_register (regno2)))
7648 bool load_p = (fn == riscv_restore_reg);
7649 rtx operands[4];
7650 th_mempair_prepare_save_restore_operands (operands,
7651 load_p, word_mode,
7652 regno, offset,
7653 regno2, offset2);
7655 /* If the operands fit into a mempair insn, then emit one. */
7656 if (th_mempair_operands_p (operands, load_p, word_mode))
7658 th_mempair_save_restore_regs (operands, load_p, word_mode);
7659 offset = offset2;
7660 regno = regno2;
7661 continue;
7666 riscv_save_restore_reg (word_mode, regno, offset, fn);
7669 /* This loop must iterate over the same space as its companion in
7670 riscv_compute_frame_info. */
7671 first_fp_offset
7672 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
7673 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7674 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7676 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7677 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7678 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame))
7679 ? CALLEE_SAVED_FREG_NUMBER (regno)
7680 : num_masked_fp;
7681 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant ();
7682 if (handle_reg)
7683 riscv_save_restore_reg (mode, regno, offset, fn);
7684 num_masked_fp++;
7688 /* Call FN for each V register that is saved by the current function. */
7690 static void
7691 riscv_for_each_saved_v_reg (poly_int64 &remaining_size,
7692 riscv_save_restore_fn fn, bool prologue)
7694 rtx vlen = NULL_RTX;
7695 if (cfun->machine->frame.vmask != 0)
7697 if (UNITS_PER_V_REG.is_constant ()
7698 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ()))
7699 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ());
7700 else
7702 vlen = RISCV_PROLOGUE_TEMP (Pmode);
7703 rtx insn
7704 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode));
7705 RTX_FRAME_RELATED_P (insn) = 1;
7709 /* Select the mode where LMUL is 1 and SEW is largest. */
7710 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode;
7712 if (prologue)
7714 /* This loop must iterate over the same space as its companion in
7715 riscv_compute_frame_info. */
7716 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
7717 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7719 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7720 if (handle_reg)
7722 rtx insn = NULL_RTX;
7723 if (CONST_INT_P (vlen))
7725 gcc_assert (SMALL_OPERAND (-INTVAL (vlen)));
7726 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7727 stack_pointer_rtx,
7728 GEN_INT (-INTVAL (vlen))));
7730 else
7731 insn = emit_insn (
7732 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7733 gcc_assert (insn != NULL_RTX);
7734 RTX_FRAME_RELATED_P (insn) = 1;
7735 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7736 remaining_size -= UNITS_PER_V_REG;
7740 else
7742 /* This loop must iterate over the same space as its companion in
7743 riscv_compute_frame_info. */
7744 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--)
7745 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
7747 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7748 if (handle_reg)
7750 riscv_save_restore_reg (m1_mode, regno, 0, fn);
7751 rtx insn = emit_insn (
7752 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
7753 gcc_assert (insn != NULL_RTX);
7754 RTX_FRAME_RELATED_P (insn) = 1;
7755 remaining_size -= UNITS_PER_V_REG;
7761 /* For stack frames that can't be allocated with a single ADDI instruction,
7762 compute the best value to initially allocate. It must at a minimum
7763 allocate enough space to spill the callee-saved registers. If TARGET_RVC,
7764 try to pick a value that will allow compression of the register saves
7765 without adding extra instructions. */
7767 static HOST_WIDE_INT
7768 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
7770 HOST_WIDE_INT remaining_const_size;
7771 if (!remaining_size.is_constant ())
7772 remaining_const_size
7773 = riscv_stack_align (remaining_size.coeffs[0])
7774 - riscv_stack_align (remaining_size.coeffs[1]);
7775 else
7776 remaining_const_size = remaining_size.to_constant ();
7778 /* First step must be set to the top of vector registers save area if any
7779 vector registers need be preserved. */
7780 if (frame->vmask != 0)
7781 return (remaining_size - frame->v_sp_offset_top).to_constant ();
7783 if (SMALL_OPERAND (remaining_const_size))
7784 return remaining_const_size;
7786 poly_int64 callee_saved_first_step =
7787 remaining_size - frame->frame_pointer_offset;
7788 gcc_assert(callee_saved_first_step.is_constant ());
7789 HOST_WIDE_INT min_first_step =
7790 riscv_stack_align (callee_saved_first_step.to_constant ());
7791 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
7792 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
7793 gcc_assert (min_first_step <= max_first_step);
7795 /* As an optimization, use the least-significant bits of the total frame
7796 size, so that the second adjustment step is just LUI + ADD. */
7797 if (!SMALL_OPERAND (min_second_step)
7798 && remaining_const_size % IMM_REACH <= max_first_step
7799 && remaining_const_size % IMM_REACH >= min_first_step)
7800 return remaining_const_size % IMM_REACH;
7802 if (TARGET_RVC || TARGET_ZCA)
7804 /* If we need two subtracts, and one is small enough to allow compressed
7805 loads and stores, then put that one first. */
7806 if (IN_RANGE (min_second_step, 0,
7807 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
7808 return MAX (min_second_step, min_first_step);
7810 /* If we need LUI + ADDI + ADD for the second adjustment step, then start
7811 with the minimum first step, so that we can get compressed loads and
7812 stores. */
7813 else if (!SMALL_OPERAND (min_second_step))
7814 return min_first_step;
7817 return max_first_step;
7820 static rtx
7821 riscv_adjust_libcall_cfi_prologue ()
7823 rtx dwarf = NULL_RTX;
7824 rtx adjust_sp_rtx, reg, mem, insn;
7825 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7826 int offset;
7828 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7829 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7831 /* The save order is ra, s0, s1, s2 to s11. */
7832 if (regno == RETURN_ADDR_REGNUM)
7833 offset = saved_size - UNITS_PER_WORD;
7834 else if (regno == S0_REGNUM)
7835 offset = saved_size - UNITS_PER_WORD * 2;
7836 else if (regno == S1_REGNUM)
7837 offset = saved_size - UNITS_PER_WORD * 3;
7838 else
7839 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
7841 reg = gen_rtx_REG (Pmode, regno);
7842 mem = gen_frame_mem (Pmode, plus_constant (Pmode,
7843 stack_pointer_rtx,
7844 offset));
7846 insn = gen_rtx_SET (mem, reg);
7847 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7850 /* Debug info for adjust sp. */
7851 adjust_sp_rtx =
7852 gen_rtx_SET (stack_pointer_rtx,
7853 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size)));
7854 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7855 dwarf);
7856 return dwarf;
7859 static rtx
7860 riscv_adjust_multi_push_cfi_prologue (int saved_size)
7862 rtx dwarf = NULL_RTX;
7863 rtx adjust_sp_rtx, reg, mem, insn;
7864 unsigned int mask = cfun->machine->frame.mask;
7865 int offset;
7866 int saved_cnt = 0;
7868 if (mask & S10_MASK)
7869 mask |= S11_MASK;
7871 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
7872 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
7874 /* The save order is s11-s0, ra
7875 from high to low addr. */
7876 offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
7878 reg = gen_rtx_REG (Pmode, regno);
7879 mem = gen_frame_mem (Pmode,
7880 plus_constant (Pmode, stack_pointer_rtx, offset));
7882 insn = gen_rtx_SET (mem, reg);
7883 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
7886 /* Debug info for adjust sp. */
7887 adjust_sp_rtx
7888 = gen_rtx_SET (stack_pointer_rtx,
7889 plus_constant (Pmode, stack_pointer_rtx, -saved_size));
7890 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7891 return dwarf;
7894 static void
7895 riscv_emit_stack_tie (void)
7897 if (Pmode == SImode)
7898 emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
7899 else
7900 emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
7903 /*zcmp multi push and pop code_for_push_pop function ptr array */
7904 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM]
7905 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra,
7906 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra},
7907 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0,
7908 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0},
7909 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1,
7910 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1},
7911 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2,
7912 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2},
7913 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3,
7914 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3},
7915 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4,
7916 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4},
7917 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5,
7918 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5},
7919 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6,
7920 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6},
7921 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7,
7922 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7},
7923 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8,
7924 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8},
7925 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9,
7926 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9},
7927 {nullptr, nullptr, nullptr, nullptr},
7928 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11,
7929 code_for_gpr_multi_popret_up_to_s11,
7930 code_for_gpr_multi_popretz_up_to_s11}};
7932 static rtx
7933 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
7934 unsigned int regs_num)
7936 gcc_assert (op < ZCMP_OP_NUM);
7937 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS
7938 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/
7939 rtx stack_adj = GEN_INT (adj_size);
7940 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj);
7943 static unsigned
7944 get_multi_push_fpr_mask (unsigned max_fprs_push)
7946 unsigned mask_fprs_push = 0, num_f_pushed = 0;
7947 for (unsigned regno = FP_REG_FIRST;
7948 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++)
7949 if (riscv_save_reg_p (regno))
7950 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++;
7951 return mask_fprs_push;
7954 /* Expand the "prologue" pattern. */
7956 void
7957 riscv_expand_prologue (void)
7959 struct riscv_frame_info *frame = &cfun->machine->frame;
7960 poly_int64 remaining_size = frame->total_size;
7961 unsigned mask = frame->mask;
7962 unsigned fmask = frame->fmask;
7963 int spimm, multi_push_additional, stack_adj;
7964 rtx insn, dwarf = NULL_RTX;
7965 unsigned th_int_mask = 0;
7967 if (flag_stack_usage_info)
7968 current_function_static_stack_size = constant_lower_bound (remaining_size);
7970 if (cfun->machine->naked_p)
7971 return;
7973 /* prefer multi-push to save-restore libcall. */
7974 if (riscv_use_multi_push (frame))
7976 remaining_size -= frame->multi_push_adj_base;
7977 /* If there are vector registers that need to be saved, then it can only
7978 be reduced to the frame->v_sp_offset_top position at most, since the
7979 vector registers will need to be saved one by one by decreasing the SP
7980 later. */
7981 poly_int64 remaining_size_above_varea
7982 = frame->vmask != 0
7983 ? remaining_size - frame->v_sp_offset_top
7984 : remaining_size;
7986 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP))
7987 spimm = 3;
7988 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP))
7989 spimm = 2;
7990 else if (known_gt (remaining_size_above_varea, 0))
7991 spimm = 1;
7992 else
7993 spimm = 0;
7994 multi_push_additional = spimm * ZCMP_SP_INC_STEP;
7995 frame->multi_push_adj_addi = multi_push_additional;
7996 remaining_size -= multi_push_additional;
7998 /* emit multi push insn & dwarf along with it. */
7999 stack_adj = frame->multi_push_adj_base + multi_push_additional;
8000 insn = emit_insn (riscv_gen_multi_push_pop_insn (
8001 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask)));
8002 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj);
8003 RTX_FRAME_RELATED_P (insn) = 1;
8004 REG_NOTES (insn) = dwarf;
8006 /* Temporarily fib that we need not save GPRs. */
8007 frame->mask = 0;
8009 /* push FPRs into the additional reserved space by cm.push. */
8010 if (fmask)
8012 unsigned mask_fprs_push
8013 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
8014 frame->fmask &= mask_fprs_push;
8015 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
8016 false);
8017 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */
8020 /* When optimizing for size, call a subroutine to save the registers. */
8021 else if (riscv_use_save_libcall (frame))
8023 rtx dwarf = NULL_RTX;
8024 dwarf = riscv_adjust_libcall_cfi_prologue ();
8026 remaining_size -= frame->save_libcall_adjustment;
8027 insn = emit_insn (riscv_gen_gpr_save_insn (frame));
8028 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
8030 RTX_FRAME_RELATED_P (insn) = 1;
8031 REG_NOTES (insn) = dwarf;
8034 th_int_mask = th_int_get_mask (frame->mask);
8035 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8037 frame->mask &= ~th_int_mask;
8039 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
8040 interrupts, such as fcsr. */
8041 if ((TARGET_HARD_FLOAT && frame->fmask)
8042 || (TARGET_ZFINX && frame->mask))
8043 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
8045 unsigned save_adjustment = th_int_get_save_adjustment ();
8046 frame->gp_sp_offset -= save_adjustment;
8047 remaining_size -= save_adjustment;
8049 insn = emit_insn (gen_th_int_push ());
8051 rtx dwarf = th_int_adjust_cfi_prologue (th_int_mask);
8052 RTX_FRAME_RELATED_P (insn) = 1;
8053 REG_NOTES (insn) = dwarf;
8056 /* Save the GP, FP registers. */
8057 if ((frame->mask | frame->fmask) != 0)
8059 if (known_gt (remaining_size, frame->frame_pointer_offset))
8061 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
8062 remaining_size -= step1;
8063 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8064 GEN_INT (-step1));
8065 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8067 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
8070 /* Undo the above fib. */
8071 frame->mask = mask;
8072 frame->fmask = fmask;
8074 /* Set up the frame pointer, if we're using one. */
8075 if (frame_pointer_needed)
8077 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8078 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
8079 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8081 riscv_emit_stack_tie ();
8084 /* Save the V registers. */
8085 if (frame->vmask != 0)
8086 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true);
8088 /* Allocate the rest of the frame. */
8089 if (known_gt (remaining_size, 0))
8091 /* Two step adjustment:
8092 1.scalable frame. 2.constant frame. */
8093 poly_int64 scalable_frame (0, 0);
8094 if (!remaining_size.is_constant ())
8096 /* First for scalable frame. */
8097 poly_int64 scalable_frame = remaining_size;
8098 scalable_frame.coeffs[0] = remaining_size.coeffs[1];
8099 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
8100 remaining_size -= scalable_frame;
8103 /* Second step for constant frame. */
8104 HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
8105 if (constant_frame == 0)
8107 /* We must have allocated stack space for the scalable frame.
8108 Emit a stack tie if we have a frame pointer so that the
8109 allocation is ordered WRT fp setup and subsequent writes
8110 into the frame. */
8111 if (frame_pointer_needed)
8112 riscv_emit_stack_tie ();
8113 return;
8116 if (SMALL_OPERAND (-constant_frame))
8118 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8119 GEN_INT (-constant_frame));
8120 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8122 else if (SUM_OF_TWO_S12_ALGN (-constant_frame))
8124 HOST_WIDE_INT one, two;
8125 riscv_split_sum_of_two_s12 (-constant_frame, &one, &two);
8126 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8127 GEN_INT (one));
8128 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8129 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8130 GEN_INT (two));
8131 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8133 else
8135 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
8136 emit_insn (gen_add3_insn (stack_pointer_rtx,
8137 stack_pointer_rtx,
8138 RISCV_PROLOGUE_TEMP (Pmode)));
8140 /* Describe the effect of the previous instructions. */
8141 insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
8142 insn = gen_rtx_SET (stack_pointer_rtx, insn);
8143 riscv_set_frame_expr (insn);
8146 /* We must have allocated the remainder of the stack frame.
8147 Emit a stack tie if we have a frame pointer so that the
8148 allocation is ordered WRT fp setup and subsequent writes
8149 into the frame. */
8150 if (frame_pointer_needed)
8151 riscv_emit_stack_tie ();
8155 static rtx
8156 riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
8158 rtx dwarf = NULL_RTX;
8159 rtx adjust_sp_rtx, reg;
8160 unsigned int mask = cfun->machine->frame.mask;
8162 if (mask & S10_MASK)
8163 mask |= S11_MASK;
8165 /* Debug info for adjust sp. */
8166 adjust_sp_rtx
8167 = gen_rtx_SET (stack_pointer_rtx,
8168 plus_constant (Pmode, stack_pointer_rtx, saved_size));
8169 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
8171 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8172 if (BITSET_P (mask, regno - GP_REG_FIRST))
8174 reg = gen_rtx_REG (Pmode, regno);
8175 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
8178 return dwarf;
8181 static rtx
8182 riscv_adjust_libcall_cfi_epilogue ()
8184 rtx dwarf = NULL_RTX;
8185 rtx adjust_sp_rtx, reg;
8186 int saved_size = cfun->machine->frame.save_libcall_adjustment;
8188 /* Debug info for adjust sp. */
8189 adjust_sp_rtx =
8190 gen_rtx_SET (stack_pointer_rtx,
8191 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size)));
8192 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
8193 dwarf);
8195 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8196 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8198 reg = gen_rtx_REG (Pmode, regno);
8199 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
8202 return dwarf;
8205 static void
8206 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
8207 unsigned multipop_size)
8209 rtx insn;
8210 unsigned regs_count = riscv_multi_push_regs_count (mask);
8212 if (!use_multi_pop_normal)
8213 insn = emit_insn (
8214 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
8215 else
8216 insn = emit_jump_insn (
8217 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
8219 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size);
8220 RTX_FRAME_RELATED_P (insn) = 1;
8221 REG_NOTES (insn) = dwarf;
8224 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern;
8225 style says which. */
8227 void
8228 riscv_expand_epilogue (int style)
8230 /* Split the frame into 3 steps. STEP1 is the amount of stack we should
8231 deallocate before restoring the registers. STEP2 is the amount we
8232 should deallocate afterwards including the callee saved regs. STEP3
8233 is the amount deallocated by save-restore libcall.
8235 Start off by assuming that no registers need to be restored. */
8236 struct riscv_frame_info *frame = &cfun->machine->frame;
8237 unsigned mask = frame->mask;
8238 unsigned fmask = frame->fmask;
8239 unsigned mask_fprs_push = 0;
8240 poly_int64 step2 = 0;
8241 bool use_multi_pop_normal
8242 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame));
8243 bool use_multi_pop_sibcall
8244 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame));
8245 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
8247 bool use_restore_libcall
8248 = !use_multi_pop
8249 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame));
8250 unsigned libcall_size = use_restore_libcall && !use_multi_pop
8251 ? frame->save_libcall_adjustment
8252 : 0;
8253 unsigned multipop_size
8254 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
8255 : 0;
8256 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
8257 unsigned th_int_mask = 0;
8258 rtx insn;
8260 /* We need to add memory barrier to prevent read from deallocated stack. */
8261 bool need_barrier_p = known_ne (get_frame_size ()
8262 + cfun->machine->frame.arg_pointer_offset, 0);
8264 if (cfun->machine->naked_p)
8266 gcc_assert (style == NORMAL_RETURN);
8268 emit_jump_insn (gen_return ());
8270 return;
8273 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ())
8275 emit_jump_insn (gen_return ());
8276 return;
8279 /* Reset the epilogue cfa info before starting to emit the epilogue. */
8280 epilogue_cfa_sp_offset = 0;
8282 /* Move past any dynamic stack allocations. */
8283 if (cfun->calls_alloca)
8285 /* Emit a barrier to prevent loads from a deallocated stack. */
8286 riscv_emit_stack_tie ();
8287 need_barrier_p = false;
8289 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
8290 rtx dwarf_adj = gen_int_mode (adjust_offset, Pmode);
8291 rtx adjust = NULL_RTX;
8292 bool sum_of_two_s12 = false;
8293 HOST_WIDE_INT one, two;
8295 if (!adjust_offset.is_constant ())
8297 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
8298 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
8299 riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
8300 gen_int_mode (adjust_offset, Pmode));
8301 adjust = tmp1;
8303 else
8305 HOST_WIDE_INT adj_off_value = adjust_offset.to_constant ();
8306 if (SMALL_OPERAND (adj_off_value))
8308 adjust = GEN_INT (adj_off_value);
8310 else if (SUM_OF_TWO_S12_ALGN (adj_off_value))
8312 riscv_split_sum_of_two_s12 (adj_off_value, &one, &two);
8313 dwarf_adj = adjust = GEN_INT (one);
8314 sum_of_two_s12 = true;
8316 else
8318 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
8319 GEN_INT (adj_off_value));
8320 adjust = RISCV_PROLOGUE_TEMP (Pmode);
8324 insn = emit_insn (
8325 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
8326 adjust));
8328 rtx dwarf = NULL_RTX;
8329 rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx,
8330 dwarf_adj);
8331 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
8332 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
8334 RTX_FRAME_RELATED_P (insn) = 1;
8336 REG_NOTES (insn) = dwarf;
8338 if (sum_of_two_s12)
8340 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8341 GEN_INT (two)));
8342 RTX_FRAME_RELATED_P (insn) = 1;
8346 if (use_restore_libcall || use_multi_pop)
8347 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
8349 /* If we need to restore registers, deallocate as much stack as
8350 possible in the second step without going out of range. */
8351 if (use_multi_pop)
8353 if (frame->fmask
8354 && known_gt (frame->total_size - multipop_size,
8355 frame->frame_pointer_offset))
8356 step2
8357 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
8359 else if ((frame->mask | frame->fmask) != 0)
8360 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
8362 if (use_restore_libcall || use_multi_pop)
8363 frame->mask = mask; /* Undo the above fib. */
8365 poly_int64 step1;
8366 /* STEP1 must be set to the bottom of vector registers save area if any
8367 vector registers need be preserved. */
8368 if (frame->vmask != 0)
8370 step1 = frame->v_sp_offset_bottom;
8371 step2 = frame->total_size - step1 - libcall_size - multipop_size;
8373 else
8374 step1 = frame->total_size - step2 - libcall_size - multipop_size;
8376 /* Set TARGET to BASE + STEP1. */
8377 if (known_gt (step1, 0))
8379 /* Emit a barrier to prevent loads from a deallocated stack. */
8380 riscv_emit_stack_tie ();
8381 need_barrier_p = false;
8383 /* Restore the scalable frame which is assigned in prologue. */
8384 if (!step1.is_constant ())
8386 poly_int64 scalable_frame = step1;
8387 scalable_frame.coeffs[0] = step1.coeffs[1];
8388 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
8389 true);
8390 step1 -= scalable_frame;
8393 /* Get an rtx for STEP1 that we can add to BASE.
8394 Skip if adjust equal to zero. */
8395 HOST_WIDE_INT step1_value = step1.to_constant ();
8396 if (step1_value != 0)
8398 rtx adjust = GEN_INT (step1_value);
8399 if (SUM_OF_TWO_S12_ALGN (step1_value))
8401 HOST_WIDE_INT one, two;
8402 riscv_split_sum_of_two_s12 (step1_value, &one, &two);
8403 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
8404 stack_pointer_rtx,
8405 GEN_INT (one)));
8406 RTX_FRAME_RELATED_P (insn) = 1;
8407 adjust = GEN_INT (two);
8409 else if (!SMALL_OPERAND (step1_value))
8411 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
8412 adjust = RISCV_PROLOGUE_TEMP (Pmode);
8415 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
8416 stack_pointer_rtx,
8417 adjust));
8418 rtx dwarf = NULL_RTX;
8419 rtx cfa_adjust_rtx
8420 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8421 gen_int_mode (step2 + libcall_size + multipop_size,
8422 Pmode));
8424 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
8425 RTX_FRAME_RELATED_P (insn) = 1;
8427 REG_NOTES (insn) = dwarf;
8430 else if (frame_pointer_needed)
8432 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring
8433 old value of FP. */
8434 epilogue_cfa_sp_offset = step2;
8437 if (use_multi_pop)
8439 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
8440 if (fmask)
8442 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi
8443 / UNITS_PER_WORD);
8444 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */
8447 else if (use_restore_libcall)
8448 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
8450 th_int_mask = th_int_get_mask (frame->mask);
8451 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8453 frame->mask &= ~th_int_mask;
8455 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
8456 interrupts, such as fcsr. */
8457 if ((TARGET_HARD_FLOAT && frame->fmask)
8458 || (TARGET_ZFINX && frame->mask))
8459 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
8462 /* Restore the registers. */
8463 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
8464 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
8465 - multipop_size,
8466 riscv_restore_reg, true, style == EXCEPTION_RETURN);
8468 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8470 frame->mask = mask; /* Undo the above fib. */
8471 unsigned save_adjustment = th_int_get_save_adjustment ();
8472 gcc_assert (step2.to_constant () >= save_adjustment);
8473 step2 -= save_adjustment;
8476 if (use_restore_libcall)
8477 frame->mask = mask; /* Undo the above fib. */
8479 if (need_barrier_p)
8480 riscv_emit_stack_tie ();
8482 /* Deallocate the final bit of the frame. */
8483 if (step2.to_constant () > 0)
8485 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8486 GEN_INT (step2.to_constant ())));
8488 rtx dwarf = NULL_RTX;
8489 rtx cfa_adjust_rtx
8490 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8491 GEN_INT (libcall_size + multipop_size));
8492 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
8493 RTX_FRAME_RELATED_P (insn) = 1;
8495 REG_NOTES (insn) = dwarf;
8498 if (use_multi_pop)
8500 /* restore FPRs pushed by cm.push. */
8501 frame->fmask = fmask & mask_fprs_push;
8502 if (frame->fmask)
8503 riscv_for_each_saved_reg (frame->total_size - libcall_size
8504 - multipop_size,
8505 riscv_restore_reg, true,
8506 style == EXCEPTION_RETURN);
8507 /* Undo the above fib. */
8508 frame->mask = mask;
8509 frame->fmask = fmask;
8510 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask,
8511 multipop_size);
8512 if (use_multi_pop_normal)
8513 return;
8515 else if (use_restore_libcall)
8517 rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
8518 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
8519 RTX_FRAME_RELATED_P (insn) = 1;
8520 REG_NOTES (insn) = dwarf;
8522 emit_jump_insn (gen_gpr_restore_return (ra));
8523 return;
8526 /* Add in the __builtin_eh_return stack adjustment. */
8527 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
8528 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8529 EH_RETURN_STACKADJ_RTX));
8531 /* Return from interrupt. */
8532 if (cfun->machine->interrupt_handler_p)
8534 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode;
8536 gcc_assert (mode != UNKNOWN_MODE);
8538 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8539 emit_jump_insn (gen_th_int_pop ());
8540 else if (mode == MACHINE_MODE)
8541 emit_jump_insn (gen_riscv_mret ());
8542 else if (mode == SUPERVISOR_MODE)
8543 emit_jump_insn (gen_riscv_sret ());
8544 else
8545 emit_jump_insn (gen_riscv_uret ());
8547 else if (style != SIBCALL_RETURN)
8548 emit_jump_insn (gen_simple_return_internal (ra));
8551 /* Implement EPILOGUE_USES. */
8553 bool
8554 riscv_epilogue_uses (unsigned int regno)
8556 if (regno == RETURN_ADDR_REGNUM)
8557 return true;
8559 if (epilogue_completed && cfun->machine->interrupt_handler_p)
8561 /* An interrupt function restores temp regs, so we must indicate that
8562 they are live at function end. */
8563 if (df_regs_ever_live_p (regno)
8564 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
8565 return true;
8568 return false;
8571 static bool
8572 riscv_avoid_shrink_wrapping_separate ()
8574 if (riscv_use_save_libcall (&cfun->machine->frame)
8575 || cfun->machine->interrupt_handler_p
8576 || !cfun->machine->frame.gp_sp_offset.is_constant ())
8577 return true;
8579 return false;
8582 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
8584 static sbitmap
8585 riscv_get_separate_components (void)
8587 HOST_WIDE_INT offset;
8588 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8589 bitmap_clear (components);
8591 if (riscv_avoid_shrink_wrapping_separate ())
8592 return components;
8594 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
8595 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8596 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8598 /* We can only wrap registers that have small operand offsets.
8599 For large offsets a pseudo register might be needed which
8600 cannot be created during the shrink wrapping pass. */
8601 if (SMALL_OPERAND (offset))
8602 bitmap_set_bit (components, regno);
8604 offset -= UNITS_PER_WORD;
8607 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
8608 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8609 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8611 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8613 /* We can only wrap registers that have small operand offsets.
8614 For large offsets a pseudo register might be needed which
8615 cannot be created during the shrink wrapping pass. */
8616 if (SMALL_OPERAND (offset))
8617 bitmap_set_bit (components, regno);
8619 offset -= GET_MODE_SIZE (mode).to_constant ();
8622 /* Don't mess with the hard frame pointer. */
8623 if (frame_pointer_needed)
8624 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
8626 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
8628 return components;
8631 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
8633 static sbitmap
8634 riscv_components_for_bb (basic_block bb)
8636 bitmap in = DF_LIVE_IN (bb);
8637 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
8638 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
8640 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8641 bitmap_clear (components);
8643 function_abi_aggregator callee_abis;
8644 rtx_insn *insn;
8645 FOR_BB_INSNS (bb, insn)
8646 if (CALL_P (insn))
8647 callee_abis.note_callee_abi (insn_callee_abi (insn));
8648 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
8650 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
8651 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8652 if (!fixed_regs[regno]
8653 && !crtl->abi->clobbers_full_reg_p (regno)
8654 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8655 || bitmap_bit_p (in, regno)
8656 || bitmap_bit_p (gen, regno)
8657 || bitmap_bit_p (kill, regno)))
8658 bitmap_set_bit (components, regno);
8660 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8661 if (!fixed_regs[regno]
8662 && !crtl->abi->clobbers_full_reg_p (regno)
8663 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
8664 || bitmap_bit_p (in, regno)
8665 || bitmap_bit_p (gen, regno)
8666 || bitmap_bit_p (kill, regno)))
8667 bitmap_set_bit (components, regno);
8669 return components;
8672 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
8674 static void
8675 riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
8677 /* Nothing to do for riscv. */
8680 static void
8681 riscv_process_components (sbitmap components, bool prologue_p)
8683 HOST_WIDE_INT offset;
8684 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
8686 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
8687 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8688 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8690 if (bitmap_bit_p (components, regno))
8691 riscv_save_restore_reg (word_mode, regno, offset, fn);
8693 offset -= UNITS_PER_WORD;
8696 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
8697 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8698 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8700 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8702 if (bitmap_bit_p (components, regno))
8703 riscv_save_restore_reg (mode, regno, offset, fn);
8705 offset -= GET_MODE_SIZE (mode).to_constant ();
8709 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
8711 static void
8712 riscv_emit_prologue_components (sbitmap components)
8714 riscv_process_components (components, true);
8717 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
8719 static void
8720 riscv_emit_epilogue_components (sbitmap components)
8722 riscv_process_components (components, false);
8725 static void
8726 riscv_set_handled_components (sbitmap components)
8728 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8729 if (bitmap_bit_p (components, regno))
8730 cfun->machine->reg_is_wrapped_separately[regno] = true;
8732 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8733 if (bitmap_bit_p (components, regno))
8734 cfun->machine->reg_is_wrapped_separately[regno] = true;
8737 /* Return nonzero if this function is known to have a null epilogue.
8738 This allows the optimizer to omit jumps to jumps if no stack
8739 was created. */
8741 bool
8742 riscv_can_use_return_insn (void)
8744 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
8745 && ! cfun->machine->interrupt_handler_p);
8748 /* Given that there exists at least one variable that is set (produced)
8749 by OUT_INSN and read (consumed) by IN_INSN, return true iff
8750 IN_INSN represents one or more memory store operations and none of
8751 the variables set by OUT_INSN is used by IN_INSN as the address of a
8752 store operation. If either IN_INSN or OUT_INSN does not represent
8753 a "single" RTL SET expression (as loosely defined by the
8754 implementation of the single_set function) or a PARALLEL with only
8755 SETs, CLOBBERs, and USEs inside, this function returns false.
8757 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain
8758 conditions that result in assertion failures in the generic
8759 store_data_bypass_p function and returns FALSE in such cases.
8761 This is required to make -msave-restore work with the sifive-7
8762 pipeline description. */
8764 bool
8765 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
8767 rtx out_set, in_set;
8768 rtx out_pat, in_pat;
8769 rtx out_exp, in_exp;
8770 int i, j;
8772 in_set = single_set (in_insn);
8773 if (in_set)
8775 if (MEM_P (SET_DEST (in_set)))
8777 out_set = single_set (out_insn);
8778 if (!out_set)
8780 out_pat = PATTERN (out_insn);
8781 if (GET_CODE (out_pat) == PARALLEL)
8783 for (i = 0; i < XVECLEN (out_pat, 0); i++)
8785 out_exp = XVECEXP (out_pat, 0, i);
8786 if ((GET_CODE (out_exp) == CLOBBER)
8787 || (GET_CODE (out_exp) == USE))
8788 continue;
8789 else if (GET_CODE (out_exp) != SET)
8790 return false;
8796 else
8798 in_pat = PATTERN (in_insn);
8799 if (GET_CODE (in_pat) != PARALLEL)
8800 return false;
8802 for (i = 0; i < XVECLEN (in_pat, 0); i++)
8804 in_exp = XVECEXP (in_pat, 0, i);
8805 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
8806 continue;
8807 else if (GET_CODE (in_exp) != SET)
8808 return false;
8810 if (MEM_P (SET_DEST (in_exp)))
8812 out_set = single_set (out_insn);
8813 if (!out_set)
8815 out_pat = PATTERN (out_insn);
8816 if (GET_CODE (out_pat) != PARALLEL)
8817 return false;
8818 for (j = 0; j < XVECLEN (out_pat, 0); j++)
8820 out_exp = XVECEXP (out_pat, 0, j);
8821 if ((GET_CODE (out_exp) == CLOBBER)
8822 || (GET_CODE (out_exp) == USE))
8823 continue;
8824 else if (GET_CODE (out_exp) != SET)
8825 return false;
8832 return store_data_bypass_p (out_insn, in_insn);
8835 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
8837 When floating-point registers are wider than integer ones, moves between
8838 them must go through memory. */
8840 static bool
8841 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
8842 reg_class_t class2)
8844 return (!riscv_v_ext_mode_p (mode)
8845 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
8846 && (class1 == FP_REGS) != (class2 == FP_REGS)
8847 && !TARGET_XTHEADFMV
8848 && !TARGET_ZFA);
8851 /* Implement TARGET_REGISTER_MOVE_COST. */
8853 static int
8854 riscv_register_move_cost (machine_mode mode,
8855 reg_class_t from, reg_class_t to)
8857 if ((from == FP_REGS && to == GR_REGS) ||
8858 (from == GR_REGS && to == FP_REGS))
8859 return tune_param->fmv_cost;
8861 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
8864 /* Implement TARGET_HARD_REGNO_NREGS. */
8866 static unsigned int
8867 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
8869 if (riscv_v_ext_vector_mode_p (mode))
8871 /* Handle fractional LMUL, it only occupy part of vector register but
8872 still need one vector register to hold. */
8873 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
8874 return 1;
8876 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
8879 /* For tuple modes, the number of register = NF * LMUL. */
8880 if (riscv_v_ext_tuple_mode_p (mode))
8882 unsigned int nf = riscv_vector::get_nf (mode);
8883 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode);
8884 poly_int64 size = GET_MODE_SIZE (subpart_mode);
8885 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode)));
8886 if (maybe_lt (size, UNITS_PER_V_REG))
8887 return nf;
8888 else
8890 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
8891 return nf * lmul;
8895 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */
8896 if (riscv_v_ext_vls_mode_p (mode))
8898 int size = GET_MODE_BITSIZE (mode).to_constant ();
8899 if (size < TARGET_MIN_VLEN)
8900 return 1;
8901 else
8902 return size / TARGET_MIN_VLEN;
8905 /* mode for VL or VTYPE are just a marker, not holding value,
8906 so it always consume one register. */
8907 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8908 || FRM_REG_P (regno))
8909 return 1;
8911 /* Assume every valid non-vector mode fits in one vector register. */
8912 if (V_REG_P (regno))
8913 return 1;
8915 if (FP_REG_P (regno))
8916 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
8918 /* All other registers are word-sized. */
8919 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8922 /* Implement TARGET_HARD_REGNO_MODE_OK. */
8924 static bool
8925 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
8927 unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
8929 if (GP_REG_P (regno))
8931 if (riscv_v_ext_mode_p (mode))
8932 return false;
8934 if (!GP_REG_P (regno + nregs - 1))
8935 return false;
8937 else if (FP_REG_P (regno))
8939 if (riscv_v_ext_mode_p (mode))
8940 return false;
8942 if (!FP_REG_P (regno + nregs - 1))
8943 return false;
8945 if (GET_MODE_CLASS (mode) != MODE_FLOAT
8946 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
8947 return false;
8949 /* Only use callee-saved registers if a potential callee is guaranteed
8950 to spill the requisite width. */
8951 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
8952 || (!call_used_or_fixed_reg_p (regno)
8953 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
8954 return false;
8956 else if (V_REG_P (regno))
8958 if (!riscv_v_ext_mode_p (mode))
8959 return false;
8961 if (!V_REG_P (regno + nregs - 1))
8962 return false;
8964 int regno_alignment = riscv_get_v_regno_alignment (mode);
8965 if (regno_alignment != 1)
8966 return ((regno % regno_alignment) == 0);
8968 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8969 || FRM_REG_P (regno))
8970 return true;
8971 else
8972 return false;
8974 /* Require same callee-savedness for all registers. */
8975 for (unsigned i = 1; i < nregs; i++)
8976 if (call_used_or_fixed_reg_p (regno)
8977 != call_used_or_fixed_reg_p (regno + i))
8978 return false;
8980 /* Only use even registers in RV32 ZDINX */
8981 if (!TARGET_64BIT && TARGET_ZDINX){
8982 if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
8983 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
8984 return !(regno & 1);
8987 return true;
8990 /* Implement TARGET_MODES_TIEABLE_P.
8992 Don't allow floating-point modes to be tied, since type punning of
8993 single-precision and double-precision is implementation defined. */
8995 static bool
8996 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
8998 /* We don't allow different REG_CLASS modes tieable since it
8999 will cause ICE in register allocation (RA).
9000 E.g. V2SI and DI are not tieable. */
9001 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
9002 return false;
9003 return (mode1 == mode2
9004 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
9005 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
9008 /* Implement TARGET_CLASS_MAX_NREGS. */
9010 static unsigned char
9011 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
9013 if (reg_class_subset_p (rclass, FP_REGS))
9014 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
9016 if (reg_class_subset_p (rclass, GR_REGS))
9017 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
9019 if (reg_class_subset_p (rclass, V_REGS))
9020 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
9022 return 0;
9025 /* Implement TARGET_MEMORY_MOVE_COST. */
9027 static int
9028 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
9030 return (tune_param->memory_cost
9031 + memory_move_secondary_cost (mode, rclass, in));
9034 /* Return the number of instructions that can be issued per cycle. */
9036 static int
9037 riscv_issue_rate (void)
9039 return tune_param->issue_rate;
9042 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
9043 static int
9044 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
9046 if (DEBUG_INSN_P (insn))
9047 return more;
9049 rtx_code code = GET_CODE (PATTERN (insn));
9050 if (code == USE || code == CLOBBER)
9051 return more;
9053 /* GHOST insns are used for blockage and similar cases which
9054 effectively end a cycle. */
9055 if (get_attr_type (insn) == TYPE_GHOST)
9056 return 0;
9058 /* If we ever encounter an insn with an unknown type, trip
9059 an assert so we can find and fix this problem. */
9060 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN);
9062 /* If we ever encounter an insn without an insn reservation, trip
9063 an assert so we can find and fix this problem. */
9064 gcc_assert (insn_has_dfa_reservation_p (insn));
9066 return more - 1;
9069 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
9070 instruction fusion of some sort. */
9072 static bool
9073 riscv_macro_fusion_p (void)
9075 return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
9078 /* Return true iff the instruction fusion described by OP is enabled. */
9080 static bool
9081 riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
9083 return tune_param->fusible_ops & op;
9086 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
9087 should be kept together during scheduling. */
9089 static bool
9090 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
9092 rtx prev_set = single_set (prev);
9093 rtx curr_set = single_set (curr);
9094 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
9095 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
9097 if (!riscv_macro_fusion_p ())
9098 return false;
9100 if (simple_sets_p
9101 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
9102 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
9104 /* We are trying to match the following:
9105 prev (slli) == (set (reg:DI rD)
9106 (ashift:DI (reg:DI rS) (const_int 32)))
9107 curr (slri) == (set (reg:DI rD)
9108 (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
9109 with <shift> being either 32 for FUSE_ZEXTW, or
9110 `less than 32 for FUSE_ZEXTWS. */
9112 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
9113 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
9114 && REG_P (SET_DEST (prev_set))
9115 && REG_P (SET_DEST (curr_set))
9116 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
9117 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
9118 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
9119 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9120 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
9121 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
9122 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
9123 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
9124 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
9125 return true;
9128 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
9130 /* We are trying to match the following:
9131 prev (slli) == (set (reg:DI rD)
9132 (ashift:DI (reg:DI rS) (const_int 48)))
9133 curr (slri) == (set (reg:DI rD)
9134 (lshiftrt:DI (reg:DI rD) (const_int 48))) */
9136 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
9137 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
9138 && REG_P (SET_DEST (prev_set))
9139 && REG_P (SET_DEST (curr_set))
9140 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
9141 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
9142 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
9143 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9144 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
9145 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
9146 return true;
9149 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
9151 /* We are trying to match the following:
9152 prev (add) == (set (reg:DI rD)
9153 (plus:DI (reg:DI rS1) (reg:DI rS2))
9154 curr (ld) == (set (reg:DI rD)
9155 (mem:DI (reg:DI rD))) */
9157 if (MEM_P (SET_SRC (curr_set))
9158 && REG_P (XEXP (SET_SRC (curr_set), 0))
9159 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
9160 && GET_CODE (SET_SRC (prev_set)) == PLUS
9161 && REG_P (XEXP (SET_SRC (prev_set), 0))
9162 && REG_P (XEXP (SET_SRC (prev_set), 1)))
9163 return true;
9165 /* We are trying to match the following:
9166 prev (add) == (set (reg:DI rD)
9167 (plus:DI (reg:DI rS1) (reg:DI rS2)))
9168 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
9170 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
9171 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
9172 && MEM_P (XEXP (SET_SRC (curr_set), 0))
9173 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
9174 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
9175 && GET_CODE (SET_SRC (prev_set)) == PLUS
9176 && REG_P (XEXP (SET_SRC (prev_set), 0))
9177 && REG_P (XEXP (SET_SRC (prev_set), 1)))
9178 return true;
9181 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
9183 /* We are trying to match the following:
9184 prev (add) == (set (reg:DI rS)
9185 (plus:DI (reg:DI rS) (const_int))
9186 curr (ld) == (set (reg:DI rD)
9187 (mem:DI (reg:DI rS))) */
9189 if (MEM_P (SET_SRC (curr_set))
9190 && REG_P (XEXP (SET_SRC (curr_set), 0))
9191 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
9192 && GET_CODE (SET_SRC (prev_set)) == PLUS
9193 && REG_P (XEXP (SET_SRC (prev_set), 0))
9194 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
9195 return true;
9198 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
9200 /* We are trying to match the following:
9201 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
9202 curr (addi) == (set (reg:DI rD)
9203 (plus:DI (reg:DI rD) (const_int IMM12))) */
9205 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
9206 || (GET_CODE (SET_SRC (curr_set)) == PLUS
9207 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9208 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
9209 && (GET_CODE (SET_SRC (prev_set)) == HIGH
9210 || (CONST_INT_P (SET_SRC (prev_set))
9211 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
9212 return true;
9215 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
9217 /* We are trying to match the following:
9218 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9219 curr (addi) == (set (reg:DI rD)
9220 (plus:DI (reg:DI rD) (const_int IMM12)))
9222 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9223 curr (addi) == (set (reg:DI rD)
9224 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
9226 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
9227 && XINT (SET_SRC (prev_set), 1) == UNSPEC_AUIPC
9228 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
9229 || (GET_CODE (SET_SRC (curr_set)) == PLUS
9230 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9231 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
9233 return true;
9236 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
9238 /* We are trying to match the following:
9239 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
9240 curr (ld) == (set (reg:DI rD)
9241 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
9243 if (CONST_INT_P (SET_SRC (prev_set))
9244 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
9245 && MEM_P (SET_SRC (curr_set))
9246 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
9247 return true;
9249 if (GET_CODE (SET_SRC (prev_set)) == HIGH
9250 && MEM_P (SET_SRC (curr_set))
9251 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
9252 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
9253 return true;
9255 if (GET_CODE (SET_SRC (prev_set)) == HIGH
9256 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
9257 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
9258 && MEM_P (XEXP (SET_SRC (curr_set), 0))
9259 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
9260 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
9261 return true;
9264 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
9266 /* We are trying to match the following:
9267 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9268 curr (ld) == (set (reg:DI rD)
9269 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
9271 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
9272 && XINT (prev_set, 1) == UNSPEC_AUIPC
9273 && MEM_P (SET_SRC (curr_set))
9274 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
9275 return true;
9278 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
9280 /* We are trying to match the following:
9281 prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
9282 (reg rS1))
9283 curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
9284 (reg rS2)) */
9286 if (MEM_P (SET_DEST (prev_set))
9287 && MEM_P (SET_DEST (curr_set))
9288 /* We can probably relax this condition. The documentation is a bit
9289 unclear about sub-word cases. So we just model DImode for now. */
9290 && GET_MODE (SET_DEST (curr_set)) == DImode
9291 && GET_MODE (SET_DEST (prev_set)) == DImode)
9293 rtx base_prev, base_curr, offset_prev, offset_curr;
9295 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
9296 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
9298 /* Fail if we did not find both bases. */
9299 if (base_prev == NULL_RTX || base_curr == NULL_RTX)
9300 return false;
9302 /* Fail if either base is not a register. */
9303 if (!REG_P (base_prev) || !REG_P (base_curr))
9304 return false;
9306 /* Fail if the bases are not the same register. */
9307 if (REGNO (base_prev) != REGNO (base_curr))
9308 return false;
9310 /* Originally the thought was to check MEM_ALIGN, but that was
9311 reporting incorrect alignments, even for SP/FP accesses, so we
9312 gave up on that approach. Instead just check for stack/hfp
9313 which we know are aligned. */
9314 if (REGNO (base_prev) != STACK_POINTER_REGNUM
9315 && REGNO (base_prev) != HARD_FRAME_POINTER_REGNUM)
9316 return false;
9318 /* The two stores must be contained within opposite halves of the
9319 same 16 byte aligned block of memory. We know that the stack
9320 pointer and the frame pointer have suitable alignment. So we
9321 just need to check the offsets of the two stores for suitable
9322 alignment. */
9323 /* Get the smaller offset into OFFSET_PREV. */
9324 if (INTVAL (offset_prev) > INTVAL (offset_curr))
9325 std::swap (offset_prev, offset_curr);
9327 /* If the smaller offset (OFFSET_PREV) is not 16 byte aligned,
9328 then fail. */
9329 if ((INTVAL (offset_prev) % 16) != 0)
9330 return false;
9332 /* The higher offset must be 8 bytes more than the lower
9333 offset. */
9334 return (INTVAL (offset_prev) + 8 == INTVAL (offset_curr));
9338 return false;
9341 /* Adjust the cost/latency of instructions for scheduling.
9342 For now this is just used to change the latency of vector instructions
9343 according to their LMUL. We assume that an insn with LMUL == 8 requires
9344 eight times more execution cycles than the same insn with LMUL == 1.
9345 As this may cause very high latencies which lead to scheduling artifacts
9346 we currently only perform the adjustment when -madjust-lmul-cost is given.
9348 static int
9349 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
9350 unsigned int)
9352 /* Only do adjustments for the generic out-of-order scheduling model. */
9353 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
9354 return cost;
9356 if (recog_memoized (insn) < 0)
9357 return cost;
9359 enum attr_type type = get_attr_type (insn);
9361 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
9363 /* TODO: For ordered reductions scale the base cost relative to the
9364 number of units. */
9368 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */
9369 if (!TARGET_ADJUST_LMUL_COST)
9370 return cost;
9372 /* vsetvl has a vlmul attribute but its latency does not depend on it. */
9373 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE)
9374 return cost;
9376 enum riscv_vector::vlmul_type lmul =
9377 (riscv_vector::vlmul_type)get_attr_vlmul (insn);
9379 double factor = 1;
9380 switch (lmul)
9382 case riscv_vector::LMUL_2:
9383 factor = 2;
9384 break;
9385 case riscv_vector::LMUL_4:
9386 factor = 4;
9387 break;
9388 case riscv_vector::LMUL_8:
9389 factor = 8;
9390 break;
9391 case riscv_vector::LMUL_F2:
9392 factor = 0.5;
9393 break;
9394 case riscv_vector::LMUL_F4:
9395 factor = 0.25;
9396 break;
9397 case riscv_vector::LMUL_F8:
9398 factor = 0.125;
9399 break;
9400 default:
9401 factor = 1;
9404 /* If the latency was nonzero, keep it that way. */
9405 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor);
9407 return new_cost;
9410 /* Auxiliary function to emit RISC-V ELF attribute. */
9411 static void
9412 riscv_emit_attribute ()
9414 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n",
9415 riscv_arch_str ().c_str ());
9417 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n",
9418 TARGET_STRICT_ALIGN ? 0 : 1);
9420 fprintf (asm_out_file, "\t.attribute stack_align, %d\n",
9421 riscv_stack_boundary / 8);
9424 /* Output .variant_cc for function symbol which follows vector calling
9425 convention. */
9427 static void
9428 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
9430 if (TREE_CODE (decl) == FUNCTION_DECL)
9432 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
9433 if (cc == RISCV_CC_V)
9435 fprintf (stream, "\t.variant_cc\t");
9436 assemble_name (stream, name);
9437 fprintf (stream, "\n");
9442 /* Implement ASM_DECLARE_FUNCTION_NAME. */
9444 void
9445 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
9447 riscv_asm_output_variant_cc (stream, fndecl, name);
9448 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
9449 ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl);
9450 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
9452 fprintf (stream, "\t.option push\n");
9454 struct cl_target_option *local_cl_target =
9455 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
9456 struct cl_target_option *global_cl_target =
9457 TREE_TARGET_OPTION (target_option_default_node);
9459 const char *local_arch_str = get_arch_str (local_cl_target);
9460 const char *arch_str = local_arch_str != NULL
9461 ? local_arch_str
9462 : riscv_arch_str (true).c_str ();
9463 fprintf (stream, "\t.option arch, %s\n", arch_str);
9464 const char *local_tune_str = get_tune_str (local_cl_target);
9465 const char *global_tune_str = get_tune_str (global_cl_target);
9466 if (strcmp (local_tune_str, global_tune_str) != 0)
9467 fprintf (stream, "\t# tune = %s\n", local_tune_str);
9471 void
9472 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
9474 if (!flag_inhibit_size_directive)
9475 ASM_OUTPUT_MEASURED_SIZE (stream, name);
9477 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
9479 fprintf (stream, "\t.option pop\n");
9483 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */
9485 void
9486 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target)
9488 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
9489 const char *value = IDENTIFIER_POINTER (target);
9490 riscv_asm_output_variant_cc (stream, decl, name);
9491 ASM_OUTPUT_DEF (stream, name, value);
9494 /* Implement ASM_OUTPUT_EXTERNAL. */
9496 void
9497 riscv_asm_output_external (FILE *stream, tree decl, const char *name)
9499 default_elf_asm_output_external (stream, decl, name);
9500 riscv_asm_output_variant_cc (stream, decl, name);
9503 /* Implement TARGET_ASM_FILE_START. */
9505 static void
9506 riscv_file_start (void)
9508 default_file_start ();
9510 /* Instruct GAS to generate position-[in]dependent code. */
9511 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
9513 /* If the user specifies "-mno-relax" on the command line then disable linker
9514 relaxation in the assembler. */
9515 if (! riscv_mrelax)
9516 fprintf (asm_out_file, "\t.option norelax\n");
9518 /* If the user specifies "-mcsr-check" on the command line then enable csr
9519 check in the assembler. */
9520 if (riscv_mcsr_check)
9521 fprintf (asm_out_file, "\t.option csr-check\n");
9523 if (riscv_emit_attribute_p)
9524 riscv_emit_attribute ();
9527 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
9528 in order to avoid duplicating too much logic from elsewhere. */
9530 static void
9531 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9532 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9533 tree function)
9535 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
9536 rtx this_rtx, temp1, temp2, fnaddr;
9537 rtx_insn *insn;
9539 riscv_in_thunk_func = true;
9541 /* Pretend to be a post-reload pass while generating rtl. */
9542 reload_completed = 1;
9544 /* Mark the end of the (empty) prologue. */
9545 emit_note (NOTE_INSN_PROLOGUE_END);
9547 /* Determine if we can use a sibcall to call FUNCTION directly. */
9548 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
9550 /* We need two temporary registers in some cases. */
9551 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
9552 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
9554 /* Find out which register contains the "this" pointer. */
9555 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9556 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
9557 else
9558 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
9560 /* Add DELTA to THIS_RTX. */
9561 if (delta != 0)
9563 rtx offset = GEN_INT (delta);
9564 if (!SMALL_OPERAND (delta))
9566 riscv_emit_move (temp1, offset);
9567 offset = temp1;
9569 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
9572 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
9573 if (vcall_offset != 0)
9575 rtx addr;
9577 /* Set TEMP1 to *THIS_RTX. */
9578 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
9580 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
9581 addr = riscv_add_offset (temp2, temp1, vcall_offset);
9583 /* Load the offset and add it to THIS_RTX. */
9584 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
9585 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
9588 /* Jump to the target function. */
9589 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode);
9590 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc));
9591 SIBLING_CALL_P (insn) = 1;
9593 /* Run just enough of rest_of_compilation. This sequence was
9594 "borrowed" from alpha.cc. */
9595 insn = get_insns ();
9596 split_all_insns_noflow ();
9597 shorten_branches (insn);
9598 assemble_start_function (thunk_fndecl, fnname);
9599 final_start_function (insn, file, 1);
9600 final (insn, file, 1);
9601 final_end_function ();
9602 assemble_end_function (thunk_fndecl, fnname);
9604 /* Clean up the vars set above. Note that final_end_function resets
9605 the global pointer for us. */
9606 reload_completed = 0;
9607 riscv_in_thunk_func = false;
9610 /* Allocate a chunk of memory for per-function machine-dependent data. */
9612 static struct machine_function *
9613 riscv_init_machine_status (void)
9615 return ggc_cleared_alloc<machine_function> ();
9618 /* Return the VLEN value associated with -march and -mwrvv-vector-bits.
9619 TODO: So far we only support length-agnostic value. */
9620 static poly_uint16
9621 riscv_convert_vector_chunks (struct gcc_options *opts)
9623 int chunk_num;
9624 int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
9625 if (min_vlen > 32)
9627 /* When targeting minimum VLEN > 32, we should use 64-bit chunk size.
9628 Otherwise we can not include SEW = 64bits.
9629 Runtime invariant: The single indeterminate represent the
9630 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
9631 Thus the number of bytes in a vector is 8 + 8 * x1 which is
9632 riscv_vector_chunks * 8 = poly_int (8, 8). */
9633 riscv_bytes_per_vector_chunk = 8;
9634 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN:
9635 - TARGET_MIN_VLEN = 64bit: [8,8]
9636 - TARGET_MIN_VLEN = 128bit: [16,16]
9637 - TARGET_MIN_VLEN = 256bit: [32,32]
9638 - TARGET_MIN_VLEN = 512bit: [64,64]
9639 - TARGET_MIN_VLEN = 1024bit: [128,128]
9640 - TARGET_MIN_VLEN = 2048bit: [256,256]
9641 - TARGET_MIN_VLEN = 4096bit: [512,512]
9642 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */
9643 chunk_num = min_vlen / 64;
9645 else
9647 /* When targeting minimum VLEN = 32, we should use 32-bit
9648 chunk size. Runtime invariant: The single indeterminate represent the
9649 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
9650 Thus the number of bytes in a vector is 4 + 4 * x1 which is
9651 riscv_vector_chunks * 4 = poly_int (4, 4). */
9652 riscv_bytes_per_vector_chunk = 4;
9653 chunk_num = 1;
9656 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR
9657 is enabled. Set riscv_vector_chunks as 1 compile-time constant if
9658 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def"
9659 to set RVV mode size. The RVV machine modes size are run-time constant if
9660 TARGET_VECTOR is enabled. The RVV machine modes size remains default
9661 compile-time constant if TARGET_VECTOR is disabled. */
9662 if (TARGET_VECTOR_OPTS_P (opts))
9664 switch (opts->x_rvv_vector_bits)
9666 case RVV_VECTOR_BITS_SCALABLE:
9667 return poly_uint16 (chunk_num, chunk_num);
9668 case RVV_VECTOR_BITS_ZVL:
9669 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
9670 default:
9671 gcc_unreachable ();
9674 else
9675 return 1;
9678 /* 'Unpack' up the internal tuning structs and update the options
9679 in OPTS. The caller must have set up selected_tune and selected_arch
9680 as all the other target-specific codegen decisions are
9681 derived from them. */
9682 void
9683 riscv_override_options_internal (struct gcc_options *opts)
9685 const struct riscv_tune_info *cpu;
9687 /* The presence of the M extension implies that division instructions
9688 are present, so include them unless explicitly disabled. */
9689 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
9690 opts->x_target_flags |= MASK_DIV;
9691 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
9692 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
9694 /* We might use a multiplication to calculate the scalable vector length at
9695 runtime. Therefore, require the M extension. */
9696 if (TARGET_VECTOR && !TARGET_MUL)
9697 sorry ("Currently the %<V%> implementation requires the %<M%> extension");
9699 /* Likewise floating-point division and square root. */
9700 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
9701 && ((target_flags_explicit & MASK_FDIV) == 0))
9702 opts->x_target_flags |= MASK_FDIV;
9704 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
9705 if both -mtune and -mcpu are not given. */
9706 const char *tune_string = get_tune_str (opts);
9707 cpu = riscv_parse_tune (tune_string, false);
9708 riscv_microarchitecture = cpu->microarchitecture;
9709 tune_param = opts->x_optimize_size
9710 ? &optimize_size_tune_info
9711 : cpu->tune_param;
9713 /* Use -mtune's setting for slow_unaligned_access, even when optimizing
9714 for size. For architectures that trap and emulate unaligned accesses,
9715 the performance cost is too great, even for -Os. Similarly, if
9716 -m[no-]strict-align is left unspecified, heed -mtune's advice. */
9717 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
9718 || TARGET_STRICT_ALIGN);
9720 /* By default, when -mno-vector-strict-align is not specified, do not allow
9721 unaligned vector memory accesses except if -mtune's setting explicitly
9722 allows it. */
9723 riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0
9724 || cpu->tune_param->vector_unaligned_access;
9726 /* Make a note if user explicitly passed -mstrict-align for later
9727 builtin macro generation. Can't use target_flags_explicitly since
9728 it is set even for -mno-strict-align. */
9729 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts);
9731 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
9732 && cpu->tune_param->slow_unaligned_access)
9733 opts->x_target_flags |= MASK_STRICT_ALIGN;
9735 /* If the user hasn't specified a branch cost, use the processor's
9736 default. */
9737 if (opts->x_riscv_branch_cost == 0)
9738 opts->x_riscv_branch_cost = tune_param->branch_cost;
9740 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of
9741 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16.
9743 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */
9744 if (TARGET_MIN_VLEN_OPTS (opts) > 4096)
9745 sorry ("Current RISC-V GCC does not support VLEN greater than 4096bit for "
9746 "'V' Extension");
9748 /* FIXME: We don't support RVV in big-endian for now, we may enable RVV with
9749 big-endian after finishing full coverage testing. */
9750 if (TARGET_VECTOR && TARGET_BIG_ENDIAN)
9751 sorry ("Current RISC-V GCC does not support RVV in big-endian mode");
9753 /* Convert -march and -mrvv-vector-bits to a chunks count. */
9754 riscv_vector_chunks = riscv_convert_vector_chunks (opts);
9757 /* Implement TARGET_OPTION_OVERRIDE. */
9759 void
9760 riscv_option_override (void)
9762 #ifdef SUBTARGET_OVERRIDE_OPTIONS
9763 SUBTARGET_OVERRIDE_OPTIONS;
9764 #endif
9766 flag_pcc_struct_return = 0;
9768 if (flag_pic)
9769 g_switch_value = 0;
9771 /* Always prefer medlow than medany for RV32 since medlow can access
9772 full address space. */
9773 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT)
9774 riscv_cmodel = CM_MEDLOW;
9776 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS)
9777 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs");
9779 if (riscv_cmodel == CM_LARGE && flag_pic)
9780 sorry ("code model %qs with %qs", "large",
9781 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic");
9783 if (flag_pic)
9784 riscv_cmodel = CM_PIC;
9786 /* We need to save the fp with ra for non-leaf functions with no fp and ra
9787 for leaf functions while no-omit-frame-pointer with
9788 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first
9789 priority to determine whether the frame pointer is needed. If we do not
9790 override it, the fp and ra will be stored for leaf functions, which is not
9791 our wanted. */
9792 riscv_save_frame_pointer = false;
9793 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
9795 if (!global_options.x_flag_omit_frame_pointer)
9796 riscv_save_frame_pointer = true;
9798 global_options.x_flag_omit_frame_pointer = 1;
9801 /* We get better code with explicit relocs for CM_MEDLOW, but
9802 worse code for the others (for now). Pick the best default. */
9803 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
9804 if (riscv_cmodel == CM_MEDLOW)
9805 target_flags |= MASK_EXPLICIT_RELOCS;
9807 /* Require that the ISA supports the requested floating-point ABI. */
9808 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
9809 error ("requested ABI requires %<-march%> to subsume the %qc extension",
9810 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
9812 /* RVE requires specific ABI. */
9813 if (TARGET_RVE)
9815 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E)
9816 error ("rv32e requires ilp32e ABI");
9817 else if (TARGET_64BIT && riscv_abi != ABI_LP64E)
9818 error ("rv64e requires lp64e ABI");
9821 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */
9822 if (TARGET_ZFINX
9823 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64
9824 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E)
9825 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
9827 /* We do not yet support ILP32 on RV64. */
9828 if (BITS_PER_WORD != POINTER_SIZE)
9829 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
9831 /* Validate -mpreferred-stack-boundary= value. */
9832 riscv_stack_boundary = ABI_STACK_BOUNDARY;
9833 if (riscv_preferred_stack_boundary_arg)
9835 int min = ctz_hwi (STACK_BOUNDARY / 8);
9836 int max = 8;
9838 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max))
9839 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d",
9840 riscv_preferred_stack_boundary_arg, min, max);
9842 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg;
9845 if (riscv_emit_attribute_p < 0)
9846 #ifdef HAVE_AS_RISCV_ATTRIBUTE
9847 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE;
9848 #else
9849 riscv_emit_attribute_p = 0;
9851 if (riscv_emit_attribute_p)
9852 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32"
9853 " [%<-mriscv-attribute%>]");
9854 #endif
9856 if (riscv_stack_protector_guard == SSP_GLOBAL
9857 && OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9859 error ("incompatible options %<-mstack-protector-guard=global%> and "
9860 "%<-mstack-protector-guard-offset=%s%>",
9861 riscv_stack_protector_guard_offset_str);
9864 if (riscv_stack_protector_guard == SSP_TLS
9865 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str)
9866 && OPTION_SET_P (riscv_stack_protector_guard_reg_str)))
9868 error ("both %<-mstack-protector-guard-offset%> and "
9869 "%<-mstack-protector-guard-reg%> must be used "
9870 "with %<-mstack-protector-guard=sysreg%>");
9873 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str))
9875 const char *str = riscv_stack_protector_guard_reg_str;
9876 int reg = decode_reg_name (str);
9878 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
9879 error ("%qs is not a valid base register in %qs", str,
9880 "-mstack-protector-guard-reg=");
9882 riscv_stack_protector_guard_reg = reg;
9885 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str))
9887 char *end;
9888 const char *str = riscv_stack_protector_guard_offset_str;
9889 errno = 0;
9890 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
9892 if (!*str || *end || errno)
9893 error ("%qs is not a valid number in %qs", str,
9894 "-mstack-protector-guard-offset=");
9896 if (!SMALL_OPERAND (offs))
9897 error ("%qs is not a valid offset in %qs", str,
9898 "-mstack-protector-guard-offset=");
9900 riscv_stack_protector_guard_offset = offs;
9903 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
9904 param_sched_pressure_algorithm,
9905 SCHED_PRESSURE_MODEL);
9907 /* Function to allocate machine-dependent function status. */
9908 init_machine_status = &riscv_init_machine_status;
9910 riscv_override_options_internal (&global_options);
9912 /* Save these options as the default ones in case we push and pop them later
9913 while processing functions with potential target attributes. */
9914 target_option_default_node = target_option_current_node
9915 = build_target_option_node (&global_options, &global_options_set);
9918 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9919 Used by riscv_set_current_function to
9920 make sure optab availability predicates are recomputed when necessary. */
9922 void
9923 riscv_save_restore_target_globals (tree new_tree)
9925 if (TREE_TARGET_GLOBALS (new_tree))
9926 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9927 else if (new_tree == target_option_default_node)
9928 restore_target_globals (&default_target_globals);
9929 else
9930 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9933 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9934 using the information saved in PTR. */
9936 static void
9937 riscv_option_restore (struct gcc_options *opts,
9938 struct gcc_options * /* opts_set */,
9939 struct cl_target_option * /* ptr */)
9941 riscv_override_options_internal (opts);
9944 static GTY (()) tree riscv_previous_fndecl;
9946 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9948 static void
9949 riscv_conditional_register_usage (void)
9951 /* We have only x0~x15 on RV32E/RV64E. */
9952 if (TARGET_RVE)
9954 for (int r = 16; r <= 31; r++)
9955 fixed_regs[r] = 1;
9958 if (riscv_abi == ABI_ILP32E)
9960 for (int r = 16; r <= 31; r++)
9961 call_used_regs[r] = 1;
9964 if (!TARGET_HARD_FLOAT)
9966 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9967 fixed_regs[regno] = call_used_regs[regno] = 1;
9970 /* In the soft-float ABI, there are no callee-saved FP registers. */
9971 if (UNITS_PER_FP_ARG == 0)
9973 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9974 call_used_regs[regno] = 1;
9977 if (!TARGET_VECTOR)
9979 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
9980 fixed_regs[regno] = call_used_regs[regno] = 1;
9982 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1;
9983 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1;
9984 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1;
9985 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1;
9989 /* Return a register priority for hard reg REGNO. */
9991 static int
9992 riscv_register_priority (int regno)
9994 /* Favor compressed registers to improve the odds of RVC instruction
9995 selection. */
9996 if (riscv_compressed_reg_p (regno))
9997 return 1;
9999 return 0;
10002 /* Implement TARGET_TRAMPOLINE_INIT. */
10004 static void
10005 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10007 rtx addr, end_addr, mem;
10008 uint32_t trampoline[4];
10009 unsigned int i;
10010 HOST_WIDE_INT static_chain_offset, target_function_offset;
10012 /* Work out the offsets of the pointers from the start of the
10013 trampoline code. */
10014 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
10016 /* Get pointers to the beginning and end of the code block. */
10017 addr = force_reg (Pmode, XEXP (m_tramp, 0));
10018 end_addr = riscv_force_binary (Pmode, PLUS, addr,
10019 GEN_INT (TRAMPOLINE_CODE_SIZE));
10022 if (Pmode == SImode)
10024 chain_value = force_reg (Pmode, chain_value);
10026 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10027 /* lui t2, hi(chain)
10028 lui t0, hi(func)
10029 addi t2, t2, lo(chain)
10030 jr t0, lo(func)
10032 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
10033 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
10035 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
10037 /* 0xfff. */
10038 rtx imm12_mask = gen_reg_rtx (SImode);
10039 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
10041 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
10043 /* Gen lui t2, hi(chain). */
10044 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
10045 fixup_value);
10046 hi_chain = riscv_force_binary (SImode, AND, hi_chain,
10047 uimm_mask);
10048 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
10049 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
10050 gen_int_mode (lui_hi_chain_code, SImode));
10052 mem = adjust_address (m_tramp, SImode, 0);
10053 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
10055 /* Gen lui t0, hi(func). */
10056 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
10057 fixup_value);
10058 hi_func = riscv_force_binary (SImode, AND, hi_func,
10059 uimm_mask);
10060 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
10061 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
10062 gen_int_mode (lui_hi_func_code, SImode));
10064 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
10065 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
10067 /* Gen addi t2, t2, lo(chain). */
10068 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
10069 imm12_mask);
10070 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
10072 lo_chain_code = OPCODE_ADDI
10073 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
10074 | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
10076 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
10077 force_reg (SImode, GEN_INT (lo_chain_code)));
10079 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
10080 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
10082 /* Gen jr t0, lo(func). */
10083 rtx lo_func = riscv_force_binary (SImode, AND, target_function,
10084 imm12_mask);
10085 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
10087 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
10089 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
10090 force_reg (SImode, GEN_INT (lo_func_code)));
10092 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
10093 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
10095 else
10097 static_chain_offset = TRAMPOLINE_CODE_SIZE;
10098 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
10100 /* auipc t2, 0
10101 l[wd] t0, target_function_offset(t2)
10102 l[wd] t2, static_chain_offset(t2)
10103 jr t0
10105 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
10106 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
10107 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
10108 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
10109 | (target_function_offset << SHIFT_IMM);
10110 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
10111 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
10112 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
10113 | (static_chain_offset << SHIFT_IMM);
10114 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
10116 /* Copy the trampoline code. */
10117 for (i = 0; i < ARRAY_SIZE (trampoline); i++)
10119 if (BYTES_BIG_ENDIAN)
10120 trampoline[i] = __builtin_bswap32(trampoline[i]);
10121 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
10122 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
10125 /* Set up the static chain pointer field. */
10126 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
10127 riscv_emit_move (mem, chain_value);
10129 /* Set up the target function field. */
10130 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
10131 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
10134 /* Flush the code part of the trampoline. */
10135 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
10136 emit_insn (gen_clear_cache (addr, end_addr));
10139 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
10141 static bool
10142 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
10143 tree exp ATTRIBUTE_UNUSED)
10145 /* Don't use sibcalls when use save-restore routine. */
10146 if (TARGET_SAVE_RESTORE)
10147 return false;
10149 /* Don't use sibcall for naked functions. */
10150 if (cfun->machine->naked_p)
10151 return false;
10153 /* Don't use sibcall for interrupt functions. */
10154 if (cfun->machine->interrupt_handler_p)
10155 return false;
10157 /* Don't use sibcalls in the large model, because a sibcall instruction
10158 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP
10159 register. */
10160 if (riscv_cmodel == CM_LARGE)
10161 return false;
10163 return true;
10166 /* Get the interrupt type, return UNKNOWN_MODE if it's not
10167 interrupt function. */
10168 static enum riscv_privilege_levels
10169 riscv_get_interrupt_type (tree decl)
10171 gcc_assert (decl != NULL_TREE);
10173 if ((TREE_CODE(decl) != FUNCTION_DECL)
10174 || (!riscv_interrupt_type_p (TREE_TYPE (decl))))
10175 return UNKNOWN_MODE;
10177 tree attr_args
10178 = TREE_VALUE (lookup_attribute ("interrupt",
10179 TYPE_ATTRIBUTES (TREE_TYPE (decl))));
10181 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE)
10183 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
10185 if (!strcmp (string, "user"))
10186 return USER_MODE;
10187 else if (!strcmp (string, "supervisor"))
10188 return SUPERVISOR_MODE;
10189 else /* Must be "machine". */
10190 return MACHINE_MODE;
10192 else
10193 /* Interrupt attributes are machine mode by default. */
10194 return MACHINE_MODE;
10197 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
10198 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
10199 of the function, if such exists. This function may be called multiple
10200 times on a single function so use aarch64_previous_fndecl to avoid
10201 setting up identical state. */
10203 /* Sanity checking for above function attributes. */
10204 static void
10205 riscv_set_current_function (tree decl)
10207 if (decl == NULL_TREE
10208 || current_function_decl == NULL_TREE
10209 || current_function_decl == error_mark_node
10210 || ! cfun->machine)
10211 return;
10213 if (!cfun->machine->attributes_checked_p)
10215 cfun->machine->naked_p = riscv_naked_function_p (decl);
10216 cfun->machine->interrupt_handler_p
10217 = riscv_interrupt_type_p (TREE_TYPE (decl));
10219 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p)
10220 error ("function attributes %qs and %qs are mutually exclusive",
10221 "interrupt", "naked");
10223 if (cfun->machine->interrupt_handler_p)
10225 tree ret = TREE_TYPE (TREE_TYPE (decl));
10226 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
10228 if (TREE_CODE (ret) != VOID_TYPE)
10229 error ("%qs function cannot return a value", "interrupt");
10231 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
10232 error ("%qs function cannot have arguments", "interrupt");
10234 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl);
10236 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE);
10239 /* Don't print the above diagnostics more than once. */
10240 cfun->machine->attributes_checked_p = 1;
10243 if (!decl || decl == riscv_previous_fndecl)
10244 return;
10246 tree old_tree = (riscv_previous_fndecl
10247 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl)
10248 : NULL_TREE);
10250 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl);
10252 /* If current function has no attributes but the previous one did,
10253 use the default node. */
10254 if (!new_tree && old_tree)
10255 new_tree = target_option_default_node;
10257 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
10258 the default have been handled by aarch64_save_restore_target_globals from
10259 aarch64_pragma_target_parse. */
10260 if (old_tree == new_tree)
10261 return;
10263 riscv_previous_fndecl = decl;
10265 /* First set the target options. */
10266 cl_target_option_restore (&global_options, &global_options_set,
10267 TREE_TARGET_OPTION (new_tree));
10269 /* The ISA extension can vary based on the function extension like target.
10270 Thus, make sure that the machine modes are reflected correctly here. */
10271 init_adjust_machine_modes ();
10273 riscv_save_restore_target_globals (new_tree);
10276 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
10277 static tree
10278 riscv_merge_decl_attributes (tree olddecl, tree newdecl)
10280 tree combined_attrs;
10282 enum riscv_privilege_levels old_interrupt_type
10283 = riscv_get_interrupt_type (olddecl);
10284 enum riscv_privilege_levels new_interrupt_type
10285 = riscv_get_interrupt_type (newdecl);
10287 /* Check old and new has same interrupt type. */
10288 if ((old_interrupt_type != UNKNOWN_MODE)
10289 && (new_interrupt_type != UNKNOWN_MODE)
10290 && (old_interrupt_type != new_interrupt_type))
10291 error ("%qs function cannot have different interrupt type", "interrupt");
10293 /* Create combined attributes. */
10294 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
10295 DECL_ATTRIBUTES (newdecl));
10297 return combined_attrs;
10300 /* Implement TARGET_CANNOT_COPY_INSN_P. */
10302 static bool
10303 riscv_cannot_copy_insn_p (rtx_insn *insn)
10305 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
10308 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
10310 static bool
10311 riscv_slow_unaligned_access (machine_mode mode, unsigned int)
10313 return VECTOR_MODE_P (mode) ? TARGET_VECTOR_MISALIGN_SUPPORTED
10314 : riscv_slow_unaligned_access_p;
10317 static bool
10318 riscv_overlap_op_by_pieces (void)
10320 return tune_param->overlap_op_by_pieces;
10323 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10325 static bool
10326 riscv_can_change_mode_class (machine_mode from, machine_mode to,
10327 reg_class_t rclass)
10329 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS.
10330 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation
10331 between wider mode (FROM) and narrow mode (TO).
10333 E.g. We should not allow copy propagation
10334 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0])
10335 since we can't order their size which will cause ICE in regcprop.
10337 TODO: Even though they are have different size, they always change
10338 the whole register. We may enhance such case in regcprop to optimize
10339 it in the future. */
10340 if (reg_classes_intersect_p (V_REGS, rclass)
10341 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
10342 return false;
10343 return !reg_classes_intersect_p (FP_REGS, rclass);
10346 /* Implement TARGET_CONSTANT_ALIGNMENT. */
10348 static HOST_WIDE_INT
10349 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align)
10351 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
10352 && (riscv_align_data_type == riscv_align_data_type_xlen))
10353 return MAX (align, BITS_PER_WORD);
10354 return align;
10357 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
10359 /* This function is equivalent to default_promote_function_mode_always_promote
10360 except that it returns a promoted mode even if type is NULL_TREE. This is
10361 needed by libcalls which have no type (only a mode) such as fixed conversion
10362 routines that take a signed or unsigned char/short/int argument and convert
10363 it to a fixed type. */
10365 static machine_mode
10366 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10367 machine_mode mode,
10368 int *punsignedp ATTRIBUTE_UNUSED,
10369 const_tree fntype ATTRIBUTE_UNUSED,
10370 int for_return ATTRIBUTE_UNUSED)
10372 int unsignedp;
10374 if (type != NULL_TREE)
10375 return promote_mode (type, mode, punsignedp);
10377 unsignedp = *punsignedp;
10378 scalar_mode smode = as_a <scalar_mode> (mode);
10379 PROMOTE_MODE (smode, unsignedp, type);
10380 *punsignedp = unsignedp;
10381 return smode;
10384 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
10386 static void
10387 riscv_reorg (void)
10389 /* Do nothing unless we have -msave-restore */
10390 if (TARGET_SAVE_RESTORE)
10391 riscv_remove_unneeded_save_restore_calls ();
10394 /* Return nonzero if register FROM_REGNO can be renamed to register
10395 TO_REGNO. */
10397 bool
10398 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
10399 unsigned to_regno)
10401 /* Interrupt functions can only use registers that have already been
10402 saved by the prologue, even if they would normally be
10403 call-clobbered. */
10404 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno);
10407 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */
10409 bool
10410 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
10412 /* Prefer old address if it is less expensive. */
10413 addr_space_t as = MEM_ADDR_SPACE (memref);
10414 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
10415 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed);
10416 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed);
10417 return new_cost <= old_cost;
10420 /* Helper function for generating gpr_save pattern. */
10423 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
10425 unsigned count = riscv_save_libcall_count (frame->mask);
10426 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
10427 unsigned veclen = 1 + 2 + 1 + count;
10428 rtvec vec = rtvec_alloc (veclen);
10430 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
10432 RTVEC_ELT (vec, 0) =
10433 gen_rtx_UNSPEC_VOLATILE (VOIDmode,
10434 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE);
10436 for (unsigned i = 1; i < veclen; ++i)
10438 unsigned regno = gpr_save_reg_order[i];
10439 rtx reg = gen_rtx_REG (Pmode, regno);
10440 rtx elt;
10442 /* t0 and t1 are CLOBBERs, others are USEs. */
10443 if (i < 3)
10444 elt = gen_rtx_CLOBBER (Pmode, reg);
10445 else
10446 elt = gen_rtx_USE (Pmode, reg);
10448 RTVEC_ELT (vec, i) = elt;
10451 /* Largest number of caller-save register must set in mask if we are
10452 not using __riscv_save_0. */
10453 gcc_assert ((count == 0) ||
10454 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
10456 return gen_rtx_PARALLEL (VOIDmode, vec);
10459 static HOST_WIDE_INT
10460 zcmp_base_adj (int regs_num)
10462 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode));
10465 static HOST_WIDE_INT
10466 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num)
10468 return total - zcmp_base_adj (regs_num);
10471 bool
10472 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num)
10474 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num);
10475 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP
10476 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP
10477 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP;
10480 /* Return true if it's valid gpr_save pattern. */
10482 bool
10483 riscv_gpr_save_operation_p (rtx op)
10485 unsigned len = XVECLEN (op, 0);
10487 if (len > ARRAY_SIZE (gpr_save_reg_order))
10488 return false;
10490 for (unsigned i = 0; i < len; i++)
10492 rtx elt = XVECEXP (op, 0, i);
10493 if (i == 0)
10495 /* First element in parallel is unspec. */
10496 if (GET_CODE (elt) != UNSPEC_VOLATILE
10497 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
10498 || XINT (elt, 1) != UNSPECV_GPR_SAVE)
10499 return false;
10501 else
10503 /* Two CLOBBER and USEs, must check the order. */
10504 unsigned expect_code = i < 3 ? CLOBBER : USE;
10505 if (GET_CODE (elt) != expect_code
10506 || !REG_P (XEXP (elt, 1))
10507 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
10508 return false;
10510 break;
10512 return true;
10515 /* Implement TARGET_ASAN_SHADOW_OFFSET. */
10517 static unsigned HOST_WIDE_INT
10518 riscv_asan_shadow_offset (void)
10520 /* We only have libsanitizer support for RV64 at present.
10522 This number must match ASAN_SHADOW_OFFSET_CONST in the file
10523 libsanitizer/asan/asan_mapping.h. */
10524 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
10527 /* Implement TARGET_MANGLE_TYPE. */
10529 static const char *
10530 riscv_mangle_type (const_tree type)
10532 /* Half-precision float, _Float16 is "DF16_" and __bf16 is "DF16b". */
10533 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
10535 if (TYPE_MODE (type) == HFmode)
10536 return "DF16_";
10538 if (TYPE_MODE (type) == BFmode)
10539 return "DF16b";
10541 gcc_unreachable ();
10544 /* Mangle all vector type for vector extension. */
10545 /* The mangle name follows the rule of RVV LLVM
10546 that is "u" + length of (abi_name) + abi_name. */
10547 if (TYPE_NAME (type) != NULL)
10549 const char *res = riscv_vector::mangle_builtin_type (type);
10550 if (res)
10551 return res;
10554 /* Use the default mangling. */
10555 return NULL;
10558 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
10560 static bool
10561 riscv_scalar_mode_supported_p (scalar_mode mode)
10563 if (mode == HFmode || mode == BFmode)
10564 return true;
10565 else
10566 return default_scalar_mode_supported_p (mode);
10569 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
10570 if MODE is HFmode or BFmode, and punt to the generic implementation
10571 otherwise. */
10573 static bool
10574 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
10576 if (mode == HFmode || mode == BFmode)
10577 return true;
10578 else
10579 return default_libgcc_floating_mode_supported_p (mode);
10582 /* Set the value of FLT_EVAL_METHOD.
10583 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
10585 0: evaluate all operations and constants, whose semantic type has at
10586 most the range and precision of type float, to the range and
10587 precision of float; evaluate all other operations and constants to
10588 the range and precision of the semantic type;
10590 N, where _FloatN is a supported interchange floating type
10591 evaluate all operations and constants, whose semantic type has at
10592 most the range and precision of _FloatN type, to the range and
10593 precision of the _FloatN type; evaluate all other operations and
10594 constants to the range and precision of the semantic type;
10596 If we have the zfh/zhinx/zvfh extensions then we support _Float16
10597 in native precision, so we should set this to 16. */
10598 static enum flt_eval_method
10599 riscv_excess_precision (enum excess_precision_type type)
10601 switch (type)
10603 case EXCESS_PRECISION_TYPE_FAST:
10604 case EXCESS_PRECISION_TYPE_STANDARD:
10605 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
10606 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
10607 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
10608 case EXCESS_PRECISION_TYPE_IMPLICIT:
10609 case EXCESS_PRECISION_TYPE_FLOAT16:
10610 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
10611 default:
10612 gcc_unreachable ();
10614 return FLT_EVAL_METHOD_UNPREDICTABLE;
10617 /* Implement TARGET_FLOATN_MODE. */
10618 static opt_scalar_float_mode
10619 riscv_floatn_mode (int n, bool extended)
10621 if (!extended && n == 16)
10622 return HFmode;
10624 return default_floatn_mode (n, extended);
10627 /* Record that we have no arithmetic or comparison libfuncs for
10628 machine_mode MODE. */
10629 static void
10630 riscv_block_arith_comp_libfuncs_for_mode (machine_mode mode)
10632 /* Half-precision float or Brain float operations. The compiler handles all
10633 operations with NULL libfuncs by converting to SFmode. */
10635 /* Arithmetic. */
10636 set_optab_libfunc (add_optab, mode, NULL);
10637 set_optab_libfunc (sdiv_optab, mode, NULL);
10638 set_optab_libfunc (smul_optab, mode, NULL);
10639 set_optab_libfunc (neg_optab, mode, NULL);
10640 set_optab_libfunc (sub_optab, mode, NULL);
10642 /* Comparisons. */
10643 set_optab_libfunc (eq_optab, mode, NULL);
10644 set_optab_libfunc (ne_optab, mode, NULL);
10645 set_optab_libfunc (lt_optab, mode, NULL);
10646 set_optab_libfunc (le_optab, mode, NULL);
10647 set_optab_libfunc (ge_optab, mode, NULL);
10648 set_optab_libfunc (gt_optab, mode, NULL);
10649 set_optab_libfunc (unord_optab, mode, NULL);
10652 static void
10653 riscv_init_libfuncs (void)
10655 riscv_block_arith_comp_libfuncs_for_mode (HFmode);
10656 riscv_block_arith_comp_libfuncs_for_mode (BFmode);
10658 /* Convert between BFmode and HFmode using only trunc libfunc if needed. */
10659 set_conv_libfunc (sext_optab, BFmode, HFmode, "__trunchfbf2");
10660 set_conv_libfunc (sext_optab, HFmode, BFmode, "__truncbfhf2");
10661 set_conv_libfunc (trunc_optab, BFmode, HFmode, "__trunchfbf2");
10662 set_conv_libfunc (trunc_optab, HFmode, BFmode, "__truncbfhf2");
10665 #if CHECKING_P
10666 void
10667 riscv_reinit (void)
10669 riscv_option_override ();
10670 init_adjust_machine_modes ();
10671 init_derived_machine_modes ();
10672 reinit_regs ();
10673 init_optabs ();
10675 #endif
10677 #if CHECKING_P
10678 #undef TARGET_RUN_TARGET_SELFTESTS
10679 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests
10680 #endif /* #if CHECKING_P */
10682 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
10684 static bool
10685 riscv_vector_mode_supported_p (machine_mode mode)
10687 if (TARGET_VECTOR)
10688 return riscv_v_ext_mode_p (mode);
10690 return false;
10693 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */
10695 static bool
10696 riscv_verify_type_context (location_t loc, type_context_kind context,
10697 const_tree type, bool silent_p)
10699 return riscv_vector::verify_type_context (loc, context, type, silent_p);
10702 /* Implement TARGET_VECTOR_ALIGNMENT. */
10704 static HOST_WIDE_INT
10705 riscv_vector_alignment (const_tree type)
10707 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
10708 be set for non-predicate vectors of booleans. Modes are the most
10709 direct way we have of identifying real RVV predicate types. */
10710 /* FIXME: RVV didn't mention the alignment of bool, we uses
10711 one byte align. */
10712 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
10713 return 8;
10715 widest_int min_size
10716 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
10717 return wi::umin (min_size, 128).to_uhwi ();
10720 /* Implement REGMODE_NATURAL_SIZE. */
10722 poly_uint64
10723 riscv_regmode_natural_size (machine_mode mode)
10725 /* The natural size for RVV data modes is one RVV data vector,
10726 and similarly for predicates. We can't independently modify
10727 anything smaller than that. */
10728 /* ??? For now, only do this for variable-width RVV registers.
10729 Doing it for constant-sized registers breaks lower-subreg.c. */
10731 if (riscv_v_ext_mode_p (mode))
10733 poly_uint64 size = GET_MODE_SIZE (mode);
10734 if (riscv_v_ext_tuple_mode_p (mode))
10736 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode));
10737 if (known_lt (size, BYTES_PER_RISCV_VECTOR))
10738 return size;
10740 else if (riscv_v_ext_vector_mode_p (mode))
10742 /* RVV mask modes always consume a single register. */
10743 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
10744 return BYTES_PER_RISCV_VECTOR;
10746 if (!size.is_constant ())
10747 return BYTES_PER_RISCV_VECTOR;
10748 else if (!riscv_v_ext_vls_mode_p (mode))
10749 /* For -march=rv64gc_zve32f, the natural vector register size
10750 is 32bits which is smaller than scalar register size, so we
10751 return minimum size between vector register size and scalar
10752 register size. */
10753 return MIN (size.to_constant (), UNITS_PER_WORD);
10755 return UNITS_PER_WORD;
10758 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
10760 static unsigned int
10761 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
10762 int *offset)
10764 /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
10765 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
10766 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
10768 gcc_assert (i == 1);
10769 *factor = riscv_bytes_per_vector_chunk;
10770 *offset = 1;
10771 return RISCV_DWARF_VLENB;
10774 /* Implement TARGET_ESTIMATED_POLY_VALUE. */
10776 static HOST_WIDE_INT
10777 riscv_estimated_poly_value (poly_int64 val,
10778 poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
10780 if (TARGET_VECTOR)
10781 return riscv_vector::estimated_poly_value (val, kind);
10782 return default_estimated_poly_value (val, kind);
10785 /* Return true if the vector misalignment factor is supported by the
10786 target. */
10787 bool
10788 riscv_support_vector_misalignment (machine_mode mode,
10789 const_tree type ATTRIBUTE_UNUSED,
10790 int misalignment,
10791 bool is_packed ATTRIBUTE_UNUSED)
10793 /* Depend on movmisalign pattern. */
10794 return default_builtin_support_vector_misalignment (mode, type, misalignment,
10795 is_packed);
10798 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
10800 static opt_machine_mode
10801 riscv_get_mask_mode (machine_mode mode)
10803 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode))
10804 return riscv_vector::get_mask_mode (mode);
10806 return default_get_mask_mode (mode);
10809 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
10810 it isn't worth branching around empty masked ops (including masked
10811 stores). */
10813 static bool
10814 riscv_empty_mask_is_expensive (unsigned)
10816 return false;
10819 /* Return true if a shift-amount matches the trailing cleared bits on
10820 a bitmask. */
10822 bool
10823 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
10825 return shamt == ctz_hwi (mask);
10828 static HARD_REG_SET
10829 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10831 HARD_REG_SET zeroed_hardregs;
10832 CLEAR_HARD_REG_SET (zeroed_hardregs);
10834 /* Find a register to hold vl. */
10835 unsigned vl_regno = INVALID_REGNUM;
10836 /* Skip the first GPR, otherwise the existing vl is kept due to the same
10837 between vl and avl. */
10838 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
10840 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10842 vl_regno = regno;
10843 break;
10847 if (vl_regno > GP_REG_LAST)
10848 sorry ("cannot allocate vl register for %qs on this target",
10849 "-fzero-call-used-regs");
10851 /* Vector configurations need not be saved and restored here. The
10852 -fzero-call-used-regs=* option will zero all vector registers and
10853 return. So there's no vector operations between them. */
10855 bool emitted_vlmax_vsetvl = false;
10856 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */
10857 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
10859 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
10861 rtx target = regno_reg_rtx[regno];
10862 machine_mode mode = GET_MODE (target);
10864 if (!emitted_vlmax_vsetvl)
10866 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl);
10867 emitted_vlmax_vsetvl = true;
10870 rtx ops[] = {target, CONST0_RTX (mode)};
10871 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode),
10872 riscv_vector::UNARY_OP, ops, vl);
10874 SET_HARD_REG_BIT (zeroed_hardregs, regno);
10878 return zeroed_hardregs;
10881 /* Generate a sequence of instructions that zero registers specified by
10882 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
10883 zeroed. */
10884 HARD_REG_SET
10885 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
10887 HARD_REG_SET zeroed_hardregs;
10888 CLEAR_HARD_REG_SET (zeroed_hardregs);
10890 if (TARGET_VECTOR)
10891 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs);
10893 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs
10894 & ~zeroed_hardregs);
10897 /* Implement target hook TARGET_ARRAY_MODE. */
10899 static opt_machine_mode
10900 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
10902 machine_mode vmode;
10903 if (TARGET_VECTOR
10904 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode))
10905 return vmode;
10907 return opt_machine_mode ();
10910 /* Given memory reference MEM, expand code to compute the aligned
10911 memory address, shift and mask values and store them into
10912 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */
10914 void
10915 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
10916 rtx *not_mask)
10918 /* Align the memory address to a word. */
10919 rtx addr = force_reg (Pmode, XEXP (mem, 0));
10921 rtx addr_mask = gen_int_mode (-4, Pmode);
10923 rtx aligned_addr = gen_reg_rtx (Pmode);
10924 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask));
10926 *aligned_mem = change_address (mem, SImode, aligned_addr);
10928 /* Calculate the shift amount. */
10929 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
10930 gen_int_mode (3, SImode)));
10931 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
10932 gen_int_mode (3, SImode)));
10934 /* Calculate the mask. */
10935 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
10937 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
10939 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
10940 gen_lowpart (QImode, *shift)));
10942 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
10945 /* Leftshift a subword within an SImode register. */
10947 void
10948 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
10949 rtx *shifted_value)
10951 rtx value_reg = gen_reg_rtx (SImode);
10952 emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
10953 mode, 0));
10955 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
10956 gen_lowpart (QImode, shift)));
10959 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This
10960 allows the behavior to be tuned for specific implementations as well as
10961 when optimizing for size. */
10963 bool
10964 riscv_use_divmod_expander (void)
10966 return tune_param->use_divmod_expansion;
10969 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
10971 static machine_mode
10972 riscv_preferred_simd_mode (scalar_mode mode)
10974 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
10975 return riscv_vector::preferred_simd_mode (mode);
10977 return word_mode;
10980 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
10982 static poly_uint64
10983 riscv_vectorize_preferred_vector_alignment (const_tree type)
10985 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
10986 return TYPE_ALIGN (TREE_TYPE (type));
10987 return TYPE_ALIGN (type);
10990 /* Return true if it is static FRM rounding mode. */
10992 static bool
10993 riscv_static_frm_mode_p (int mode)
10995 switch (mode)
10997 case riscv_vector::FRM_RDN:
10998 case riscv_vector::FRM_RUP:
10999 case riscv_vector::FRM_RTZ:
11000 case riscv_vector::FRM_RMM:
11001 case riscv_vector::FRM_RNE:
11002 return true;
11003 default:
11004 return false;
11007 gcc_unreachable ();
11010 /* Implement the floating-point Mode Switching. */
11012 static void
11013 riscv_emit_frm_mode_set (int mode, int prev_mode)
11015 rtx backup_reg = DYNAMIC_FRM_RTL (cfun);
11017 if (prev_mode == riscv_vector::FRM_DYN_CALL)
11018 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
11020 if (mode != prev_mode)
11022 rtx frm = gen_int_mode (mode, SImode);
11024 if (mode == riscv_vector::FRM_DYN_CALL
11025 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
11026 /* No need to emit when prev mode is DYN already. */
11027 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
11028 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
11029 && prev_mode != riscv_vector::FRM_DYN
11030 && prev_mode != riscv_vector::FRM_DYN_CALL)
11031 /* No need to emit when prev mode is DYN or DYN_CALL already. */
11032 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
11033 else if (mode == riscv_vector::FRM_DYN
11034 && prev_mode != riscv_vector::FRM_DYN_CALL)
11035 /* Restore frm value from backup when switch to DYN mode. */
11036 emit_insn (gen_fsrmsi_restore (backup_reg));
11037 else if (riscv_static_frm_mode_p (mode))
11038 /* Set frm value when switch to static mode. */
11039 emit_insn (gen_fsrmsi_restore (frm));
11043 /* Implement Mode switching. */
11045 static void
11046 riscv_emit_mode_set (int entity, int mode, int prev_mode,
11047 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
11049 switch (entity)
11051 case RISCV_VXRM:
11052 if (mode != VXRM_MODE_NONE && mode != prev_mode)
11053 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
11054 break;
11055 case RISCV_FRM:
11056 riscv_emit_frm_mode_set (mode, prev_mode);
11057 break;
11058 default:
11059 gcc_unreachable ();
11063 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the
11064 underlying emit. */
11066 static int
11067 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
11069 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
11071 if (insn && CALL_P (insn))
11072 return riscv_vector::FRM_DYN;
11074 return mode;
11077 /* Insert the backup frm insn to the end of the bb if and only if the call
11078 is the last insn of this bb. */
11080 static void
11081 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
11083 edge eg;
11084 bool abnormal_edge_p = false;
11085 edge_iterator eg_iterator;
11086 basic_block bb = BLOCK_FOR_INSN (cur_insn);
11088 FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
11090 if (eg->flags & EDGE_ABNORMAL)
11091 abnormal_edge_p = true;
11092 else
11094 start_sequence ();
11095 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11096 rtx_insn *backup_insn = get_insns ();
11097 end_sequence ();
11099 insert_insn_on_edge (backup_insn, eg);
11103 if (abnormal_edge_p)
11105 start_sequence ();
11106 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11107 rtx_insn *backup_insn = get_insns ();
11108 end_sequence ();
11110 insert_insn_end_basic_block (backup_insn, bb);
11113 commit_edge_insertions ();
11116 /* Return mode that frm must be switched into
11117 prior to the execution of insn. */
11119 static int
11120 riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
11122 if (!DYNAMIC_FRM_RTL(cfun))
11124 /* The dynamic frm will be initialized only onece during cfun. */
11125 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
11126 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11129 if (CALL_P (cur_insn))
11131 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
11133 if (!insn)
11134 riscv_frm_emit_after_bb_end (cur_insn);
11136 return riscv_vector::FRM_DYN_CALL;
11139 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
11141 if (mode == riscv_vector::FRM_NONE)
11142 /* After meet a call, we need to backup the frm because it may be
11143 updated during the call. Here, for each insn, we will check if
11144 the previous insn is a call or not. When previous insn is call,
11145 there will be 2 cases for the emit mode set.
11147 1. Current insn is not MODE_NONE, then the mode switch framework
11148 will do the mode switch from MODE_CALL to MODE_NONE natively.
11149 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
11150 the MODE_DYN, and leave the mode switch itself to perform
11151 the emit mode set.
11153 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
11155 return mode;
11158 /* Return mode that entity must be switched into
11159 prior to the execution of insn. */
11161 static int
11162 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
11164 int code = recog_memoized (insn);
11166 switch (entity)
11168 case RISCV_VXRM:
11169 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
11170 case RISCV_FRM:
11171 return riscv_frm_mode_needed (insn, code);
11172 default:
11173 gcc_unreachable ();
11177 /* Return TRUE that an insn is asm. */
11179 static bool
11180 asm_insn_p (rtx_insn *insn)
11182 extract_insn (insn);
11184 return recog_data.is_asm;
11187 /* Return TRUE that an insn is unknown for VXRM. */
11189 static bool
11190 vxrm_unknown_p (rtx_insn *insn)
11192 /* Return true if there is a definition of VXRM. */
11193 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn))
11194 return true;
11196 /* A CALL function may contain an instruction that modifies the VXRM,
11197 return true in this situation. */
11198 if (CALL_P (insn))
11199 return true;
11201 /* Return true for all assembly since users may hardcode a assembly
11202 like this: asm volatile ("csrwi vxrm, 0"). */
11203 if (asm_insn_p (insn))
11204 return true;
11206 return false;
11209 /* Return TRUE that an insn is unknown dynamic for FRM. */
11211 static bool
11212 frm_unknown_dynamic_p (rtx_insn *insn)
11214 /* Return true if there is a definition of FRM. */
11215 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn))
11216 return true;
11218 return false;
11221 /* Return the mode that an insn results in for VXRM. */
11223 static int
11224 riscv_vxrm_mode_after (rtx_insn *insn, int mode)
11226 if (vxrm_unknown_p (insn))
11227 return VXRM_MODE_NONE;
11229 if (recog_memoized (insn) < 0)
11230 return mode;
11232 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn)))
11233 return get_attr_vxrm_mode (insn);
11234 else
11235 return mode;
11238 /* Return the mode that an insn results in for FRM. */
11240 static int
11241 riscv_frm_mode_after (rtx_insn *insn, int mode)
11243 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode);
11245 if (CALL_P (insn))
11246 return mode;
11248 if (frm_unknown_dynamic_p (insn))
11249 return riscv_vector::FRM_DYN;
11251 if (recog_memoized (insn) < 0)
11252 return mode;
11254 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn)))
11255 return get_attr_frm_mode (insn);
11256 else
11257 return mode;
11260 /* Return the mode that an insn results in. */
11262 static int
11263 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
11265 switch (entity)
11267 case RISCV_VXRM:
11268 return riscv_vxrm_mode_after (insn, mode);
11269 case RISCV_FRM:
11270 return riscv_frm_mode_after (insn, mode);
11271 default:
11272 gcc_unreachable ();
11276 /* Return a mode that ENTITY is assumed to be
11277 switched to at function entry. */
11279 static int
11280 riscv_mode_entry (int entity)
11282 switch (entity)
11284 case RISCV_VXRM:
11285 return VXRM_MODE_NONE;
11286 case RISCV_FRM:
11288 /* According to RVV 1.0 spec, all vector floating-point operations use
11289 the dynamic rounding mode in the frm register. Likewise in other
11290 similar places. */
11291 return riscv_vector::FRM_DYN;
11293 default:
11294 gcc_unreachable ();
11298 /* Return a mode that ENTITY is assumed to be
11299 switched to at function exit. */
11301 static int
11302 riscv_mode_exit (int entity)
11304 switch (entity)
11306 case RISCV_VXRM:
11307 return VXRM_MODE_NONE;
11308 case RISCV_FRM:
11309 return riscv_vector::FRM_DYN_EXIT;
11310 default:
11311 gcc_unreachable ();
11315 static int
11316 riscv_mode_priority (int, int n)
11318 return n;
11321 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
11322 unsigned int
11323 riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
11325 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
11326 return riscv_vector::autovectorize_vector_modes (modes, all);
11328 return default_autovectorize_vector_modes (modes, all);
11331 /* Implement TARGET_VECTORIZE_RELATED_MODE. */
11332 opt_machine_mode
11333 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
11334 poly_uint64 nunits)
11336 if (TARGET_VECTOR)
11337 return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
11338 nunits);
11339 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
11342 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
11344 static bool
11345 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
11346 rtx target, rtx op0, rtx op1,
11347 const vec_perm_indices &sel)
11349 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode))
11350 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
11351 op1, sel);
11353 return false;
11356 static bool
11357 riscv_frame_pointer_required (void)
11359 return riscv_save_frame_pointer && !crtl->is_leaf;
11362 /* Return the appropriate common costs according to VECTYPE from COSTS. */
11363 static const common_vector_cost *
11364 get_common_costs (const cpu_vector_cost *costs, tree vectype)
11366 gcc_assert (costs);
11368 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
11369 return costs->vls;
11370 return costs->vla;
11373 /* Return the CPU vector costs according to -mtune if tune info has non-NULL
11374 vector cost. Otherwise, return the default generic vector costs. */
11375 const cpu_vector_cost *
11376 get_vector_costs ()
11378 const cpu_vector_cost *costs = tune_param->vec_costs;
11379 if (!costs)
11380 return &generic_vector_cost;
11381 return costs;
11384 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11386 static int
11387 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11388 tree vectype, int misalign ATTRIBUTE_UNUSED)
11390 const cpu_vector_cost *costs = get_vector_costs ();
11391 bool fp = false;
11393 if (vectype != NULL)
11394 fp = FLOAT_TYPE_P (vectype);
11396 const common_vector_cost *common_costs = get_common_costs (costs, vectype);
11397 gcc_assert (common_costs != NULL);
11398 switch (type_of_cost)
11400 case scalar_stmt:
11401 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
11403 case scalar_load:
11404 return costs->scalar_load_cost;
11406 case scalar_store:
11407 return costs->scalar_store_cost;
11409 case vector_stmt:
11410 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
11412 case vector_load:
11413 return common_costs->align_load_cost;
11415 case vector_store:
11416 return common_costs->align_store_cost;
11418 case vec_to_scalar:
11419 return common_costs->vec_to_scalar_cost;
11421 case scalar_to_vec:
11422 return common_costs->scalar_to_vec_cost;
11424 case unaligned_load:
11425 return common_costs->unalign_load_cost;
11426 case vector_gather_load:
11427 return common_costs->gather_load_cost;
11429 case unaligned_store:
11430 return common_costs->unalign_store_cost;
11431 case vector_scatter_store:
11432 return common_costs->scatter_store_cost;
11434 case cond_branch_taken:
11435 return costs->cond_taken_branch_cost;
11437 case cond_branch_not_taken:
11438 return costs->cond_not_taken_branch_cost;
11440 case vec_perm:
11441 return common_costs->permute_cost;
11443 case vec_promote_demote:
11444 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
11446 case vec_construct:
11447 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
11449 default:
11450 gcc_unreachable ();
11453 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
11456 /* Implement targetm.vectorize.create_costs. */
11458 static vector_costs *
11459 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
11461 if (TARGET_VECTOR)
11462 return new riscv_vector::costs (vinfo, costing_for_scalar);
11463 /* Default vector costs. */
11464 return new vector_costs (vinfo, costing_for_scalar);
11467 /* Implement TARGET_PREFERRED_ELSE_VALUE. */
11469 static tree
11470 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
11471 tree *ops)
11473 if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
11475 tree tmp_var = create_tmp_var (vectype);
11476 TREE_NO_WARNING (tmp_var) = 1;
11477 return get_or_create_ssa_default_def (cfun, tmp_var);
11480 return default_preferred_else_value (ifn, vectype, nops, ops);
11483 /* If MEM is in the form of "base+offset", extract the two parts
11484 of address and set to BASE and OFFSET, otherwise return false
11485 after clearing BASE and OFFSET. */
11487 bool
11488 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
11490 rtx addr;
11492 gcc_assert (MEM_P (mem));
11494 addr = XEXP (mem, 0);
11496 if (REG_P (addr))
11498 *base = addr;
11499 *offset = const0_rtx;
11500 return true;
11503 if (GET_CODE (addr) == PLUS
11504 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
11506 *base = XEXP (addr, 0);
11507 *offset = XEXP (addr, 1);
11508 return true;
11511 *base = NULL_RTX;
11512 *offset = NULL_RTX;
11514 return false;
11517 /* Implements target hook vector_mode_supported_any_target_p. */
11519 static bool
11520 riscv_vector_mode_supported_any_target_p (machine_mode)
11522 if (TARGET_XTHEADVECTOR)
11523 return false;
11524 return true;
11527 /* Implements hook TARGET_FUNCTION_VALUE_REGNO_P. */
11529 static bool
11530 riscv_function_value_regno_p (const unsigned regno)
11532 if (GP_RETURN_FIRST <= regno && regno <= GP_RETURN_LAST)
11533 return true;
11535 if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST)
11536 return true;
11538 if (TARGET_VECTOR && regno == V_RETURN)
11539 return true;
11541 return false;
11544 /* Implements hook TARGET_GET_RAW_RESULT_MODE. */
11546 static fixed_size_mode
11547 riscv_get_raw_result_mode (int regno)
11549 if (!is_a <fixed_size_mode> (reg_raw_mode[regno]))
11550 return as_a <fixed_size_mode> (VOIDmode);
11552 return default_get_reg_raw_mode (regno);
11555 /* Implements the unsigned saturation add standard name usadd for int mode.
11557 z = SAT_ADD(x, y).
11559 1. sum = x + y.
11560 2. sum = truncate (sum) for QI and HI only.
11561 3. lt = sum < x.
11562 4. lt = -lt.
11563 5. z = sum | lt. */
11565 void
11566 riscv_expand_usadd (rtx dest, rtx x, rtx y)
11568 machine_mode mode = GET_MODE (dest);
11569 rtx xmode_sum = gen_reg_rtx (Xmode);
11570 rtx xmode_lt = gen_reg_rtx (Xmode);
11571 rtx xmode_x = gen_lowpart (Xmode, x);
11572 rtx xmode_y = gen_lowpart (Xmode, y);
11573 rtx xmode_dest = gen_reg_rtx (Xmode);
11575 /* Step-1: sum = x + y */
11576 if (mode == SImode && mode != Xmode)
11577 { /* Take addw to avoid the sum truncate. */
11578 rtx simode_sum = gen_reg_rtx (SImode);
11579 riscv_emit_binary (PLUS, simode_sum, x, y);
11580 emit_move_insn (xmode_sum, gen_lowpart (Xmode, simode_sum));
11582 else
11583 riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
11585 /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI. */
11586 if (mode == HImode || mode == QImode)
11588 int shift_bits = GET_MODE_BITSIZE (Xmode)
11589 - GET_MODE_BITSIZE (mode).to_constant ();
11591 gcc_assert (shift_bits > 0);
11593 riscv_emit_binary (ASHIFT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
11594 riscv_emit_binary (LSHIFTRT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
11597 /* Step-2: lt = sum < x */
11598 riscv_emit_binary (LTU, xmode_lt, xmode_sum, xmode_x);
11600 /* Step-3: lt = -lt */
11601 riscv_emit_unary (NEG, xmode_lt, xmode_lt);
11603 /* Step-4: xmode_dest = sum | lt */
11604 riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_sum);
11606 /* Step-5: dest = xmode_dest */
11607 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
11610 /* Implements the unsigned saturation sub standard name usadd for int mode.
11612 z = SAT_SUB(x, y).
11614 1. minus = x - y.
11615 2. lt = x < y.
11616 3. lt = lt - 1.
11617 4. z = minus & lt. */
11619 void
11620 riscv_expand_ussub (rtx dest, rtx x, rtx y)
11622 machine_mode mode = GET_MODE (dest);
11623 rtx xmode_x = gen_lowpart (Xmode, x);
11624 rtx xmode_y = gen_lowpart (Xmode, y);
11625 rtx xmode_lt = gen_reg_rtx (Xmode);
11626 rtx xmode_minus = gen_reg_rtx (Xmode);
11627 rtx xmode_dest = gen_reg_rtx (Xmode);
11629 /* Step-1: minus = x - y */
11630 riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
11632 /* Step-2: lt = x < y */
11633 riscv_emit_binary (LTU, xmode_lt, xmode_x, xmode_y);
11635 /* Step-3: lt = lt - 1 (lt + (-1)) */
11636 riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode));
11638 /* Step-4: xmode_dest = minus & lt */
11639 riscv_emit_binary (AND, xmode_dest, xmode_lt, xmode_minus);
11641 /* Step-5: dest = xmode_dest */
11642 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
11645 /* Implement the unsigned saturation truncation for int mode.
11647 b = SAT_TRUNC (a);
11649 1. max = half truncated max
11650 2. lt = a < max
11651 3. lt = lt - 1 (lt 0, ge -1)
11652 4. d = a | lt
11653 5. b = (trunc)d */
11655 void
11656 riscv_expand_ustrunc (rtx dest, rtx src)
11658 machine_mode mode = GET_MODE (dest);
11659 rtx xmode_max = gen_reg_rtx (Xmode);
11660 unsigned precision = GET_MODE_PRECISION (mode).to_constant ();
11662 gcc_assert (precision < 64);
11664 uint64_t max = ((uint64_t)1u << precision) - 1u;
11665 rtx xmode_src = gen_lowpart (Xmode, src);
11666 rtx xmode_dest = gen_reg_rtx (Xmode);
11667 rtx xmode_lt = gen_reg_rtx (Xmode);
11669 /* Step-1: max = half truncated max */
11670 emit_move_insn (xmode_max, gen_int_mode (max, Xmode));
11672 /* Step-2: lt = src < max */
11673 riscv_emit_binary (LTU, xmode_lt, xmode_src, xmode_max);
11675 /* Step-3: lt = lt - 1 */
11676 riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode));
11678 /* Step-4: xmode_dest = lt | src */
11679 riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_src);
11681 /* Step-5: dest = xmode_dest */
11682 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
11685 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for
11686 TI_LONG_DOUBLE_TYPE which is for long double type, go with the
11687 default one for the others. */
11689 static machine_mode
11690 riscv_c_mode_for_floating_type (enum tree_index ti)
11692 if (ti == TI_LONG_DOUBLE_TYPE)
11693 return TFmode;
11694 return default_mode_for_floating_type (ti);
11697 /* Initialize the GCC target structure. */
11698 #undef TARGET_ASM_ALIGNED_HI_OP
11699 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
11700 #undef TARGET_ASM_ALIGNED_SI_OP
11701 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11702 #undef TARGET_ASM_ALIGNED_DI_OP
11703 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
11705 #undef TARGET_OPTION_OVERRIDE
11706 #define TARGET_OPTION_OVERRIDE riscv_option_override
11708 #undef TARGET_OPTION_RESTORE
11709 #define TARGET_OPTION_RESTORE riscv_option_restore
11711 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
11712 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
11714 #undef TARGET_LEGITIMIZE_ADDRESS
11715 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
11717 #undef TARGET_SCHED_ISSUE_RATE
11718 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
11719 #undef TARGET_SCHED_MACRO_FUSION_P
11720 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
11721 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11722 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
11724 #undef TARGET_SCHED_VARIABLE_ISSUE
11725 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
11727 #undef TARGET_SCHED_ADJUST_COST
11728 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
11730 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11731 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
11733 #undef TARGET_SET_CURRENT_FUNCTION
11734 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function
11736 #undef TARGET_REGISTER_MOVE_COST
11737 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
11738 #undef TARGET_MEMORY_MOVE_COST
11739 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
11740 #undef TARGET_RTX_COSTS
11741 #define TARGET_RTX_COSTS riscv_rtx_costs
11742 #undef TARGET_ADDRESS_COST
11743 #define TARGET_ADDRESS_COST riscv_address_cost
11744 #undef TARGET_INSN_COST
11745 #define TARGET_INSN_COST riscv_insn_cost
11747 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
11748 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost
11749 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
11750 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p
11752 #undef TARGET_ASM_FILE_START
11753 #define TARGET_ASM_FILE_START riscv_file_start
11754 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
11755 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
11756 #undef TARGET_ASM_FILE_END
11757 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
11759 #undef TARGET_EXPAND_BUILTIN_VA_START
11760 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
11762 #undef TARGET_PROMOTE_FUNCTION_MODE
11763 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode
11765 #undef TARGET_RETURN_IN_MEMORY
11766 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
11768 #undef TARGET_ASM_OUTPUT_MI_THUNK
11769 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
11770 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11771 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11773 #undef TARGET_PRINT_OPERAND
11774 #define TARGET_PRINT_OPERAND riscv_print_operand
11775 #undef TARGET_PRINT_OPERAND_ADDRESS
11776 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
11777 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
11778 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
11780 #undef TARGET_SETUP_INCOMING_VARARGS
11781 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
11782 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
11783 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args
11784 #undef TARGET_STRICT_ARGUMENT_NAMING
11785 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
11786 #undef TARGET_MUST_PASS_IN_STACK
11787 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11788 #undef TARGET_PASS_BY_REFERENCE
11789 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
11790 #undef TARGET_ARG_PARTIAL_BYTES
11791 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
11792 #undef TARGET_FUNCTION_ARG
11793 #define TARGET_FUNCTION_ARG riscv_function_arg
11794 #undef TARGET_FUNCTION_ARG_ADVANCE
11795 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
11796 #undef TARGET_FUNCTION_ARG_BOUNDARY
11797 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
11798 #undef TARGET_FNTYPE_ABI
11799 #define TARGET_FNTYPE_ABI riscv_fntype_abi
11800 #undef TARGET_INSN_CALLEE_ABI
11801 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi
11803 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
11804 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
11805 riscv_get_separate_components
11807 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
11808 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
11809 riscv_components_for_bb
11811 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
11812 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
11813 riscv_disqualify_components
11815 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
11816 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
11817 riscv_emit_prologue_components
11819 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
11820 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
11821 riscv_emit_epilogue_components
11823 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
11824 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
11825 riscv_set_handled_components
11827 /* The generic ELF target does not always have TLS support. */
11828 #ifdef HAVE_AS_TLS
11829 #undef TARGET_HAVE_TLS
11830 #define TARGET_HAVE_TLS true
11831 #endif
11833 #undef TARGET_CANNOT_FORCE_CONST_MEM
11834 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
11836 #undef TARGET_LEGITIMATE_CONSTANT_P
11837 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
11839 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11840 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p
11842 #undef TARGET_LEGITIMATE_ADDRESS_P
11843 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p
11845 #undef TARGET_CAN_ELIMINATE
11846 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
11848 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11849 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
11851 #undef TARGET_CLASS_MAX_NREGS
11852 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
11854 #undef TARGET_TRAMPOLINE_INIT
11855 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
11857 #undef TARGET_IN_SMALL_DATA_P
11858 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
11860 #undef TARGET_HAVE_SRODATA_SECTION
11861 #define TARGET_HAVE_SRODATA_SECTION true
11863 #undef TARGET_ASM_SELECT_SECTION
11864 #define TARGET_ASM_SELECT_SECTION riscv_select_section
11866 #undef TARGET_ASM_UNIQUE_SECTION
11867 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section
11869 #undef TARGET_ASM_SELECT_RTX_SECTION
11870 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section
11872 #undef TARGET_MIN_ANCHOR_OFFSET
11873 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
11875 #undef TARGET_MAX_ANCHOR_OFFSET
11876 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
11878 #undef TARGET_REGISTER_PRIORITY
11879 #define TARGET_REGISTER_PRIORITY riscv_register_priority
11881 #undef TARGET_CANNOT_COPY_INSN_P
11882 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
11884 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11885 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
11887 #undef TARGET_INIT_BUILTINS
11888 #define TARGET_INIT_BUILTINS riscv_init_builtins
11890 #undef TARGET_BUILTIN_DECL
11891 #define TARGET_BUILTIN_DECL riscv_builtin_decl
11893 #undef TARGET_GIMPLE_FOLD_BUILTIN
11894 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
11896 #undef TARGET_EXPAND_BUILTIN
11897 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
11899 #undef TARGET_HARD_REGNO_NREGS
11900 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs
11901 #undef TARGET_HARD_REGNO_MODE_OK
11902 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok
11904 #undef TARGET_MODES_TIEABLE_P
11905 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
11907 #undef TARGET_SLOW_UNALIGNED_ACCESS
11908 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
11910 #undef TARGET_OVERLAP_OP_BY_PIECES_P
11911 #define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
11913 #undef TARGET_SECONDARY_MEMORY_NEEDED
11914 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
11916 #undef TARGET_CAN_CHANGE_MODE_CLASS
11917 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class
11919 #undef TARGET_CONSTANT_ALIGNMENT
11920 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment
11922 #undef TARGET_MERGE_DECL_ATTRIBUTES
11923 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes
11925 #undef TARGET_ATTRIBUTE_TABLE
11926 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table
11928 #undef TARGET_WARN_FUNC_RETURN
11929 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return
11931 /* The low bit is ignored by jump instructions so is safe to use. */
11932 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
11933 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
11935 #undef TARGET_MACHINE_DEPENDENT_REORG
11936 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
11938 #undef TARGET_NEW_ADDRESS_PROFITABLE_P
11939 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
11941 #undef TARGET_MANGLE_TYPE
11942 #define TARGET_MANGLE_TYPE riscv_mangle_type
11944 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11945 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
11947 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
11948 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
11949 riscv_libgcc_floating_mode_supported_p
11951 #undef TARGET_INIT_LIBFUNCS
11952 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
11954 #undef TARGET_C_EXCESS_PRECISION
11955 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision
11957 #undef TARGET_FLOATN_MODE
11958 #define TARGET_FLOATN_MODE riscv_floatn_mode
11960 #undef TARGET_ASAN_SHADOW_OFFSET
11961 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
11963 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11964 #undef TARGET_DEFAULT_TARGET_FLAGS
11965 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
11966 #endif
11968 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11969 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p
11971 #undef TARGET_VERIFY_TYPE_CONTEXT
11972 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
11974 #undef TARGET_ESTIMATED_POLY_VALUE
11975 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
11977 #undef TARGET_VECTORIZE_GET_MASK_MODE
11978 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
11980 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
11981 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive
11983 #undef TARGET_VECTOR_ALIGNMENT
11984 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
11986 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
11987 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment
11989 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
11990 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
11992 #undef TARGET_ZERO_CALL_USED_REGS
11993 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
11995 #undef TARGET_ARRAY_MODE
11996 #define TARGET_ARRAY_MODE riscv_array_mode
11998 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11999 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
12001 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
12002 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
12003 riscv_vectorize_preferred_vector_alignment
12005 /* Mode switching hooks. */
12007 #undef TARGET_MODE_EMIT
12008 #define TARGET_MODE_EMIT riscv_emit_mode_set
12009 #undef TARGET_MODE_NEEDED
12010 #define TARGET_MODE_NEEDED riscv_mode_needed
12011 #undef TARGET_MODE_AFTER
12012 #define TARGET_MODE_AFTER riscv_mode_after
12013 #undef TARGET_MODE_ENTRY
12014 #define TARGET_MODE_ENTRY riscv_mode_entry
12015 #undef TARGET_MODE_EXIT
12016 #define TARGET_MODE_EXIT riscv_mode_exit
12017 #undef TARGET_MODE_PRIORITY
12018 #define TARGET_MODE_PRIORITY riscv_mode_priority
12020 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
12021 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
12022 riscv_autovectorize_vector_modes
12024 #undef TARGET_VECTORIZE_RELATED_MODE
12025 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
12027 #undef TARGET_VECTORIZE_VEC_PERM_CONST
12028 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
12030 #undef TARGET_FRAME_POINTER_REQUIRED
12031 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
12033 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
12034 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
12035 riscv_builtin_vectorization_cost
12037 #undef TARGET_VECTORIZE_CREATE_COSTS
12038 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
12040 #undef TARGET_PREFERRED_ELSE_VALUE
12041 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
12043 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
12044 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
12046 #undef TARGET_FUNCTION_VALUE_REGNO_P
12047 #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
12049 #undef TARGET_GET_RAW_RESULT_MODE
12050 #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
12052 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
12053 #define TARGET_C_MODE_FOR_FLOATING_TYPE riscv_c_mode_for_floating_type
12055 struct gcc_target targetm = TARGET_INITIALIZER;
12057 #include "gt-riscv.h"