1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2024 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
27 #include "coretypes.h"
30 #include "target-globals.h"
39 #include "stringpool.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
55 #include "conditions.h"
57 #include "insn-attr.h"
69 #include "cfgcleanup.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-iterator.h"
74 #include "gimple-fold.h"
78 #include "tree-pass.h"
83 #include "tm-constrs.h"
85 #include "symbol-summary.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
90 /* This file should be included last. */
91 #include "target-def.h"
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode
);
95 /* Remember the last target of s390_set_current_function. */
96 static GTY(()) tree s390_previous_fndecl
;
98 /* Define the specific costs for a given cpu. */
100 struct processor_costs
103 const int m
; /* cost of an M instruction. */
104 const int mghi
; /* cost of an MGHI instruction. */
105 const int mh
; /* cost of an MH instruction. */
106 const int mhi
; /* cost of an MHI instruction. */
107 const int ml
; /* cost of an ML instruction. */
108 const int mr
; /* cost of an MR instruction. */
109 const int ms
; /* cost of an MS instruction. */
110 const int msg
; /* cost of an MSG instruction. */
111 const int msgf
; /* cost of an MSGF instruction. */
112 const int msgfr
; /* cost of an MSGFR instruction. */
113 const int msgr
; /* cost of an MSGR instruction. */
114 const int msr
; /* cost of an MSR instruction. */
115 const int mult_df
; /* cost of multiplication in DFmode. */
118 const int sqxbr
; /* cost of square root in TFmode. */
119 const int sqdbr
; /* cost of square root in DFmode. */
120 const int sqebr
; /* cost of square root in SFmode. */
121 /* multiply and add */
122 const int madbr
; /* cost of multiply and add in DFmode. */
123 const int maebr
; /* cost of multiply and add in SFmode. */
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
138 struct processor_costs z900_cost
=
140 COSTS_N_INSNS (5), /* M */
141 COSTS_N_INSNS (10), /* MGHI */
142 COSTS_N_INSNS (5), /* MH */
143 COSTS_N_INSNS (4), /* MHI */
144 COSTS_N_INSNS (5), /* ML */
145 COSTS_N_INSNS (5), /* MR */
146 COSTS_N_INSNS (4), /* MS */
147 COSTS_N_INSNS (15), /* MSG */
148 COSTS_N_INSNS (7), /* MSGF */
149 COSTS_N_INSNS (7), /* MSGFR */
150 COSTS_N_INSNS (10), /* MSGR */
151 COSTS_N_INSNS (4), /* MSR */
152 COSTS_N_INSNS (7), /* multiplication in DFmode */
153 COSTS_N_INSNS (13), /* MXBR */
154 COSTS_N_INSNS (136), /* SQXBR */
155 COSTS_N_INSNS (44), /* SQDBR */
156 COSTS_N_INSNS (35), /* SQEBR */
157 COSTS_N_INSNS (18), /* MADBR */
158 COSTS_N_INSNS (13), /* MAEBR */
159 COSTS_N_INSNS (134), /* DXBR */
160 COSTS_N_INSNS (30), /* DDBR */
161 COSTS_N_INSNS (27), /* DEBR */
162 COSTS_N_INSNS (220), /* DLGR */
163 COSTS_N_INSNS (34), /* DLR */
164 COSTS_N_INSNS (34), /* DR */
165 COSTS_N_INSNS (32), /* DSGFR */
166 COSTS_N_INSNS (32), /* DSGR */
170 struct processor_costs z990_cost
=
172 COSTS_N_INSNS (4), /* M */
173 COSTS_N_INSNS (2), /* MGHI */
174 COSTS_N_INSNS (2), /* MH */
175 COSTS_N_INSNS (2), /* MHI */
176 COSTS_N_INSNS (4), /* ML */
177 COSTS_N_INSNS (4), /* MR */
178 COSTS_N_INSNS (5), /* MS */
179 COSTS_N_INSNS (6), /* MSG */
180 COSTS_N_INSNS (4), /* MSGF */
181 COSTS_N_INSNS (4), /* MSGFR */
182 COSTS_N_INSNS (4), /* MSGR */
183 COSTS_N_INSNS (4), /* MSR */
184 COSTS_N_INSNS (1), /* multiplication in DFmode */
185 COSTS_N_INSNS (28), /* MXBR */
186 COSTS_N_INSNS (130), /* SQXBR */
187 COSTS_N_INSNS (66), /* SQDBR */
188 COSTS_N_INSNS (38), /* SQEBR */
189 COSTS_N_INSNS (1), /* MADBR */
190 COSTS_N_INSNS (1), /* MAEBR */
191 COSTS_N_INSNS (60), /* DXBR */
192 COSTS_N_INSNS (40), /* DDBR */
193 COSTS_N_INSNS (26), /* DEBR */
194 COSTS_N_INSNS (176), /* DLGR */
195 COSTS_N_INSNS (31), /* DLR */
196 COSTS_N_INSNS (31), /* DR */
197 COSTS_N_INSNS (31), /* DSGFR */
198 COSTS_N_INSNS (31), /* DSGR */
202 struct processor_costs z9_109_cost
=
204 COSTS_N_INSNS (4), /* M */
205 COSTS_N_INSNS (2), /* MGHI */
206 COSTS_N_INSNS (2), /* MH */
207 COSTS_N_INSNS (2), /* MHI */
208 COSTS_N_INSNS (4), /* ML */
209 COSTS_N_INSNS (4), /* MR */
210 COSTS_N_INSNS (5), /* MS */
211 COSTS_N_INSNS (6), /* MSG */
212 COSTS_N_INSNS (4), /* MSGF */
213 COSTS_N_INSNS (4), /* MSGFR */
214 COSTS_N_INSNS (4), /* MSGR */
215 COSTS_N_INSNS (4), /* MSR */
216 COSTS_N_INSNS (1), /* multiplication in DFmode */
217 COSTS_N_INSNS (28), /* MXBR */
218 COSTS_N_INSNS (130), /* SQXBR */
219 COSTS_N_INSNS (66), /* SQDBR */
220 COSTS_N_INSNS (38), /* SQEBR */
221 COSTS_N_INSNS (1), /* MADBR */
222 COSTS_N_INSNS (1), /* MAEBR */
223 COSTS_N_INSNS (60), /* DXBR */
224 COSTS_N_INSNS (40), /* DDBR */
225 COSTS_N_INSNS (26), /* DEBR */
226 COSTS_N_INSNS (30), /* DLGR */
227 COSTS_N_INSNS (23), /* DLR */
228 COSTS_N_INSNS (23), /* DR */
229 COSTS_N_INSNS (24), /* DSGFR */
230 COSTS_N_INSNS (24), /* DSGR */
234 struct processor_costs z10_cost
=
236 COSTS_N_INSNS (10), /* M */
237 COSTS_N_INSNS (10), /* MGHI */
238 COSTS_N_INSNS (10), /* MH */
239 COSTS_N_INSNS (10), /* MHI */
240 COSTS_N_INSNS (10), /* ML */
241 COSTS_N_INSNS (10), /* MR */
242 COSTS_N_INSNS (10), /* MS */
243 COSTS_N_INSNS (10), /* MSG */
244 COSTS_N_INSNS (10), /* MSGF */
245 COSTS_N_INSNS (10), /* MSGFR */
246 COSTS_N_INSNS (10), /* MSGR */
247 COSTS_N_INSNS (10), /* MSR */
248 COSTS_N_INSNS (1) , /* multiplication in DFmode */
249 COSTS_N_INSNS (50), /* MXBR */
250 COSTS_N_INSNS (120), /* SQXBR */
251 COSTS_N_INSNS (52), /* SQDBR */
252 COSTS_N_INSNS (38), /* SQEBR */
253 COSTS_N_INSNS (1), /* MADBR */
254 COSTS_N_INSNS (1), /* MAEBR */
255 COSTS_N_INSNS (111), /* DXBR */
256 COSTS_N_INSNS (39), /* DDBR */
257 COSTS_N_INSNS (32), /* DEBR */
258 COSTS_N_INSNS (160), /* DLGR */
259 COSTS_N_INSNS (71), /* DLR */
260 COSTS_N_INSNS (71), /* DR */
261 COSTS_N_INSNS (71), /* DSGFR */
262 COSTS_N_INSNS (71), /* DSGR */
266 struct processor_costs z196_cost
=
268 COSTS_N_INSNS (7), /* M */
269 COSTS_N_INSNS (5), /* MGHI */
270 COSTS_N_INSNS (5), /* MH */
271 COSTS_N_INSNS (5), /* MHI */
272 COSTS_N_INSNS (7), /* ML */
273 COSTS_N_INSNS (7), /* MR */
274 COSTS_N_INSNS (6), /* MS */
275 COSTS_N_INSNS (8), /* MSG */
276 COSTS_N_INSNS (6), /* MSGF */
277 COSTS_N_INSNS (6), /* MSGFR */
278 COSTS_N_INSNS (8), /* MSGR */
279 COSTS_N_INSNS (6), /* MSR */
280 COSTS_N_INSNS (1) , /* multiplication in DFmode */
281 COSTS_N_INSNS (40), /* MXBR B+40 */
282 COSTS_N_INSNS (100), /* SQXBR B+100 */
283 COSTS_N_INSNS (42), /* SQDBR B+42 */
284 COSTS_N_INSNS (28), /* SQEBR B+28 */
285 COSTS_N_INSNS (1), /* MADBR B */
286 COSTS_N_INSNS (1), /* MAEBR B */
287 COSTS_N_INSNS (101), /* DXBR B+101 */
288 COSTS_N_INSNS (29), /* DDBR */
289 COSTS_N_INSNS (22), /* DEBR */
290 COSTS_N_INSNS (160), /* DLGR cracked */
291 COSTS_N_INSNS (160), /* DLR cracked */
292 COSTS_N_INSNS (160), /* DR expanded */
293 COSTS_N_INSNS (160), /* DSGFR cracked */
294 COSTS_N_INSNS (160), /* DSGR cracked */
298 struct processor_costs zEC12_cost
=
300 COSTS_N_INSNS (7), /* M */
301 COSTS_N_INSNS (5), /* MGHI */
302 COSTS_N_INSNS (5), /* MH */
303 COSTS_N_INSNS (5), /* MHI */
304 COSTS_N_INSNS (7), /* ML */
305 COSTS_N_INSNS (7), /* MR */
306 COSTS_N_INSNS (6), /* MS */
307 COSTS_N_INSNS (8), /* MSG */
308 COSTS_N_INSNS (6), /* MSGF */
309 COSTS_N_INSNS (6), /* MSGFR */
310 COSTS_N_INSNS (8), /* MSGR */
311 COSTS_N_INSNS (6), /* MSR */
312 COSTS_N_INSNS (1) , /* multiplication in DFmode */
313 COSTS_N_INSNS (40), /* MXBR B+40 */
314 COSTS_N_INSNS (100), /* SQXBR B+100 */
315 COSTS_N_INSNS (42), /* SQDBR B+42 */
316 COSTS_N_INSNS (28), /* SQEBR B+28 */
317 COSTS_N_INSNS (1), /* MADBR B */
318 COSTS_N_INSNS (1), /* MAEBR B */
319 COSTS_N_INSNS (131), /* DXBR B+131 */
320 COSTS_N_INSNS (29), /* DDBR */
321 COSTS_N_INSNS (22), /* DEBR */
322 COSTS_N_INSNS (160), /* DLGR cracked */
323 COSTS_N_INSNS (160), /* DLR cracked */
324 COSTS_N_INSNS (160), /* DR expanded */
325 COSTS_N_INSNS (160), /* DSGFR cracked */
326 COSTS_N_INSNS (160), /* DSGR cracked */
329 const struct s390_processor processor_table
[] =
331 { "z900", "z900", PROCESSOR_2064_Z900
, &z900_cost
, 5 },
332 { "z990", "z990", PROCESSOR_2084_Z990
, &z990_cost
, 6 },
333 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109
, &z9_109_cost
, 7 },
334 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC
, &z9_109_cost
, 7 },
335 { "z10", "z10", PROCESSOR_2097_Z10
, &z10_cost
, 8 },
336 { "z196", "z196", PROCESSOR_2817_Z196
, &z196_cost
, 9 },
337 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12
, &zEC12_cost
, 10 },
338 { "z13", "z13", PROCESSOR_2964_Z13
, &zEC12_cost
, 11 },
339 { "z14", "arch12", PROCESSOR_3906_Z14
, &zEC12_cost
, 12 },
340 { "z15", "arch13", PROCESSOR_8561_Z15
, &zEC12_cost
, 13 },
341 { "z16", "arch14", PROCESSOR_3931_Z16
, &zEC12_cost
, 14 },
342 { "native", "", PROCESSOR_NATIVE
, NULL
, 0 }
345 extern int reload_completed
;
347 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
348 static rtx_insn
*last_scheduled_insn
;
351 #define MAX_SCHED_UNITS 4
352 static int last_scheduled_unit_distance
[MAX_SCHED_UNITS
][NUM_SIDES
];
354 /* Estimate of number of cycles a long-running insn occupies an
356 static int fxd_longrunning
[NUM_SIDES
];
357 static int fpd_longrunning
[NUM_SIDES
];
359 /* The maximum score added for an instruction whose unit hasn't been
360 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
361 give instruction mix scheduling more priority over instruction
363 #define MAX_SCHED_MIX_SCORE 2
365 /* The maximum distance up to which individual scores will be
366 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
367 Increase this with the OOO windows size of the machine. */
368 #define MAX_SCHED_MIX_DISTANCE 70
370 /* Structure used to hold the components of a S/390 memory
371 address. A legitimate address on S/390 is of the general
373 base + index + displacement
374 where any of the components is optional.
376 base and index are registers of the class ADDR_REGS,
377 displacement is an unsigned 12-bit immediate constant. */
379 /* The max number of insns of backend generated memset/memcpy/memcmp
380 loops. This value is used in the unroll adjust hook to detect such
381 loops. Current max is 9 coming from the memcmp loop. */
382 #define BLOCK_MEM_OPS_LOOP_INSNS 9
393 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
395 #define cfun_frame_layout (cfun->machine->frame_layout)
396 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
397 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
398 ? cfun_frame_layout.fpr_bitmap & 0x0f \
399 : cfun_frame_layout.fpr_bitmap & 0x03))
400 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
401 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
402 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
403 (1 << (REGNO - FPR0_REGNUM)))
404 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
405 (1 << (REGNO - FPR0_REGNUM))))
406 #define cfun_gpr_save_slot(REGNO) \
407 cfun->machine->frame_layout.gpr_save_slots[REGNO]
409 /* Number of GPRs and FPRs used for argument passing. */
410 #define GP_ARG_NUM_REG 5
411 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
412 #define VEC_ARG_NUM_REG 8
414 /* Return TRUE if GPR REGNO is supposed to be restored in the function
417 s390_restore_gpr_p (int regno
)
419 return (cfun_frame_layout
.first_restore_gpr
!= -1
420 && regno
>= cfun_frame_layout
.first_restore_gpr
421 && regno
<= cfun_frame_layout
.last_restore_gpr
);
424 /* Return TRUE if any of the registers in range [FIRST, LAST] is saved
425 because of -mpreserve-args. */
427 s390_preserve_gpr_arg_in_range_p (int first
, int last
)
429 int num_arg_regs
= MIN (crtl
->args
.info
.gprs
+ cfun
->va_list_gpr_size
,
432 && s390_preserve_args_p
433 && first
<= GPR2_REGNUM
+ num_arg_regs
- 1
434 && last
>= GPR2_REGNUM
);
438 s390_preserve_gpr_arg_p (int regno
)
440 return s390_preserve_gpr_arg_in_range_p (regno
, regno
);
444 s390_preserve_fpr_arg_p (int regno
)
446 int num_arg_regs
= MIN (crtl
->args
.info
.fprs
+ cfun
->va_list_fpr_size
,
448 return (s390_preserve_args_p
449 && regno
<= FPR0_REGNUM
+ num_arg_regs
- 1
450 && regno
>= FPR0_REGNUM
);
453 #undef TARGET_ATOMIC_ALIGN_FOR_MODE
454 #define TARGET_ATOMIC_ALIGN_FOR_MODE s390_atomic_align_for_mode
456 s390_atomic_align_for_mode (machine_mode mode
)
458 return GET_MODE_BITSIZE (mode
);
461 /* A couple of shortcuts. */
462 #define CONST_OK_FOR_J(x) \
463 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
464 #define CONST_OK_FOR_K(x) \
465 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
466 #define CONST_OK_FOR_Os(x) \
467 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
468 #define CONST_OK_FOR_Op(x) \
469 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
470 #define CONST_OK_FOR_On(x) \
471 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
473 #define REGNO_PAIR_OK(REGNO, MODE) \
474 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
476 /* That's the read ahead of the dynamic branch prediction unit in
477 bytes on a z10 (or higher) CPU. */
478 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
481 s390_address_cost (rtx addr
, machine_mode mode ATTRIBUTE_UNUSED
,
482 addr_space_t as ATTRIBUTE_UNUSED
,
483 bool speed ATTRIBUTE_UNUSED
);
486 s390_hard_regno_nregs (unsigned int regno
, machine_mode mode
);
488 /* Masks per jump target register indicating which thunk need to be
490 static GTY(()) int indirect_branch_prez10thunk_mask
= 0;
491 static GTY(()) int indirect_branch_z10thunk_mask
= 0;
493 #define INDIRECT_BRANCH_NUM_OPTIONS 4
495 enum s390_indirect_branch_option
497 s390_opt_indirect_branch_jump
= 0,
498 s390_opt_indirect_branch_call
,
499 s390_opt_function_return_reg
,
500 s390_opt_function_return_mem
503 static GTY(()) int indirect_branch_table_label_no
[INDIRECT_BRANCH_NUM_OPTIONS
] = { 0 };
504 const char *indirect_branch_table_label
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
505 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
506 const char *indirect_branch_table_name
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
507 { ".s390_indirect_jump", ".s390_indirect_call",
508 ".s390_return_reg", ".s390_return_mem" };
511 s390_return_addr_from_memory ()
513 return cfun_gpr_save_slot(RETURN_REGNUM
) == SAVE_SLOT_STACK
;
516 /* Generate a SUBREG for the MODE lowpart of EXPR.
518 In contrast to gen_lowpart it will always return a SUBREG
519 expression. This is useful to generate STRICT_LOW_PART
522 s390_gen_lowpart_subreg (machine_mode mode
, rtx expr
)
524 rtx lowpart
= gen_lowpart (mode
, expr
);
526 /* There might be no SUBREG in case it could be applied to the hard
527 REG rtx or it could be folded with a paradoxical subreg. Bring
529 if (!SUBREG_P (lowpart
))
531 machine_mode reg_mode
= TARGET_ZARCH
? DImode
: SImode
;
532 gcc_assert (REG_P (lowpart
));
533 lowpart
= gen_lowpart_SUBREG (mode
,
534 gen_rtx_REG (reg_mode
,
541 /* Return nonzero if it's OK to use fused multiply-add for MODE. */
543 s390_fma_allowed_p (machine_mode mode
)
545 if (TARGET_VXE
&& mode
== TFmode
)
546 return flag_vx_long_double_fma
;
551 /* Indicate which ABI has been used for passing vector args.
552 0 - no vector type arguments have been passed where the ABI is relevant
553 1 - the old ABI has been used
554 2 - a vector type argument has been passed either in a vector register
555 or on the stack by value */
556 static int s390_vector_abi
= 0;
558 /* Set the vector ABI marker if TYPE is subject to the vector ABI
559 switch. The vector ABI affects only vector data types. There are
560 two aspects of the vector ABI relevant here:
562 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
563 ABI and natural alignment with the old.
565 2. vector <= 16 bytes are passed in VRs or by value on the stack
566 with the new ABI but by reference on the stack with the old.
568 If ARG_P is true TYPE is used for a function argument or return
569 value. The ABI marker then is set for all vector data types. If
570 ARG_P is false only type 1 vectors are being checked. */
573 s390_check_type_for_vector_abi (const_tree type
, bool arg_p
, bool in_struct_p
)
575 static hash_set
<const_tree
> visited_types_hash
;
580 if (type
== NULL_TREE
|| TREE_CODE (type
) == ERROR_MARK
)
583 if (visited_types_hash
.contains (type
))
586 visited_types_hash
.add (type
);
588 if (VECTOR_TYPE_P (type
))
590 int type_size
= int_size_in_bytes (type
);
592 /* Outside arguments only the alignment is changing and this
593 only happens for vector types >= 16 bytes. */
594 if (!arg_p
&& type_size
< 16)
597 /* In arguments vector types > 16 are passed as before (GCC
598 never enforced the bigger alignment for arguments which was
599 required by the old vector ABI). However, it might still be
600 ABI relevant due to the changed alignment if it is a struct
602 if (arg_p
&& type_size
> 16 && !in_struct_p
)
605 s390_vector_abi
= TARGET_VX_ABI
? 2 : 1;
607 else if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == ARRAY_TYPE
)
609 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
610 natural alignment there will never be ABI dependent padding
611 in an array type. That's why we do not set in_struct_p to
613 s390_check_type_for_vector_abi (TREE_TYPE (type
), arg_p
, in_struct_p
);
615 else if (FUNC_OR_METHOD_TYPE_P (type
))
619 /* Check the return type. */
620 s390_check_type_for_vector_abi (TREE_TYPE (type
), true, false);
622 for (arg_chain
= TYPE_ARG_TYPES (type
);
624 arg_chain
= TREE_CHAIN (arg_chain
))
625 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain
), true, false);
627 else if (RECORD_OR_UNION_TYPE_P (type
))
631 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
633 if (TREE_CODE (field
) != FIELD_DECL
)
636 s390_check_type_for_vector_abi (TREE_TYPE (field
), arg_p
, true);
642 /* System z builtins. */
644 #include "s390-builtins.h"
646 const unsigned int bflags_builtin
[S390_BUILTIN_MAX
+ 1] =
651 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
653 #define OB_DEF_VAR(...)
654 #include "s390-builtins.def"
658 const unsigned int opflags_builtin
[S390_BUILTIN_MAX
+ 1] =
663 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
665 #define OB_DEF_VAR(...)
666 #include "s390-builtins.def"
670 const unsigned int bflags_overloaded_builtin
[S390_OVERLOADED_BUILTIN_MAX
+ 1] =
676 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
677 #define OB_DEF_VAR(...)
678 #include "s390-builtins.def"
683 bflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
690 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
691 #include "s390-builtins.def"
696 opflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
703 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
704 #include "s390-builtins.def"
708 tree s390_builtin_types
[BT_MAX
];
709 tree s390_builtin_fn_types
[BT_FN_MAX
];
710 tree s390_builtin_decls
[S390_BUILTIN_MAX
+
711 S390_OVERLOADED_BUILTIN_MAX
+
712 S390_OVERLOADED_BUILTIN_VAR_MAX
];
714 static enum insn_code
const code_for_builtin
[S390_BUILTIN_MAX
+ 1] = {
718 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
720 #define OB_DEF_VAR(...)
722 #include "s390-builtins.def"
727 s390_init_builtins (void)
729 /* These definitions are being used in s390-builtins.def. */
730 tree returns_twice_attr
= tree_cons (get_identifier ("returns_twice"),
732 tree noreturn_attr
= tree_cons (get_identifier ("noreturn"), NULL
, NULL
);
733 tree c_uint64_type_node
;
735 /* The uint64_type_node from tree.cc is not compatible to the C99
736 uint64_t data type. What we want is c_uint64_type_node from
737 c-common.cc. But since backend code is not supposed to interface
738 with the frontend we recreate it here. */
740 c_uint64_type_node
= long_unsigned_type_node
;
742 c_uint64_type_node
= long_long_unsigned_type_node
;
745 #define DEF_TYPE(INDEX, NODE, CONST_P) \
746 if (s390_builtin_types[INDEX] == NULL) \
747 s390_builtin_types[INDEX] = (!CONST_P) ? \
748 (NODE) : build_type_variant ((NODE), 1, 0);
750 #undef DEF_POINTER_TYPE
751 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
752 if (s390_builtin_types[INDEX] == NULL) \
753 s390_builtin_types[INDEX] = \
754 build_pointer_type (s390_builtin_types[INDEX_BASE]);
756 #undef DEF_DISTINCT_TYPE
757 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
758 if (s390_builtin_types[INDEX] == NULL) \
759 s390_builtin_types[INDEX] = \
760 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
762 #undef DEF_VECTOR_TYPE
763 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
764 if (s390_builtin_types[INDEX] == NULL) \
765 s390_builtin_types[INDEX] = \
766 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
768 #undef DEF_OPAQUE_VECTOR_TYPE
769 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
770 if (s390_builtin_types[INDEX] == NULL) \
771 s390_builtin_types[INDEX] = \
772 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
775 #define DEF_FN_TYPE(INDEX, args...) \
776 if (s390_builtin_fn_types[INDEX] == NULL) \
777 s390_builtin_fn_types[INDEX] = \
778 build_function_type_list (args, NULL_TREE);
780 #define DEF_OV_TYPE(...)
781 #include "s390-builtin-types.def"
784 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
785 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
786 s390_builtin_decls[S390_BUILTIN_##NAME] = \
787 add_builtin_function ("__builtin_" #NAME, \
788 s390_builtin_fn_types[FNTYPE], \
789 S390_BUILTIN_##NAME, \
794 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
795 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
797 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
798 add_builtin_function ("__builtin_" #NAME, \
799 s390_builtin_fn_types[FNTYPE], \
800 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
805 #define OB_DEF_VAR(...)
806 #include "s390-builtins.def"
810 /* Return true if ARG is appropriate as argument number ARGNUM of
811 builtin DECL. The operand flags from s390-builtins.def have to
812 passed as OP_FLAGS. */
814 s390_const_operand_ok (tree arg
, int argnum
, int op_flags
, tree decl
)
816 if (O_UIMM_P (op_flags
))
818 unsigned HOST_WIDE_INT bitwidths
[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
819 unsigned HOST_WIDE_INT bitmasks
[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
820 unsigned HOST_WIDE_INT bitwidth
= bitwidths
[op_flags
- O_U1
];
821 unsigned HOST_WIDE_INT bitmask
= bitmasks
[op_flags
- O_U1
];
823 gcc_assert(ARRAY_SIZE(bitwidths
) == (O_M12
- O_U1
+ 1));
824 gcc_assert(ARRAY_SIZE(bitmasks
) == (O_M12
- O_U1
+ 1));
826 if (!tree_fits_uhwi_p (arg
)
827 || tree_to_uhwi (arg
) > (HOST_WIDE_INT_1U
<< bitwidth
) - 1
828 || (bitmask
&& tree_to_uhwi (arg
) & ~bitmask
))
832 gcc_assert (bitmask
< 16);
833 char values
[120] = "";
835 for (unsigned HOST_WIDE_INT i
= 0; i
<= bitmask
; i
++)
840 int ret
= snprintf (buf
, 5, HOST_WIDE_INT_PRINT_UNSIGNED
, i
& bitmask
);
841 gcc_assert (ret
< 5);
842 strcat (values
, buf
);
844 strcat (values
, ", ");
846 error ("constant argument %d for builtin %qF is invalid (%s)",
847 argnum
, decl
, values
);
850 error ("constant argument %d for builtin %qF is out of range (0-%wu)",
851 argnum
, decl
, (HOST_WIDE_INT_1U
<< bitwidth
) - 1);
857 if (O_SIMM_P (op_flags
))
859 int bitwidths
[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
860 int bitwidth
= bitwidths
[op_flags
- O_S2
];
862 if (!tree_fits_shwi_p (arg
)
863 || tree_to_shwi (arg
) < -(HOST_WIDE_INT_1
<< (bitwidth
- 1))
864 || tree_to_shwi (arg
) > ((HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1))
866 error ("constant argument %d for builtin %qF is out of range "
867 "(%wd-%wd)", argnum
, decl
,
868 -(HOST_WIDE_INT_1
<< (bitwidth
- 1)),
869 (HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1);
876 /* Expand an expression EXP that calls a built-in function,
877 with result going to TARGET if that's convenient
878 (and in mode MODE if that's convenient).
879 SUBTARGET may be used as the target for computing one of EXP's operands.
880 IGNORE is nonzero if the value is to be ignored. */
883 s390_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
884 machine_mode mode ATTRIBUTE_UNUSED
,
885 int ignore ATTRIBUTE_UNUSED
)
889 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
890 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
891 enum insn_code icode
;
892 rtx op
[MAX_ARGS
], pat
;
896 call_expr_arg_iterator iter
;
897 unsigned int all_op_flags
= opflags_for_builtin (fcode
);
898 machine_mode last_vec_mode
= VOIDmode
;
900 if (TARGET_DEBUG_ARG
)
903 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
904 (int)fcode
, IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
905 bflags_for_builtin (fcode
));
908 if (S390_USE_TARGET_ATTRIBUTE
)
912 bflags
= bflags_for_builtin (fcode
);
913 if ((bflags
& B_HTM
) && !TARGET_HTM
)
915 error ("builtin %qF is not supported without %<-mhtm%> "
916 "(default with %<-march=zEC12%> and higher)", fndecl
);
919 if (((bflags
& B_VX
) || (bflags
& B_VXE
)) && !TARGET_VX
)
921 error ("builtin %qF requires %<-mvx%> "
922 "(default with %<-march=z13%> and higher)", fndecl
);
926 if ((bflags
& B_VXE
) && !TARGET_VXE
)
928 error ("Builtin %qF requires z14 or higher", fndecl
);
932 if ((bflags
& B_VXE2
) && !TARGET_VXE2
)
934 error ("Builtin %qF requires z15 or higher", fndecl
);
938 if (fcode
>= S390_OVERLOADED_BUILTIN_VAR_OFFSET
939 && fcode
< S390_ALL_BUILTIN_MAX
)
943 else if (fcode
< S390_OVERLOADED_BUILTIN_OFFSET
)
945 icode
= code_for_builtin
[fcode
];
946 /* Set a flag in the machine specific cfun part in order to support
947 saving/restoring of FPRs. */
948 if (fcode
== S390_BUILTIN_tbegin
|| fcode
== S390_BUILTIN_tbegin_retry
)
949 cfun
->machine
->tbegin_p
= true;
951 else if (fcode
< S390_OVERLOADED_BUILTIN_VAR_OFFSET
)
953 error ("unresolved overloaded builtin");
957 internal_error ("bad builtin fcode");
960 internal_error ("bad builtin icode");
962 nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
966 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
968 || GET_MODE (target
) != tmode
969 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
970 target
= gen_reg_rtx (tmode
);
972 /* There are builtins (e.g. vec_promote) with no vector
973 arguments but an element selector. So we have to also look
974 at the vector return type when emitting the modulo
976 if (VECTOR_MODE_P (insn_data
[icode
].operand
[0].mode
))
977 last_vec_mode
= insn_data
[icode
].operand
[0].mode
;
981 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
984 const struct insn_operand_data
*insn_op
;
985 unsigned int op_flags
= all_op_flags
& ((1 << O_SHIFT
) - 1);
987 all_op_flags
= all_op_flags
>> O_SHIFT
;
989 if (arg
== error_mark_node
)
991 if (arity
>= MAX_ARGS
)
994 if (O_IMM_P (op_flags
)
995 && TREE_CODE (arg
) != INTEGER_CST
)
997 error ("constant value required for builtin %qF argument %d",
1002 if (!s390_const_operand_ok (arg
, arity
+ 1, op_flags
, fndecl
))
1005 insn_op
= &insn_data
[icode
].operand
[arity
+ nonvoid
];
1006 op
[arity
] = expand_expr (arg
, NULL_RTX
, insn_op
->mode
, EXPAND_NORMAL
);
1008 /* expand_expr truncates constants to the target mode only if it
1009 is "convenient". However, our checks below rely on this
1011 if (CONST_INT_P (op
[arity
])
1012 && SCALAR_INT_MODE_P (insn_op
->mode
)
1013 && GET_MODE (op
[arity
]) != insn_op
->mode
)
1014 op
[arity
] = GEN_INT (trunc_int_for_mode (INTVAL (op
[arity
]),
1017 /* Wrap the expanded RTX for pointer types into a MEM expr with
1018 the proper mode. This allows us to use e.g. (match_operand
1019 "memory_operand"..) in the insn patterns instead of (mem
1020 (match_operand "address_operand)). This is helpful for
1021 patterns not just accepting MEMs. */
1022 if (POINTER_TYPE_P (TREE_TYPE (arg
))
1023 && insn_op
->predicate
!= address_operand
)
1024 op
[arity
] = gen_rtx_MEM (insn_op
->mode
, op
[arity
]);
1026 /* Expand the module operation required on element selectors. */
1027 if (op_flags
== O_ELEM
)
1029 gcc_assert (last_vec_mode
!= VOIDmode
);
1030 op
[arity
] = simplify_expand_binop (SImode
, code_to_optab (AND
),
1032 GEN_INT (GET_MODE_NUNITS (last_vec_mode
) - 1),
1033 NULL_RTX
, 1, OPTAB_DIRECT
);
1036 /* Record the vector mode used for an element selector. This assumes:
1037 1. There is no builtin with two different vector modes and an element selector
1038 2. The element selector comes after the vector type it is referring to.
1039 This currently the true for all the builtins but FIXME we
1040 should better check for that. */
1041 if (VECTOR_MODE_P (insn_op
->mode
))
1042 last_vec_mode
= insn_op
->mode
;
1044 if (insn_op
->predicate (op
[arity
], insn_op
->mode
))
1050 /* A memory operand is rejected by the memory_operand predicate.
1051 Try making the address legal by copying it into a register. */
1052 if (MEM_P (op
[arity
])
1053 && insn_op
->predicate
== memory_operand
1054 && (GET_MODE (XEXP (op
[arity
], 0)) == Pmode
1055 || GET_MODE (XEXP (op
[arity
], 0)) == VOIDmode
))
1057 op
[arity
] = replace_equiv_address (op
[arity
],
1058 copy_to_mode_reg (Pmode
,
1059 XEXP (op
[arity
], 0)));
1061 /* Some of the builtins require different modes/types than the
1062 pattern in order to implement a specific API. Instead of
1063 adding many expanders which do the mode change we do it here.
1064 E.g. s390_vec_add_u128 required to have vector unsigned char
1065 arguments is mapped to addti3. */
1066 else if (insn_op
->mode
!= VOIDmode
1067 && GET_MODE (op
[arity
]) != VOIDmode
1068 && GET_MODE (op
[arity
]) != insn_op
->mode
1069 && ((tmp_rtx
= simplify_gen_subreg (insn_op
->mode
, op
[arity
],
1070 GET_MODE (op
[arity
]), 0))
1073 op
[arity
] = tmp_rtx
;
1076 /* The predicate rejects the operand although the mode is fine.
1077 Copy the operand to register. */
1078 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
)
1079 && (GET_MODE (op
[arity
]) == insn_op
->mode
1080 || GET_MODE (op
[arity
]) == VOIDmode
1081 || (insn_op
->predicate
== address_operand
1082 && GET_MODE (op
[arity
]) == Pmode
)))
1084 /* An address_operand usually has VOIDmode in the expander
1085 so we cannot use this. */
1086 machine_mode target_mode
=
1087 (insn_op
->predicate
== address_operand
1088 ? (machine_mode
) Pmode
: insn_op
->mode
);
1089 op
[arity
] = copy_to_mode_reg (target_mode
, op
[arity
]);
1092 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
))
1094 error ("invalid argument %d for builtin %qF", arity
+ 1, fndecl
);
1103 pat
= GEN_FCN (icode
) (target
);
1107 pat
= GEN_FCN (icode
) (target
, op
[0]);
1109 pat
= GEN_FCN (icode
) (op
[0]);
1113 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
1115 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
1119 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
1121 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
1125 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
1127 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
1131 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
1133 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
1137 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1139 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1155 static const int s390_hotpatch_hw_max
= 1000000;
1156 static int s390_hotpatch_hw_before_label
= 0;
1157 static int s390_hotpatch_hw_after_label
= 0;
1159 /* Check whether the hotpatch attribute is applied to a function and, if it has
1160 an argument, the argument is valid. */
1163 s390_handle_hotpatch_attribute (tree
*node
, tree name
, tree args
,
1164 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1170 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1172 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1174 *no_add_attrs
= true;
1176 if (args
!= NULL
&& TREE_CHAIN (args
) != NULL
)
1178 expr
= TREE_VALUE (args
);
1179 expr2
= TREE_VALUE (TREE_CHAIN (args
));
1181 if (args
== NULL
|| TREE_CHAIN (args
) == NULL
)
1183 else if (TREE_CODE (expr
) != INTEGER_CST
1184 || !INTEGRAL_TYPE_P (TREE_TYPE (expr
))
1185 || wi::gtu_p (wi::to_wide (expr
), s390_hotpatch_hw_max
))
1187 else if (TREE_CODE (expr2
) != INTEGER_CST
1188 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2
))
1189 || wi::gtu_p (wi::to_wide (expr2
), s390_hotpatch_hw_max
))
1195 error ("requested %qE attribute is not a comma separated pair of"
1196 " non-negative integer constants or too large (max. %d)", name
,
1197 s390_hotpatch_hw_max
);
1198 *no_add_attrs
= true;
1204 /* Expand the s390_vector_bool type attribute. */
1207 s390_handle_vectorbool_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1208 tree args ATTRIBUTE_UNUSED
,
1209 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1211 tree type
= *node
, result
= NULL_TREE
;
1214 while (POINTER_TYPE_P (type
)
1215 || TREE_CODE (type
) == FUNCTION_TYPE
1216 || TREE_CODE (type
) == METHOD_TYPE
1217 || TREE_CODE (type
) == ARRAY_TYPE
)
1218 type
= TREE_TYPE (type
);
1220 mode
= TYPE_MODE (type
);
1223 case E_DImode
: case E_V2DImode
:
1224 result
= s390_builtin_types
[BT_BV2DI
];
1226 case E_SImode
: case E_V4SImode
:
1227 result
= s390_builtin_types
[BT_BV4SI
];
1229 case E_HImode
: case E_V8HImode
:
1230 result
= s390_builtin_types
[BT_BV8HI
];
1232 case E_QImode
: case E_V16QImode
:
1233 result
= s390_builtin_types
[BT_BV16QI
];
1239 *no_add_attrs
= true; /* No need to hang on to the attribute. */
1242 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
1247 /* Check syntax of function decl attributes having a string type value. */
1250 s390_handle_string_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1251 tree args ATTRIBUTE_UNUSED
,
1252 int flags ATTRIBUTE_UNUSED
,
1257 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1259 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1261 *no_add_attrs
= true;
1264 cst
= TREE_VALUE (args
);
1266 if (TREE_CODE (cst
) != STRING_CST
)
1268 warning (OPT_Wattributes
,
1269 "%qE attribute requires a string constant argument",
1271 *no_add_attrs
= true;
1274 if (is_attribute_p ("indirect_branch", name
)
1275 || is_attribute_p ("indirect_branch_call", name
)
1276 || is_attribute_p ("function_return", name
)
1277 || is_attribute_p ("function_return_reg", name
)
1278 || is_attribute_p ("function_return_mem", name
))
1280 if (strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1281 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1282 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1284 warning (OPT_Wattributes
,
1285 "argument to %qE attribute is not "
1286 "(keep|thunk|thunk-extern)", name
);
1287 *no_add_attrs
= true;
1291 if (is_attribute_p ("indirect_branch_jump", name
)
1292 && strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1293 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1294 && strcmp (TREE_STRING_POINTER (cst
), "thunk-inline") != 0
1295 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1297 warning (OPT_Wattributes
,
1298 "argument to %qE attribute is not "
1299 "(keep|thunk|thunk-inline|thunk-extern)", name
);
1300 *no_add_attrs
= true;
1306 TARGET_GNU_ATTRIBUTES (s390_attribute_table
, {
1307 { "hotpatch", 2, 2, true, false, false, false,
1308 s390_handle_hotpatch_attribute
, NULL
},
1309 { "s390_vector_bool", 0, 0, false, true, false, true,
1310 s390_handle_vectorbool_attribute
, NULL
},
1311 { "indirect_branch", 1, 1, true, false, false, false,
1312 s390_handle_string_attribute
, NULL
},
1313 { "indirect_branch_jump", 1, 1, true, false, false, false,
1314 s390_handle_string_attribute
, NULL
},
1315 { "indirect_branch_call", 1, 1, true, false, false, false,
1316 s390_handle_string_attribute
, NULL
},
1317 { "function_return", 1, 1, true, false, false, false,
1318 s390_handle_string_attribute
, NULL
},
1319 { "function_return_reg", 1, 1, true, false, false, false,
1320 s390_handle_string_attribute
, NULL
},
1321 { "function_return_mem", 1, 1, true, false, false, false,
1322 s390_handle_string_attribute
, NULL
}
1325 /* Return the alignment for LABEL. We default to the -falign-labels
1326 value except for the literal pool base label. */
1328 s390_label_align (rtx_insn
*label
)
1330 rtx_insn
*prev_insn
= prev_active_insn (label
);
1333 if (prev_insn
== NULL_RTX
)
1336 set
= single_set (prev_insn
);
1338 if (set
== NULL_RTX
)
1341 src
= SET_SRC (set
);
1343 /* Don't align literal pool base labels. */
1344 if (GET_CODE (src
) == UNSPEC
1345 && XINT (src
, 1) == UNSPEC_MAIN_BASE
)
1349 return align_labels
.levels
[0].log
;
1352 static GTY(()) rtx got_symbol
;
1354 /* Return the GOT table symbol. The symbol will be created when the
1355 function is invoked for the first time. */
1358 s390_got_symbol (void)
1362 got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1363 SYMBOL_REF_FLAGS (got_symbol
) = SYMBOL_FLAG_LOCAL
;
1369 static scalar_int_mode
1370 s390_libgcc_cmp_return_mode (void)
1372 return TARGET_64BIT
? DImode
: SImode
;
1375 static scalar_int_mode
1376 s390_libgcc_shift_count_mode (void)
1378 return TARGET_64BIT
? DImode
: SImode
;
1381 static scalar_int_mode
1382 s390_unwind_word_mode (void)
1384 return TARGET_64BIT
? DImode
: SImode
;
1387 /* Return true if the back end supports mode MODE. */
1389 s390_scalar_mode_supported_p (scalar_mode mode
)
1391 /* In contrast to the default implementation reject TImode constants on 31bit
1392 TARGET_ZARCH for ABI compliance. */
1393 if (!TARGET_64BIT
&& TARGET_ZARCH
&& mode
== TImode
)
1396 if (DECIMAL_FLOAT_MODE_P (mode
))
1397 return default_decimal_float_supported_p ();
1399 return default_scalar_mode_supported_p (mode
);
1402 /* Return true if the back end supports vector mode MODE. */
1404 s390_vector_mode_supported_p (machine_mode mode
)
1408 if (!VECTOR_MODE_P (mode
)
1410 || GET_MODE_SIZE (mode
) > 16)
1413 inner
= GET_MODE_INNER (mode
);
1431 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1434 s390_set_has_landing_pad_p (bool value
)
1436 cfun
->machine
->has_landing_pad_p
= value
;
1439 /* If two condition code modes are compatible, return a condition code
1440 mode which is compatible with both. Otherwise, return
1444 s390_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
1452 if (m2
== CCUmode
|| m2
== CCTmode
|| m2
== CCZ1mode
1453 || m2
== CCSmode
|| m2
== CCSRmode
|| m2
== CCURmode
)
1474 /* Return true if SET either doesn't set the CC register, or else
1475 the source and destination have matching CC modes and that
1476 CC mode is at least as constrained as REQ_MODE. */
1479 s390_match_ccmode_set (rtx set
, machine_mode req_mode
)
1481 machine_mode set_mode
;
1483 gcc_assert (GET_CODE (set
) == SET
);
1485 /* These modes are supposed to be used only in CC consumer
1487 gcc_assert (req_mode
!= CCVIALLmode
&& req_mode
!= CCVIANYmode
1488 && req_mode
!= CCVFALLmode
&& req_mode
!= CCVFANYmode
);
1490 if (GET_CODE (SET_DEST (set
)) != REG
|| !CC_REGNO_P (REGNO (SET_DEST (set
))))
1493 set_mode
= GET_MODE (SET_DEST (set
));
1515 if (req_mode
!= set_mode
)
1520 if (req_mode
!= CCSmode
&& req_mode
!= CCUmode
&& req_mode
!= CCTmode
1521 && req_mode
!= CCSRmode
&& req_mode
!= CCURmode
1522 && req_mode
!= CCZ1mode
)
1528 if (req_mode
!= CCAmode
)
1536 return (GET_MODE (SET_SRC (set
)) == set_mode
);
1539 /* Return true if every SET in INSN that sets the CC register
1540 has source and destination with matching CC modes and that
1541 CC mode is at least as constrained as REQ_MODE.
1542 If REQ_MODE is VOIDmode, always return false. */
1545 s390_match_ccmode (rtx_insn
*insn
, machine_mode req_mode
)
1549 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1550 if (req_mode
== VOIDmode
)
1553 if (GET_CODE (PATTERN (insn
)) == SET
)
1554 return s390_match_ccmode_set (PATTERN (insn
), req_mode
);
1556 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1557 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
1559 rtx set
= XVECEXP (PATTERN (insn
), 0, i
);
1560 if (GET_CODE (set
) == SET
)
1561 if (!s390_match_ccmode_set (set
, req_mode
))
1568 /* If a test-under-mask instruction can be used to implement
1569 (compare (and ... OP1) OP2), return the CC mode required
1570 to do that. Otherwise, return VOIDmode.
1571 MIXED is true if the instruction can distinguish between
1572 CC1 and CC2 for mixed selected bits (TMxx), it is false
1573 if the instruction cannot (TM). */
1576 s390_tm_ccmode (rtx op1
, rtx op2
, bool mixed
)
1580 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1581 if (GET_CODE (op1
) != CONST_INT
|| GET_CODE (op2
) != CONST_INT
)
1584 /* Selected bits all zero: CC0.
1585 e.g.: int a; if ((a & (16 + 128)) == 0) */
1586 if (INTVAL (op2
) == 0)
1589 /* Selected bits all one: CC3.
1590 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1591 if (INTVAL (op2
) == INTVAL (op1
))
1594 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1596 if ((a & (16 + 128)) == 16) -> CCT1
1597 if ((a & (16 + 128)) == 128) -> CCT2 */
1600 bit1
= exact_log2 (INTVAL (op2
));
1601 bit0
= exact_log2 (INTVAL (op1
) ^ INTVAL (op2
));
1602 if (bit0
!= -1 && bit1
!= -1)
1603 return bit0
> bit1
? CCT1mode
: CCT2mode
;
1609 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1610 OP0 and OP1 of a COMPARE, return the mode to be used for the
1614 s390_select_ccmode (enum rtx_code code
, rtx op0
, rtx op1
)
1620 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1621 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1623 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1624 && CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1))))
1626 if ((GET_CODE (op0
) == PLUS
|| GET_CODE (op0
) == MINUS
1627 || GET_CODE (op1
) == NEG
)
1628 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1631 if (GET_CODE (op0
) == AND
)
1633 /* Check whether we can potentially do it via TM. */
1634 machine_mode ccmode
;
1635 ccmode
= s390_tm_ccmode (XEXP (op0
, 1), op1
, 1);
1636 if (ccmode
!= VOIDmode
)
1638 /* Relax CCTmode to CCZmode to allow fall-back to AND
1639 if that turns out to be beneficial. */
1640 return ccmode
== CCTmode
? CCZmode
: ccmode
;
1644 if (register_operand (op0
, HImode
)
1645 && GET_CODE (op1
) == CONST_INT
1646 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 65535))
1648 if (register_operand (op0
, QImode
)
1649 && GET_CODE (op1
) == CONST_INT
1650 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 255))
1659 /* The only overflow condition of NEG and ABS happens when
1660 -INT_MAX is used as parameter, which stays negative. So
1661 we have an overflow from a positive value to a negative.
1662 Using CCAP mode the resulting cc can be used for comparisons. */
1663 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1664 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1667 /* If constants are involved in an add instruction it is possible to use
1668 the resulting cc for comparisons with zero. Knowing the sign of the
1669 constant the overflow behavior gets predictable. e.g.:
1670 int a, b; if ((b = a + c) > 0)
1671 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1672 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1673 && (CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1)))
1674 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0
, 1)), 'O', "Os")
1675 /* Avoid INT32_MIN on 32 bit. */
1676 && (!TARGET_ZARCH
|| INTVAL (XEXP (op0
, 1)) != -0x7fffffff - 1))))
1678 if (INTVAL (XEXP((op0
), 1)) < 0)
1686 if (HONOR_NANS (op0
) || HONOR_NANS (op1
))
1697 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1698 && GET_CODE (op1
) != CONST_INT
)
1704 if (GET_CODE (op0
) == PLUS
1705 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1708 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1709 && GET_CODE (op1
) != CONST_INT
)
1715 if (GET_CODE (op0
) == MINUS
1716 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1719 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1720 && GET_CODE (op1
) != CONST_INT
)
1729 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1730 that we can implement more efficiently. */
1733 s390_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
1734 bool op0_preserve_value
)
1736 if (op0_preserve_value
)
1739 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1740 if ((*code
== EQ
|| *code
== NE
)
1741 && *op1
== const0_rtx
1742 && GET_CODE (*op0
) == ZERO_EXTRACT
1743 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1744 && GET_CODE (XEXP (*op0
, 2)) == CONST_INT
1745 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1747 rtx inner
= XEXP (*op0
, 0);
1748 HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (GET_MODE (inner
));
1749 HOST_WIDE_INT len
= INTVAL (XEXP (*op0
, 1));
1750 HOST_WIDE_INT pos
= INTVAL (XEXP (*op0
, 2));
1752 if (len
> 0 && len
< modesize
1753 && pos
>= 0 && pos
+ len
<= modesize
1754 && modesize
<= HOST_BITS_PER_WIDE_INT
)
1756 unsigned HOST_WIDE_INT block
;
1757 block
= (HOST_WIDE_INT_1U
<< len
) - 1;
1758 block
<<= modesize
- pos
- len
;
1760 *op0
= gen_rtx_AND (GET_MODE (inner
), inner
,
1761 gen_int_mode (block
, GET_MODE (inner
)));
1765 /* Narrow AND of memory against immediate to enable TM. */
1766 if ((*code
== EQ
|| *code
== NE
)
1767 && *op1
== const0_rtx
1768 && GET_CODE (*op0
) == AND
1769 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1770 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1772 rtx inner
= XEXP (*op0
, 0);
1773 rtx mask
= XEXP (*op0
, 1);
1775 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1776 if (GET_CODE (inner
) == SUBREG
1777 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner
)))
1778 && (GET_MODE_SIZE (GET_MODE (inner
))
1779 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
1781 & GET_MODE_MASK (GET_MODE (inner
))
1782 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner
))))
1784 inner
= SUBREG_REG (inner
);
1786 /* Do not change volatile MEMs. */
1787 if (MEM_P (inner
) && !MEM_VOLATILE_P (inner
))
1789 int part
= s390_single_part (XEXP (*op0
, 1),
1790 GET_MODE (inner
), QImode
, 0);
1793 mask
= gen_int_mode (s390_extract_part (mask
, QImode
, 0), QImode
);
1794 inner
= adjust_address_nv (inner
, QImode
, part
);
1795 *op0
= gen_rtx_AND (QImode
, inner
, mask
);
1800 /* Narrow comparisons against 0xffff to HImode if possible. */
1801 if ((*code
== EQ
|| *code
== NE
)
1802 && GET_CODE (*op1
) == CONST_INT
1803 && INTVAL (*op1
) == 0xffff
1804 && SCALAR_INT_MODE_P (GET_MODE (*op0
))
1805 && (nonzero_bits (*op0
, GET_MODE (*op0
))
1806 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1808 *op0
= gen_lowpart (HImode
, *op0
);
1812 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1813 if (GET_CODE (*op0
) == UNSPEC
1814 && XINT (*op0
, 1) == UNSPEC_STRCMPCC_TO_INT
1815 && XVECLEN (*op0
, 0) == 1
1816 && GET_MODE (XVECEXP (*op0
, 0, 0)) == CCUmode
1817 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1818 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1819 && *op1
== const0_rtx
)
1821 enum rtx_code new_code
= UNKNOWN
;
1824 case EQ
: new_code
= EQ
; break;
1825 case NE
: new_code
= NE
; break;
1826 case LT
: new_code
= GTU
; break;
1827 case GT
: new_code
= LTU
; break;
1828 case LE
: new_code
= GEU
; break;
1829 case GE
: new_code
= LEU
; break;
1833 if (new_code
!= UNKNOWN
)
1835 *op0
= XVECEXP (*op0
, 0, 0);
1840 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1841 if (GET_CODE (*op0
) == UNSPEC
1842 && XINT (*op0
, 1) == UNSPEC_CC_TO_INT
1843 && XVECLEN (*op0
, 0) == 1
1844 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1845 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1846 && CONST_INT_P (*op1
))
1848 enum rtx_code new_code
= UNKNOWN
;
1849 switch (GET_MODE (XVECEXP (*op0
, 0, 0)))
1855 case EQ
: new_code
= EQ
; break;
1856 case NE
: new_code
= NE
; break;
1863 if (new_code
!= UNKNOWN
)
1865 /* For CCRAWmode put the required cc mask into the second
1867 if (GET_MODE (XVECEXP (*op0
, 0, 0)) == CCRAWmode
1868 && INTVAL (*op1
) >= 0 && INTVAL (*op1
) <= 3)
1869 *op1
= gen_rtx_CONST_INT (VOIDmode
, 1 << (3 - INTVAL (*op1
)));
1870 *op0
= XVECEXP (*op0
, 0, 0);
1874 /* Remove UNSPEC_CC_TO_INT from connectives. This happens for
1875 checks against multiple condition codes. */
1876 if (GET_CODE (*op0
) == AND
1877 && GET_CODE (XEXP (*op0
, 0)) == UNSPEC
1878 && XINT (XEXP (*op0
, 0), 1) == UNSPEC_CC_TO_INT
1879 && XVECLEN (XEXP (*op0
, 0), 0) == 1
1880 && REGNO (XVECEXP (XEXP (*op0
, 0), 0, 0)) == CC_REGNUM
1881 && CONST_INT_P (XEXP (*op0
, 1))
1882 && CONST_INT_P (*op1
)
1883 && INTVAL (XEXP (*op0
, 1)) == -3
1886 if (INTVAL (*op1
) == 0)
1888 /* case cc == 0 || cc = 2 => mask = 0xa */
1889 *op0
= XVECEXP (XEXP (*op0
, 0), 0, 0);
1890 *op1
= gen_rtx_CONST_INT (VOIDmode
, 0xa);
1892 else if (INTVAL (*op1
) == 1)
1894 /* case cc == 1 || cc == 3 => mask = 0x5 */
1895 *op0
= XVECEXP (XEXP (*op0
, 0), 0, 0);
1896 *op1
= gen_rtx_CONST_INT (VOIDmode
, 0x5);
1899 if (GET_CODE (*op0
) == PLUS
1900 && GET_CODE (XEXP (*op0
, 0)) == UNSPEC
1901 && XINT (XEXP (*op0
, 0), 1) == UNSPEC_CC_TO_INT
1902 && XVECLEN (XEXP (*op0
, 0), 0) == 1
1903 && REGNO (XVECEXP (XEXP (*op0
, 0), 0, 0)) == CC_REGNUM
1904 && CONST_INT_P (XEXP (*op0
, 1))
1905 && CONST_INT_P (*op1
)
1906 && (*code
== LEU
|| *code
== GTU
))
1908 if (INTVAL (*op1
) == 1)
1910 if (INTVAL (XEXP (*op0
, 1)) == -1)
1912 /* case cc == 1 || cc == 2 => mask = 0x6 */
1913 *op0
= XVECEXP (XEXP (*op0
, 0), 0, 0);
1914 *op1
= gen_rtx_CONST_INT (VOIDmode
, 0x6);
1915 *code
= *code
== GTU
? NE
: EQ
;
1917 else if (INTVAL (XEXP (*op0
, 1)) == -2)
1919 /* case cc == 2 || cc == 3 => mask = 0x3 */
1920 *op0
= XVECEXP (XEXP (*op0
, 0), 0, 0);
1921 *op1
= gen_rtx_CONST_INT (VOIDmode
, 0x3);
1922 *code
= *code
== GTU
? NE
: EQ
;
1925 else if (INTVAL (*op1
) == 2
1926 && INTVAL (XEXP (*op0
, 1)) == -1)
1928 /* case cc == 1 || cc == 2 || cc == 3 => mask = 0x7 */
1929 *op0
= XVECEXP (XEXP (*op0
, 0), 0, 0);
1930 *op1
= gen_rtx_CONST_INT (VOIDmode
, 0x7);
1931 *code
= *code
== GTU
? NE
: EQ
;
1934 else if (*code
== LEU
|| *code
== GTU
)
1936 if (GET_CODE (*op0
) == UNSPEC
1937 && XINT (*op0
, 1) == UNSPEC_CC_TO_INT
1938 && XVECLEN (*op0
, 0) == 1
1939 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1940 && CONST_INT_P (*op1
))
1942 if (INTVAL (*op1
) == 1)
1944 /* case cc == 0 || cc == 1 => mask = 0xc */
1945 *op0
= XVECEXP (*op0
, 0, 0);
1946 *op1
= gen_rtx_CONST_INT (VOIDmode
, 0xc);
1947 *code
= *code
== GTU
? NE
: EQ
;
1949 else if (INTVAL (*op1
) == 2)
1951 /* case cc == 0 || cc == 1 || cc == 2 => mask = 0xd */
1952 *op0
= XVECEXP (*op0
, 0, 0);
1953 *op1
= gen_rtx_CONST_INT (VOIDmode
, 0xd);
1954 *code
= *code
== GTU
? NE
: EQ
;
1956 else if (INTVAL (*op1
) == 3)
1961 *code
= *code
== GTU
? NE
: EQ
;
1966 /* Simplify cascaded EQ, NE with const0_rtx. */
1967 if ((*code
== NE
|| *code
== EQ
)
1968 && (GET_CODE (*op0
) == EQ
|| GET_CODE (*op0
) == NE
)
1969 && GET_MODE (*op0
) == SImode
1970 && GET_MODE (XEXP (*op0
, 0)) == CCZ1mode
1971 && REG_P (XEXP (*op0
, 0))
1972 && XEXP (*op0
, 1) == const0_rtx
1973 && *op1
== const0_rtx
)
1975 if ((*code
== EQ
&& GET_CODE (*op0
) == NE
)
1976 || (*code
== NE
&& GET_CODE (*op0
) == EQ
))
1980 *op0
= XEXP (*op0
, 0);
1983 /* Prefer register over memory as first operand. */
1984 if (MEM_P (*op0
) && REG_P (*op1
))
1986 rtx tem
= *op0
; *op0
= *op1
; *op1
= tem
;
1987 *code
= (int)swap_condition ((enum rtx_code
)*code
);
1990 /* A comparison result is compared against zero. Replace it with
1991 the (perhaps inverted) original comparison.
1992 This probably should be done by simplify_relational_operation. */
1993 if ((*code
== EQ
|| *code
== NE
)
1994 && *op1
== const0_rtx
1995 && COMPARISON_P (*op0
)
1996 && CC_REG_P (XEXP (*op0
, 0)))
1998 enum rtx_code new_code
;
2001 new_code
= reversed_comparison_code_parts (GET_CODE (*op0
),
2003 XEXP (*op0
, 1), NULL
);
2005 new_code
= GET_CODE (*op0
);
2007 if (new_code
!= UNKNOWN
)
2010 *op1
= XEXP (*op0
, 1);
2011 *op0
= XEXP (*op0
, 0);
2015 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
2017 && (*code
== EQ
|| *code
== NE
)
2018 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
2019 && GET_CODE (*op0
) == NOT
)
2021 machine_mode mode
= GET_MODE (*op0
);
2022 *op0
= gen_rtx_XOR (mode
, XEXP (*op0
, 0), *op1
);
2023 *op0
= gen_rtx_NOT (mode
, *op0
);
2027 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
2029 && (*code
== EQ
|| *code
== NE
)
2030 && (GET_CODE (*op0
) == AND
|| GET_CODE (*op0
) == IOR
)
2031 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
2032 && CONST_INT_P (*op1
)
2033 && *op1
== constm1_rtx
)
2035 machine_mode mode
= GET_MODE (*op0
);
2036 rtx op00
= gen_rtx_NOT (mode
, XEXP (*op0
, 0));
2037 rtx op01
= gen_rtx_NOT (mode
, XEXP (*op0
, 1));
2039 if (GET_CODE (*op0
) == AND
)
2040 *op0
= gen_rtx_IOR (mode
, op00
, op01
);
2042 *op0
= gen_rtx_AND (mode
, op00
, op01
);
2049 /* Emit a compare instruction suitable to implement the comparison
2050 OP0 CODE OP1. Return the correct condition RTL to be placed in
2051 the IF_THEN_ELSE of the conditional branch testing the result. */
2054 s390_emit_compare (enum rtx_code code
, rtx op0
, rtx op1
)
2056 machine_mode mode
= s390_select_ccmode (code
, op0
, op1
);
2059 /* Force OP1 into register in order to satisfy VXE TFmode patterns. */
2060 if (TARGET_VXE
&& GET_MODE (op1
) == TFmode
)
2061 op1
= force_reg (TFmode
, op1
);
2063 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
2065 /* Do not output a redundant compare instruction if a
2066 compare_and_swap pattern already computed the result and the
2067 machine modes are compatible. */
2068 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0
), mode
)
2074 cc
= gen_rtx_REG (mode
, CC_REGNUM
);
2075 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (mode
, op0
, op1
)));
2078 return gen_rtx_fmt_ee (code
, VOIDmode
, cc
, const0_rtx
);
2081 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
2082 MEM, whose address is a pseudo containing the original MEM's address. */
2085 s390_legitimize_cs_operand (rtx mem
)
2089 if (!contains_symbol_ref_p (mem
))
2091 tmp
= gen_reg_rtx (Pmode
);
2092 emit_move_insn (tmp
, copy_rtx (XEXP (mem
, 0)));
2093 return change_address (mem
, VOIDmode
, tmp
);
2096 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
2098 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
2099 conditional branch testing the result. */
2102 s390_emit_compare_and_swap (enum rtx_code code
, rtx old
, rtx mem
,
2103 rtx cmp
, rtx new_rtx
, machine_mode ccmode
)
2107 mem
= s390_legitimize_cs_operand (mem
);
2108 cc
= gen_rtx_REG (ccmode
, CC_REGNUM
);
2109 switch (GET_MODE (mem
))
2112 emit_insn (gen_atomic_compare_and_swapsi_internal (old
, mem
, cmp
,
2116 emit_insn (gen_atomic_compare_and_swapdi_internal (old
, mem
, cmp
,
2120 emit_insn (gen_atomic_compare_and_swapti_internal (old
, mem
, cmp
,
2128 return s390_emit_compare (code
, cc
, const0_rtx
);
2131 /* Emit a jump instruction to TARGET and return it. If COND is
2132 NULL_RTX, emit an unconditional jump, else a conditional jump under
2136 s390_emit_jump (rtx target
, rtx cond
)
2140 target
= gen_rtx_LABEL_REF (VOIDmode
, target
);
2142 target
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, target
, pc_rtx
);
2144 insn
= gen_rtx_SET (pc_rtx
, target
);
2145 return emit_jump_insn (insn
);
2148 /* Return branch condition mask to implement a branch
2149 specified by CODE. Return -1 for invalid comparisons. */
2152 s390_branch_condition_mask (rtx code
)
2154 const int CC0
= 1 << 3;
2155 const int CC1
= 1 << 2;
2156 const int CC2
= 1 << 1;
2157 const int CC3
= 1 << 0;
2159 gcc_assert (GET_CODE (XEXP (code
, 0)) == REG
);
2160 gcc_assert (REGNO (XEXP (code
, 0)) == CC_REGNUM
);
2161 gcc_assert (XEXP (code
, 1) == const0_rtx
2162 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2163 && CONST_INT_P (XEXP (code
, 1))));
2166 switch (GET_MODE (XEXP (code
, 0)))
2170 switch (GET_CODE (code
))
2172 case EQ
: return CC0
;
2173 case NE
: return CC1
| CC2
| CC3
;
2179 switch (GET_CODE (code
))
2181 case EQ
: return CC1
;
2182 case NE
: return CC0
| CC2
| CC3
;
2188 switch (GET_CODE (code
))
2190 case EQ
: return CC2
;
2191 case NE
: return CC0
| CC1
| CC3
;
2197 switch (GET_CODE (code
))
2199 case EQ
: return CC3
;
2200 case NE
: return CC0
| CC1
| CC2
;
2206 switch (GET_CODE (code
))
2208 case EQ
: return CC0
| CC2
;
2209 case NE
: return CC1
| CC3
;
2215 switch (GET_CODE (code
))
2217 case LTU
: return CC2
| CC3
; /* carry */
2218 case GEU
: return CC0
| CC1
; /* no carry */
2224 switch (GET_CODE (code
))
2226 case GTU
: return CC0
| CC1
; /* borrow */
2227 case LEU
: return CC2
| CC3
; /* no borrow */
2233 switch (GET_CODE (code
))
2235 case EQ
: return CC0
| CC2
;
2236 case NE
: return CC1
| CC3
;
2237 case LTU
: return CC1
;
2238 case GTU
: return CC3
;
2239 case LEU
: return CC1
| CC2
;
2240 case GEU
: return CC2
| CC3
;
2245 switch (GET_CODE (code
))
2247 case EQ
: return CC0
;
2248 case NE
: return CC1
| CC2
| CC3
;
2249 case LTU
: return CC1
;
2250 case GTU
: return CC2
;
2251 case LEU
: return CC0
| CC1
;
2252 case GEU
: return CC0
| CC2
;
2258 switch (GET_CODE (code
))
2260 case EQ
: return CC0
;
2261 case NE
: return CC2
| CC1
| CC3
;
2262 case LTU
: return CC2
;
2263 case GTU
: return CC1
;
2264 case LEU
: return CC0
| CC2
;
2265 case GEU
: return CC0
| CC1
;
2271 switch (GET_CODE (code
))
2273 case EQ
: return CC0
;
2274 case NE
: return CC1
| CC2
| CC3
;
2275 case LT
: return CC1
| CC3
;
2276 case GT
: return CC2
;
2277 case LE
: return CC0
| CC1
| CC3
;
2278 case GE
: return CC0
| CC2
;
2284 switch (GET_CODE (code
))
2286 case EQ
: return CC0
;
2287 case NE
: return CC1
| CC2
| CC3
;
2288 case LT
: return CC1
;
2289 case GT
: return CC2
| CC3
;
2290 case LE
: return CC0
| CC1
;
2291 case GE
: return CC0
| CC2
| CC3
;
2297 switch (GET_CODE (code
))
2299 case EQ
: return CC0
| CC1
| CC2
;
2300 case NE
: return CC3
;
2307 switch (GET_CODE (code
))
2309 case EQ
: return CC0
;
2310 case NE
: return CC1
| CC2
| CC3
;
2311 case LT
: return CC1
;
2312 case GT
: return CC2
;
2313 case LE
: return CC0
| CC1
;
2314 case GE
: return CC0
| CC2
;
2315 case UNORDERED
: return CC3
;
2316 case ORDERED
: return CC0
| CC1
| CC2
;
2317 case UNEQ
: return CC0
| CC3
;
2318 case UNLT
: return CC1
| CC3
;
2319 case UNGT
: return CC2
| CC3
;
2320 case UNLE
: return CC0
| CC1
| CC3
;
2321 case UNGE
: return CC0
| CC2
| CC3
;
2322 case LTGT
: return CC1
| CC2
;
2328 switch (GET_CODE (code
))
2330 case EQ
: return CC0
;
2331 case NE
: return CC2
| CC1
| CC3
;
2332 case LT
: return CC2
;
2333 case GT
: return CC1
;
2334 case LE
: return CC0
| CC2
;
2335 case GE
: return CC0
| CC1
;
2336 case UNORDERED
: return CC3
;
2337 case ORDERED
: return CC0
| CC2
| CC1
;
2338 case UNEQ
: return CC0
| CC3
;
2339 case UNLT
: return CC2
| CC3
;
2340 case UNGT
: return CC1
| CC3
;
2341 case UNLE
: return CC0
| CC2
| CC3
;
2342 case UNGE
: return CC0
| CC1
| CC3
;
2343 case LTGT
: return CC2
| CC1
;
2348 /* Vector comparison modes. */
2349 /* CC2 will never be set. It however is part of the negated
2352 switch (GET_CODE (code
))
2357 case GE
: return CC0
;
2358 /* The inverted modes are in fact *any* modes. */
2362 case LT
: return CC3
| CC1
| CC2
;
2367 switch (GET_CODE (code
))
2372 case GE
: return CC0
| CC1
;
2373 /* The inverted modes are in fact *all* modes. */
2377 case LT
: return CC3
| CC2
;
2381 switch (GET_CODE (code
))
2385 case GE
: return CC0
;
2386 /* The inverted modes are in fact *any* modes. */
2389 case UNLT
: return CC3
| CC1
| CC2
;
2394 switch (GET_CODE (code
))
2398 case GE
: return CC0
| CC1
;
2399 /* The inverted modes are in fact *all* modes. */
2402 case UNLT
: return CC3
| CC2
;
2407 switch (GET_CODE (code
))
2410 return INTVAL (XEXP (code
, 1));
2412 return (INTVAL (XEXP (code
, 1))) ^ 0xf;
2423 /* Return branch condition mask to implement a compare and branch
2424 specified by CODE. Return -1 for invalid comparisons. */
2427 s390_compare_and_branch_condition_mask (rtx code
)
2429 const int CC0
= 1 << 3;
2430 const int CC1
= 1 << 2;
2431 const int CC2
= 1 << 1;
2433 switch (GET_CODE (code
))
2457 /* If INV is false, return assembler mnemonic string to implement
2458 a branch specified by CODE. If INV is true, return mnemonic
2459 for the corresponding inverted branch. */
2462 s390_branch_condition_mnemonic (rtx code
, int inv
)
2466 static const char *const mnemonic
[16] =
2468 NULL
, "o", "h", "nle",
2469 "l", "nhe", "lh", "ne",
2470 "e", "nlh", "he", "nl",
2471 "le", "nh", "no", NULL
2474 if (GET_CODE (XEXP (code
, 0)) == REG
2475 && REGNO (XEXP (code
, 0)) == CC_REGNUM
2476 && (XEXP (code
, 1) == const0_rtx
2477 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2478 && CONST_INT_P (XEXP (code
, 1)))))
2479 mask
= s390_branch_condition_mask (code
);
2481 mask
= s390_compare_and_branch_condition_mask (code
);
2483 gcc_assert (mask
>= 0);
2488 gcc_assert (mask
>= 1 && mask
<= 14);
2490 return mnemonic
[mask
];
2493 /* Return the part of op which has a value different from def.
2494 The size of the part is determined by mode.
2495 Use this function only if you already know that op really
2496 contains such a part. */
2498 unsigned HOST_WIDE_INT
2499 s390_extract_part (rtx op
, machine_mode mode
, int def
)
2501 unsigned HOST_WIDE_INT value
= 0;
2502 int max_parts
= HOST_BITS_PER_WIDE_INT
/ GET_MODE_BITSIZE (mode
);
2503 int part_bits
= GET_MODE_BITSIZE (mode
);
2504 unsigned HOST_WIDE_INT part_mask
= (HOST_WIDE_INT_1U
<< part_bits
) - 1;
2507 for (i
= 0; i
< max_parts
; i
++)
2510 value
= UINTVAL (op
);
2512 value
>>= part_bits
;
2514 if ((value
& part_mask
) != (def
& part_mask
))
2515 return value
& part_mask
;
2521 /* If OP is an integer constant of mode MODE with exactly one
2522 part of mode PART_MODE unequal to DEF, return the number of that
2523 part. Otherwise, return -1. */
2526 s390_single_part (rtx op
,
2528 machine_mode part_mode
,
2531 unsigned HOST_WIDE_INT value
= 0;
2532 int n_parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (part_mode
);
2533 unsigned HOST_WIDE_INT part_mask
2534 = (HOST_WIDE_INT_1U
<< GET_MODE_BITSIZE (part_mode
)) - 1;
2537 if (GET_CODE (op
) != CONST_INT
)
2540 for (i
= 0; i
< n_parts
; i
++)
2543 value
= UINTVAL (op
);
2545 value
>>= GET_MODE_BITSIZE (part_mode
);
2547 if ((value
& part_mask
) != (def
& part_mask
))
2555 return part
== -1 ? -1 : n_parts
- 1 - part
;
2558 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2559 bits and no other bits are set in (the lower SIZE bits of) IN.
2561 PSTART and PEND can be used to obtain the start and end
2562 position (inclusive) of the bitfield relative to 64
2563 bits. *PSTART / *PEND gives the position of the first/last bit
2564 of the bitfield counting from the highest order bit starting
2568 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in
, int size
,
2569 int *pstart
, int *pend
)
2573 int lowbit
= HOST_BITS_PER_WIDE_INT
- 1;
2574 int highbit
= HOST_BITS_PER_WIDE_INT
- size
;
2575 unsigned HOST_WIDE_INT bitmask
= HOST_WIDE_INT_1U
;
2577 gcc_assert (!!pstart
== !!pend
);
2578 for (start
= lowbit
; start
>= highbit
; bitmask
<<= 1, start
--)
2581 /* Look for the rightmost bit of a contiguous range of ones. */
2588 /* Look for the firt zero bit after the range of ones. */
2589 if (! (bitmask
& in
))
2593 /* We're one past the last one-bit. */
2597 /* No one bits found. */
2600 if (start
> highbit
)
2602 unsigned HOST_WIDE_INT mask
;
2604 /* Calculate a mask for all bits beyond the contiguous bits. */
2605 mask
= ((~HOST_WIDE_INT_0U
>> highbit
)
2606 & (~HOST_WIDE_INT_0U
<< (lowbit
- start
+ 1)));
2608 /* There are more bits set beyond the first range of one bits. */
2621 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2622 if ~IN contains a contiguous bitfield. In that case, *END is <
2625 If WRAP_P is true, a bitmask that wraps around is also tested.
2626 When a wraparoud occurs *START is greater than *END (in
2627 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2628 part of the range. If WRAP_P is false, no wraparound is
2632 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in
, bool wrap_p
,
2633 int size
, int *start
, int *end
)
2635 int bs
= HOST_BITS_PER_WIDE_INT
;
2638 gcc_assert (!!start
== !!end
);
2639 if ((in
& ((~HOST_WIDE_INT_0U
) >> (bs
- size
))) == 0)
2640 /* This cannot be expressed as a contiguous bitmask. Exit early because
2641 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2644 b
= s390_contiguous_bitmask_nowrap_p (in
, size
, start
, end
);
2649 b
= s390_contiguous_bitmask_nowrap_p (~in
, size
, start
, end
);
2655 gcc_assert (s
>= 1);
2656 *start
= ((e
+ 1) & (bs
- 1));
2657 *end
= ((s
- 1 + bs
) & (bs
- 1));
2663 /* Return true if OP contains the same contiguous bitfield in *all*
2664 its elements. START and END can be used to obtain the start and
2665 end position of the bitfield.
2667 START/STOP give the position of the first/last bit of the bitfield
2668 counting from the lowest order bit starting with zero. In order to
2669 use these values for S/390 instructions this has to be converted to
2670 "bits big endian" style. */
2673 s390_contiguous_bitmask_vector_p (rtx op
, int *start
, int *end
)
2675 unsigned HOST_WIDE_INT mask
;
2680 /* Handle floats by bitcasting them to ints. */
2681 op
= gen_lowpart (related_int_vector_mode (GET_MODE (op
)).require (), op
);
2683 gcc_assert (!!start
== !!end
);
2684 if (!const_vec_duplicate_p (op
, &elt
)
2685 || !CONST_INT_P (elt
))
2688 size
= GET_MODE_UNIT_BITSIZE (GET_MODE (op
));
2690 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2694 mask
= UINTVAL (elt
);
2696 b
= s390_contiguous_bitmask_p (mask
, true, size
, start
, end
);
2701 *start
-= (HOST_BITS_PER_WIDE_INT
- size
);
2702 *end
-= (HOST_BITS_PER_WIDE_INT
- size
);
2710 /* Return true if C consists only of byte chunks being either 0 or
2711 0xff. If MASK is !=NULL a byte mask is generated which is
2712 appropriate for the vector generate byte mask instruction. */
2715 s390_bytemask_vector_p (rtx op
, unsigned *mask
)
2718 unsigned tmp_mask
= 0;
2719 int nunit
, unit_size
;
2721 if (!VECTOR_MODE_P (GET_MODE (op
))
2722 || GET_CODE (op
) != CONST_VECTOR
2723 || !CONST_INT_P (XVECEXP (op
, 0, 0)))
2726 nunit
= GET_MODE_NUNITS (GET_MODE (op
));
2727 unit_size
= GET_MODE_UNIT_SIZE (GET_MODE (op
));
2729 for (i
= 0; i
< nunit
; i
++)
2731 unsigned HOST_WIDE_INT c
;
2734 if (!CONST_INT_P (XVECEXP (op
, 0, i
)))
2737 c
= UINTVAL (XVECEXP (op
, 0, i
));
2738 for (j
= 0; j
< unit_size
; j
++)
2740 if ((c
& 0xff) != 0 && (c
& 0xff) != 0xff)
2742 tmp_mask
|= (c
& 1) << ((nunit
- 1 - i
) * unit_size
+ j
);
2743 c
= c
>> BITS_PER_UNIT
;
2753 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2754 equivalent to a shift followed by the AND. In particular, CONTIG
2755 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2756 for ROTL indicate a rotate to the right. */
2759 s390_extzv_shift_ok (int bitsize
, int rotl
, unsigned HOST_WIDE_INT contig
)
2764 ok
= s390_contiguous_bitmask_nowrap_p (contig
, bitsize
, &start
, &end
);
2768 return (64 - end
>= rotl
);
2771 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2773 rotl
= -rotl
+ (64 - bitsize
);
2774 return (start
>= rotl
);
2778 /* Check whether we can (and want to) split a double-word
2779 move in mode MODE from SRC to DST into two single-word
2780 moves, moving the subword FIRST_SUBWORD first. */
2783 s390_split_ok_p (rtx dst
, rtx src
, machine_mode mode
, int first_subword
)
2785 /* Floating point and vector registers cannot be split. */
2786 if (FP_REG_P (src
) || FP_REG_P (dst
) || VECTOR_REG_P (src
) || VECTOR_REG_P (dst
))
2789 /* Non-offsettable memory references cannot be split. */
2790 if ((GET_CODE (src
) == MEM
&& !offsettable_memref_p (src
))
2791 || (GET_CODE (dst
) == MEM
&& !offsettable_memref_p (dst
)))
2794 /* Moving the first subword must not clobber a register
2795 needed to move the second subword. */
2796 if (register_operand (dst
, mode
))
2798 rtx subreg
= operand_subword (dst
, first_subword
, 0, mode
);
2799 if (reg_overlap_mentioned_p (subreg
, src
))
2806 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2807 and [MEM2, MEM2 + SIZE] do overlap and false
2811 s390_overlap_p (rtx mem1
, rtx mem2
, HOST_WIDE_INT size
)
2813 rtx addr1
, addr2
, addr_delta
;
2814 HOST_WIDE_INT delta
;
2816 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2822 addr1
= XEXP (mem1
, 0);
2823 addr2
= XEXP (mem2
, 0);
2825 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2827 /* This overlapping check is used by peepholes merging memory block operations.
2828 Overlapping operations would otherwise be recognized by the S/390 hardware
2829 and would fall back to a slower implementation. Allowing overlapping
2830 operations would lead to slow code but not to wrong code. Therefore we are
2831 somewhat optimistic if we cannot prove that the memory blocks are
2833 That's why we return false here although this may accept operations on
2834 overlapping memory areas. */
2835 if (!addr_delta
|| GET_CODE (addr_delta
) != CONST_INT
)
2838 delta
= INTVAL (addr_delta
);
2841 || (delta
> 0 && delta
< size
)
2842 || (delta
< 0 && -delta
< size
))
2848 /* Check whether the address of memory reference MEM2 equals exactly
2849 the address of memory reference MEM1 plus DELTA. Return true if
2850 we can prove this to be the case, false otherwise. */
2853 s390_offset_p (rtx mem1
, rtx mem2
, rtx delta
)
2855 rtx addr1
, addr2
, addr_delta
;
2857 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2860 addr1
= XEXP (mem1
, 0);
2861 addr2
= XEXP (mem2
, 0);
2863 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2864 if (!addr_delta
|| !rtx_equal_p (addr_delta
, delta
))
2870 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2873 s390_expand_logical_operator (enum rtx_code code
, machine_mode mode
,
2876 machine_mode wmode
= mode
;
2877 rtx dst
= operands
[0];
2878 rtx src1
= operands
[1];
2879 rtx src2
= operands
[2];
2882 /* If we cannot handle the operation directly, use a temp register. */
2883 if (!s390_logical_operator_ok_p (operands
))
2884 dst
= gen_reg_rtx (mode
);
2886 /* QImode and HImode patterns make sense only if we have a destination
2887 in memory. Otherwise perform the operation in SImode. */
2888 if ((mode
== QImode
|| mode
== HImode
) && GET_CODE (dst
) != MEM
)
2891 /* Widen operands if required. */
2894 if (GET_CODE (dst
) == SUBREG
2895 && (tem
= simplify_subreg (wmode
, dst
, mode
, 0)) != 0)
2897 else if (REG_P (dst
))
2898 dst
= gen_rtx_SUBREG (wmode
, dst
, 0);
2900 dst
= gen_reg_rtx (wmode
);
2902 if (GET_CODE (src1
) == SUBREG
2903 && (tem
= simplify_subreg (wmode
, src1
, mode
, 0)) != 0)
2905 else if (GET_MODE (src1
) != VOIDmode
)
2906 src1
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src1
), 0);
2908 if (GET_CODE (src2
) == SUBREG
2909 && (tem
= simplify_subreg (wmode
, src2
, mode
, 0)) != 0)
2911 else if (GET_MODE (src2
) != VOIDmode
)
2912 src2
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src2
), 0);
2915 /* Emit the instruction. */
2916 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, wmode
, src1
, src2
));
2917 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
2918 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
2920 /* Fix up the destination if needed. */
2921 if (dst
!= operands
[0])
2922 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
2925 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2928 s390_logical_operator_ok_p (rtx
*operands
)
2930 /* If the destination operand is in memory, it needs to coincide
2931 with one of the source operands. After reload, it has to be
2932 the first source operand. */
2933 if (GET_CODE (operands
[0]) == MEM
)
2934 return rtx_equal_p (operands
[0], operands
[1])
2935 || (!reload_completed
&& rtx_equal_p (operands
[0], operands
[2]));
2940 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2941 operand IMMOP to switch from SS to SI type instructions. */
2944 s390_narrow_logical_operator (enum rtx_code code
, rtx
*memop
, rtx
*immop
)
2946 int def
= code
== AND
? -1 : 0;
2950 gcc_assert (GET_CODE (*memop
) == MEM
);
2951 gcc_assert (!MEM_VOLATILE_P (*memop
));
2953 mask
= s390_extract_part (*immop
, QImode
, def
);
2954 part
= s390_single_part (*immop
, GET_MODE (*memop
), QImode
, def
);
2955 gcc_assert (part
>= 0);
2957 *memop
= adjust_address (*memop
, QImode
, part
);
2958 *immop
= gen_int_mode (mask
, QImode
);
2962 /* How to allocate a 'struct machine_function'. */
2964 static struct machine_function
*
2965 s390_init_machine_status (void)
2967 return ggc_cleared_alloc
<machine_function
> ();
2970 /* Map for smallest class containing reg regno. */
2972 const enum reg_class regclass_map
[FIRST_PSEUDO_REGISTER
] =
2973 { GENERAL_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 0 */
2974 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 4 */
2975 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 8 */
2976 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 12 */
2977 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 16 */
2978 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 20 */
2979 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 24 */
2980 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 28 */
2981 ADDR_REGS
, CC_REGS
, ADDR_REGS
, ADDR_REGS
, /* 32 */
2982 ACCESS_REGS
, ACCESS_REGS
, VEC_REGS
, VEC_REGS
, /* 36 */
2983 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 40 */
2984 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 44 */
2985 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 48 */
2986 VEC_REGS
, VEC_REGS
/* 52 */
2989 /* Return attribute type of insn. */
2991 static enum attr_type
2992 s390_safe_attr_type (rtx_insn
*insn
)
2994 if (recog_memoized (insn
) >= 0)
2995 return get_attr_type (insn
);
3000 /* Return attribute relative_long of insn. */
3003 s390_safe_relative_long_p (rtx_insn
*insn
)
3005 if (recog_memoized (insn
) >= 0)
3006 return get_attr_relative_long (insn
) == RELATIVE_LONG_YES
;
3011 /* Return true if DISP is a valid short displacement. */
3014 s390_short_displacement (rtx disp
)
3016 /* No displacement is OK. */
3020 /* Without the long displacement facility we don't need to
3021 distingiush between long and short displacement. */
3022 if (!TARGET_LONG_DISPLACEMENT
)
3025 /* Integer displacement in range. */
3026 if (GET_CODE (disp
) == CONST_INT
)
3027 return INTVAL (disp
) >= 0 && INTVAL (disp
) < 4096;
3029 /* GOT offset is not OK, the GOT can be large. */
3030 if (GET_CODE (disp
) == CONST
3031 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
3032 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
3033 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTNTPOFF
))
3036 /* All other symbolic constants are literal pool references,
3037 which are OK as the literal pool must be small. */
3038 if (GET_CODE (disp
) == CONST
)
3044 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
3045 If successful, also determines the
3046 following characteristics of `ref': `is_ptr' - whether it can be an
3047 LA argument, `is_base_ptr' - whether the resulting base is a well-known
3048 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
3049 considered a literal pool pointer for purposes of avoiding two different
3050 literal pool pointers per insn during or after reload (`B' constraint). */
3052 s390_decompose_constant_pool_ref (rtx
*ref
, rtx
*disp
, bool *is_ptr
,
3053 bool *is_base_ptr
, bool *is_pool_ptr
)
3058 if (GET_CODE (*ref
) == UNSPEC
)
3059 switch (XINT (*ref
, 1))
3063 *disp
= gen_rtx_UNSPEC (Pmode
,
3064 gen_rtvec (1, XVECEXP (*ref
, 0, 0)),
3065 UNSPEC_LTREL_OFFSET
);
3069 *ref
= XVECEXP (*ref
, 0, 1);
3076 if (!REG_P (*ref
) || GET_MODE (*ref
) != Pmode
)
3079 if (REGNO (*ref
) == STACK_POINTER_REGNUM
3080 || REGNO (*ref
) == FRAME_POINTER_REGNUM
3081 || ((reload_completed
|| reload_in_progress
)
3082 && frame_pointer_needed
3083 && REGNO (*ref
) == HARD_FRAME_POINTER_REGNUM
)
3084 || REGNO (*ref
) == ARG_POINTER_REGNUM
3086 && REGNO (*ref
) == PIC_OFFSET_TABLE_REGNUM
))
3087 *is_ptr
= *is_base_ptr
= true;
3089 if ((reload_completed
|| reload_in_progress
)
3090 && *ref
== cfun
->machine
->base_reg
)
3091 *is_ptr
= *is_base_ptr
= *is_pool_ptr
= true;
3096 /* Decompose a RTL expression ADDR for a memory address into
3097 its components, returned in OUT.
3099 Returns false if ADDR is not a valid memory address, true
3100 otherwise. If OUT is NULL, don't return the components,
3101 but check for validity only.
3103 Note: Only addresses in canonical form are recognized.
3104 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
3105 canonical form so that they will be recognized. */
3108 s390_decompose_address (rtx addr
, struct s390_address
*out
)
3110 HOST_WIDE_INT offset
= 0;
3111 rtx base
= NULL_RTX
;
3112 rtx indx
= NULL_RTX
;
3113 rtx disp
= NULL_RTX
;
3115 bool pointer
= false;
3116 bool base_ptr
= false;
3117 bool indx_ptr
= false;
3118 bool literal_pool
= false;
3120 /* We may need to substitute the literal pool base register into the address
3121 below. However, at this point we do not know which register is going to
3122 be used as base, so we substitute the arg pointer register. This is going
3123 to be treated as holding a pointer below -- it shouldn't be used for any
3125 rtx fake_pool_base
= gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
);
3127 /* Decompose address into base + index + displacement. */
3129 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == UNSPEC
)
3132 else if (GET_CODE (addr
) == PLUS
)
3134 rtx op0
= XEXP (addr
, 0);
3135 rtx op1
= XEXP (addr
, 1);
3136 enum rtx_code code0
= GET_CODE (op0
);
3137 enum rtx_code code1
= GET_CODE (op1
);
3139 if (code0
== REG
|| code0
== UNSPEC
)
3141 if (code1
== REG
|| code1
== UNSPEC
)
3143 indx
= op0
; /* index + base */
3149 base
= op0
; /* base + displacement */
3154 else if (code0
== PLUS
)
3156 indx
= XEXP (op0
, 0); /* index + base + disp */
3157 base
= XEXP (op0
, 1);
3168 disp
= addr
; /* displacement */
3170 /* Extract integer part of displacement. */
3174 if (GET_CODE (disp
) == CONST_INT
)
3176 offset
= INTVAL (disp
);
3179 else if (GET_CODE (disp
) == CONST
3180 && GET_CODE (XEXP (disp
, 0)) == PLUS
3181 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
3183 offset
= INTVAL (XEXP (XEXP (disp
, 0), 1));
3184 disp
= XEXP (XEXP (disp
, 0), 0);
3188 /* Strip off CONST here to avoid special case tests later. */
3189 if (disp
&& GET_CODE (disp
) == CONST
)
3190 disp
= XEXP (disp
, 0);
3192 /* We can convert literal pool addresses to
3193 displacements by basing them off the base register. */
3194 if (disp
&& GET_CODE (disp
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (disp
))
3199 base
= fake_pool_base
, literal_pool
= true;
3201 /* Mark up the displacement. */
3202 disp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, disp
),
3203 UNSPEC_LTREL_OFFSET
);
3206 /* Validate base register. */
3207 if (!s390_decompose_constant_pool_ref (&base
, &disp
, &pointer
, &base_ptr
,
3211 /* Validate index register. */
3212 if (!s390_decompose_constant_pool_ref (&indx
, &disp
, &pointer
, &indx_ptr
,
3216 /* Prefer to use pointer as base, not index. */
3217 if (base
&& indx
&& !base_ptr
3218 && (indx_ptr
|| (!REG_POINTER (base
) && REG_POINTER (indx
))))
3225 /* Validate displacement. */
3228 /* If virtual registers are involved, the displacement will change later
3229 anyway as the virtual registers get eliminated. This could make a
3230 valid displacement invalid, but it is more likely to make an invalid
3231 displacement valid, because we sometimes access the register save area
3232 via negative offsets to one of those registers.
3233 Thus we don't check the displacement for validity here. If after
3234 elimination the displacement turns out to be invalid after all,
3235 this is fixed up by reload in any case. */
3236 /* LRA maintains always displacements up to date and we need to
3237 know the displacement is right during all LRA not only at the
3238 final elimination. */
3240 || (base
!= arg_pointer_rtx
3241 && indx
!= arg_pointer_rtx
3242 && base
!= return_address_pointer_rtx
3243 && indx
!= return_address_pointer_rtx
3244 && base
!= frame_pointer_rtx
3245 && indx
!= frame_pointer_rtx
3246 && base
!= virtual_stack_vars_rtx
3247 && indx
!= virtual_stack_vars_rtx
))
3248 if (!DISP_IN_RANGE (offset
))
3253 /* All the special cases are pointers. */
3256 /* In the small-PIC case, the linker converts @GOT
3257 and @GOTNTPOFF offsets to possible displacements. */
3258 if (GET_CODE (disp
) == UNSPEC
3259 && (XINT (disp
, 1) == UNSPEC_GOT
3260 || XINT (disp
, 1) == UNSPEC_GOTNTPOFF
)
3266 /* Accept pool label offsets. */
3267 else if (GET_CODE (disp
) == UNSPEC
3268 && XINT (disp
, 1) == UNSPEC_POOL_OFFSET
)
3271 /* Accept literal pool references. */
3272 else if (GET_CODE (disp
) == UNSPEC
3273 && XINT (disp
, 1) == UNSPEC_LTREL_OFFSET
)
3275 /* In case CSE pulled a non literal pool reference out of
3276 the pool we have to reject the address. This is
3277 especially important when loading the GOT pointer on non
3278 zarch CPUs. In this case the literal pool contains an lt
3279 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3280 will most likely exceed the displacement. */
3281 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
3282 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp
, 0, 0)))
3285 orig_disp
= gen_rtx_CONST (Pmode
, disp
);
3288 /* If we have an offset, make sure it does not
3289 exceed the size of the constant pool entry.
3290 Otherwise we might generate an out-of-range
3291 displacement for the base register form. */
3292 rtx sym
= XVECEXP (disp
, 0, 0);
3293 if (offset
>= GET_MODE_SIZE (get_pool_mode (sym
)))
3296 orig_disp
= plus_constant (Pmode
, orig_disp
, offset
);
3311 out
->disp
= orig_disp
;
3312 out
->pointer
= pointer
;
3313 out
->literal_pool
= literal_pool
;
3319 /* Decompose a RTL expression OP for an address style operand into its
3320 components, and return the base register in BASE and the offset in
3321 OFFSET. While OP looks like an address it is never supposed to be
3324 Return true if OP is a valid address operand, false if not. */
3327 s390_decompose_addrstyle_without_index (rtx op
, rtx
*base
,
3328 HOST_WIDE_INT
*offset
)
3332 /* We can have an integer constant, an address register,
3333 or a sum of the two. */
3334 if (CONST_SCALAR_INT_P (op
))
3339 if (op
&& GET_CODE (op
) == PLUS
&& CONST_SCALAR_INT_P (XEXP (op
, 1)))
3344 while (op
&& GET_CODE (op
) == SUBREG
)
3345 op
= SUBREG_REG (op
);
3347 if (op
&& GET_CODE (op
) != REG
)
3352 if (off
== NULL_RTX
)
3354 else if (CONST_INT_P (off
))
3355 *offset
= INTVAL (off
);
3356 else if (CONST_WIDE_INT_P (off
))
3357 /* The offset will anyway be cut down to 12 bits so take just
3358 the lowest order chunk of the wide int. */
3359 *offset
= CONST_WIDE_INT_ELT (off
, 0);
3369 /* Check that OP is a valid shift count operand.
3370 It should be of the following structure:
3371 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3372 where subreg, and and plus are optional.
3374 If IMPLICIT_MASK is > 0 and OP contains and
3376 it is checked whether IMPLICIT_MASK and the immediate match.
3377 Otherwise, no checking is performed.
3380 s390_valid_shift_count (rtx op
, HOST_WIDE_INT implicit_mask
)
3383 while (GET_CODE (op
) == SUBREG
&& subreg_lowpart_p (op
))
3386 /* Check for an and with proper constant. */
3387 if (GET_CODE (op
) == AND
)
3389 rtx op1
= XEXP (op
, 0);
3390 rtx imm
= XEXP (op
, 1);
3392 if (GET_CODE (op1
) == SUBREG
&& subreg_lowpart_p (op1
))
3393 op1
= XEXP (op1
, 0);
3395 if (!(register_operand (op1
, GET_MODE (op1
)) || GET_CODE (op1
) == PLUS
))
3398 if (!immediate_operand (imm
, GET_MODE (imm
)))
3401 HOST_WIDE_INT val
= INTVAL (imm
);
3402 if (implicit_mask
> 0
3403 && (val
& implicit_mask
) != implicit_mask
)
3409 /* Check the rest. */
3410 return s390_decompose_addrstyle_without_index (op
, NULL
, NULL
);
3413 /* Return true if CODE is a valid address without index. */
3416 s390_legitimate_address_without_index_p (rtx op
)
3418 struct s390_address addr
;
3420 if (!s390_decompose_address (XEXP (op
, 0), &addr
))
3429 /* Return TRUE if ADDR is an operand valid for a load/store relative
3430 instruction. Be aware that the alignment of the operand needs to
3431 be checked separately.
3432 Valid addresses are single references or a sum of a reference and a
3433 constant integer. Return these parts in SYMREF and ADDEND. You can
3434 pass NULL in REF and/or ADDEND if you are not interested in these
3438 s390_loadrelative_operand_p (rtx addr
, rtx
*symref
, HOST_WIDE_INT
*addend
)
3440 HOST_WIDE_INT tmpaddend
= 0;
3442 if (GET_CODE (addr
) == CONST
)
3443 addr
= XEXP (addr
, 0);
3445 if (GET_CODE (addr
) == PLUS
)
3447 if (!CONST_INT_P (XEXP (addr
, 1)))
3450 tmpaddend
= INTVAL (XEXP (addr
, 1));
3451 addr
= XEXP (addr
, 0);
3454 if (GET_CODE (addr
) == SYMBOL_REF
3455 || (GET_CODE (addr
) == UNSPEC
3456 && (XINT (addr
, 1) == UNSPEC_GOTENT
3457 || XINT (addr
, 1) == UNSPEC_PLT31
)))
3462 *addend
= tmpaddend
;
3469 /* Return true if the address in OP is valid for constraint letter C
3470 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3471 pool MEMs should be accepted. Only the Q, R, S, T constraint
3472 letters are allowed for C. */
3475 s390_check_qrst_address (char c
, rtx op
, bool lit_pool_ok
)
3478 struct s390_address addr
;
3479 bool decomposed
= false;
3481 if (!address_operand (op
, GET_MODE (op
)))
3484 /* This check makes sure that no symbolic address (except literal
3485 pool references) are accepted by the R or T constraints. */
3486 if (s390_loadrelative_operand_p (op
, &symref
, NULL
)
3488 || !SYMBOL_REF_P (symref
)
3489 || !CONSTANT_POOL_ADDRESS_P (symref
)))
3492 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3495 if (!s390_decompose_address (op
, &addr
))
3497 if (addr
.literal_pool
)
3502 /* With reload, we sometimes get intermediate address forms that are
3503 actually invalid as-is, but we need to accept them in the most
3504 generic cases below ('R' or 'T'), since reload will in fact fix
3505 them up. LRA behaves differently here; we never see such forms,
3506 but on the other hand, we need to strictly reject every invalid
3507 address form. After both reload and LRA invalid address forms
3508 must be rejected, because nothing will fix them up later. Perform
3509 this check right up front. */
3510 if (lra_in_progress
|| reload_completed
)
3512 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3519 case 'Q': /* no index short displacement */
3520 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3524 if (!s390_short_displacement (addr
.disp
))
3528 case 'R': /* with index short displacement */
3529 if (TARGET_LONG_DISPLACEMENT
)
3531 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3533 if (!s390_short_displacement (addr
.disp
))
3536 /* Any invalid address here will be fixed up by reload,
3537 so accept it for the most generic constraint. */
3540 case 'S': /* no index long displacement */
3541 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3547 case 'T': /* with index long displacement */
3548 /* Any invalid address here will be fixed up by reload,
3549 so accept it for the most generic constraint. */
3559 /* Evaluates constraint strings described by the regular expression
3560 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3561 the constraint given in STR, or 0 else. */
3564 s390_mem_constraint (const char *str
, rtx op
)
3571 /* Check for offsettable variants of memory constraints. */
3572 if (!MEM_P (op
) || MEM_VOLATILE_P (op
))
3574 if ((reload_completed
|| reload_in_progress
)
3575 ? !offsettable_memref_p (op
) : !offsettable_nonstrict_memref_p (op
))
3577 return s390_check_qrst_address (str
[1], XEXP (op
, 0), true);
3579 /* Check for non-literal-pool variants of memory constraints. */
3582 return s390_check_qrst_address (str
[1], XEXP (op
, 0), false);
3587 if (GET_CODE (op
) != MEM
)
3589 return s390_check_qrst_address (c
, XEXP (op
, 0), true);
3591 /* Simply check for the basic form of a shift count. Reload will
3592 take care of making sure we have a proper base register. */
3593 if (!s390_decompose_addrstyle_without_index (op
, NULL
, NULL
))
3597 return s390_check_qrst_address (str
[1], op
, true);
3605 /* Evaluates constraint strings starting with letter O. Input
3606 parameter C is the second letter following the "O" in the constraint
3607 string. Returns 1 if VALUE meets the respective constraint and 0
3611 s390_O_constraint_str (const char c
, HOST_WIDE_INT value
)
3619 return trunc_int_for_mode (value
, SImode
) == value
;
3623 || s390_single_part (GEN_INT (value
), DImode
, SImode
, 0) == 1;
3626 return s390_single_part (GEN_INT (value
- 1), DImode
, SImode
, -1) == 1;
3634 /* Evaluates constraint strings starting with letter N. Parameter STR
3635 contains the letters following letter "N" in the constraint string.
3636 Returns true if VALUE matches the constraint. */
3639 s390_N_constraint_str (const char *str
, HOST_WIDE_INT value
)
3641 machine_mode mode
, part_mode
;
3643 int part
, part_goal
;
3649 part_goal
= str
[0] - '0';
3693 if (GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (part_mode
))
3696 part
= s390_single_part (GEN_INT (value
), mode
, part_mode
, def
);
3699 if (part_goal
!= -1 && part_goal
!= part
)
3706 /* Returns true if the input parameter VALUE is a float zero. */
3709 s390_float_const_zero_p (rtx value
)
3711 return (GET_MODE_CLASS (GET_MODE (value
)) == MODE_FLOAT
3712 && value
== CONST0_RTX (GET_MODE (value
)));
3715 /* Implement TARGET_REGISTER_MOVE_COST. */
3718 s390_register_move_cost (machine_mode mode
,
3719 reg_class_t from
, reg_class_t to
)
3721 /* On s390, copy between fprs and gprs is expensive. */
3723 /* It becomes somewhat faster having ldgr/lgdr. */
3724 if (TARGET_Z10
&& GET_MODE_SIZE (mode
) == 8)
3726 /* ldgr is single cycle. */
3727 if (reg_classes_intersect_p (from
, GENERAL_REGS
)
3728 && reg_classes_intersect_p (to
, FP_REGS
))
3730 /* lgdr needs 3 cycles. */
3731 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
3732 && reg_classes_intersect_p (from
, FP_REGS
))
3736 /* Otherwise copying is done via memory. */
3737 if ((reg_classes_intersect_p (from
, GENERAL_REGS
)
3738 && reg_classes_intersect_p (to
, FP_REGS
))
3739 || (reg_classes_intersect_p (from
, FP_REGS
)
3740 && reg_classes_intersect_p (to
, GENERAL_REGS
)))
3743 /* We usually do not want to copy via CC. */
3744 if (reg_classes_intersect_p (from
, CC_REGS
)
3745 || reg_classes_intersect_p (to
, CC_REGS
))
3751 /* Implement TARGET_MEMORY_MOVE_COST. */
3754 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
3755 reg_class_t rclass ATTRIBUTE_UNUSED
,
3756 bool in ATTRIBUTE_UNUSED
)
3761 /* Compute a (partial) cost for rtx X. Return true if the complete
3762 cost has been computed, and false if subexpressions should be
3763 scanned. In either case, *TOTAL contains the cost result. The
3764 initial value of *TOTAL is the default value computed by
3765 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3766 code of the superexpression of x. */
3769 s390_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
3770 int opno ATTRIBUTE_UNUSED
,
3771 int *total
, bool speed ATTRIBUTE_UNUSED
)
3773 int code
= GET_CODE (x
);
3781 case CONST_WIDE_INT
:
3786 rtx dst
= SET_DEST (x
);
3787 rtx src
= SET_SRC (x
);
3789 switch (GET_CODE (src
))
3791 case IF_THEN_ELSE
: {
3792 /* Without this a conditional move instruction would be
3793 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3794 comparison operator). That's a bit pessimistic. */
3799 rtx cond
= XEXP (src
, 0);
3800 if (!CC_REG_P (XEXP (cond
, 0)) || !CONST_INT_P (XEXP (cond
, 1)))
3803 /* It is going to be a load/store on condition. Make it
3804 slightly more expensive than a normal load. */
3805 *total
= COSTS_N_INSNS (1) + 2;
3807 rtx then
= XEXP (src
, 1);
3808 rtx els
= XEXP (src
, 2);
3810 /* It is a real IF-THEN-ELSE. An additional move will be
3811 needed to implement that. */
3812 if (!TARGET_Z15
&& reload_completed
&& !rtx_equal_p (dst
, then
)
3813 && !rtx_equal_p (dst
, els
))
3814 *total
+= COSTS_N_INSNS (1) / 2;
3816 /* A minor penalty for constants we cannot directly handle. */
3817 if ((CONST_INT_P (then
) || CONST_INT_P (els
))
3818 && (!TARGET_Z13
|| MEM_P (dst
)
3819 || (CONST_INT_P (then
) && !satisfies_constraint_K (then
))
3820 || (CONST_INT_P (els
) && !satisfies_constraint_K (els
))))
3821 *total
+= COSTS_N_INSNS (1) / 2;
3823 /* A store on condition can only handle register src operands. */
3824 if (MEM_P (dst
) && (!REG_P (then
) || !REG_P (els
)))
3825 *total
+= COSTS_N_INSNS (1) / 2;
3833 switch (GET_CODE (dst
))
3836 if (!REG_P (SUBREG_REG (dst
)))
3837 *total
+= rtx_cost (SUBREG_REG (src
), VOIDmode
, SET
, 0, speed
);
3840 /* If this is a VR -> VR copy, count the number of
3842 if (VECTOR_MODE_P (GET_MODE (dst
)) && REG_P (src
))
3844 int nregs
= s390_hard_regno_nregs (VR0_REGNUM
, GET_MODE (dst
));
3845 *total
= COSTS_N_INSNS (nregs
);
3847 /* Same for GPRs. */
3848 else if (REG_P (src
))
3851 = s390_hard_regno_nregs (GPR0_REGNUM
, GET_MODE (dst
));
3852 *total
= COSTS_N_INSNS (nregs
);
3855 /* Otherwise just cost the src. */
3856 *total
+= rtx_cost (src
, mode
, SET
, 1, speed
);
3859 rtx address
= XEXP (dst
, 0);
3862 if (s390_loadrelative_operand_p (address
, &tmp
, &tmp2
))
3863 *total
= COSTS_N_INSNS (1);
3865 *total
= s390_address_cost (address
, mode
, 0, speed
);
3869 /* Not handled for now, assume default costs. */
3870 *total
= COSTS_N_INSNS (1);
3880 && (mode
== SImode
|| mode
== DImode
)
3881 && GET_CODE (XEXP (x
, 0)) == NOT
3882 && GET_CODE (XEXP (x
, 1)) == NOT
)
3884 *total
= COSTS_N_INSNS (1);
3885 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3887 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3893 if (GET_CODE (XEXP (x
, 0)) == AND
3894 && GET_CODE (XEXP (x
, 1)) == ASHIFT
3895 && REG_P (XEXP (XEXP (x
, 0), 0))
3896 && REG_P (XEXP (XEXP (x
, 1), 0))
3897 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3898 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3899 && (UINTVAL (XEXP (XEXP (x
, 0), 1)) ==
3900 (HOST_WIDE_INT_1U
<< UINTVAL (XEXP (XEXP (x
, 1), 1))) - 1))
3902 *total
= COSTS_N_INSNS (2);
3906 /* ~AND on a 128 bit mode. This can be done using a vector
3909 && GET_CODE (XEXP (x
, 0)) == NOT
3910 && GET_CODE (XEXP (x
, 1)) == NOT
3911 && REG_P (XEXP (XEXP (x
, 0), 0))
3912 && REG_P (XEXP (XEXP (x
, 1), 0))
3913 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x
, 0), 0))) == 16
3914 && s390_hard_regno_mode_ok (VR0_REGNUM
,
3915 GET_MODE (XEXP (XEXP (x
, 0), 0))))
3917 *total
= COSTS_N_INSNS (1);
3921 *total
= COSTS_N_INSNS (1);
3927 && (mode
== SImode
|| mode
== DImode
)
3928 && GET_CODE (XEXP (x
, 0)) == NOT
3929 && GET_CODE (XEXP (x
, 1)) == NOT
)
3931 *total
= COSTS_N_INSNS (1);
3932 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3934 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3949 *total
= COSTS_N_INSNS (1);
3957 rtx left
= XEXP (x
, 0);
3958 rtx right
= XEXP (x
, 1);
3959 if (GET_CODE (right
) == CONST_INT
3960 && CONST_OK_FOR_K (INTVAL (right
)))
3961 *total
= s390_cost
->mhi
;
3962 else if (GET_CODE (left
) == SIGN_EXTEND
)
3963 *total
= s390_cost
->mh
;
3965 *total
= s390_cost
->ms
; /* msr, ms, msy */
3970 rtx left
= XEXP (x
, 0);
3971 rtx right
= XEXP (x
, 1);
3974 if (GET_CODE (right
) == CONST_INT
3975 && CONST_OK_FOR_K (INTVAL (right
)))
3976 *total
= s390_cost
->mghi
;
3977 else if (GET_CODE (left
) == SIGN_EXTEND
)
3978 *total
= s390_cost
->msgf
;
3980 *total
= s390_cost
->msg
; /* msgr, msg */
3982 else /* TARGET_31BIT */
3984 if (GET_CODE (left
) == SIGN_EXTEND
3985 && GET_CODE (right
) == SIGN_EXTEND
)
3986 /* mulsidi case: mr, m */
3987 *total
= s390_cost
->m
;
3988 else if (GET_CODE (left
) == ZERO_EXTEND
3989 && GET_CODE (right
) == ZERO_EXTEND
)
3990 /* umulsidi case: ml, mlr */
3991 *total
= s390_cost
->ml
;
3993 /* Complex calculation is required. */
3994 *total
= COSTS_N_INSNS (40);
4000 *total
= s390_cost
->mult_df
;
4003 *total
= s390_cost
->mxbr
;
4014 *total
= s390_cost
->madbr
;
4017 *total
= s390_cost
->maebr
;
4022 /* Negate in the third argument is free: FMSUB. */
4023 if (GET_CODE (XEXP (x
, 2)) == NEG
)
4025 *total
+= (rtx_cost (XEXP (x
, 0), mode
, FMA
, 0, speed
)
4026 + rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
)
4027 + rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, FMA
, 2, speed
));
4034 if (mode
== TImode
) /* 128 bit division */
4035 *total
= s390_cost
->dlgr
;
4036 else if (mode
== DImode
)
4038 rtx right
= XEXP (x
, 1);
4039 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
4040 *total
= s390_cost
->dlr
;
4041 else /* 64 by 64 bit division */
4042 *total
= s390_cost
->dlgr
;
4044 else if (mode
== SImode
) /* 32 bit division */
4045 *total
= s390_cost
->dlr
;
4052 rtx right
= XEXP (x
, 1);
4053 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
4055 *total
= s390_cost
->dsgfr
;
4057 *total
= s390_cost
->dr
;
4058 else /* 64 by 64 bit division */
4059 *total
= s390_cost
->dsgr
;
4061 else if (mode
== SImode
) /* 32 bit division */
4062 *total
= s390_cost
->dlr
;
4063 else if (mode
== SFmode
)
4065 *total
= s390_cost
->debr
;
4067 else if (mode
== DFmode
)
4069 *total
= s390_cost
->ddbr
;
4071 else if (mode
== TFmode
)
4073 *total
= s390_cost
->dxbr
;
4079 *total
= s390_cost
->sqebr
;
4080 else if (mode
== DFmode
)
4081 *total
= s390_cost
->sqdbr
;
4083 *total
= s390_cost
->sqxbr
;
4088 if (outer_code
== MULT
|| outer_code
== DIV
|| outer_code
== MOD
4089 || outer_code
== PLUS
|| outer_code
== MINUS
4090 || outer_code
== COMPARE
)
4095 *total
= COSTS_N_INSNS (1);
4097 /* nxrk, nxgrk ~(a^b)==0 */
4099 && GET_CODE (XEXP (x
, 0)) == NOT
4100 && XEXP (x
, 1) == const0_rtx
4101 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == XOR
4102 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
4105 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
4107 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
4112 /* nnrk, nngrk, nork, nogrk */
4114 && (GET_CODE (XEXP (x
, 0)) == AND
|| GET_CODE (XEXP (x
, 0)) == IOR
)
4115 && XEXP (x
, 1) == const0_rtx
4116 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
4117 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == NOT
4118 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == NOT
4121 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
4123 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 1), 0)))
4128 if (GET_CODE (XEXP (x
, 0)) == AND
4129 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4130 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
4132 rtx op0
= XEXP (XEXP (x
, 0), 0);
4133 rtx op1
= XEXP (XEXP (x
, 0), 1);
4134 rtx op2
= XEXP (x
, 1);
4136 if (memory_operand (op0
, GET_MODE (op0
))
4137 && s390_tm_ccmode (op1
, op2
, 0) != VOIDmode
)
4139 if (register_operand (op0
, GET_MODE (op0
))
4140 && s390_tm_ccmode (op1
, op2
, 1) != VOIDmode
)
4150 /* Return the cost of an address rtx ADDR. */
4153 s390_address_cost (rtx addr
, machine_mode mode ATTRIBUTE_UNUSED
,
4154 addr_space_t as ATTRIBUTE_UNUSED
,
4155 bool speed ATTRIBUTE_UNUSED
)
4157 struct s390_address ad
;
4158 if (!s390_decompose_address (addr
, &ad
))
4161 return ad
.indx
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
4164 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4166 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4168 int misalign ATTRIBUTE_UNUSED
)
4170 switch (type_of_cost
)
4178 case vector_gather_load
:
4179 case vector_scatter_store
:
4182 case cond_branch_not_taken
:
4184 case vec_promote_demote
:
4185 case unaligned_load
:
4186 case unaligned_store
:
4189 case cond_branch_taken
:
4193 return TYPE_VECTOR_SUBPARTS (vectype
) - 1;
4200 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
4201 otherwise return 0. */
4204 tls_symbolic_operand (rtx op
)
4206 if (GET_CODE (op
) != SYMBOL_REF
)
4208 return SYMBOL_REF_TLS_MODEL (op
);
4211 /* Split DImode access register reference REG (on 64-bit) into its constituent
4212 low and high parts, and store them into LO and HI. Note that gen_lowpart/
4213 gen_highpart cannot be used as they assume all registers are word-sized,
4214 while our access registers have only half that size. */
4217 s390_split_access_reg (rtx reg
, rtx
*lo
, rtx
*hi
)
4219 gcc_assert (TARGET_64BIT
);
4220 gcc_assert (ACCESS_REG_P (reg
));
4221 gcc_assert (GET_MODE (reg
) == DImode
);
4222 gcc_assert (!(REGNO (reg
) & 1));
4224 *lo
= gen_rtx_REG (SImode
, REGNO (reg
) + 1);
4225 *hi
= gen_rtx_REG (SImode
, REGNO (reg
));
4228 /* Return true if OP contains a symbol reference */
4231 symbolic_reference_mentioned_p (rtx op
)
4236 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4239 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4240 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4246 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4247 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4251 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4258 /* Return true if OP contains a reference to a thread-local symbol. */
4261 tls_symbolic_reference_mentioned_p (rtx op
)
4266 if (GET_CODE (op
) == SYMBOL_REF
)
4267 return tls_symbolic_operand (op
);
4269 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4270 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4276 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4277 if (tls_symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4281 else if (fmt
[i
] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op
, i
)))
4289 /* Return true if OP is a legitimate general operand when
4290 generating PIC code. It is given that flag_pic is on
4291 and that OP satisfies CONSTANT_P. */
4294 legitimate_pic_operand_p (rtx op
)
4296 /* Accept all non-symbolic constants. */
4297 if (!SYMBOLIC_CONST (op
))
4300 /* Accept addresses that can be expressed relative to (pc). */
4301 if (larl_operand (op
, VOIDmode
))
4304 /* Reject everything else; must be handled
4305 via emit_symbolic_move. */
4309 /* Returns true if the constant value OP is a legitimate general operand.
4310 It is given that OP satisfies CONSTANT_P. */
4313 s390_legitimate_constant_p (machine_mode mode
, rtx op
)
4315 if (TARGET_VX
&& VECTOR_MODE_P (mode
) && GET_CODE (op
) == CONST_VECTOR
)
4317 if (GET_MODE_SIZE (mode
) != 16)
4320 if (!satisfies_constraint_j00 (op
)
4321 && !satisfies_constraint_jm1 (op
)
4322 && !satisfies_constraint_jKK (op
)
4323 && !satisfies_constraint_jxx (op
)
4324 && !satisfies_constraint_jyy (op
))
4328 /* Accept all non-symbolic constants. */
4329 if (!SYMBOLIC_CONST (op
))
4332 /* Accept immediate LARL operands. */
4333 if (larl_operand (op
, mode
))
4336 /* Thread-local symbols are never legal constants. This is
4337 so that emit_call knows that computing such addresses
4338 might require a function call. */
4339 if (TLS_SYMBOLIC_CONST (op
))
4342 /* In the PIC case, symbolic constants must *not* be
4343 forced into the literal pool. We accept them here,
4344 so that they will be handled by emit_symbolic_move. */
4348 /* All remaining non-PIC symbolic constants are
4349 forced into the literal pool. */
4353 /* Determine if it's legal to put X into the constant pool. This
4354 is not possible if X contains the address of a symbol that is
4355 not constant (TLS) or not known at final link time (PIC). */
4358 s390_cannot_force_const_mem (machine_mode mode
, rtx x
)
4360 switch (GET_CODE (x
))
4364 case CONST_WIDE_INT
:
4366 /* Accept all non-symbolic constants. */
4370 /* Accept an unary '-' only on scalar numeric constants. */
4371 switch (GET_CODE (XEXP (x
, 0)))
4375 case CONST_WIDE_INT
:
4382 /* Labels are OK iff we are non-PIC. */
4383 return flag_pic
!= 0;
4386 /* 'Naked' TLS symbol references are never OK,
4387 non-TLS symbols are OK iff we are non-PIC. */
4388 if (tls_symbolic_operand (x
))
4391 return flag_pic
!= 0;
4394 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0));
4397 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0))
4398 || s390_cannot_force_const_mem (mode
, XEXP (x
, 1));
4401 switch (XINT (x
, 1))
4403 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4404 case UNSPEC_LTREL_OFFSET
:
4412 case UNSPEC_GOTNTPOFF
:
4413 case UNSPEC_INDNTPOFF
:
4416 /* If the literal pool shares the code section, be put
4417 execute template placeholders into the pool as well. */
4429 /* Returns true if the constant value OP is a legitimate general
4430 operand during and after reload. The difference to
4431 legitimate_constant_p is that this function will not accept
4432 a constant that would need to be forced to the literal pool
4433 before it can be used as operand.
4434 This function accepts all constants which can be loaded directly
4438 legitimate_reload_constant_p (rtx op
)
4440 /* Accept la(y) operands. */
4441 if (GET_CODE (op
) == CONST_INT
4442 && DISP_IN_RANGE (INTVAL (op
)))
4445 /* Accept l(g)hi/l(g)fi operands. */
4446 if (GET_CODE (op
) == CONST_INT
4447 && (CONST_OK_FOR_K (INTVAL (op
)) || CONST_OK_FOR_Os (INTVAL (op
))))
4450 /* Accept lliXX operands. */
4452 && GET_CODE (op
) == CONST_INT
4453 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4454 && s390_single_part (op
, word_mode
, HImode
, 0) >= 0)
4458 && GET_CODE (op
) == CONST_INT
4459 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4460 && s390_single_part (op
, word_mode
, SImode
, 0) >= 0)
4463 /* Accept larl operands. */
4464 if (larl_operand (op
, VOIDmode
))
4467 /* Accept floating-point zero operands that fit into a single GPR. */
4468 if (GET_CODE (op
) == CONST_DOUBLE
4469 && s390_float_const_zero_p (op
)
4470 && GET_MODE_SIZE (GET_MODE (op
)) <= UNITS_PER_WORD
)
4473 /* Accept double-word operands that can be split. */
4474 if (GET_CODE (op
) == CONST_WIDE_INT
4475 || (GET_CODE (op
) == CONST_INT
4476 && trunc_int_for_mode (INTVAL (op
), word_mode
) != INTVAL (op
)))
4478 machine_mode dword_mode
= word_mode
== SImode
? DImode
: TImode
;
4479 rtx hi
= operand_subword (op
, 0, 0, dword_mode
);
4480 rtx lo
= operand_subword (op
, 1, 0, dword_mode
);
4481 return legitimate_reload_constant_p (hi
)
4482 && legitimate_reload_constant_p (lo
);
4485 /* Everything else cannot be handled without reload. */
4489 /* Returns true if the constant value OP is a legitimate fp operand
4490 during and after reload.
4491 This function accepts all constants which can be loaded directly
4495 legitimate_reload_fp_constant_p (rtx op
)
4497 /* Accept floating-point zero operands if the load zero instruction
4498 can be used. Prior to z196 the load fp zero instruction caused a
4499 performance penalty if the result is used as BFP number. */
4501 && GET_CODE (op
) == CONST_DOUBLE
4502 && s390_float_const_zero_p (op
))
4508 /* Returns true if the constant value OP is a legitimate vector operand
4509 during and after reload.
4510 This function accepts all constants which can be loaded directly
4514 legitimate_reload_vector_constant_p (rtx op
)
4516 if (TARGET_VX
&& GET_MODE_SIZE (GET_MODE (op
)) == 16
4517 && (satisfies_constraint_j00 (op
)
4518 || satisfies_constraint_jm1 (op
)
4519 || satisfies_constraint_jKK (op
)
4520 || satisfies_constraint_jxx (op
)
4521 || satisfies_constraint_jyy (op
)))
4527 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4528 return the class of reg to actually use. */
4531 s390_preferred_reload_class (rtx op
, reg_class_t rclass
)
4533 switch (GET_CODE (op
))
4535 /* Constants we cannot reload into general registers
4536 must be forced into the literal pool. */
4540 case CONST_WIDE_INT
:
4541 if (reg_class_subset_p (GENERAL_REGS
, rclass
)
4542 && legitimate_reload_constant_p (op
))
4543 return GENERAL_REGS
;
4544 else if (reg_class_subset_p (ADDR_REGS
, rclass
)
4545 && legitimate_reload_constant_p (op
))
4547 else if (reg_class_subset_p (FP_REGS
, rclass
)
4548 && legitimate_reload_fp_constant_p (op
))
4550 else if (reg_class_subset_p (VEC_REGS
, rclass
)
4551 && legitimate_reload_vector_constant_p (op
))
4556 /* If a symbolic constant or a PLUS is reloaded,
4557 it is most likely being used as an address, so
4558 prefer ADDR_REGS. If 'class' is not a superset
4559 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4561 /* Symrefs cannot be pushed into the literal pool with -fPIC
4562 so we *MUST NOT* return NO_REGS for these cases
4563 (s390_cannot_force_const_mem will return true).
4565 On the other hand we MUST return NO_REGS for symrefs with
4566 invalid addend which might have been pushed to the literal
4567 pool (no -fPIC). Usually we would expect them to be
4568 handled via secondary reload but this does not happen if
4569 they are used as literal pool slot replacement in reload
4570 inheritance (see emit_input_reload_insns). */
4571 if (GET_CODE (XEXP (op
, 0)) == PLUS
4572 && GET_CODE (XEXP (XEXP(op
, 0), 0)) == SYMBOL_REF
4573 && GET_CODE (XEXP (XEXP(op
, 0), 1)) == CONST_INT
)
4575 if (flag_pic
&& reg_class_subset_p (ADDR_REGS
, rclass
))
4583 if (!legitimate_reload_constant_p (op
))
4587 /* load address will be used. */
4588 if (reg_class_subset_p (ADDR_REGS
, rclass
))
4600 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4601 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4605 s390_check_symref_alignment (rtx addr
, HOST_WIDE_INT alignment
)
4607 HOST_WIDE_INT addend
;
4610 /* The "required alignment" might be 0 (e.g. for certain structs
4611 accessed via BLKmode). Early abort in this case, as well as when
4612 an alignment > 8 is required. */
4613 if (alignment
< 2 || alignment
> 8)
4616 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4619 if (addend
& (alignment
- 1))
4622 if (GET_CODE (symref
) == SYMBOL_REF
)
4624 /* s390_encode_section_info is not called for anchors, since they don't
4625 have corresponding VAR_DECLs. Therefore, we cannot rely on
4626 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4627 if (SYMBOL_REF_ANCHOR_P (symref
))
4629 HOST_WIDE_INT block_offset
= SYMBOL_REF_BLOCK_OFFSET (symref
);
4630 unsigned int block_alignment
= (SYMBOL_REF_BLOCK (symref
)->alignment
4633 gcc_assert (block_offset
>= 0);
4634 return ((block_offset
& (alignment
- 1)) == 0
4635 && block_alignment
>= alignment
);
4638 /* We have load-relative instructions for 2-byte, 4-byte, and
4639 8-byte alignment so allow only these. */
4642 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref
);
4643 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref
);
4644 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref
);
4645 default: return false;
4649 if (GET_CODE (symref
) == UNSPEC
4650 && alignment
<= UNITS_PER_LONG
)
4656 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4657 operand SCRATCH is used to reload the even part of the address and
4661 s390_reload_larl_operand (rtx reg
, rtx addr
, rtx scratch
)
4663 HOST_WIDE_INT addend
;
4666 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4670 /* Easy case. The addend is even so larl will do fine. */
4671 emit_move_insn (reg
, addr
);
4674 /* We can leave the scratch register untouched if the target
4675 register is a valid base register. */
4676 if (REGNO (reg
) < FIRST_PSEUDO_REGISTER
4677 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
)
4680 gcc_assert (REGNO (scratch
) < FIRST_PSEUDO_REGISTER
);
4681 gcc_assert (REGNO_REG_CLASS (REGNO (scratch
)) == ADDR_REGS
);
4684 emit_move_insn (scratch
,
4685 gen_rtx_CONST (Pmode
,
4686 gen_rtx_PLUS (Pmode
, symref
,
4687 GEN_INT (addend
- 1))));
4689 emit_move_insn (scratch
, symref
);
4691 /* Increment the address using la in order to avoid clobbering cc. */
4692 s390_load_address (reg
, gen_rtx_PLUS (Pmode
, scratch
, const1_rtx
));
4696 /* Generate what is necessary to move between REG and MEM using
4697 SCRATCH. The direction is given by TOMEM. */
4700 s390_reload_symref_address (rtx reg
, rtx mem
, rtx scratch
, bool tomem
)
4702 /* Reload might have pulled a constant out of the literal pool.
4703 Force it back in. */
4704 if (CONST_INT_P (mem
) || GET_CODE (mem
) == CONST_DOUBLE
4705 || GET_CODE (mem
) == CONST_WIDE_INT
4706 || GET_CODE (mem
) == CONST_VECTOR
4707 || GET_CODE (mem
) == CONST
)
4708 mem
= force_const_mem (GET_MODE (reg
), mem
);
4710 gcc_assert (MEM_P (mem
));
4712 /* For a load from memory we can leave the scratch register
4713 untouched if the target register is a valid base register. */
4715 && REGNO (reg
) < FIRST_PSEUDO_REGISTER
4716 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
4717 && GET_MODE (reg
) == GET_MODE (scratch
))
4720 /* Load address into scratch register. Since we can't have a
4721 secondary reload for a secondary reload we have to cover the case
4722 where larl would need a secondary reload here as well. */
4723 s390_reload_larl_operand (scratch
, XEXP (mem
, 0), scratch
);
4725 /* Now we can use a standard load/store to do the move. */
4727 emit_move_insn (replace_equiv_address (mem
, scratch
), reg
);
4729 emit_move_insn (reg
, replace_equiv_address (mem
, scratch
));
4732 /* Inform reload about cases where moving X with a mode MODE to a register in
4733 RCLASS requires an extra scratch or immediate register. Return the class
4734 needed for the immediate register. */
4737 s390_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
4738 machine_mode mode
, secondary_reload_info
*sri
)
4740 enum reg_class rclass
= (enum reg_class
) rclass_i
;
4742 /* Intermediate register needed. */
4743 if (reg_classes_intersect_p (CC_REGS
, rclass
))
4744 return GENERAL_REGS
;
4748 /* The vst/vl vector move instructions allow only for short
4751 && GET_CODE (XEXP (x
, 0)) == PLUS
4752 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4753 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x
, 0), 1)))
4754 && reg_class_subset_p (rclass
, VEC_REGS
)
4755 && (!reg_class_subset_p (rclass
, FP_REGS
)
4756 || (GET_MODE_SIZE (mode
) > 8
4757 && s390_class_max_nregs (FP_REGS
, mode
) == 1)))
4760 sri
->icode
= (TARGET_64BIT
?
4761 CODE_FOR_reloaddi_la_in
:
4762 CODE_FOR_reloadsi_la_in
);
4764 sri
->icode
= (TARGET_64BIT
?
4765 CODE_FOR_reloaddi_la_out
:
4766 CODE_FOR_reloadsi_la_out
);
4772 HOST_WIDE_INT offset
;
4775 /* On z10 several optimizer steps may generate larl operands with
4778 && s390_loadrelative_operand_p (x
, &symref
, &offset
)
4780 && !SYMBOL_FLAG_NOTALIGN2_P (symref
)
4781 && (offset
& 1) == 1)
4782 sri
->icode
= ((mode
== DImode
) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4783 : CODE_FOR_reloadsi_larl_odd_addend_z10
);
4785 /* Handle all the (mem (symref)) accesses we cannot use the z10
4786 instructions for. */
4788 && s390_loadrelative_operand_p (XEXP (x
, 0), NULL
, NULL
)
4790 || !reg_class_subset_p (rclass
, GENERAL_REGS
)
4791 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
4792 || !s390_check_symref_alignment (XEXP (x
, 0),
4793 GET_MODE_SIZE (mode
))))
4795 #define __SECONDARY_RELOAD_CASE(M,m) \
4798 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4799 CODE_FOR_reload##m##di_tomem_z10; \
4801 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4802 CODE_FOR_reload##m##si_tomem_z10; \
4805 switch (GET_MODE (x
))
4807 __SECONDARY_RELOAD_CASE (QI
, qi
);
4808 __SECONDARY_RELOAD_CASE (HI
, hi
);
4809 __SECONDARY_RELOAD_CASE (SI
, si
);
4810 __SECONDARY_RELOAD_CASE (DI
, di
);
4811 __SECONDARY_RELOAD_CASE (TI
, ti
);
4812 __SECONDARY_RELOAD_CASE (SF
, sf
);
4813 __SECONDARY_RELOAD_CASE (DF
, df
);
4814 __SECONDARY_RELOAD_CASE (TF
, tf
);
4815 __SECONDARY_RELOAD_CASE (SD
, sd
);
4816 __SECONDARY_RELOAD_CASE (DD
, dd
);
4817 __SECONDARY_RELOAD_CASE (TD
, td
);
4818 __SECONDARY_RELOAD_CASE (V1QI
, v1qi
);
4819 __SECONDARY_RELOAD_CASE (V2QI
, v2qi
);
4820 __SECONDARY_RELOAD_CASE (V4QI
, v4qi
);
4821 __SECONDARY_RELOAD_CASE (V8QI
, v8qi
);
4822 __SECONDARY_RELOAD_CASE (V16QI
, v16qi
);
4823 __SECONDARY_RELOAD_CASE (V1HI
, v1hi
);
4824 __SECONDARY_RELOAD_CASE (V2HI
, v2hi
);
4825 __SECONDARY_RELOAD_CASE (V4HI
, v4hi
);
4826 __SECONDARY_RELOAD_CASE (V8HI
, v8hi
);
4827 __SECONDARY_RELOAD_CASE (V1SI
, v1si
);
4828 __SECONDARY_RELOAD_CASE (V2SI
, v2si
);
4829 __SECONDARY_RELOAD_CASE (V4SI
, v4si
);
4830 __SECONDARY_RELOAD_CASE (V1DI
, v1di
);
4831 __SECONDARY_RELOAD_CASE (V2DI
, v2di
);
4832 __SECONDARY_RELOAD_CASE (V1TI
, v1ti
);
4833 __SECONDARY_RELOAD_CASE (V1SF
, v1sf
);
4834 __SECONDARY_RELOAD_CASE (V2SF
, v2sf
);
4835 __SECONDARY_RELOAD_CASE (V4SF
, v4sf
);
4836 __SECONDARY_RELOAD_CASE (V1DF
, v1df
);
4837 __SECONDARY_RELOAD_CASE (V2DF
, v2df
);
4838 __SECONDARY_RELOAD_CASE (V1TF
, v1tf
);
4842 #undef __SECONDARY_RELOAD_CASE
4846 /* We need a scratch register when loading a PLUS expression which
4847 is not a legitimate operand of the LOAD ADDRESS instruction. */
4848 /* LRA can deal with transformation of plus op very well -- so we
4849 don't need to prompt LRA in this case. */
4850 if (! lra_in_progress
&& in_p
&& s390_plus_operand (x
, mode
))
4851 sri
->icode
= (TARGET_64BIT
?
4852 CODE_FOR_reloaddi_plus
: CODE_FOR_reloadsi_plus
);
4854 /* Performing a multiword move from or to memory we have to make sure the
4855 second chunk in memory is addressable without causing a displacement
4856 overflow. If that would be the case we calculate the address in
4857 a scratch register. */
4859 && GET_CODE (XEXP (x
, 0)) == PLUS
4860 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4861 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x
, 0), 1))
4862 + GET_MODE_SIZE (mode
) - 1))
4864 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4865 in a s_operand address since we may fallback to lm/stm. So we only
4866 have to care about overflows in the b+i+d case. */
4867 if ((reg_classes_intersect_p (GENERAL_REGS
, rclass
)
4868 && s390_class_max_nregs (GENERAL_REGS
, mode
) > 1
4869 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == PLUS
)
4870 /* For FP_REGS no lm/stm is available so this check is triggered
4871 for displacement overflows in b+i+d and b+d like addresses. */
4872 || (reg_classes_intersect_p (FP_REGS
, rclass
)
4873 && s390_class_max_nregs (FP_REGS
, mode
) > 1))
4876 sri
->icode
= (TARGET_64BIT
?
4877 CODE_FOR_reloaddi_la_in
:
4878 CODE_FOR_reloadsi_la_in
);
4880 sri
->icode
= (TARGET_64BIT
?
4881 CODE_FOR_reloaddi_la_out
:
4882 CODE_FOR_reloadsi_la_out
);
4886 /* A scratch address register is needed when a symbolic constant is
4887 copied to r0 compiling with -fPIC. In other cases the target
4888 register might be used as temporary (see legitimize_pic_address). */
4889 if (in_p
&& SYMBOLIC_CONST (x
) && flag_pic
== 2 && rclass
!= ADDR_REGS
)
4890 sri
->icode
= (TARGET_64BIT
?
4891 CODE_FOR_reloaddi_PIC_addr
:
4892 CODE_FOR_reloadsi_PIC_addr
);
4894 /* Either scratch or no register needed. */
4898 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4900 We need secondary memory to move data between GPRs and FPRs.
4902 - With DFP the ldgr lgdr instructions are available. Due to the
4903 different alignment we cannot use them for SFmode. For 31 bit a
4904 64 bit value in GPR would be a register pair so here we still
4905 need to go via memory.
4907 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4908 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4909 in full VRs so as before also on z13 we do these moves via
4912 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4915 s390_secondary_memory_needed (machine_mode mode
,
4916 reg_class_t class1
, reg_class_t class2
)
4918 return (((reg_classes_intersect_p (class1
, VEC_REGS
)
4919 && reg_classes_intersect_p (class2
, GENERAL_REGS
))
4920 || (reg_classes_intersect_p (class1
, GENERAL_REGS
)
4921 && reg_classes_intersect_p (class2
, VEC_REGS
)))
4922 && (TARGET_TPF
|| !TARGET_DFP
|| !TARGET_64BIT
4923 || GET_MODE_SIZE (mode
) != 8)
4924 && (!TARGET_VX
|| (SCALAR_FLOAT_MODE_P (mode
)
4925 && GET_MODE_SIZE (mode
) > 8)));
4928 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4930 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4931 because the movsi and movsf patterns don't handle r/f moves. */
4934 s390_secondary_memory_needed_mode (machine_mode mode
)
4936 if (GET_MODE_BITSIZE (mode
) < 32)
4937 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
4941 /* Generate code to load SRC, which is PLUS that is not a
4942 legitimate operand for the LA instruction, into TARGET.
4943 SCRATCH may be used as scratch register. */
4946 s390_expand_plus_operand (rtx target
, rtx src
,
4950 struct s390_address ad
;
4952 /* src must be a PLUS; get its two operands. */
4953 gcc_assert (GET_CODE (src
) == PLUS
);
4954 gcc_assert (GET_MODE (src
) == Pmode
);
4956 /* Check if any of the two operands is already scheduled
4957 for replacement by reload. This can happen e.g. when
4958 float registers occur in an address. */
4959 sum1
= find_replacement (&XEXP (src
, 0));
4960 sum2
= find_replacement (&XEXP (src
, 1));
4961 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4963 /* If the address is already strictly valid, there's nothing to do. */
4964 if (!s390_decompose_address (src
, &ad
)
4965 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4966 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
4968 /* Otherwise, one of the operands cannot be an address register;
4969 we reload its value into the scratch register. */
4970 if (true_regnum (sum1
) < 1 || true_regnum (sum1
) > 15)
4972 emit_move_insn (scratch
, sum1
);
4975 if (true_regnum (sum2
) < 1 || true_regnum (sum2
) > 15)
4977 emit_move_insn (scratch
, sum2
);
4981 /* According to the way these invalid addresses are generated
4982 in reload.cc, it should never happen (at least on s390) that
4983 *neither* of the PLUS components, after find_replacements
4984 was applied, is an address register. */
4985 if (sum1
== scratch
&& sum2
== scratch
)
4991 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4994 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4995 is only ever performed on addresses, so we can mark the
4996 sum as legitimate for LA in any case. */
4997 s390_load_address (target
, src
);
5001 /* Return true if ADDR is a valid memory address.
5002 STRICT specifies whether strict register checking applies. */
5005 s390_legitimate_address_p (machine_mode mode
, rtx addr
, bool strict
,
5006 code_helper
= ERROR_MARK
)
5008 struct s390_address ad
;
5011 && larl_operand (addr
, VOIDmode
)
5012 && (mode
== VOIDmode
5013 || s390_check_symref_alignment (addr
, GET_MODE_SIZE (mode
))))
5016 if (!s390_decompose_address (addr
, &ad
))
5019 /* The vector memory instructions only support short displacements.
5020 Reject invalid displacements early to prevent plenty of lay
5021 instructions to be generated later which then cannot be merged
5024 && VECTOR_MODE_P (mode
)
5025 && ad
.disp
!= NULL_RTX
5026 && CONST_INT_P (ad
.disp
)
5027 && !SHORT_DISP_IN_RANGE (INTVAL (ad
.disp
)))
5032 if (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
5035 if (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
)))
5041 && !(REGNO (ad
.base
) >= FIRST_PSEUDO_REGISTER
5042 || REGNO_REG_CLASS (REGNO (ad
.base
)) == ADDR_REGS
))
5046 && !(REGNO (ad
.indx
) >= FIRST_PSEUDO_REGISTER
5047 || REGNO_REG_CLASS (REGNO (ad
.indx
)) == ADDR_REGS
))
5053 /* Return true if OP is a valid operand for the LA instruction.
5054 In 31-bit, we need to prove that the result is used as an
5055 address, as LA performs only a 31-bit addition. */
5058 legitimate_la_operand_p (rtx op
)
5060 struct s390_address addr
;
5061 if (!s390_decompose_address (op
, &addr
))
5064 return (TARGET_64BIT
|| addr
.pointer
);
5067 /* Return true if it is valid *and* preferable to use LA to
5068 compute the sum of OP1 and OP2. */
5071 preferred_la_operand_p (rtx op1
, rtx op2
)
5073 struct s390_address addr
;
5075 if (op2
!= const0_rtx
)
5076 op1
= gen_rtx_PLUS (Pmode
, op1
, op2
);
5078 if (!s390_decompose_address (op1
, &addr
))
5080 if (addr
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (addr
.base
)))
5082 if (addr
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (addr
.indx
)))
5085 /* Avoid LA instructions with index (and base) register on z196 or
5086 later; it is preferable to use regular add instructions when
5087 possible. Starting with zEC12 the la with index register is
5088 "uncracked" again but still slower than a regular add. */
5089 if (addr
.indx
&& s390_tune
>= PROCESSOR_2817_Z196
)
5092 if (!TARGET_64BIT
&& !addr
.pointer
)
5098 if ((addr
.base
&& REG_P (addr
.base
) && REG_POINTER (addr
.base
))
5099 || (addr
.indx
&& REG_P (addr
.indx
) && REG_POINTER (addr
.indx
)))
5105 /* Emit a forced load-address operation to load SRC into DST.
5106 This will use the LOAD ADDRESS instruction even in situations
5107 where legitimate_la_operand_p (SRC) returns false. */
5110 s390_load_address (rtx dst
, rtx src
)
5113 emit_move_insn (dst
, src
);
5115 emit_insn (gen_force_la_31 (dst
, src
));
5118 /* Return true if it ok to use SYMBOL_REF in a relative address. */
5121 s390_rel_address_ok_p (rtx symbol_ref
)
5125 if (symbol_ref
== s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref
))
5128 decl
= SYMBOL_REF_DECL (symbol_ref
);
5130 if (!flag_pic
|| SYMBOL_REF_LOCAL_P (symbol_ref
))
5131 return (s390_pic_data_is_text_relative
5133 && TREE_CODE (decl
) == FUNCTION_DECL
));
5138 /* Return a legitimate reference for ORIG (an address) using the
5139 register REG. If REG is 0, a new pseudo is generated.
5141 There are two types of references that must be handled:
5143 1. Global data references must load the address from the GOT, via
5144 the PIC reg. An insn is emitted to do this load, and the reg is
5147 2. Static data references, constant pool addresses, and code labels
5148 compute the address as an offset from the GOT, whose base is in
5149 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5150 differentiate them from global data objects. The returned
5151 address is the PIC reg + an unspec constant.
5153 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
5154 reg also appears in the address. */
5157 legitimize_pic_address (rtx orig
, rtx reg
)
5160 rtx addend
= const0_rtx
;
5163 gcc_assert (!TLS_SYMBOLIC_CONST (addr
));
5165 if (GET_CODE (addr
) == CONST
)
5166 addr
= XEXP (addr
, 0);
5168 if (GET_CODE (addr
) == PLUS
)
5170 addend
= XEXP (addr
, 1);
5171 addr
= XEXP (addr
, 0);
5174 if ((GET_CODE (addr
) == LABEL_REF
5175 || (SYMBOL_REF_P (addr
) && s390_rel_address_ok_p (addr
))
5176 || (GET_CODE (addr
) == UNSPEC
&&
5177 (XINT (addr
, 1) == UNSPEC_GOTENT
5178 || XINT (addr
, 1) == UNSPEC_PLT31
)))
5179 && GET_CODE (addend
) == CONST_INT
)
5181 /* This can be locally addressed. */
5183 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
5184 rtx const_addr
= (GET_CODE (addr
) == UNSPEC
?
5185 gen_rtx_CONST (Pmode
, addr
) : addr
);
5187 if (larl_operand (const_addr
, VOIDmode
)
5188 && INTVAL (addend
) < HOST_WIDE_INT_1
<< 31
5189 && INTVAL (addend
) >= -(HOST_WIDE_INT_1
<< 31))
5191 if (INTVAL (addend
) & 1)
5193 /* LARL can't handle odd offsets, so emit a pair of LARL
5195 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5197 if (!DISP_IN_RANGE (INTVAL (addend
)))
5199 HOST_WIDE_INT even
= INTVAL (addend
) - 1;
5200 addr
= gen_rtx_PLUS (Pmode
, addr
, GEN_INT (even
));
5201 addr
= gen_rtx_CONST (Pmode
, addr
);
5202 addend
= const1_rtx
;
5205 emit_move_insn (temp
, addr
);
5206 new_rtx
= gen_rtx_PLUS (Pmode
, temp
, addend
);
5210 s390_load_address (reg
, new_rtx
);
5216 /* If the offset is even, we can just use LARL. This
5217 will happen automatically. */
5222 /* No larl - Access local symbols relative to the GOT. */
5224 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5226 if (reload_in_progress
|| reload_completed
)
5227 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5229 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5230 if (addend
!= const0_rtx
)
5231 addr
= gen_rtx_PLUS (Pmode
, addr
, addend
);
5232 addr
= gen_rtx_CONST (Pmode
, addr
);
5233 addr
= force_const_mem (Pmode
, addr
);
5234 emit_move_insn (temp
, addr
);
5236 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
5239 s390_load_address (reg
, new_rtx
);
5244 else if (GET_CODE (addr
) == SYMBOL_REF
&& addend
== const0_rtx
)
5246 /* A non-local symbol reference without addend.
5248 The symbol ref is wrapped into an UNSPEC to make sure the
5249 proper operand modifier (@GOT or @GOTENT) will be emitted.
5250 This will tell the linker to put the symbol into the GOT.
5252 Additionally the code dereferencing the GOT slot is emitted here.
5254 An addend to the symref needs to be added afterwards.
5255 legitimize_pic_address calls itself recursively to handle
5256 that case. So no need to do it here. */
5259 reg
= gen_reg_rtx (Pmode
);
5263 /* Use load relative if possible.
5264 lgrl <target>, sym@GOTENT */
5265 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5266 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5267 new_rtx
= gen_const_mem (GET_MODE (reg
), new_rtx
);
5269 emit_move_insn (reg
, new_rtx
);
5272 else if (flag_pic
== 1)
5274 /* Assume GOT offset is a valid displacement operand (< 4k
5275 or < 512k with z990). This is handled the same way in
5276 both 31- and 64-bit code (@GOT).
5277 lg <target>, sym@GOT(r12) */
5279 if (reload_in_progress
|| reload_completed
)
5280 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5282 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5283 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5284 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5285 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5286 emit_move_insn (reg
, new_rtx
);
5291 /* If the GOT offset might be >= 4k, we determine the position
5292 of the GOT entry via a PC-relative LARL (@GOTENT).
5293 larl temp, sym@GOTENT
5294 lg <target>, 0(temp) */
5296 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5298 gcc_assert (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
5299 || REGNO_REG_CLASS (REGNO (temp
)) == ADDR_REGS
);
5301 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5302 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5303 emit_move_insn (temp
, new_rtx
);
5304 new_rtx
= gen_const_mem (Pmode
, temp
);
5305 emit_move_insn (reg
, new_rtx
);
5310 else if (GET_CODE (addr
) == UNSPEC
&& GET_CODE (addend
) == CONST_INT
)
5312 gcc_assert (XVECLEN (addr
, 0) == 1);
5313 switch (XINT (addr
, 1))
5315 /* These address symbols (or PLT slots) relative to the GOT
5316 (not GOT slots!). In general this will exceed the
5317 displacement range so these value belong into the literal
5321 new_rtx
= force_const_mem (Pmode
, orig
);
5324 /* For -fPIC the GOT size might exceed the displacement
5325 range so make sure the value is in the literal pool. */
5328 new_rtx
= force_const_mem (Pmode
, orig
);
5331 /* For @GOTENT larl is used. This is handled like local
5337 /* For @PLT larl is used. This is handled like local
5343 /* Everything else cannot happen. */
5348 else if (addend
!= const0_rtx
)
5350 /* Otherwise, compute the sum. */
5352 rtx base
= legitimize_pic_address (addr
, reg
);
5353 new_rtx
= legitimize_pic_address (addend
,
5354 base
== reg
? NULL_RTX
: reg
);
5355 if (GET_CODE (new_rtx
) == CONST_INT
)
5356 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
5359 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
5361 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
5362 new_rtx
= XEXP (new_rtx
, 1);
5364 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
5367 if (GET_CODE (new_rtx
) == CONST
)
5368 new_rtx
= XEXP (new_rtx
, 0);
5369 new_rtx
= force_operand (new_rtx
, 0);
5375 /* Load the thread pointer into a register. */
5378 s390_get_thread_pointer (void)
5380 rtx tp
= gen_reg_rtx (Pmode
);
5382 emit_insn (gen_get_thread_pointer (Pmode
, tp
));
5384 mark_reg_pointer (tp
, BITS_PER_WORD
);
5389 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5390 in s390_tls_symbol which always refers to __tls_get_offset.
5391 The returned offset is written to RESULT_REG and an USE rtx is
5392 generated for TLS_CALL. */
5394 static GTY(()) rtx s390_tls_symbol
;
5397 s390_emit_tls_call_insn (rtx result_reg
, rtx tls_call
)
5402 emit_insn (s390_load_got ());
5404 if (!s390_tls_symbol
)
5406 s390_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tls_get_offset");
5407 SYMBOL_REF_FLAGS (s390_tls_symbol
) |= SYMBOL_FLAG_FUNCTION
;
5410 insn
= s390_emit_call (s390_tls_symbol
, tls_call
, result_reg
,
5411 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
5413 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), result_reg
);
5414 RTL_CONST_CALL_P (insn
) = 1;
5417 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5418 this (thread-local) address. REG may be used as temporary. */
5421 legitimize_tls_address (rtx addr
, rtx reg
)
5423 rtx new_rtx
, tls_call
, temp
, base
, r2
;
5426 if (GET_CODE (addr
) == SYMBOL_REF
)
5427 switch (tls_symbolic_operand (addr
))
5429 case TLS_MODEL_GLOBAL_DYNAMIC
:
5431 r2
= gen_rtx_REG (Pmode
, 2);
5432 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_TLSGD
);
5433 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5434 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5435 emit_move_insn (r2
, new_rtx
);
5436 s390_emit_tls_call_insn (r2
, tls_call
);
5437 insn
= get_insns ();
5440 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5441 temp
= gen_reg_rtx (Pmode
);
5442 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5444 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5447 s390_load_address (reg
, new_rtx
);
5452 case TLS_MODEL_LOCAL_DYNAMIC
:
5454 r2
= gen_rtx_REG (Pmode
, 2);
5455 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM
);
5456 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5457 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5458 emit_move_insn (r2
, new_rtx
);
5459 s390_emit_tls_call_insn (r2
, tls_call
);
5460 insn
= get_insns ();
5463 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM_NTPOFF
);
5464 temp
= gen_reg_rtx (Pmode
);
5465 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5467 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5468 base
= gen_reg_rtx (Pmode
);
5469 s390_load_address (base
, new_rtx
);
5471 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_DTPOFF
);
5472 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5473 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5474 temp
= gen_reg_rtx (Pmode
);
5475 emit_move_insn (temp
, new_rtx
);
5477 new_rtx
= gen_rtx_PLUS (Pmode
, base
, temp
);
5480 s390_load_address (reg
, new_rtx
);
5485 case TLS_MODEL_INITIAL_EXEC
:
5488 /* Assume GOT offset < 4k. This is handled the same way
5489 in both 31- and 64-bit code. */
5491 if (reload_in_progress
|| reload_completed
)
5492 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5494 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTNTPOFF
);
5495 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5496 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5497 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5498 temp
= gen_reg_rtx (Pmode
);
5499 emit_move_insn (temp
, new_rtx
);
5503 /* If the GOT offset might be >= 4k, we determine the position
5504 of the GOT entry via a PC-relative LARL. */
5506 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_INDNTPOFF
);
5507 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5508 temp
= gen_reg_rtx (Pmode
);
5509 emit_move_insn (temp
, new_rtx
);
5511 new_rtx
= gen_const_mem (Pmode
, temp
);
5512 temp
= gen_reg_rtx (Pmode
);
5513 emit_move_insn (temp
, new_rtx
);
5516 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5519 s390_load_address (reg
, new_rtx
);
5524 case TLS_MODEL_LOCAL_EXEC
:
5525 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5526 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5527 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5528 temp
= gen_reg_rtx (Pmode
);
5529 emit_move_insn (temp
, new_rtx
);
5531 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5534 s390_load_address (reg
, new_rtx
);
5543 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == UNSPEC
)
5545 switch (XINT (XEXP (addr
, 0), 1))
5548 case UNSPEC_INDNTPOFF
:
5557 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
5558 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
5560 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5561 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5562 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5564 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5565 new_rtx
= plus_constant (Pmode
, new_rtx
,
5566 INTVAL (XEXP (XEXP (addr
, 0), 1)));
5567 new_rtx
= force_operand (new_rtx
, 0);
5570 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5571 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == NEG
)
5573 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5574 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5575 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5577 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5578 new_rtx
= gen_rtx_NEG (Pmode
, new_rtx
);
5579 new_rtx
= force_operand (new_rtx
, 0);
5583 gcc_unreachable (); /* for now ... */
5588 /* Emit insns making the address in operands[1] valid for a standard
5589 move to operands[0]. operands[1] is replaced by an address which
5590 should be used instead of the former RTX to emit the move
5594 emit_symbolic_move (rtx
*operands
)
5596 rtx temp
= !can_create_pseudo_p () ? operands
[0] : gen_reg_rtx (Pmode
);
5598 if (GET_CODE (operands
[0]) == MEM
)
5599 operands
[1] = force_reg (Pmode
, operands
[1]);
5600 else if (TLS_SYMBOLIC_CONST (operands
[1]))
5601 operands
[1] = legitimize_tls_address (operands
[1], temp
);
5603 operands
[1] = legitimize_pic_address (operands
[1], temp
);
5606 /* Try machine-dependent ways of modifying an illegitimate address X
5607 to be legitimate. If we find one, return the new, valid address.
5609 OLDX is the address as it was before break_out_memory_refs was called.
5610 In some cases it is useful to look at this to decide what needs to be done.
5612 MODE is the mode of the operand pointed to by X.
5614 When -fpic is used, special handling is needed for symbolic references.
5615 See comments by legitimize_pic_address for details. */
5618 s390_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
5619 machine_mode mode ATTRIBUTE_UNUSED
)
5621 rtx constant_term
= const0_rtx
;
5623 if (TLS_SYMBOLIC_CONST (x
))
5625 x
= legitimize_tls_address (x
, 0);
5627 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5630 else if (GET_CODE (x
) == PLUS
5631 && (TLS_SYMBOLIC_CONST (XEXP (x
, 0))
5632 || TLS_SYMBOLIC_CONST (XEXP (x
, 1))))
5638 if (SYMBOLIC_CONST (x
)
5639 || (GET_CODE (x
) == PLUS
5640 && (SYMBOLIC_CONST (XEXP (x
, 0))
5641 || SYMBOLIC_CONST (XEXP (x
, 1)))))
5642 x
= legitimize_pic_address (x
, 0);
5644 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5648 x
= eliminate_constant_term (x
, &constant_term
);
5650 /* Optimize loading of large displacements by splitting them
5651 into the multiple of 4K and the rest; this allows the
5652 former to be CSE'd if possible.
5654 Don't do this if the displacement is added to a register
5655 pointing into the stack frame, as the offsets will
5656 change later anyway. */
5658 if (GET_CODE (constant_term
) == CONST_INT
5659 && !TARGET_LONG_DISPLACEMENT
5660 && !DISP_IN_RANGE (INTVAL (constant_term
))
5661 && !(REG_P (x
) && REGNO_PTR_FRAME_P (REGNO (x
))))
5663 HOST_WIDE_INT lower
= INTVAL (constant_term
) & 0xfff;
5664 HOST_WIDE_INT upper
= INTVAL (constant_term
) ^ lower
;
5666 rtx temp
= gen_reg_rtx (Pmode
);
5667 rtx val
= force_operand (GEN_INT (upper
), temp
);
5669 emit_move_insn (temp
, val
);
5671 x
= gen_rtx_PLUS (Pmode
, x
, temp
);
5672 constant_term
= GEN_INT (lower
);
5675 if (GET_CODE (x
) == PLUS
)
5677 if (GET_CODE (XEXP (x
, 0)) == REG
)
5679 rtx temp
= gen_reg_rtx (Pmode
);
5680 rtx val
= force_operand (XEXP (x
, 1), temp
);
5682 emit_move_insn (temp
, val
);
5684 x
= gen_rtx_PLUS (Pmode
, XEXP (x
, 0), temp
);
5687 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5689 rtx temp
= gen_reg_rtx (Pmode
);
5690 rtx val
= force_operand (XEXP (x
, 0), temp
);
5692 emit_move_insn (temp
, val
);
5694 x
= gen_rtx_PLUS (Pmode
, temp
, XEXP (x
, 1));
5698 if (constant_term
!= const0_rtx
)
5699 x
= gen_rtx_PLUS (Pmode
, x
, constant_term
);
5704 /* Try a machine-dependent way of reloading an illegitimate address AD
5705 operand. If we find one, push the reload and return the new address.
5707 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5708 and TYPE is the reload type of the current reload. */
5711 legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
5712 int opnum
, int type
)
5714 if (!optimize
|| TARGET_LONG_DISPLACEMENT
)
5717 if (GET_CODE (ad
) == PLUS
)
5719 rtx tem
= simplify_binary_operation (PLUS
, Pmode
,
5720 XEXP (ad
, 0), XEXP (ad
, 1));
5725 if (GET_CODE (ad
) == PLUS
5726 && GET_CODE (XEXP (ad
, 0)) == REG
5727 && GET_CODE (XEXP (ad
, 1)) == CONST_INT
5728 && !DISP_IN_RANGE (INTVAL (XEXP (ad
, 1))))
5730 HOST_WIDE_INT lower
= INTVAL (XEXP (ad
, 1)) & 0xfff;
5731 HOST_WIDE_INT upper
= INTVAL (XEXP (ad
, 1)) ^ lower
;
5732 rtx cst
, tem
, new_rtx
;
5734 cst
= GEN_INT (upper
);
5735 if (!legitimate_reload_constant_p (cst
))
5736 cst
= force_const_mem (Pmode
, cst
);
5738 tem
= gen_rtx_PLUS (Pmode
, XEXP (ad
, 0), cst
);
5739 new_rtx
= gen_rtx_PLUS (Pmode
, tem
, GEN_INT (lower
));
5741 push_reload (XEXP (tem
, 1), 0, &XEXP (tem
, 1), 0,
5742 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
5743 opnum
, (enum reload_type
) type
);
5750 /* Emit code to move LEN bytes from SRC to DST. */
5753 s390_expand_cpymem (rtx dst
, rtx src
, rtx len
, rtx min_len_rtx
, rtx max_len_rtx
)
5755 /* Exit early in case nothing has to be done. */
5756 if (CONST_INT_P (len
) && UINTVAL (len
) == 0)
5759 unsigned HOST_WIDE_INT min_len
= UINTVAL (min_len_rtx
);
5760 unsigned HOST_WIDE_INT max_len
5761 = max_len_rtx
? UINTVAL (max_len_rtx
) : HOST_WIDE_INT_M1U
;
5763 /* Expand memcpy for constant length operands without a loop if it
5764 is shorter that way.
5766 With a constant length argument a
5767 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5768 if (CONST_INT_P (len
)
5769 && UINTVAL (len
) <= 6 * 256
5770 && (!TARGET_MVCLE
|| UINTVAL (len
) <= 256))
5774 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5776 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5777 rtx newsrc
= adjust_address (src
, BLKmode
, o
);
5778 emit_insn (gen_cpymem_short (newdst
, newsrc
,
5779 GEN_INT (l
> 256 ? 255 : l
- 1)));
5785 else if (TARGET_MVCLE
5786 && (s390_tune
< PROCESSOR_2097_Z10
5787 || (CONST_INT_P (len
) && UINTVAL (len
) <= (1 << 16))))
5789 emit_insn (gen_cpymem_long (dst
, src
, convert_to_mode (Pmode
, len
, 1)));
5793 /* Non-constant length and no loop required. */
5794 else if (!CONST_INT_P (len
) && max_len
<= 256)
5796 rtx_code_label
*end_label
;
5800 end_label
= gen_label_rtx ();
5801 emit_cmp_and_jump_insns (len
, const0_rtx
, EQ
, NULL_RTX
,
5802 GET_MODE (len
), 1, end_label
,
5803 profile_probability::very_unlikely ());
5806 rtx lenm1
= expand_binop (GET_MODE (len
), add_optab
, len
, constm1_rtx
,
5807 NULL_RTX
, 1, OPTAB_DIRECT
);
5809 /* Prefer a vectorized implementation over one which makes use of an
5810 execute instruction since it is faster (although it increases register
5812 if (max_len
<= 16 && TARGET_VX
)
5814 rtx tmp
= gen_reg_rtx (V16QImode
);
5815 lenm1
= convert_to_mode (SImode
, lenm1
, 1);
5816 emit_insn (gen_vllv16qi (tmp
, lenm1
, src
));
5817 emit_insn (gen_vstlv16qi (tmp
, lenm1
, dst
));
5819 else if (TARGET_Z15
)
5820 emit_insn (gen_mvcrl (dst
, src
, convert_to_mode (SImode
, lenm1
, 1)));
5823 gen_cpymem_short (dst
, src
, convert_to_mode (Pmode
, lenm1
, 1)));
5826 emit_label (end_label
);
5831 else if (s390_tune
< PROCESSOR_2097_Z10
|| (CONST_INT_P (len
) && UINTVAL (len
) <= (1 << 16)))
5833 rtx dst_addr
, src_addr
, count
, blocks
, temp
;
5834 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5835 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5836 rtx_code_label
*end_label
= gen_label_rtx ();
5839 mode
= GET_MODE (len
);
5840 if (mode
== VOIDmode
)
5843 dst_addr
= gen_reg_rtx (Pmode
);
5844 src_addr
= gen_reg_rtx (Pmode
);
5845 count
= gen_reg_rtx (mode
);
5846 blocks
= gen_reg_rtx (mode
);
5848 convert_move (count
, len
, 1);
5850 emit_cmp_and_jump_insns (count
, const0_rtx
, EQ
, NULL_RTX
, mode
, 1,
5853 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5854 emit_move_insn (src_addr
, force_operand (XEXP (src
, 0), NULL_RTX
));
5855 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5856 src
= change_address (src
, VOIDmode
, src_addr
);
5858 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5861 emit_move_insn (count
, temp
);
5863 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5866 emit_move_insn (blocks
, temp
);
5868 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5869 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5871 emit_label (loop_start_label
);
5874 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 768))
5878 /* Issue a read prefetch for the +3 cache line. */
5879 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (768)),
5880 const0_rtx
, const0_rtx
);
5881 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5882 emit_insn (prefetch
);
5884 /* Issue a write prefetch for the +3 cache line. */
5885 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (768)),
5886 const1_rtx
, const0_rtx
);
5887 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5888 emit_insn (prefetch
);
5891 emit_insn (gen_cpymem_short (dst
, src
, GEN_INT (255)));
5892 s390_load_address (dst_addr
,
5893 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5894 s390_load_address (src_addr
,
5895 gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (256)));
5897 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5900 emit_move_insn (blocks
, temp
);
5902 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5903 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5905 emit_jump (loop_start_label
);
5906 emit_label (loop_end_label
);
5908 emit_insn (gen_cpymem_short (dst
, src
,
5909 convert_to_mode (Pmode
, count
, 1)));
5910 emit_label (end_label
);
5919 s390_expand_movmem (rtx dst
, rtx src
, rtx len
, rtx min_len_rtx
, rtx max_len_rtx
)
5921 /* Exit early in case nothing has to be done. */
5922 if (CONST_INT_P (len
) && UINTVAL (len
) == 0)
5924 /* Exit early in case length is not upper bounded. */
5925 else if (max_len_rtx
== NULL
)
5928 unsigned HOST_WIDE_INT min_len
= UINTVAL (min_len_rtx
);
5929 unsigned HOST_WIDE_INT max_len
= UINTVAL (max_len_rtx
);
5931 /* At most 16 bytes. */
5932 if (max_len
<= 16 && TARGET_VX
)
5934 rtx_code_label
*end_label
;
5938 end_label
= gen_label_rtx ();
5939 emit_cmp_and_jump_insns (len
, const0_rtx
, EQ
, NULL_RTX
,
5940 GET_MODE (len
), 1, end_label
,
5941 profile_probability::very_unlikely ());
5945 if (CONST_INT_P (len
))
5947 lenm1
= gen_reg_rtx (SImode
);
5948 emit_move_insn (lenm1
, GEN_INT (UINTVAL (len
) - 1));
5952 = expand_binop (SImode
, add_optab
, convert_to_mode (SImode
, len
, 1),
5953 constm1_rtx
, NULL_RTX
, 1, OPTAB_DIRECT
);
5955 rtx tmp
= gen_reg_rtx (V16QImode
);
5956 emit_insn (gen_vllv16qi (tmp
, lenm1
, src
));
5957 emit_insn (gen_vstlv16qi (tmp
, lenm1
, dst
));
5960 emit_label (end_label
);
5965 /* At most 256 bytes. */
5966 else if (max_len
<= 256 && TARGET_Z15
)
5968 rtx_code_label
*end_label
= gen_label_rtx ();
5971 emit_cmp_and_jump_insns (len
, const0_rtx
, EQ
, NULL_RTX
, GET_MODE (len
),
5973 profile_probability::very_unlikely ());
5975 rtx dst_addr
= gen_reg_rtx (Pmode
);
5976 rtx src_addr
= gen_reg_rtx (Pmode
);
5977 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5978 emit_move_insn (src_addr
, force_operand (XEXP (src
, 0), NULL_RTX
));
5980 rtx lenm1
= CONST_INT_P (len
)
5981 ? GEN_INT (UINTVAL (len
) - 1)
5982 : expand_binop (GET_MODE (len
), add_optab
, len
, constm1_rtx
,
5983 NULL_RTX
, 1, OPTAB_DIRECT
);
5985 rtx_code_label
*right_to_left_label
= gen_label_rtx ();
5986 emit_cmp_and_jump_insns (src_addr
, dst_addr
, LT
, NULL_RTX
, GET_MODE (len
),
5987 1, right_to_left_label
);
5991 gen_cpymem_short (dst
, src
, convert_to_mode (Pmode
, lenm1
, 1)));
5992 emit_jump (end_label
);
5995 emit_label (right_to_left_label
);
5996 emit_insn (gen_mvcrl (dst
, src
, convert_to_mode (SImode
, lenm1
, 1)));
5998 emit_label (end_label
);
6006 /* Emit code to set LEN bytes at DST to VAL.
6007 Make use of clrmem if VAL is zero. */
6010 s390_expand_setmem (rtx dst
, rtx len
, rtx val
, rtx min_len_rtx
, rtx max_len_rtx
)
6012 /* Exit early in case nothing has to be done. */
6013 if (CONST_INT_P (len
) && UINTVAL (len
) == 0)
6016 gcc_assert (GET_CODE (val
) == CONST_INT
|| GET_MODE (val
) == QImode
);
6018 unsigned HOST_WIDE_INT min_len
= UINTVAL (min_len_rtx
);
6019 unsigned HOST_WIDE_INT max_len
6020 = max_len_rtx
? UINTVAL (max_len_rtx
) : HOST_WIDE_INT_M1U
;
6022 /* Vectorize memset with a constant length
6023 - if 0 < LEN < 16, then emit a vstl based solution;
6024 - if 16 <= LEN <= 64, then emit a vst based solution
6025 where the last two vector stores may overlap in case LEN%16!=0. Paying
6026 the price for an overlap is negligible compared to an extra GPR which is
6027 required for vstl. */
6028 if (CONST_INT_P (len
) && UINTVAL (len
) <= 64 && val
!= const0_rtx
6031 rtx val_vec
= gen_reg_rtx (V16QImode
);
6032 emit_move_insn (val_vec
, gen_rtx_VEC_DUPLICATE (V16QImode
, val
));
6034 if (UINTVAL (len
) < 16)
6036 rtx len_reg
= gen_reg_rtx (SImode
);
6037 emit_move_insn (len_reg
, GEN_INT (UINTVAL (len
) - 1));
6038 emit_insn (gen_vstlv16qi (val_vec
, len_reg
, dst
));
6042 unsigned HOST_WIDE_INT l
= UINTVAL (len
) / 16;
6043 unsigned HOST_WIDE_INT r
= UINTVAL (len
) % 16;
6044 unsigned HOST_WIDE_INT o
= 0;
6045 for (unsigned HOST_WIDE_INT i
= 0; i
< l
; ++i
)
6047 rtx newdst
= adjust_address (dst
, V16QImode
, o
);
6048 emit_move_insn (newdst
, val_vec
);
6053 rtx newdst
= adjust_address (dst
, V16QImode
, (o
- 16) + r
);
6054 emit_move_insn (newdst
, val_vec
);
6059 /* Expand setmem/clrmem for a constant length operand without a
6060 loop if it will be shorter that way.
6061 clrmem loop (with PFD) is 30 bytes -> 5 * xc
6062 clrmem loop (without PFD) is 24 bytes -> 4 * xc
6063 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
6064 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
6065 else if (GET_CODE (len
) == CONST_INT
6066 && ((val
== const0_rtx
6067 && (INTVAL (len
) <= 256 * 4
6068 || (INTVAL (len
) <= 256 * 5 && TARGET_SETMEM_PFD(val
,len
))))
6069 || (val
!= const0_rtx
&& INTVAL (len
) <= 257 * 4))
6070 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
6074 if (val
== const0_rtx
)
6075 /* clrmem: emit 256 byte blockwise XCs. */
6076 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
6078 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
6079 emit_insn (gen_clrmem_short (newdst
,
6080 GEN_INT (l
> 256 ? 255 : l
- 1)));
6083 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
6084 setting first byte to val and using a 256 byte mvc with one
6085 byte overlap to propagate the byte. */
6086 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 257, o
+= 257)
6088 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
6089 emit_move_insn (adjust_address (dst
, QImode
, o
), val
);
6092 rtx newdstp1
= adjust_address (dst
, BLKmode
, o
+ 1);
6093 emit_insn (gen_cpymem_short (newdstp1
, newdst
,
6094 GEN_INT (l
> 257 ? 255 : l
- 2)));
6099 else if (TARGET_MVCLE
)
6101 val
= force_not_mem (convert_modes (Pmode
, QImode
, val
, 1));
6103 emit_insn (gen_setmem_long_di (dst
, convert_to_mode (Pmode
, len
, 1),
6106 emit_insn (gen_setmem_long_si (dst
, convert_to_mode (Pmode
, len
, 1),
6110 /* Non-constant length and no loop required. */
6111 else if (!CONST_INT_P (len
) && max_len
<= 256)
6113 rtx_code_label
*end_label
;
6117 end_label
= gen_label_rtx ();
6118 emit_cmp_and_jump_insns (len
, const0_rtx
, EQ
, NULL_RTX
,
6119 GET_MODE (len
), 1, end_label
,
6120 profile_probability::very_unlikely ());
6123 rtx lenm1
= expand_binop (GET_MODE (len
), add_optab
, len
, constm1_rtx
,
6124 NULL_RTX
, 1, OPTAB_DIRECT
);
6126 /* Prefer a vectorized implementation over one which makes use of an
6127 execute instruction since it is faster (although it increases register
6129 if (max_len
<= 16 && TARGET_VX
)
6131 rtx val_vec
= gen_reg_rtx (V16QImode
);
6132 if (val
== const0_rtx
)
6133 emit_move_insn (val_vec
, CONST0_RTX (V16QImode
));
6135 emit_move_insn (val_vec
, gen_rtx_VEC_DUPLICATE (V16QImode
, val
));
6137 lenm1
= convert_to_mode (SImode
, lenm1
, 1);
6138 emit_insn (gen_vstlv16qi (val_vec
, lenm1
, dst
));
6142 if (val
== const0_rtx
)
6144 gen_clrmem_short (dst
, convert_to_mode (Pmode
, lenm1
, 1)));
6147 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
6149 rtx_code_label
*onebyte_end_label
;
6152 onebyte_end_label
= gen_label_rtx ();
6153 emit_cmp_and_jump_insns (
6154 len
, const1_rtx
, EQ
, NULL_RTX
, GET_MODE (len
), 1,
6155 onebyte_end_label
, profile_probability::very_unlikely ());
6158 rtx dstp1
= adjust_address (dst
, VOIDmode
, 1);
6160 = expand_binop (GET_MODE (len
), add_optab
, len
, GEN_INT (-2),
6161 NULL_RTX
, 1, OPTAB_DIRECT
);
6162 lenm2
= convert_to_mode (Pmode
, lenm2
, 1);
6163 emit_insn (gen_cpymem_short (dstp1
, dst
, lenm2
));
6166 emit_label (onebyte_end_label
);
6171 emit_label (end_label
);
6176 rtx dst_addr
, count
, blocks
, temp
, dstp1
= NULL_RTX
;
6177 rtx_code_label
*loop_start_label
= gen_label_rtx ();
6178 rtx_code_label
*onebyte_end_label
= gen_label_rtx ();
6179 rtx_code_label
*zerobyte_end_label
= gen_label_rtx ();
6180 rtx_code_label
*restbyte_end_label
= gen_label_rtx ();
6183 mode
= GET_MODE (len
);
6184 if (mode
== VOIDmode
)
6187 dst_addr
= gen_reg_rtx (Pmode
);
6188 count
= gen_reg_rtx (mode
);
6189 blocks
= gen_reg_rtx (mode
);
6191 convert_move (count
, len
, 1);
6193 emit_cmp_and_jump_insns (count
, const0_rtx
, EQ
, NULL_RTX
, mode
, 1,
6195 profile_probability::very_unlikely ());
6197 /* We need to make a copy of the target address since memset is
6198 supposed to return it unmodified. We have to make it here
6199 already since the new reg is used at onebyte_end_label. */
6200 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
6201 dst
= change_address (dst
, VOIDmode
, dst_addr
);
6203 if (val
!= const0_rtx
)
6205 /* When using the overlapping mvc the original target
6206 address is only accessed as single byte entity (even by
6207 the mvc reading this value). */
6208 set_mem_size (dst
, 1);
6209 dstp1
= adjust_address (dst
, VOIDmode
, 1);
6211 emit_cmp_and_jump_insns (count
, const1_rtx
, EQ
, NULL_RTX
, mode
, 1,
6213 profile_probability::very_unlikely ());
6216 /* There is one unconditional (mvi+mvc)/xc after the loop
6217 dealing with the rest of the bytes, subtracting two (mvi+mvc)
6218 or one (xc) here leaves this number of bytes to be handled by
6220 temp
= expand_binop (mode
, add_optab
, count
,
6221 val
== const0_rtx
? constm1_rtx
: GEN_INT (-2),
6222 count
, 1, OPTAB_DIRECT
);
6224 emit_move_insn (count
, temp
);
6226 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
6229 emit_move_insn (blocks
, temp
);
6231 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
6232 EQ
, NULL_RTX
, mode
, 1, restbyte_end_label
);
6234 emit_jump (loop_start_label
);
6236 if (val
!= const0_rtx
&& min_len
<= 1)
6238 /* The 1 byte != 0 special case. Not handled efficiently
6239 since we require two jumps for that. However, this
6240 should be very rare. */
6241 emit_label (onebyte_end_label
);
6242 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
6243 emit_jump (zerobyte_end_label
);
6246 emit_label (loop_start_label
);
6248 if (TARGET_SETMEM_PFD (val
, len
))
6250 /* Issue a write prefetch. */
6251 rtx distance
= GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE
);
6252 rtx prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, distance
),
6253 const1_rtx
, const0_rtx
);
6254 emit_insn (prefetch
);
6255 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
6258 if (val
== const0_rtx
)
6259 emit_insn (gen_clrmem_short (dst
, GEN_INT (255)));
6262 /* Set the first byte in the block to the value and use an
6263 overlapping mvc for the block. */
6264 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
6265 emit_insn (gen_cpymem_short (dstp1
, dst
, GEN_INT (254)));
6267 s390_load_address (dst_addr
,
6268 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
6270 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
6273 emit_move_insn (blocks
, temp
);
6275 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
6276 NE
, NULL_RTX
, mode
, 1, loop_start_label
);
6278 emit_label (restbyte_end_label
);
6280 if (val
== const0_rtx
)
6281 emit_insn (gen_clrmem_short (dst
, convert_to_mode (Pmode
, count
, 1)));
6284 /* Set the first byte in the block to the value and use an
6285 overlapping mvc for the block. */
6286 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
6287 /* execute only uses the lowest 8 bits of count that's
6288 exactly what we need here. */
6289 emit_insn (gen_cpymem_short (dstp1
, dst
,
6290 convert_to_mode (Pmode
, count
, 1)));
6293 emit_label (zerobyte_end_label
);
6297 /* Emit code to compare LEN bytes at OP0 with those at OP1,
6298 and return the result in TARGET. */
6301 s390_expand_cmpmem (rtx target
, rtx op0
, rtx op1
, rtx len
)
6303 rtx ccreg
= gen_rtx_REG (CCUmode
, CC_REGNUM
);
6306 /* When tuning for z10 or higher we rely on the Glibc functions to
6307 do the right thing. Only for constant lengths below 64k we will
6308 generate inline code. */
6309 if (s390_tune
>= PROCESSOR_2097_Z10
6310 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
6313 /* As the result of CMPINT is inverted compared to what we need,
6314 we have to swap the operands. */
6315 tmp
= op0
; op0
= op1
; op1
= tmp
;
6317 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) >= 0 && INTVAL (len
) <= 256)
6319 if (INTVAL (len
) > 0)
6321 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (INTVAL (len
) - 1)));
6322 emit_insn (gen_cmpint (target
, ccreg
));
6325 emit_move_insn (target
, const0_rtx
);
6327 else if (TARGET_MVCLE
)
6329 emit_insn (gen_cmpmem_long (op0
, op1
, convert_to_mode (Pmode
, len
, 1)));
6330 emit_insn (gen_cmpint (target
, ccreg
));
6334 rtx addr0
, addr1
, count
, blocks
, temp
;
6335 rtx_code_label
*loop_start_label
= gen_label_rtx ();
6336 rtx_code_label
*loop_end_label
= gen_label_rtx ();
6337 rtx_code_label
*end_label
= gen_label_rtx ();
6340 mode
= GET_MODE (len
);
6341 if (mode
== VOIDmode
)
6344 addr0
= gen_reg_rtx (Pmode
);
6345 addr1
= gen_reg_rtx (Pmode
);
6346 count
= gen_reg_rtx (mode
);
6347 blocks
= gen_reg_rtx (mode
);
6349 convert_move (count
, len
, 1);
6350 emit_cmp_and_jump_insns (count
, const0_rtx
,
6351 EQ
, NULL_RTX
, mode
, 1, end_label
);
6353 emit_move_insn (addr0
, force_operand (XEXP (op0
, 0), NULL_RTX
));
6354 emit_move_insn (addr1
, force_operand (XEXP (op1
, 0), NULL_RTX
));
6355 op0
= change_address (op0
, VOIDmode
, addr0
);
6356 op1
= change_address (op1
, VOIDmode
, addr1
);
6358 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
6361 emit_move_insn (count
, temp
);
6363 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
6366 emit_move_insn (blocks
, temp
);
6368 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
6369 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
6371 emit_label (loop_start_label
);
6374 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 512))
6378 /* Issue a read prefetch for the +2 cache line of operand 1. */
6379 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (512)),
6380 const0_rtx
, const0_rtx
);
6381 emit_insn (prefetch
);
6382 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
6384 /* Issue a read prefetch for the +2 cache line of operand 2. */
6385 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (512)),
6386 const0_rtx
, const0_rtx
);
6387 emit_insn (prefetch
);
6388 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
6391 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (255)));
6392 temp
= gen_rtx_NE (VOIDmode
, ccreg
, const0_rtx
);
6393 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
6394 gen_rtx_LABEL_REF (VOIDmode
, end_label
), pc_rtx
);
6395 temp
= gen_rtx_SET (pc_rtx
, temp
);
6396 emit_jump_insn (temp
);
6398 s390_load_address (addr0
,
6399 gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (256)));
6400 s390_load_address (addr1
,
6401 gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (256)));
6403 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
6406 emit_move_insn (blocks
, temp
);
6408 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
6409 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
6411 emit_jump (loop_start_label
);
6412 emit_label (loop_end_label
);
6414 emit_insn (gen_cmpmem_short (op0
, op1
,
6415 convert_to_mode (Pmode
, count
, 1)));
6416 emit_label (end_label
);
6418 emit_insn (gen_cmpint (target
, ccreg
));
6423 /* Emit a conditional jump to LABEL for condition code mask MASK using
6424 comparsion operator COMPARISON. Return the emitted jump insn. */
6427 s390_emit_ccraw_jump (HOST_WIDE_INT mask
, enum rtx_code comparison
, rtx label
)
6431 gcc_assert (comparison
== EQ
|| comparison
== NE
);
6432 gcc_assert (mask
> 0 && mask
< 15);
6434 temp
= gen_rtx_fmt_ee (comparison
, VOIDmode
,
6435 gen_rtx_REG (CCRAWmode
, CC_REGNUM
), GEN_INT (mask
));
6436 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
6437 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
6438 temp
= gen_rtx_SET (pc_rtx
, temp
);
6439 return emit_jump_insn (temp
);
6442 /* Emit the instructions to implement strlen of STRING and store the
6443 result in TARGET. The string has the known ALIGNMENT. This
6444 version uses vector instructions and is therefore not appropriate
6445 for targets prior to z13. */
6448 s390_expand_vec_strlen (rtx target
, rtx string
, rtx alignment
)
6450 rtx highest_index_to_load_reg
= gen_reg_rtx (Pmode
);
6451 rtx str_reg
= gen_reg_rtx (V16QImode
);
6452 rtx str_addr_base_reg
= gen_reg_rtx (Pmode
);
6453 rtx str_idx_reg
= gen_reg_rtx (Pmode
);
6454 rtx result_reg
= gen_reg_rtx (V16QImode
);
6455 rtx is_aligned_label
= gen_label_rtx ();
6456 rtx into_loop_label
= NULL_RTX
;
6457 rtx loop_start_label
= gen_label_rtx ();
6459 rtx len
= gen_reg_rtx (QImode
);
6463 s390_load_address (str_addr_base_reg
, XEXP (string
, 0));
6464 emit_move_insn (str_idx_reg
, const0_rtx
);
6466 if (INTVAL (alignment
) < 16)
6468 /* Check whether the address happens to be aligned properly so
6469 jump directly to the aligned loop. */
6470 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode
,
6471 str_addr_base_reg
, GEN_INT (15)),
6472 const0_rtx
, EQ
, NULL_RTX
,
6473 Pmode
, 1, is_aligned_label
);
6475 temp
= gen_reg_rtx (Pmode
);
6476 temp
= expand_binop (Pmode
, and_optab
, str_addr_base_reg
,
6477 GEN_INT (15), temp
, 1, OPTAB_DIRECT
);
6478 gcc_assert (REG_P (temp
));
6479 highest_index_to_load_reg
=
6480 expand_binop (Pmode
, sub_optab
, GEN_INT (15), temp
,
6481 highest_index_to_load_reg
, 1, OPTAB_DIRECT
);
6482 gcc_assert (REG_P (highest_index_to_load_reg
));
6483 emit_insn (gen_vllv16qi (str_reg
,
6484 convert_to_mode (SImode
, highest_index_to_load_reg
, 1),
6485 gen_rtx_MEM (BLKmode
, str_addr_base_reg
)));
6487 into_loop_label
= gen_label_rtx ();
6488 s390_emit_jump (into_loop_label
, NULL_RTX
);
6492 emit_label (is_aligned_label
);
6493 LABEL_NUSES (is_aligned_label
) = INTVAL (alignment
) < 16 ? 2 : 1;
6495 /* Reaching this point we are only performing 16 bytes aligned
6497 emit_move_insn (highest_index_to_load_reg
, GEN_INT (15));
6499 emit_label (loop_start_label
);
6500 LABEL_NUSES (loop_start_label
) = 1;
6502 /* Load 16 bytes of the string into VR. */
6503 mem
= gen_rtx_MEM (V16QImode
,
6504 gen_rtx_PLUS (Pmode
, str_idx_reg
, str_addr_base_reg
));
6505 set_mem_align (mem
, 128);
6506 emit_move_insn (str_reg
, mem
);
6507 if (into_loop_label
!= NULL_RTX
)
6509 emit_label (into_loop_label
);
6510 LABEL_NUSES (into_loop_label
) = 1;
6513 /* Increment string index by 16 bytes. */
6514 expand_binop (Pmode
, add_optab
, str_idx_reg
, GEN_INT (16),
6515 str_idx_reg
, 1, OPTAB_DIRECT
);
6517 emit_insn (gen_vec_vfenesv16qi (result_reg
, str_reg
, str_reg
,
6518 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6520 add_int_reg_note (s390_emit_ccraw_jump (8, NE
, loop_start_label
),
6522 profile_probability::very_likely ().to_reg_br_prob_note ());
6523 emit_insn (gen_vec_extractv16qiqi (len
, result_reg
, GEN_INT (7)));
6525 /* If the string pointer wasn't aligned we have loaded less then 16
6526 bytes and the remaining bytes got filled with zeros (by vll).
6527 Now we have to check whether the resulting index lies within the
6528 bytes actually part of the string. */
6530 cond
= s390_emit_compare (GT
, convert_to_mode (Pmode
, len
, 1),
6531 highest_index_to_load_reg
);
6532 s390_load_address (highest_index_to_load_reg
,
6533 gen_rtx_PLUS (Pmode
, highest_index_to_load_reg
,
6536 emit_insn (gen_movdicc (str_idx_reg
, cond
,
6537 highest_index_to_load_reg
, str_idx_reg
));
6539 emit_insn (gen_movsicc (str_idx_reg
, cond
,
6540 highest_index_to_load_reg
, str_idx_reg
));
6542 add_reg_br_prob_note (s390_emit_jump (is_aligned_label
, cond
),
6543 profile_probability::very_unlikely ());
6545 expand_binop (Pmode
, add_optab
, str_idx_reg
,
6546 GEN_INT (-16), str_idx_reg
, 1, OPTAB_DIRECT
);
6547 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6549 temp
= expand_binop (Pmode
, add_optab
, str_idx_reg
,
6550 convert_to_mode (Pmode
, len
, 1),
6551 target
, 1, OPTAB_DIRECT
);
6553 emit_move_insn (target
, temp
);
6557 s390_expand_vec_movstr (rtx result
, rtx dst
, rtx src
)
6559 rtx temp
= gen_reg_rtx (Pmode
);
6560 rtx src_addr
= XEXP (src
, 0);
6561 rtx dst_addr
= XEXP (dst
, 0);
6562 rtx src_addr_reg
= gen_reg_rtx (Pmode
);
6563 rtx dst_addr_reg
= gen_reg_rtx (Pmode
);
6564 rtx offset
= gen_reg_rtx (Pmode
);
6565 rtx vsrc
= gen_reg_rtx (V16QImode
);
6566 rtx vpos
= gen_reg_rtx (V16QImode
);
6567 rtx loadlen
= gen_reg_rtx (SImode
);
6568 rtx gpos_qi
= gen_reg_rtx(QImode
);
6569 rtx gpos
= gen_reg_rtx (SImode
);
6570 rtx done_label
= gen_label_rtx ();
6571 rtx loop_label
= gen_label_rtx ();
6572 rtx exit_label
= gen_label_rtx ();
6573 rtx full_label
= gen_label_rtx ();
6575 /* Perform a quick check for string ending on the first up to 16
6576 bytes and exit early if successful. */
6578 emit_insn (gen_vlbb (vsrc
, src
, GEN_INT (6)));
6579 emit_insn (gen_lcbb (loadlen
, src_addr
, GEN_INT (6)));
6580 emit_insn (gen_vfenezv16qi (vpos
, vsrc
, vsrc
));
6581 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6582 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6583 /* gpos is the byte index if a zero was found and 16 otherwise.
6584 So if it is lower than the loaded bytes we have a hit. */
6585 emit_cmp_and_jump_insns (gpos
, loadlen
, GE
, NULL_RTX
, SImode
, 1,
6587 emit_insn (gen_vstlv16qi (vsrc
, gpos
, dst
));
6589 force_expand_binop (Pmode
, add_optab
, dst_addr
, gpos
, result
,
6591 emit_jump (exit_label
);
6594 emit_label (full_label
);
6595 LABEL_NUSES (full_label
) = 1;
6597 /* Calculate `offset' so that src + offset points to the last byte
6598 before 16 byte alignment. */
6600 /* temp = src_addr & 0xf */
6601 force_expand_binop (Pmode
, and_optab
, src_addr
, GEN_INT (15), temp
,
6604 /* offset = 0xf - temp */
6605 emit_move_insn (offset
, GEN_INT (15));
6606 force_expand_binop (Pmode
, sub_optab
, offset
, temp
, offset
,
6609 /* Store `offset' bytes in the dstination string. The quick check
6610 has loaded at least `offset' bytes into vsrc. */
6612 emit_insn (gen_vstlv16qi (vsrc
, gen_lowpart (SImode
, offset
), dst
));
6614 /* Advance to the next byte to be loaded. */
6615 force_expand_binop (Pmode
, add_optab
, offset
, const1_rtx
, offset
,
6618 /* Make sure the addresses are single regs which can be used as a
6620 emit_move_insn (src_addr_reg
, src_addr
);
6621 emit_move_insn (dst_addr_reg
, dst_addr
);
6625 emit_label (loop_label
);
6626 LABEL_NUSES (loop_label
) = 1;
6628 emit_move_insn (vsrc
,
6629 gen_rtx_MEM (V16QImode
,
6630 gen_rtx_PLUS (Pmode
, src_addr_reg
, offset
)));
6632 emit_insn (gen_vec_vfenesv16qi (vpos
, vsrc
, vsrc
,
6633 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6634 add_int_reg_note (s390_emit_ccraw_jump (8, EQ
, done_label
),
6635 REG_BR_PROB
, profile_probability::very_unlikely ()
6636 .to_reg_br_prob_note ());
6638 emit_move_insn (gen_rtx_MEM (V16QImode
,
6639 gen_rtx_PLUS (Pmode
, dst_addr_reg
, offset
)),
6642 force_expand_binop (Pmode
, add_optab
, offset
, GEN_INT (16),
6643 offset
, 1, OPTAB_DIRECT
);
6645 emit_jump (loop_label
);
6650 /* We are done. Add the offset of the zero character to the dst_addr
6651 pointer to get the result. */
6653 emit_label (done_label
);
6654 LABEL_NUSES (done_label
) = 1;
6656 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, offset
, dst_addr_reg
,
6659 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6660 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6662 emit_insn (gen_vstlv16qi (vsrc
, gpos
, gen_rtx_MEM (BLKmode
, dst_addr_reg
)));
6664 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, gpos
, result
,
6669 emit_label (exit_label
);
6670 LABEL_NUSES (exit_label
) = 1;
6674 /* Expand conditional increment or decrement using alc/slb instructions.
6675 Should generate code setting DST to either SRC or SRC + INCREMENT,
6676 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6677 Returns true if successful, false otherwise.
6679 That makes it possible to implement some if-constructs without jumps e.g.:
6680 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6681 unsigned int a, b, c;
6682 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6683 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6684 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6685 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6687 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6688 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6689 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6690 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6691 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6694 s390_expand_addcc (enum rtx_code cmp_code
, rtx cmp_op0
, rtx cmp_op1
,
6695 rtx dst
, rtx src
, rtx increment
)
6697 machine_mode cmp_mode
;
6698 machine_mode cc_mode
;
6704 if ((GET_MODE (cmp_op0
) == SImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6705 && (GET_MODE (cmp_op1
) == SImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6707 else if ((GET_MODE (cmp_op0
) == DImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6708 && (GET_MODE (cmp_op1
) == DImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6713 /* Try ADD LOGICAL WITH CARRY. */
6714 if (increment
== const1_rtx
)
6716 /* Determine CC mode to use. */
6717 if (cmp_code
== EQ
|| cmp_code
== NE
)
6719 if (cmp_op1
!= const0_rtx
)
6721 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6722 NULL_RTX
, 0, OPTAB_WIDEN
);
6723 cmp_op1
= const0_rtx
;
6726 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6729 if (cmp_code
== LTU
|| cmp_code
== LEU
)
6734 cmp_code
= swap_condition (cmp_code
);
6751 /* Emit comparison instruction pattern. */
6752 if (!register_operand (cmp_op0
, cmp_mode
))
6753 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6755 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6756 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6757 /* We use insn_invalid_p here to add clobbers if required. */
6758 ret
= insn_invalid_p (emit_insn (insn
), false);
6761 /* Emit ALC instruction pattern. */
6762 op_res
= gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6763 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6766 if (src
!= const0_rtx
)
6768 if (!register_operand (src
, GET_MODE (dst
)))
6769 src
= force_reg (GET_MODE (dst
), src
);
6771 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, src
);
6772 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, const0_rtx
);
6775 p
= rtvec_alloc (2);
6777 gen_rtx_SET (dst
, op_res
);
6779 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6780 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6785 /* Try SUBTRACT LOGICAL WITH BORROW. */
6786 if (increment
== constm1_rtx
)
6788 /* Determine CC mode to use. */
6789 if (cmp_code
== EQ
|| cmp_code
== NE
)
6791 if (cmp_op1
!= const0_rtx
)
6793 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6794 NULL_RTX
, 0, OPTAB_WIDEN
);
6795 cmp_op1
= const0_rtx
;
6798 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6801 if (cmp_code
== GTU
|| cmp_code
== GEU
)
6806 cmp_code
= swap_condition (cmp_code
);
6823 /* Emit comparison instruction pattern. */
6824 if (!register_operand (cmp_op0
, cmp_mode
))
6825 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6827 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6828 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6829 /* We use insn_invalid_p here to add clobbers if required. */
6830 ret
= insn_invalid_p (emit_insn (insn
), false);
6833 /* Emit SLB instruction pattern. */
6834 if (!register_operand (src
, GET_MODE (dst
)))
6835 src
= force_reg (GET_MODE (dst
), src
);
6837 op_res
= gen_rtx_MINUS (GET_MODE (dst
),
6838 gen_rtx_MINUS (GET_MODE (dst
), src
, const0_rtx
),
6839 gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6840 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6842 p
= rtvec_alloc (2);
6844 gen_rtx_SET (dst
, op_res
);
6846 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6847 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6855 /* Expand code for the insv template. Return true if successful. */
6858 s390_expand_insv (rtx dest
, rtx op1
, rtx op2
, rtx src
)
6860 int bitsize
= INTVAL (op1
);
6861 int bitpos
= INTVAL (op2
);
6862 machine_mode mode
= GET_MODE (dest
);
6864 int smode_bsize
, mode_bsize
;
6867 if (bitsize
+ bitpos
> GET_MODE_BITSIZE (mode
))
6872 && bitsize
== GET_MODE_BITSIZE (GET_MODE (src
))
6873 && mode
== GET_MODE (src
))
6875 emit_move_insn (dest
, src
);
6879 /* Generate INSERT IMMEDIATE (IILL et al). */
6880 /* (set (ze (reg)) (const_int)). */
6882 && register_operand (dest
, word_mode
)
6883 && (bitpos
% 16) == 0
6884 && (bitsize
% 16) == 0
6885 && const_int_operand (src
, VOIDmode
))
6887 HOST_WIDE_INT val
= INTVAL (src
);
6888 int regpos
= bitpos
+ bitsize
;
6890 while (regpos
> bitpos
)
6892 machine_mode putmode
;
6895 if (TARGET_EXTIMM
&& (regpos
% 32 == 0) && (regpos
>= bitpos
+ 32))
6900 putsize
= GET_MODE_BITSIZE (putmode
);
6902 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6905 gen_int_mode (val
, putmode
));
6908 gcc_assert (regpos
== bitpos
);
6912 smode
= smallest_int_mode_for_size (bitsize
);
6913 smode_bsize
= GET_MODE_BITSIZE (smode
);
6914 mode_bsize
= GET_MODE_BITSIZE (mode
);
6916 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6918 && (bitsize
% BITS_PER_UNIT
) == 0
6920 && (register_operand (src
, word_mode
)
6921 || const_int_operand (src
, VOIDmode
)))
6923 /* Emit standard pattern if possible. */
6924 if (smode_bsize
== bitsize
)
6926 emit_move_insn (adjust_address (dest
, smode
, 0),
6927 gen_lowpart (smode
, src
));
6931 /* (set (ze (mem)) (const_int)). */
6932 else if (const_int_operand (src
, VOIDmode
))
6934 int size
= bitsize
/ BITS_PER_UNIT
;
6935 rtx src_mem
= adjust_address (force_const_mem (word_mode
, src
),
6937 UNITS_PER_WORD
- size
);
6939 dest
= adjust_address (dest
, BLKmode
, 0);
6940 set_mem_size (dest
, size
);
6941 rtx size_rtx
= GEN_INT (size
);
6942 s390_expand_cpymem (dest
, src_mem
, size_rtx
, size_rtx
, size_rtx
);
6946 /* (set (ze (mem)) (reg)). */
6947 else if (register_operand (src
, word_mode
))
6950 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
, op1
,
6954 /* Emit st,stcmh sequence. */
6955 int stcmh_width
= bitsize
- 32;
6956 int size
= stcmh_width
/ BITS_PER_UNIT
;
6958 emit_move_insn (adjust_address (dest
, SImode
, size
),
6959 gen_lowpart (SImode
, src
));
6960 set_mem_size (dest
, size
);
6961 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6962 GEN_INT (stcmh_width
),
6964 gen_rtx_LSHIFTRT (word_mode
, src
, GEN_INT (32)));
6970 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6971 if ((bitpos
% BITS_PER_UNIT
) == 0
6972 && (bitsize
% BITS_PER_UNIT
) == 0
6973 && (bitpos
& 32) == ((bitpos
+ bitsize
- 1) & 32)
6975 && (mode
== DImode
|| mode
== SImode
)
6977 && register_operand (dest
, mode
))
6979 /* Emit a strict_low_part pattern if possible. */
6980 if (smode_bsize
== bitsize
&& bitpos
== mode_bsize
- smode_bsize
)
6982 rtx low_dest
= s390_gen_lowpart_subreg (smode
, dest
);
6983 rtx low_src
= gen_lowpart (smode
, src
);
6987 case E_QImode
: emit_insn (gen_movstrictqi (low_dest
, low_src
)); return true;
6988 case E_HImode
: emit_insn (gen_movstricthi (low_dest
, low_src
)); return true;
6989 case E_SImode
: emit_insn (gen_movstrictsi (low_dest
, low_src
)); return true;
6994 /* ??? There are more powerful versions of ICM that are not
6995 completely represented in the md file. */
6998 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6999 if (TARGET_Z10
&& (mode
== DImode
|| mode
== SImode
))
7001 machine_mode mode_s
= GET_MODE (src
);
7003 if (CONSTANT_P (src
))
7005 /* For constant zero values the representation with AND
7006 appears to be folded in more situations than the (set
7007 (zero_extract) ...).
7008 We only do this when the start and end of the bitfield
7009 remain in the same SImode chunk. That way nihf or nilf
7011 The AND patterns might still generate a risbg for this. */
7012 if (src
== const0_rtx
&& bitpos
/ 32 == (bitpos
+ bitsize
- 1) / 32)
7015 src
= force_reg (mode
, src
);
7017 else if (mode_s
!= mode
)
7019 gcc_assert (GET_MODE_BITSIZE (mode_s
) >= bitsize
);
7020 src
= force_reg (mode_s
, src
);
7021 src
= gen_lowpart (mode
, src
);
7024 op
= gen_rtx_ZERO_EXTRACT (mode
, dest
, op1
, op2
),
7025 op
= gen_rtx_SET (op
, src
);
7029 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
7030 op
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
));
7040 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
7041 register that holds VAL of mode MODE shifted by COUNT bits. */
7044 s390_expand_mask_and_shift (rtx val
, machine_mode mode
, rtx count
)
7046 val
= expand_simple_binop (SImode
, AND
, val
, GEN_INT (GET_MODE_MASK (mode
)),
7047 NULL_RTX
, 1, OPTAB_DIRECT
);
7048 return expand_simple_binop (SImode
, ASHIFT
, val
, count
,
7049 NULL_RTX
, 1, OPTAB_DIRECT
);
7052 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
7053 the result in TARGET. */
7056 s390_expand_vec_compare (rtx target
, enum rtx_code cond
,
7057 rtx cmp_op1
, rtx cmp_op2
)
7059 machine_mode mode
= GET_MODE (target
);
7060 bool neg_p
= false, swap_p
= false;
7063 if (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_VECTOR_FLOAT
)
7065 cmp_op2
= force_reg (GET_MODE (cmp_op1
), cmp_op2
);
7068 /* NE a != b -> !(a == b) */
7069 case NE
: cond
= EQ
; neg_p
= true; break;
7071 emit_insn (gen_vec_cmpungt (target
, cmp_op1
, cmp_op2
));
7074 emit_insn (gen_vec_cmpunge (target
, cmp_op1
, cmp_op2
));
7076 case LE
: cond
= GE
; swap_p
= true; break;
7077 /* UNLE: (a u<= b) -> (b u>= a). */
7079 emit_insn (gen_vec_cmpunge (target
, cmp_op2
, cmp_op1
));
7081 /* LT: a < b -> b > a */
7082 case LT
: cond
= GT
; swap_p
= true; break;
7083 /* UNLT: (a u< b) -> (b u> a). */
7085 emit_insn (gen_vec_cmpungt (target
, cmp_op2
, cmp_op1
));
7088 emit_insn (gen_vec_cmpuneq (target
, cmp_op1
, cmp_op2
));
7091 emit_insn (gen_vec_cmpltgt (target
, cmp_op1
, cmp_op2
));
7094 emit_insn (gen_vec_cmpordered (target
, cmp_op1
, cmp_op2
));
7097 emit_insn (gen_vec_cmpunordered (target
, cmp_op1
, cmp_op2
));
7104 /* Turn x < 0 into x >> (bits per element - 1) */
7105 if (cond
== LT
&& cmp_op2
== CONST0_RTX (mode
))
7107 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) - 1;
7108 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cmp_op1
,
7109 GEN_INT (shift
), target
,
7112 emit_move_insn (target
, res
);
7115 cmp_op2
= force_reg (GET_MODE (cmp_op1
), cmp_op2
);
7119 /* NE: a != b -> !(a == b) */
7120 case NE
: cond
= EQ
; neg_p
= true; break;
7121 /* GE: a >= b -> !(b > a) */
7122 case GE
: cond
= GT
; neg_p
= true; swap_p
= true; break;
7123 /* GEU: a >= b -> !(b > a) */
7124 case GEU
: cond
= GTU
; neg_p
= true; swap_p
= true; break;
7125 /* LE: a <= b -> !(a > b) */
7126 case LE
: cond
= GT
; neg_p
= true; break;
7127 /* LEU: a <= b -> !(a > b) */
7128 case LEU
: cond
= GTU
; neg_p
= true; break;
7129 /* LT: a < b -> b > a */
7130 case LT
: cond
= GT
; swap_p
= true; break;
7131 /* LTU: a < b -> b > a */
7132 case LTU
: cond
= GTU
; swap_p
= true; break;
7139 tmp
= cmp_op1
; cmp_op1
= cmp_op2
; cmp_op2
= tmp
;
7142 emit_insn (gen_rtx_SET (target
, gen_rtx_fmt_ee (cond
,
7144 cmp_op1
, cmp_op2
)));
7146 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (mode
, target
)));
7149 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
7150 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
7151 elements in CMP1 and CMP2 fulfill the comparison.
7152 This function is only used to emit patterns for the vx builtins and
7153 therefore only handles comparison codes required by the
7156 s390_expand_vec_compare_cc (rtx target
, enum rtx_code code
,
7157 rtx cmp1
, rtx cmp2
, bool all_p
)
7159 machine_mode cc_producer_mode
, cc_consumer_mode
, scratch_mode
;
7160 rtx tmp_reg
= gen_reg_rtx (SImode
);
7161 bool swap_p
= false;
7163 if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_INT
)
7169 cc_producer_mode
= CCVEQmode
;
7173 code
= swap_condition (code
);
7178 cc_producer_mode
= CCVIHmode
;
7182 code
= swap_condition (code
);
7187 cc_producer_mode
= CCVIHUmode
;
7193 scratch_mode
= GET_MODE (cmp1
);
7194 /* These codes represent inverted CC interpretations. Inverting
7195 an ALL CC mode results in an ANY CC mode and the other way
7196 around. Invert the all_p flag here to compensate for
7198 if (code
== NE
|| code
== LE
|| code
== LEU
)
7201 cc_consumer_mode
= all_p
? CCVIALLmode
: CCVIANYmode
;
7203 else if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_FLOAT
)
7209 case EQ
: cc_producer_mode
= CCVEQmode
; break;
7210 case NE
: cc_producer_mode
= CCVEQmode
; inv_p
= true; break;
7211 case GT
: cc_producer_mode
= CCVFHmode
; break;
7212 case GE
: cc_producer_mode
= CCVFHEmode
; break;
7213 case UNLE
: cc_producer_mode
= CCVFHmode
; inv_p
= true; break;
7214 case UNLT
: cc_producer_mode
= CCVFHEmode
; inv_p
= true; break;
7215 case LT
: cc_producer_mode
= CCVFHmode
; code
= GT
; swap_p
= true; break;
7216 case LE
: cc_producer_mode
= CCVFHEmode
; code
= GE
; swap_p
= true; break;
7217 default: gcc_unreachable ();
7219 scratch_mode
= related_int_vector_mode (GET_MODE (cmp1
)).require ();
7224 cc_consumer_mode
= all_p
? CCVFALLmode
: CCVFANYmode
;
7236 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7237 gen_rtvec (2, gen_rtx_SET (
7238 gen_rtx_REG (cc_producer_mode
, CC_REGNUM
),
7239 gen_rtx_COMPARE (cc_producer_mode
, cmp1
, cmp2
)),
7240 gen_rtx_CLOBBER (VOIDmode
,
7241 gen_rtx_SCRATCH (scratch_mode
)))));
7242 emit_move_insn (target
, const0_rtx
);
7243 emit_move_insn (tmp_reg
, const1_rtx
);
7245 emit_move_insn (target
,
7246 gen_rtx_IF_THEN_ELSE (SImode
,
7247 gen_rtx_fmt_ee (code
, VOIDmode
,
7248 gen_rtx_REG (cc_consumer_mode
, CC_REGNUM
),
7253 /* Invert the comparison CODE applied to a CC mode. This is only safe
7254 if we know whether there result was created by a floating point
7255 compare or not. For the CCV modes this is encoded as part of the
7258 s390_reverse_condition (machine_mode mode
, enum rtx_code code
)
7260 /* Reversal of FP compares takes care -- an ordered compare
7261 becomes an unordered compare and vice versa. */
7262 if (mode
== CCVFALLmode
|| mode
== CCVFANYmode
|| mode
== CCSFPSmode
)
7263 return reverse_condition_maybe_unordered (code
);
7264 else if (mode
== CCVIALLmode
|| mode
== CCVIANYmode
)
7265 return reverse_condition (code
);
7270 /* Generate a vector comparison expression loading either elements of
7271 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
7275 s390_expand_vcond (rtx target
, rtx then
, rtx els
,
7276 enum rtx_code cond
, rtx cmp_op1
, rtx cmp_op2
)
7279 machine_mode result_mode
;
7282 machine_mode target_mode
= GET_MODE (target
);
7283 machine_mode cmp_mode
= GET_MODE (cmp_op1
);
7284 rtx op
= (cond
== LT
) ? els
: then
;
7286 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
7287 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
7288 for short and byte (x >> 15 and x >> 7 respectively). */
7289 if ((cond
== LT
|| cond
== GE
)
7290 && target_mode
== cmp_mode
7291 && cmp_op2
== CONST0_RTX (cmp_mode
)
7292 && op
== CONST0_RTX (target_mode
)
7293 && s390_vector_mode_supported_p (target_mode
)
7294 && GET_MODE_CLASS (target_mode
) == MODE_VECTOR_INT
)
7296 rtx negop
= (cond
== LT
) ? then
: els
;
7298 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (target_mode
)) - 1;
7300 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
7301 if (negop
== CONST1_RTX (target_mode
))
7303 rtx res
= expand_simple_binop (cmp_mode
, LSHIFTRT
, cmp_op1
,
7304 GEN_INT (shift
), target
,
7307 emit_move_insn (target
, res
);
7311 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
7312 else if (all_ones_operand (negop
, target_mode
))
7314 rtx res
= expand_simple_binop (cmp_mode
, ASHIFTRT
, cmp_op1
,
7315 GEN_INT (shift
), target
,
7318 emit_move_insn (target
, res
);
7323 /* We always use an integral type vector to hold the comparison
7325 result_mode
= related_int_vector_mode (cmp_mode
).require ();
7326 result_target
= gen_reg_rtx (result_mode
);
7328 /* We allow vector immediates as comparison operands that
7329 can be handled by the optimization above but not by the
7330 following code. Hence, force them into registers here. */
7331 if (!REG_P (cmp_op1
))
7332 cmp_op1
= force_reg (GET_MODE (cmp_op1
), cmp_op1
);
7334 s390_expand_vec_compare (result_target
, cond
, cmp_op1
, cmp_op2
);
7336 /* If the results are supposed to be either -1 or 0 we are done
7337 since this is what our compare instructions generate anyway. */
7338 if (all_ones_operand (then
, GET_MODE (then
))
7339 && const0_operand (els
, GET_MODE (els
)))
7341 emit_move_insn (target
, gen_rtx_SUBREG (target_mode
,
7346 /* Otherwise we will do a vsel afterwards. */
7347 /* This gets triggered e.g.
7348 with gcc.c-torture/compile/pr53410-1.c */
7350 then
= force_reg (target_mode
, then
);
7353 els
= force_reg (target_mode
, els
);
7355 tmp
= gen_rtx_fmt_ee (EQ
, VOIDmode
,
7357 CONST0_RTX (result_mode
));
7359 /* We compared the result against zero above so we have to swap then
7361 tmp
= gen_rtx_IF_THEN_ELSE (target_mode
, tmp
, els
, then
);
7363 gcc_assert (target_mode
== GET_MODE (then
));
7364 emit_insn (gen_rtx_SET (target
, tmp
));
7367 /* Emit the RTX necessary to initialize the vector TARGET with values
7370 s390_expand_vec_init (rtx target
, rtx vals
)
7372 machine_mode mode
= GET_MODE (target
);
7373 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7374 int n_elts
= GET_MODE_NUNITS (mode
);
7375 bool all_same
= true, all_regs
= true, all_const_int
= true;
7379 for (i
= 0; i
< n_elts
; ++i
)
7381 x
= XVECEXP (vals
, 0, i
);
7383 if (!CONST_INT_P (x
))
7384 all_const_int
= false;
7386 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7393 /* Use vector gen mask or vector gen byte mask if possible. */
7394 if (all_same
&& all_const_int
)
7396 rtx vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7397 if (XVECEXP (vals
, 0, 0) == const0_rtx
7398 || s390_contiguous_bitmask_vector_p (vec
, NULL
, NULL
)
7399 || s390_bytemask_vector_p (vec
, NULL
))
7401 emit_insn (gen_rtx_SET (target
, vec
));
7406 /* Use vector replicate instructions. vlrep/vrepi/vrep */
7409 rtx elem
= XVECEXP (vals
, 0, 0);
7411 /* vec_splats accepts general_operand as source. */
7412 if (!general_operand (elem
, GET_MODE (elem
)))
7413 elem
= force_reg (inner_mode
, elem
);
7415 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, elem
)));
7422 && GET_MODE_SIZE (inner_mode
) == 8)
7424 /* Use vector load pair. */
7425 emit_insn (gen_rtx_SET (target
,
7426 gen_rtx_VEC_CONCAT (mode
,
7427 XVECEXP (vals
, 0, 0),
7428 XVECEXP (vals
, 0, 1))));
7432 /* Use vector load logical element and zero. */
7433 if (TARGET_VXE
&& (mode
== V4SImode
|| mode
== V4SFmode
))
7437 x
= XVECEXP (vals
, 0, 0);
7438 if (memory_operand (x
, inner_mode
))
7440 for (i
= 1; i
< n_elts
; ++i
)
7441 found
= found
&& XVECEXP (vals
, 0, i
) == const0_rtx
;
7445 machine_mode half_mode
= (inner_mode
== SFmode
7446 ? V2SFmode
: V2SImode
);
7447 emit_insn (gen_rtx_SET (target
,
7448 gen_rtx_VEC_CONCAT (mode
,
7449 gen_rtx_VEC_CONCAT (half_mode
,
7452 gen_rtx_VEC_CONCAT (half_mode
,
7460 /* We are about to set the vector elements one by one. Zero out the
7461 full register first in order to help the data flow framework to
7462 detect it as full VR set. */
7463 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
7465 /* Unfortunately the vec_init expander is not allowed to fail. So
7466 we have to implement the fallback ourselves. */
7467 for (i
= 0; i
< n_elts
; i
++)
7469 rtx elem
= XVECEXP (vals
, 0, i
);
7470 if (!general_operand (elem
, GET_MODE (elem
)))
7471 elem
= force_reg (inner_mode
, elem
);
7473 if (elem
!= const0_rtx
)
7474 emit_insn (gen_rtx_SET (target
,
7475 gen_rtx_UNSPEC (mode
,
7477 GEN_INT (i
), target
),
7482 /* Return a parallel of constant integers to be used as permutation
7483 vector for a vector merge operation in MODE. If HIGH_P is true the
7484 left-most elements of the source vectors are merged otherwise the
7485 right-most elements. */
7487 s390_expand_merge_perm_const (machine_mode mode
, bool high_p
)
7489 int nelts
= GET_MODE_NUNITS (mode
);
7491 int addend
= high_p
? 0 : nelts
;
7493 for (int i
= 0; i
< nelts
; i
++)
7494 perm
[i
] = GEN_INT ((i
+ addend
) / 2 + (i
% 2) * nelts
);
7496 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelts
, perm
));
7499 /* Emit RTL to implement a vector merge operation of SRC1 and SRC2
7500 which creates the result in TARGET. HIGH_P determines whether a
7501 merge hi or lo will be generated. */
7503 s390_expand_merge (rtx target
, rtx src1
, rtx src2
, bool high_p
)
7505 machine_mode mode
= GET_MODE (target
);
7506 opt_machine_mode opt_mode_2x
= mode_for_vector (GET_MODE_INNER (mode
),
7507 2 * GET_MODE_NUNITS (mode
));
7508 gcc_assert (opt_mode_2x
.exists ());
7509 machine_mode mode_double_nelts
= opt_mode_2x
.require ();
7510 rtx constv
= s390_expand_merge_perm_const (mode
, high_p
);
7511 src1
= force_reg (GET_MODE (src1
), src1
);
7512 src2
= force_reg (GET_MODE (src2
), src2
);
7513 rtx x
= gen_rtx_VEC_CONCAT (mode_double_nelts
, src1
, src2
);
7514 x
= gen_rtx_VEC_SELECT (mode
, x
, constv
);
7515 emit_insn (gen_rtx_SET (target
, x
));
7518 /* Emit a vector constant that contains 1s in each element's sign bit position
7519 and 0s in other positions. MODE is the desired constant's mode. */
7521 s390_build_signbit_mask (machine_mode mode
)
7523 if (mode
== TFmode
&& TARGET_VXE
)
7525 wide_int mask_val
= wi::set_bit_in_zero (127, 128);
7526 rtx mask
= immed_wide_int_const (mask_val
, TImode
);
7527 return gen_lowpart (TFmode
, mask
);
7530 /* Generate the integral element mask value. */
7531 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7532 int inner_bitsize
= GET_MODE_BITSIZE (inner_mode
);
7533 wide_int mask_val
= wi::set_bit_in_zero (inner_bitsize
- 1, inner_bitsize
);
7535 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
7536 value to the desired mode. */
7537 machine_mode int_mode
= related_int_vector_mode (mode
).require ();
7538 rtx mask
= immed_wide_int_const (mask_val
, GET_MODE_INNER (int_mode
));
7539 mask
= gen_lowpart (inner_mode
, mask
);
7541 /* Emit the vector mask rtx by mode the element mask rtx. */
7542 int nunits
= GET_MODE_NUNITS (mode
);
7543 rtvec v
= rtvec_alloc (nunits
);
7544 for (int i
= 0; i
< nunits
; i
++)
7545 RTVEC_ELT (v
, i
) = mask
;
7546 return gen_rtx_CONST_VECTOR (mode
, v
);
7549 /* Structure to hold the initial parameters for a compare_and_swap operation
7550 in HImode and QImode. */
7552 struct alignment_context
7554 rtx memsi
; /* SI aligned memory location. */
7555 rtx shift
; /* Bit offset with regard to lsb. */
7556 rtx modemask
; /* Mask of the HQImode shifted by SHIFT bits. */
7557 rtx modemaski
; /* ~modemask */
7558 bool aligned
; /* True if memory is aligned, false else. */
7561 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
7562 structure AC for transparent simplifying, if the memory alignment is known
7563 to be at least 32bit. MEM is the memory location for the actual operation
7564 and MODE its mode. */
7567 init_alignment_context (struct alignment_context
*ac
, rtx mem
,
7570 ac
->shift
= GEN_INT (GET_MODE_SIZE (SImode
) - GET_MODE_SIZE (mode
));
7571 ac
->aligned
= (MEM_ALIGN (mem
) >= GET_MODE_BITSIZE (SImode
));
7574 ac
->memsi
= adjust_address (mem
, SImode
, 0); /* Memory is aligned. */
7577 /* Alignment is unknown. */
7578 rtx byteoffset
, addr
, align
;
7580 /* Force the address into a register. */
7581 addr
= force_reg (Pmode
, XEXP (mem
, 0));
7583 /* Align it to SImode. */
7584 align
= expand_simple_binop (Pmode
, AND
, addr
,
7585 GEN_INT (-GET_MODE_SIZE (SImode
)),
7586 NULL_RTX
, 1, OPTAB_DIRECT
);
7588 ac
->memsi
= gen_rtx_MEM (SImode
, align
);
7589 MEM_VOLATILE_P (ac
->memsi
) = MEM_VOLATILE_P (mem
);
7590 set_mem_alias_set (ac
->memsi
, ALIAS_SET_MEMORY_BARRIER
);
7591 set_mem_align (ac
->memsi
, GET_MODE_BITSIZE (SImode
));
7593 /* Calculate shiftcount. */
7594 byteoffset
= expand_simple_binop (Pmode
, AND
, addr
,
7595 GEN_INT (GET_MODE_SIZE (SImode
) - 1),
7596 NULL_RTX
, 1, OPTAB_DIRECT
);
7597 /* As we already have some offset, evaluate the remaining distance. */
7598 ac
->shift
= expand_simple_binop (SImode
, MINUS
, ac
->shift
, byteoffset
,
7599 NULL_RTX
, 1, OPTAB_DIRECT
);
7602 /* Shift is the byte count, but we need the bitcount. */
7603 ac
->shift
= expand_simple_binop (SImode
, ASHIFT
, ac
->shift
, GEN_INT (3),
7604 NULL_RTX
, 1, OPTAB_DIRECT
);
7606 /* Calculate masks. */
7607 ac
->modemask
= expand_simple_binop (SImode
, ASHIFT
,
7608 GEN_INT (GET_MODE_MASK (mode
)),
7609 ac
->shift
, NULL_RTX
, 1, OPTAB_DIRECT
);
7610 ac
->modemaski
= expand_simple_unop (SImode
, NOT
, ac
->modemask
,
7614 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7615 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7616 perform the merge in SEQ2. */
7619 s390_two_part_insv (struct alignment_context
*ac
, rtx
*seq1
, rtx
*seq2
,
7620 machine_mode mode
, rtx val
, rtx ins
)
7627 tmp
= copy_to_mode_reg (SImode
, val
);
7628 if (s390_expand_insv (tmp
, GEN_INT (GET_MODE_BITSIZE (mode
)),
7632 *seq2
= get_insns ();
7639 /* Failed to use insv. Generate a two part shift and mask. */
7641 tmp
= s390_expand_mask_and_shift (ins
, mode
, ac
->shift
);
7642 *seq1
= get_insns ();
7646 tmp
= expand_simple_binop (SImode
, IOR
, tmp
, val
, NULL_RTX
, 1, OPTAB_DIRECT
);
7647 *seq2
= get_insns ();
7653 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7654 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7655 value to set if CMP == MEM. */
7658 s390_expand_cs_hqi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7659 rtx cmp
, rtx new_rtx
, bool is_weak
)
7661 struct alignment_context ac
;
7662 rtx cmpv
, newv
, val
, cc
, seq0
, seq1
, seq2
, seq3
;
7663 rtx res
= gen_reg_rtx (SImode
);
7664 rtx_code_label
*csloop
= NULL
, *csend
= NULL
;
7666 gcc_assert (MEM_P (mem
));
7668 init_alignment_context (&ac
, mem
, mode
);
7670 /* Load full word. Subsequent loads are performed by CS. */
7671 val
= expand_simple_binop (SImode
, AND
, ac
.memsi
, ac
.modemaski
,
7672 NULL_RTX
, 1, OPTAB_DIRECT
);
7674 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7675 possible, we try to use insv to make this happen efficiently. If
7676 that fails we'll generate code both inside and outside the loop. */
7677 cmpv
= s390_two_part_insv (&ac
, &seq0
, &seq2
, mode
, val
, cmp
);
7678 newv
= s390_two_part_insv (&ac
, &seq1
, &seq3
, mode
, val
, new_rtx
);
7685 /* Start CS loop. */
7688 /* Begin assuming success. */
7689 emit_move_insn (btarget
, const1_rtx
);
7691 csloop
= gen_label_rtx ();
7692 csend
= gen_label_rtx ();
7693 emit_label (csloop
);
7696 /* val = "<mem>00..0<mem>"
7697 * cmp = "00..0<cmp>00..0"
7698 * new = "00..0<new>00..0"
7704 cc
= s390_emit_compare_and_swap (EQ
, res
, ac
.memsi
, cmpv
, newv
, CCZ1mode
);
7706 emit_insn (gen_cstorecc4 (btarget
, cc
, XEXP (cc
, 0), XEXP (cc
, 1)));
7711 /* Jump to end if we're done (likely?). */
7712 s390_emit_jump (csend
, cc
);
7714 /* Check for changes outside mode, and loop internal if so.
7715 Arrange the moves so that the compare is adjacent to the
7716 branch so that we can generate CRJ. */
7717 tmp
= copy_to_reg (val
);
7718 force_expand_binop (SImode
, and_optab
, res
, ac
.modemaski
, val
,
7720 cc
= s390_emit_compare (NE
, val
, tmp
);
7721 s390_emit_jump (csloop
, cc
);
7724 emit_move_insn (btarget
, const0_rtx
);
7728 /* Return the correct part of the bitfield. */
7729 convert_move (vtarget
, expand_simple_binop (SImode
, LSHIFTRT
, res
, ac
.shift
,
7730 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7733 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7735 s390_expand_cs_tdsi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7736 rtx cmp
, rtx new_rtx
, bool is_weak
)
7738 rtx output
= vtarget
;
7739 rtx_code_label
*skip_cs_label
= NULL
;
7740 bool do_const_opt
= false;
7742 if (!register_operand (output
, mode
))
7743 output
= gen_reg_rtx (mode
);
7745 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7746 with the constant first and skip the compare_and_swap because its very
7747 expensive and likely to fail anyway.
7748 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7749 cause spurious in that case.
7750 Note 2: It may be useful to do this also for non-constant INPUT.
7751 Note 3: Currently only targets with "load on condition" are supported
7752 (z196 and newer). */
7755 && (mode
== SImode
|| mode
== DImode
))
7756 do_const_opt
= (is_weak
&& CONST_INT_P (cmp
));
7760 rtx cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7762 skip_cs_label
= gen_label_rtx ();
7763 emit_move_insn (btarget
, const0_rtx
);
7764 if (CONST_INT_P (cmp
) && INTVAL (cmp
) == 0)
7766 rtvec lt
= rtvec_alloc (2);
7768 /* Load-and-test + conditional jump. */
7770 = gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, mem
, cmp
));
7771 RTVEC_ELT (lt
, 1) = gen_rtx_SET (output
, mem
);
7772 emit_insn (gen_rtx_PARALLEL (VOIDmode
, lt
));
7776 emit_move_insn (output
, mem
);
7777 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, output
, cmp
)));
7779 s390_emit_jump (skip_cs_label
, gen_rtx_NE (VOIDmode
, cc
, const0_rtx
));
7780 add_reg_br_prob_note (get_last_insn (),
7781 profile_probability::very_unlikely ());
7782 /* If the jump is not taken, OUTPUT is the expected value. */
7784 /* Reload newval to a register manually, *after* the compare and jump
7785 above. Otherwise Reload might place it before the jump. */
7788 cmp
= force_reg (mode
, cmp
);
7789 new_rtx
= force_reg (mode
, new_rtx
);
7790 s390_emit_compare_and_swap (EQ
, output
, mem
, cmp
, new_rtx
,
7791 (do_const_opt
) ? CCZmode
: CCZ1mode
);
7792 if (skip_cs_label
!= NULL
)
7793 emit_label (skip_cs_label
);
7795 /* We deliberately accept non-register operands in the predicate
7796 to ensure the write back to the output operand happens *before*
7797 the store-flags code below. This makes it easier for combine
7798 to merge the store-flags code with a potential test-and-branch
7799 pattern following (immediately!) afterwards. */
7800 if (output
!= vtarget
)
7801 emit_move_insn (vtarget
, output
);
7807 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7808 btarget has already been initialized with 0 above. */
7809 cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7810 cond
= gen_rtx_EQ (VOIDmode
, cc
, const0_rtx
);
7811 ite
= gen_rtx_IF_THEN_ELSE (SImode
, cond
, const1_rtx
, btarget
);
7812 emit_insn (gen_rtx_SET (btarget
, ite
));
7818 cc
= gen_rtx_REG (CCZ1mode
, CC_REGNUM
);
7819 cond
= gen_rtx_EQ (SImode
, cc
, const0_rtx
);
7820 emit_insn (gen_cstorecc4 (btarget
, cond
, cc
, const0_rtx
));
7824 /* Expand an atomic compare and swap operation. MEM is the memory location,
7825 CMP the old value to compare MEM with and NEW_RTX the value to set if
7829 s390_expand_cs (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7830 rtx cmp
, rtx new_rtx
, bool is_weak
)
7837 s390_expand_cs_tdsi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7841 s390_expand_cs_hqi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7848 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7849 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7853 s390_expand_atomic_exchange_tdsi (rtx output
, rtx mem
, rtx input
)
7855 machine_mode mode
= GET_MODE (mem
);
7856 rtx_code_label
*csloop
;
7859 && (mode
== DImode
|| mode
== SImode
)
7860 && CONST_INT_P (input
) && INTVAL (input
) == 0)
7862 emit_move_insn (output
, const0_rtx
);
7864 emit_insn (gen_atomic_fetch_anddi (output
, mem
, const0_rtx
, input
));
7866 emit_insn (gen_atomic_fetch_andsi (output
, mem
, const0_rtx
, input
));
7870 input
= force_reg (mode
, input
);
7871 emit_move_insn (output
, mem
);
7872 csloop
= gen_label_rtx ();
7873 emit_label (csloop
);
7874 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, output
, mem
, output
,
7878 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7879 and VAL the value to play with. If AFTER is true then store the value
7880 MEM holds after the operation, if AFTER is false then store the value MEM
7881 holds before the operation. If TARGET is zero then discard that value, else
7882 store it to TARGET. */
7885 s390_expand_atomic (machine_mode mode
, enum rtx_code code
,
7886 rtx target
, rtx mem
, rtx val
, bool after
)
7888 struct alignment_context ac
;
7890 rtx new_rtx
= gen_reg_rtx (SImode
);
7891 rtx orig
= gen_reg_rtx (SImode
);
7892 rtx_code_label
*csloop
= gen_label_rtx ();
7894 gcc_assert (!target
|| register_operand (target
, VOIDmode
));
7895 gcc_assert (MEM_P (mem
));
7897 init_alignment_context (&ac
, mem
, mode
);
7899 /* Shift val to the correct bit positions.
7900 Preserve "icm", but prevent "ex icm". */
7901 if (!(ac
.aligned
&& code
== SET
&& MEM_P (val
)))
7902 val
= s390_expand_mask_and_shift (val
, mode
, ac
.shift
);
7904 /* Further preparation insns. */
7905 if (code
== PLUS
|| code
== MINUS
)
7906 emit_move_insn (orig
, val
);
7907 else if (code
== MULT
|| code
== AND
) /* val = "11..1<val>11..1" */
7908 val
= expand_simple_binop (SImode
, XOR
, val
, ac
.modemaski
,
7909 NULL_RTX
, 1, OPTAB_DIRECT
);
7911 /* Load full word. Subsequent loads are performed by CS. */
7912 cmp
= force_reg (SImode
, ac
.memsi
);
7914 /* Start CS loop. */
7915 emit_label (csloop
);
7916 emit_move_insn (new_rtx
, cmp
);
7918 /* Patch new with val at correct position. */
7923 val
= expand_simple_binop (SImode
, code
, new_rtx
, orig
,
7924 NULL_RTX
, 1, OPTAB_DIRECT
);
7925 val
= expand_simple_binop (SImode
, AND
, val
, ac
.modemask
,
7926 NULL_RTX
, 1, OPTAB_DIRECT
);
7929 if (ac
.aligned
&& MEM_P (val
))
7930 store_bit_field (new_rtx
, GET_MODE_BITSIZE (mode
), 0,
7931 0, 0, SImode
, val
, false, false);
7934 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, ac
.modemaski
,
7935 NULL_RTX
, 1, OPTAB_DIRECT
);
7936 new_rtx
= expand_simple_binop (SImode
, IOR
, new_rtx
, val
,
7937 NULL_RTX
, 1, OPTAB_DIRECT
);
7943 new_rtx
= expand_simple_binop (SImode
, code
, new_rtx
, val
,
7944 NULL_RTX
, 1, OPTAB_DIRECT
);
7946 case MULT
: /* NAND */
7947 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, val
,
7948 NULL_RTX
, 1, OPTAB_DIRECT
);
7949 new_rtx
= expand_simple_binop (SImode
, XOR
, new_rtx
, ac
.modemask
,
7950 NULL_RTX
, 1, OPTAB_DIRECT
);
7956 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, cmp
,
7957 ac
.memsi
, cmp
, new_rtx
,
7960 /* Return the correct part of the bitfield. */
7962 convert_move (target
, expand_simple_binop (SImode
, LSHIFTRT
,
7963 after
? new_rtx
: cmp
, ac
.shift
,
7964 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7967 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7968 We need to emit DTP-relative relocations. */
7970 static void s390_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
7973 s390_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7978 fputs ("\t.long\t", file
);
7981 fputs ("\t.quad\t", file
);
7986 output_addr_const (file
, x
);
7987 fputs ("@DTPOFF", file
);
7990 /* Return the proper mode for REGNO being represented in the dwarf
7993 s390_dwarf_frame_reg_mode (int regno
)
7995 machine_mode save_mode
= default_dwarf_frame_reg_mode (regno
);
7997 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7998 if (GENERAL_REGNO_P (regno
))
8001 /* The rightmost 64 bits of vector registers are call-clobbered. */
8002 if (GET_MODE_SIZE (save_mode
) > 8)
8008 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
8009 /* Implement TARGET_MANGLE_TYPE. */
8012 s390_mangle_type (const_tree type
)
8014 type
= TYPE_MAIN_VARIANT (type
);
8016 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
8017 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
8020 if (type
== s390_builtin_types
[BT_BV16QI
]) return "U6__boolc";
8021 if (type
== s390_builtin_types
[BT_BV8HI
]) return "U6__bools";
8022 if (type
== s390_builtin_types
[BT_BV4SI
]) return "U6__booli";
8023 if (type
== s390_builtin_types
[BT_BV2DI
]) return "U6__booll";
8025 if (type
== long_double_type_node
&& TARGET_LONG_DOUBLE_128
)
8028 /* For all other types, use normal C++ mangling. */
8033 /* In the name of slightly smaller debug output, and to cater to
8034 general assembler lossage, recognize various UNSPEC sequences
8035 and turn them back into a direct symbol reference. */
8038 s390_delegitimize_address (rtx orig_x
)
8042 orig_x
= delegitimize_mem_from_attrs (orig_x
);
8045 /* Extract the symbol ref from:
8046 (plus:SI (reg:SI 12 %r12)
8047 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
8048 UNSPEC_GOTOFF/PLTOFF)))
8050 (plus:SI (reg:SI 12 %r12)
8051 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
8052 UNSPEC_GOTOFF/PLTOFF)
8053 (const_int 4 [0x4])))) */
8054 if (GET_CODE (x
) == PLUS
8055 && REG_P (XEXP (x
, 0))
8056 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
8057 && GET_CODE (XEXP (x
, 1)) == CONST
)
8059 HOST_WIDE_INT offset
= 0;
8061 /* The const operand. */
8062 y
= XEXP (XEXP (x
, 1), 0);
8064 if (GET_CODE (y
) == PLUS
8065 && GET_CODE (XEXP (y
, 1)) == CONST_INT
)
8067 offset
= INTVAL (XEXP (y
, 1));
8071 if (GET_CODE (y
) == UNSPEC
8072 && (XINT (y
, 1) == UNSPEC_GOTOFF
8073 || XINT (y
, 1) == UNSPEC_PLTOFF
))
8074 return plus_constant (Pmode
, XVECEXP (y
, 0, 0), offset
);
8077 if (GET_CODE (x
) != MEM
)
8081 if (GET_CODE (x
) == PLUS
8082 && GET_CODE (XEXP (x
, 1)) == CONST
8083 && GET_CODE (XEXP (x
, 0)) == REG
8084 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8086 y
= XEXP (XEXP (x
, 1), 0);
8087 if (GET_CODE (y
) == UNSPEC
8088 && XINT (y
, 1) == UNSPEC_GOT
)
8089 y
= XVECEXP (y
, 0, 0);
8093 else if (GET_CODE (x
) == CONST
)
8095 /* Extract the symbol ref from:
8096 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
8097 UNSPEC_PLT/GOTENT))) */
8100 if (GET_CODE (y
) == UNSPEC
8101 && (XINT (y
, 1) == UNSPEC_GOTENT
8102 || XINT (y
, 1) == UNSPEC_PLT31
))
8103 y
= XVECEXP (y
, 0, 0);
8110 if (GET_MODE (orig_x
) != Pmode
)
8112 if (GET_MODE (orig_x
) == BLKmode
)
8114 y
= lowpart_subreg (GET_MODE (orig_x
), y
, Pmode
);
8121 /* Output operand OP to stdio stream FILE.
8122 OP is an address (register + offset) which is not used to address data;
8123 instead the rightmost bits are interpreted as the value. */
8126 print_addrstyle_operand (FILE *file
, rtx op
)
8128 HOST_WIDE_INT offset
;
8131 /* Extract base register and offset. */
8132 if (!s390_decompose_addrstyle_without_index (op
, &base
, &offset
))
8138 gcc_assert (GET_CODE (base
) == REG
);
8139 gcc_assert (REGNO (base
) < FIRST_PSEUDO_REGISTER
);
8140 gcc_assert (REGNO_REG_CLASS (REGNO (base
)) == ADDR_REGS
);
8143 /* Offsets are constricted to twelve bits. */
8144 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
& ((1 << 12) - 1));
8146 fprintf (file
, "(%s)", reg_names
[REGNO (base
)]);
8149 /* Print the shift count operand OP to FILE.
8150 OP is an address-style operand in a form which
8151 s390_valid_shift_count permits. Subregs and no-op
8152 and-masking of the operand are stripped. */
8155 print_shift_count_operand (FILE *file
, rtx op
)
8157 /* No checking of the and mask required here. */
8158 if (!s390_valid_shift_count (op
, 0))
8161 while (op
&& GET_CODE (op
) == SUBREG
)
8162 op
= SUBREG_REG (op
);
8164 if (GET_CODE (op
) == AND
)
8167 print_addrstyle_operand (file
, op
);
8170 /* Assigns the number of NOP halfwords to be emitted before and after the
8171 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
8172 If hotpatching is disabled for the function, the values are set to zero.
8176 s390_function_num_hotpatch_hw (tree decl
,
8182 attr
= lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl
));
8184 /* Handle the arguments of the hotpatch attribute. The values
8185 specified via attribute might override the cmdline argument
8189 tree args
= TREE_VALUE (attr
);
8191 *hw_before
= TREE_INT_CST_LOW (TREE_VALUE (args
));
8192 *hw_after
= TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args
)));
8196 /* Use the values specified by the cmdline arguments. */
8197 *hw_before
= s390_hotpatch_hw_before_label
;
8198 *hw_after
= s390_hotpatch_hw_after_label
;
8202 /* Write the current .machine and .machinemode specification to the assembler
8205 #ifdef HAVE_AS_MACHINE_MACHINEMODE
8207 s390_asm_output_machine_for_arch (FILE *asm_out_file
)
8209 fprintf (asm_out_file
, "\t.machinemode %s\n",
8210 (TARGET_ZARCH
) ? "zarch" : "esa");
8211 fprintf (asm_out_file
, "\t.machine \"%s",
8212 processor_table
[s390_arch
].binutils_name
);
8213 if (S390_USE_ARCHITECTURE_MODIFIERS
)
8217 cpu_flags
= processor_flags_table
[(int) s390_arch
];
8218 if (TARGET_HTM
&& !(cpu_flags
& PF_TX
))
8219 fprintf (asm_out_file
, "+htm");
8220 else if (!TARGET_HTM
&& (cpu_flags
& PF_TX
))
8221 fprintf (asm_out_file
, "+nohtm");
8222 if (TARGET_VX
&& !(cpu_flags
& PF_VX
))
8223 fprintf (asm_out_file
, "+vx");
8224 else if (!TARGET_VX
&& (cpu_flags
& PF_VX
))
8225 fprintf (asm_out_file
, "+novx");
8227 fprintf (asm_out_file
, "\"\n");
8230 /* Write an extra function header before the very start of the function. */
8233 s390_asm_output_function_prefix (FILE *asm_out_file
,
8234 const char *fnname ATTRIBUTE_UNUSED
)
8236 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
) == NULL
)
8238 /* Since only the function specific options are saved but not the indications
8239 which options are set, it's too much work here to figure out which options
8240 have actually changed. Thus, generate .machine and .machinemode whenever a
8241 function has the target attribute or pragma. */
8242 fprintf (asm_out_file
, "\t.machinemode push\n");
8243 fprintf (asm_out_file
, "\t.machine push\n");
8244 s390_asm_output_machine_for_arch (asm_out_file
);
8247 /* Write an extra function footer after the very end of the function. */
8250 s390_asm_declare_function_size (FILE *asm_out_file
,
8251 const char *fnname
, tree decl
)
8253 if (!flag_inhibit_size_directive
)
8254 ASM_OUTPUT_MEASURED_SIZE (asm_out_file
, fnname
);
8255 if (DECL_FUNCTION_SPECIFIC_TARGET (decl
) == NULL
)
8257 fprintf (asm_out_file
, "\t.machine pop\n");
8258 fprintf (asm_out_file
, "\t.machinemode pop\n");
8262 /* Write the extra assembler code needed to declare a function properly. */
8265 s390_asm_output_function_label (FILE *out_file
, const char *fname
,
8268 int hw_before
, hw_after
;
8270 s390_function_num_hotpatch_hw (decl
, &hw_before
, &hw_after
);
8273 unsigned int function_alignment
;
8276 /* Add a trampoline code area before the function label and initialize it
8277 with two-byte nop instructions. This area can be overwritten with code
8278 that jumps to a patched version of the function. */
8279 asm_fprintf (out_file
, "\tnopr\t%%r0"
8280 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
8282 for (i
= 1; i
< hw_before
; i
++)
8283 fputs ("\tnopr\t%r0\n", out_file
);
8285 /* Note: The function label must be aligned so that (a) the bytes of the
8286 following nop do not cross a cacheline boundary, and (b) a jump address
8287 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
8288 stored directly before the label without crossing a cacheline
8289 boundary. All this is necessary to make sure the trampoline code can
8290 be changed atomically.
8291 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
8292 if there are NOPs before the function label, the alignment is placed
8293 before them. So it is necessary to duplicate the alignment after the
8295 function_alignment
= MAX (8, DECL_ALIGN (decl
) / BITS_PER_UNIT
);
8296 if (! DECL_USER_ALIGN (decl
))
8298 = MAX (function_alignment
,
8299 (unsigned int) align_functions
.levels
[0].get_value ());
8300 fputs ("\t# alignment for hotpatch\n", out_file
);
8301 ASM_OUTPUT_ALIGN (out_file
, align_functions
.levels
[0].log
);
8304 if (S390_USE_TARGET_ATTRIBUTE
&& TARGET_DEBUG_ARG
)
8306 asm_fprintf (out_file
, "\t# fn:%s ar%d\n", fname
, s390_arch
);
8307 asm_fprintf (out_file
, "\t# fn:%s tu%d\n", fname
, s390_tune
);
8308 asm_fprintf (out_file
, "\t# fn:%s sg%d\n", fname
, s390_stack_guard
);
8309 asm_fprintf (out_file
, "\t# fn:%s ss%d\n", fname
, s390_stack_size
);
8310 asm_fprintf (out_file
, "\t# fn:%s bc%d\n", fname
, s390_branch_cost
);
8311 asm_fprintf (out_file
, "\t# fn:%s wf%d\n", fname
,
8312 s390_warn_framesize
);
8313 asm_fprintf (out_file
, "\t# fn:%s ba%d\n", fname
, TARGET_BACKCHAIN
);
8314 asm_fprintf (out_file
, "\t# fn:%s hd%d\n", fname
, TARGET_HARD_DFP
);
8315 asm_fprintf (out_file
, "\t# fn:%s hf%d\n", fname
, !TARGET_SOFT_FLOAT
);
8316 asm_fprintf (out_file
, "\t# fn:%s ht%d\n", fname
, TARGET_OPT_HTM
);
8317 asm_fprintf (out_file
, "\t# fn:%s vx%d\n", fname
, TARGET_OPT_VX
);
8318 asm_fprintf (out_file
, "\t# fn:%s ps%d\n", fname
,
8319 TARGET_PACKED_STACK
);
8320 asm_fprintf (out_file
, "\t# fn:%s se%d\n", fname
, TARGET_SMALL_EXEC
);
8321 asm_fprintf (out_file
, "\t# fn:%s mv%d\n", fname
, TARGET_MVCLE
);
8322 asm_fprintf (out_file
, "\t# fn:%s zv%d\n", fname
, TARGET_ZVECTOR
);
8323 asm_fprintf (out_file
, "\t# fn:%s wd%d\n", fname
,
8324 s390_warn_dynamicstack_p
);
8326 assemble_function_label_raw (out_file
, fname
);
8328 asm_fprintf (out_file
,
8329 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
8333 /* Output machine-dependent UNSPECs occurring in address constant X
8334 in assembler syntax to stdio stream FILE. Returns true if the
8335 constant X could be recognized, false otherwise. */
8338 s390_output_addr_const_extra (FILE *file
, rtx x
)
8340 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 1)
8341 switch (XINT (x
, 1))
8344 output_addr_const (file
, XVECEXP (x
, 0, 0));
8345 fprintf (file
, "@GOTENT");
8348 output_addr_const (file
, XVECEXP (x
, 0, 0));
8349 fprintf (file
, "@GOT");
8352 output_addr_const (file
, XVECEXP (x
, 0, 0));
8353 fprintf (file
, "@GOTOFF");
8356 output_addr_const (file
, XVECEXP (x
, 0, 0));
8357 fprintf (file
, "@PLT");
8360 output_addr_const (file
, XVECEXP (x
, 0, 0));
8361 fprintf (file
, "@PLTOFF");
8364 output_addr_const (file
, XVECEXP (x
, 0, 0));
8365 fprintf (file
, "@TLSGD");
8368 assemble_name (file
, get_some_local_dynamic_name ());
8369 fprintf (file
, "@TLSLDM");
8372 output_addr_const (file
, XVECEXP (x
, 0, 0));
8373 fprintf (file
, "@DTPOFF");
8376 output_addr_const (file
, XVECEXP (x
, 0, 0));
8377 fprintf (file
, "@NTPOFF");
8379 case UNSPEC_GOTNTPOFF
:
8380 output_addr_const (file
, XVECEXP (x
, 0, 0));
8381 fprintf (file
, "@GOTNTPOFF");
8383 case UNSPEC_INDNTPOFF
:
8384 output_addr_const (file
, XVECEXP (x
, 0, 0));
8385 fprintf (file
, "@INDNTPOFF");
8389 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 2)
8390 switch (XINT (x
, 1))
8392 case UNSPEC_POOL_OFFSET
:
8393 x
= gen_rtx_MINUS (GET_MODE (x
), XVECEXP (x
, 0, 0), XVECEXP (x
, 0, 1));
8394 output_addr_const (file
, x
);
8400 /* Output address operand ADDR in assembler syntax to
8401 stdio stream FILE. */
8404 print_operand_address (FILE *file
, rtx addr
)
8406 struct s390_address ad
;
8407 memset (&ad
, 0, sizeof (s390_address
));
8409 if (s390_loadrelative_operand_p (addr
, NULL
, NULL
))
8413 output_operand_lossage ("symbolic memory references are "
8414 "only supported on z10 or later");
8417 output_addr_const (file
, addr
);
8421 if (!s390_decompose_address (addr
, &ad
)
8422 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8423 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
8424 output_operand_lossage ("cannot decompose address");
8427 output_addr_const (file
, ad
.disp
);
8429 fprintf (file
, "0");
8431 if (ad
.base
&& ad
.indx
)
8432 fprintf (file
, "(%s,%s)", reg_names
[REGNO (ad
.indx
)],
8433 reg_names
[REGNO (ad
.base
)]);
8435 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
8438 /* Output operand X in assembler syntax to stdio stream FILE.
8439 CODE specified the format flag. The following format flags
8442 'A': On z14 or higher: If operand is a mem print the alignment
8443 hint usable with vl/vst prefixed by a comma.
8444 'C': print opcode suffix for branch condition.
8445 'D': print opcode suffix for inverse branch condition.
8446 'E': print opcode suffix for branch on index instruction.
8447 'G': print the size of the operand in bytes.
8448 'J': print tls_load/tls_gdcall/tls_ldcall suffix
8449 'K': print @PLT suffix for call targets and load address values.
8450 'M': print the second word of a TImode operand.
8451 'N': print the second word of a DImode operand.
8452 'O': print only the displacement of a memory reference or address.
8453 'R': print only the base register of a memory reference or address.
8454 'S': print S-type memory reference (base+displacement).
8455 'Y': print address style operand without index (e.g. shift count or setmem
8458 'b': print integer X as if it's an unsigned byte.
8459 'c': print integer X as if it's an signed byte.
8460 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
8461 'f': "end" contiguous bitmask X in SImode.
8462 'h': print integer X as if it's a signed halfword.
8463 'i': print the first nonzero HImode part of X.
8464 'j': print the first HImode part unequal to -1 of X.
8465 'k': print the first nonzero SImode part of X.
8466 'm': print the first SImode part unequal to -1 of X.
8467 'o': print integer X as if it's an unsigned 32bit word.
8468 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
8469 't': CONST_INT: "start" of contiguous bitmask X in SImode.
8470 CONST_VECTOR: Generate a bitmask for vgbm instruction.
8471 'x': print integer X as if it's an unsigned halfword.
8472 'v': print register number as vector register (v1 instead of f1).
8473 'V': print the second word of a TFmode operand as vector register.
8477 print_operand (FILE *file
, rtx x
, int code
)
8484 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS
&& MEM_P (x
))
8486 if (MEM_ALIGN (x
) >= 128)
8487 fprintf (file
, ",4");
8488 else if (MEM_ALIGN (x
) == 64)
8489 fprintf (file
, ",3");
8493 fprintf (file
, s390_branch_condition_mnemonic (x
, FALSE
));
8497 fprintf (file
, s390_branch_condition_mnemonic (x
, TRUE
));
8501 if (GET_CODE (x
) == LE
)
8502 fprintf (file
, "l");
8503 else if (GET_CODE (x
) == GT
)
8504 fprintf (file
, "h");
8506 output_operand_lossage ("invalid comparison operator "
8507 "for 'E' output modifier");
8511 if (GET_CODE (x
) == SYMBOL_REF
)
8513 fprintf (file
, "%s", ":tls_load:");
8514 output_addr_const (file
, x
);
8516 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
8518 fprintf (file
, "%s", ":tls_gdcall:");
8519 output_addr_const (file
, XVECEXP (x
, 0, 0));
8521 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSLDM
)
8523 fprintf (file
, "%s", ":tls_ldcall:");
8524 const char *name
= get_some_local_dynamic_name ();
8526 assemble_name (file
, name
);
8529 output_operand_lossage ("invalid reference for 'J' output modifier");
8533 fprintf (file
, "%u", GET_MODE_SIZE (GET_MODE (x
)));
8538 struct s390_address ad
;
8541 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
8544 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8547 output_operand_lossage ("invalid address for 'O' output modifier");
8552 output_addr_const (file
, ad
.disp
);
8554 fprintf (file
, "0");
8560 struct s390_address ad
;
8563 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
8566 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8569 output_operand_lossage ("invalid address for 'R' output modifier");
8574 fprintf (file
, "%s", reg_names
[REGNO (ad
.base
)]);
8576 fprintf (file
, "0");
8582 struct s390_address ad
;
8587 output_operand_lossage ("memory reference expected for "
8588 "'S' output modifier");
8591 ret
= s390_decompose_address (XEXP (x
, 0), &ad
);
8594 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8597 output_operand_lossage ("invalid address for 'S' output modifier");
8602 output_addr_const (file
, ad
.disp
);
8604 fprintf (file
, "0");
8607 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
8612 if (GET_CODE (x
) == REG
)
8613 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8614 else if (GET_CODE (x
) == MEM
)
8615 x
= change_address (x
, VOIDmode
,
8616 plus_constant (Pmode
, XEXP (x
, 0), 4));
8618 output_operand_lossage ("register or memory expression expected "
8619 "for 'N' output modifier");
8623 if (GET_CODE (x
) == REG
)
8624 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8625 else if (GET_CODE (x
) == MEM
)
8626 x
= change_address (x
, VOIDmode
,
8627 plus_constant (Pmode
, XEXP (x
, 0), 8));
8629 output_operand_lossage ("register or memory expression expected "
8630 "for 'M' output modifier");
8634 print_shift_count_operand (file
, x
);
8638 /* Append @PLT to both local and non-local symbols in order to support
8639 Linux Kernel livepatching: patches contain individual functions and
8640 are loaded further than 2G away from vmlinux, and therefore they must
8641 call even static functions via PLT. ld will optimize @PLT away for
8642 normal code, and keep it for patches.
8644 Do not indiscriminately add @PLT in 31-bit mode due to the %r12
8645 restriction, use UNSPEC_PLT31 instead.
8647 @PLT only makes sense for functions, data is taken care of by
8648 -mno-pic-data-is-text-relative.
8650 Adding @PLT interferes with handling of weak symbols in non-PIC code,
8651 since their addresses are loaded with larl, which then always produces
8652 a non-NULL result, so skip them here as well. */
8654 && GET_CODE (x
) == SYMBOL_REF
8655 && SYMBOL_REF_FUNCTION_P (x
)
8656 && !(SYMBOL_REF_WEAK (x
) && !flag_pic
))
8657 fprintf (file
, "@PLT");
8661 switch (GET_CODE (x
))
8664 /* Print FP regs as fx instead of vx when they are accessed
8665 through non-vector mode. */
8666 if ((code
== 'v' || code
== 'V')
8667 || VECTOR_NOFP_REG_P (x
)
8668 || (FP_REG_P (x
) && VECTOR_MODE_P (GET_MODE (x
)))
8669 || (VECTOR_REG_P (x
)
8670 && (GET_MODE_SIZE (GET_MODE (x
)) /
8671 s390_class_max_nregs (FP_REGS
, GET_MODE (x
))) > 8))
8672 fprintf (file
, "%%v%s", reg_names
[REGNO (x
) + (code
== 'V')] + 2);
8674 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
8678 output_address (GET_MODE (x
), XEXP (x
, 0));
8685 output_addr_const (file
, x
);
8698 ival
= ((ival
& 0xff) ^ 0x80) - 0x80;
8704 ival
= ((ival
& 0xffff) ^ 0x8000) - 0x8000;
8707 ival
= s390_extract_part (x
, HImode
, 0);
8710 ival
= s390_extract_part (x
, HImode
, -1);
8713 ival
= s390_extract_part (x
, SImode
, 0);
8716 ival
= s390_extract_part (x
, SImode
, -1);
8728 len
= (code
== 's' || code
== 'e' ? 64 : 32);
8729 ok
= s390_contiguous_bitmask_p (ival
, true, len
, &start
, &end
);
8731 if (code
== 's' || code
== 't')
8738 output_operand_lossage ("invalid constant for output modifier '%c'", code
);
8740 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8743 case CONST_WIDE_INT
:
8745 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8746 CONST_WIDE_INT_ELT (x
, 0) & 0xff);
8747 else if (code
== 'x')
8748 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8749 CONST_WIDE_INT_ELT (x
, 0) & 0xffff);
8750 else if (code
== 'h')
8751 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8752 ((CONST_WIDE_INT_ELT (x
, 0) & 0xffff) ^ 0x8000) - 0x8000);
8756 output_operand_lossage ("invalid constant - try using "
8757 "an output modifier");
8759 output_operand_lossage ("invalid constant for output modifier '%c'",
8767 gcc_assert (const_vec_duplicate_p (x
));
8768 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8769 ((INTVAL (XVECEXP (x
, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8777 ok
= s390_contiguous_bitmask_vector_p (x
, &start
, &end
);
8779 ival
= (code
== 's') ? start
: end
;
8780 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8786 bool ok
= s390_bytemask_vector_p (x
, &mask
);
8788 fprintf (file
, "%u", mask
);
8793 output_operand_lossage ("invalid constant vector for output "
8794 "modifier '%c'", code
);
8800 output_operand_lossage ("invalid expression - try using "
8801 "an output modifier");
8803 output_operand_lossage ("invalid expression for output "
8804 "modifier '%c'", code
);
8809 /* Target hook for assembling integer objects. We need to define it
8810 here to work a round a bug in some versions of GAS, which couldn't
8811 handle values smaller than INT_MIN when printed in decimal. */
8814 s390_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
8816 if (size
== 8 && aligned_p
8817 && GET_CODE (x
) == CONST_INT
&& INTVAL (x
) < INT_MIN
)
8819 fprintf (asm_out_file
, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX
"\n",
8823 return default_assemble_integer (x
, size
, aligned_p
);
8826 /* Returns true if register REGNO is used for forming
8827 a memory address in expression X. */
8830 reg_used_in_mem_p (int regno
, rtx x
)
8832 enum rtx_code code
= GET_CODE (x
);
8838 if (refers_to_regno_p (regno
, XEXP (x
, 0)))
8841 else if (code
== SET
8842 && GET_CODE (SET_DEST (x
)) == PC
)
8844 if (refers_to_regno_p (regno
, SET_SRC (x
)))
8848 fmt
= GET_RTX_FORMAT (code
);
8849 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8852 && reg_used_in_mem_p (regno
, XEXP (x
, i
)))
8855 else if (fmt
[i
] == 'E')
8856 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8857 if (reg_used_in_mem_p (regno
, XVECEXP (x
, i
, j
)))
8863 /* Returns true if expression DEP_RTX sets an address register
8864 used by instruction INSN to address memory. */
8867 addr_generation_dependency_p (rtx dep_rtx
, rtx_insn
*insn
)
8871 if (NONJUMP_INSN_P (dep_rtx
))
8872 dep_rtx
= PATTERN (dep_rtx
);
8874 if (GET_CODE (dep_rtx
) == SET
)
8876 target
= SET_DEST (dep_rtx
);
8877 if (GET_CODE (target
) == STRICT_LOW_PART
)
8878 target
= XEXP (target
, 0);
8879 while (GET_CODE (target
) == SUBREG
)
8880 target
= SUBREG_REG (target
);
8882 if (GET_CODE (target
) == REG
)
8884 int regno
= REGNO (target
);
8886 if (s390_safe_attr_type (insn
) == TYPE_LA
)
8888 pat
= PATTERN (insn
);
8889 if (GET_CODE (pat
) == PARALLEL
)
8891 gcc_assert (XVECLEN (pat
, 0) == 2);
8892 pat
= XVECEXP (pat
, 0, 0);
8894 gcc_assert (GET_CODE (pat
) == SET
);
8895 return refers_to_regno_p (regno
, SET_SRC (pat
));
8897 else if (get_attr_atype (insn
) == ATYPE_AGEN
)
8898 return reg_used_in_mem_p (regno
, PATTERN (insn
));
8904 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8907 s390_agen_dep_p (rtx_insn
*dep_insn
, rtx_insn
*insn
)
8909 rtx dep_rtx
= PATTERN (dep_insn
);
8912 if (GET_CODE (dep_rtx
) == SET
8913 && addr_generation_dependency_p (dep_rtx
, insn
))
8915 else if (GET_CODE (dep_rtx
) == PARALLEL
)
8917 for (i
= 0; i
< XVECLEN (dep_rtx
, 0); i
++)
8919 if (addr_generation_dependency_p (XVECEXP (dep_rtx
, 0, i
), insn
))
8927 /* A C statement (sans semicolon) to update the integer scheduling priority
8928 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8929 reduce the priority to execute INSN later. Do not define this macro if
8930 you do not need to adjust the scheduling priorities of insns.
8932 A STD instruction should be scheduled earlier,
8933 in order to use the bypass. */
8935 s390_adjust_priority (rtx_insn
*insn
, int priority
)
8937 if (! INSN_P (insn
))
8940 if (s390_tune
<= PROCESSOR_2064_Z900
)
8943 switch (s390_safe_attr_type (insn
))
8947 priority
= priority
<< 3;
8951 priority
= priority
<< 1;
8960 /* The number of instructions that can be issued per cycle. */
8963 s390_issue_rate (void)
8967 case PROCESSOR_2084_Z990
:
8968 case PROCESSOR_2094_Z9_109
:
8969 case PROCESSOR_2094_Z9_EC
:
8970 case PROCESSOR_2817_Z196
:
8972 case PROCESSOR_2097_Z10
:
8974 case PROCESSOR_2064_Z900
:
8975 /* Starting with EC12 we use the sched_reorder hook to take care
8976 of instruction dispatch constraints. The algorithm only
8977 picks the best instruction and assumes only a single
8978 instruction gets issued per cycle. */
8979 case PROCESSOR_2827_ZEC12
:
8980 case PROCESSOR_2964_Z13
:
8981 case PROCESSOR_3906_Z14
:
8982 case PROCESSOR_8561_Z15
:
8983 case PROCESSOR_3931_Z16
:
8990 s390_first_cycle_multipass_dfa_lookahead (void)
8996 annotate_constant_pool_refs_1 (rtx
*x
)
9001 gcc_assert (GET_CODE (*x
) != SYMBOL_REF
9002 || !CONSTANT_POOL_ADDRESS_P (*x
));
9004 /* Literal pool references can only occur inside a MEM ... */
9005 if (GET_CODE (*x
) == MEM
)
9007 rtx memref
= XEXP (*x
, 0);
9009 if (GET_CODE (memref
) == SYMBOL_REF
9010 && CONSTANT_POOL_ADDRESS_P (memref
))
9012 rtx base
= cfun
->machine
->base_reg
;
9013 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, memref
, base
),
9016 *x
= replace_equiv_address (*x
, addr
);
9020 if (GET_CODE (memref
) == CONST
9021 && GET_CODE (XEXP (memref
, 0)) == PLUS
9022 && GET_CODE (XEXP (XEXP (memref
, 0), 1)) == CONST_INT
9023 && GET_CODE (XEXP (XEXP (memref
, 0), 0)) == SYMBOL_REF
9024 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref
, 0), 0)))
9026 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (memref
, 0), 1));
9027 rtx sym
= XEXP (XEXP (memref
, 0), 0);
9028 rtx base
= cfun
->machine
->base_reg
;
9029 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
9032 *x
= replace_equiv_address (*x
, plus_constant (Pmode
, addr
, off
));
9037 /* ... or a load-address type pattern. */
9038 if (GET_CODE (*x
) == SET
)
9040 rtx addrref
= SET_SRC (*x
);
9042 if (GET_CODE (addrref
) == SYMBOL_REF
9043 && CONSTANT_POOL_ADDRESS_P (addrref
))
9045 rtx base
= cfun
->machine
->base_reg
;
9046 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addrref
, base
),
9049 SET_SRC (*x
) = addr
;
9053 if (GET_CODE (addrref
) == CONST
9054 && GET_CODE (XEXP (addrref
, 0)) == PLUS
9055 && GET_CODE (XEXP (XEXP (addrref
, 0), 1)) == CONST_INT
9056 && GET_CODE (XEXP (XEXP (addrref
, 0), 0)) == SYMBOL_REF
9057 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref
, 0), 0)))
9059 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (addrref
, 0), 1));
9060 rtx sym
= XEXP (XEXP (addrref
, 0), 0);
9061 rtx base
= cfun
->machine
->base_reg
;
9062 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
9065 SET_SRC (*x
) = plus_constant (Pmode
, addr
, off
);
9070 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
9071 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
9075 annotate_constant_pool_refs_1 (&XEXP (*x
, i
));
9077 else if (fmt
[i
] == 'E')
9079 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
9080 annotate_constant_pool_refs_1 (&XVECEXP (*x
, i
, j
));
9085 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
9086 Fix up MEMs as required.
9087 Skip insns which support relative addressing, because they do not use a base
9091 annotate_constant_pool_refs (rtx_insn
*insn
)
9093 if (s390_safe_relative_long_p (insn
))
9095 annotate_constant_pool_refs_1 (&PATTERN (insn
));
9099 find_constant_pool_ref_1 (rtx x
, rtx
*ref
)
9104 /* Likewise POOL_ENTRY insns. */
9105 if (GET_CODE (x
) == UNSPEC_VOLATILE
9106 && XINT (x
, 1) == UNSPECV_POOL_ENTRY
)
9109 gcc_assert (GET_CODE (x
) != SYMBOL_REF
9110 || !CONSTANT_POOL_ADDRESS_P (x
));
9112 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_LTREF
)
9114 rtx sym
= XVECEXP (x
, 0, 0);
9115 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
9116 && CONSTANT_POOL_ADDRESS_P (sym
));
9118 if (*ref
== NULL_RTX
)
9121 gcc_assert (*ref
== sym
);
9126 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9127 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9131 find_constant_pool_ref_1 (XEXP (x
, i
), ref
);
9133 else if (fmt
[i
] == 'E')
9135 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
9136 find_constant_pool_ref_1 (XVECEXP (x
, i
, j
), ref
);
9141 /* Find an annotated literal pool symbol referenced in INSN,
9142 and store it at REF. Will abort if INSN contains references to
9143 more than one such pool symbol; multiple references to the same
9144 symbol are allowed, however.
9146 The rtx pointed to by REF must be initialized to NULL_RTX
9147 by the caller before calling this routine.
9149 Skip insns which support relative addressing, because they do not use a base
9153 find_constant_pool_ref (rtx_insn
*insn
, rtx
*ref
)
9155 if (s390_safe_relative_long_p (insn
))
9157 find_constant_pool_ref_1 (PATTERN (insn
), ref
);
9161 replace_constant_pool_ref_1 (rtx
*x
, rtx ref
, rtx offset
)
9166 gcc_assert (*x
!= ref
);
9168 if (GET_CODE (*x
) == UNSPEC
9169 && XINT (*x
, 1) == UNSPEC_LTREF
9170 && XVECEXP (*x
, 0, 0) == ref
)
9172 *x
= gen_rtx_PLUS (Pmode
, XVECEXP (*x
, 0, 1), offset
);
9176 if (GET_CODE (*x
) == PLUS
9177 && GET_CODE (XEXP (*x
, 1)) == CONST_INT
9178 && GET_CODE (XEXP (*x
, 0)) == UNSPEC
9179 && XINT (XEXP (*x
, 0), 1) == UNSPEC_LTREF
9180 && XVECEXP (XEXP (*x
, 0), 0, 0) == ref
)
9182 rtx addr
= gen_rtx_PLUS (Pmode
, XVECEXP (XEXP (*x
, 0), 0, 1), offset
);
9183 *x
= plus_constant (Pmode
, addr
, INTVAL (XEXP (*x
, 1)));
9187 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
9188 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
9192 replace_constant_pool_ref_1 (&XEXP (*x
, i
), ref
, offset
);
9194 else if (fmt
[i
] == 'E')
9196 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
9197 replace_constant_pool_ref_1 (&XVECEXP (*x
, i
, j
), ref
, offset
);
9202 /* Replace every reference to the annotated literal pool
9203 symbol REF in INSN by its base plus OFFSET.
9204 Skip insns which support relative addressing, because they do not use a base
9208 replace_constant_pool_ref (rtx_insn
*insn
, rtx ref
, rtx offset
)
9210 if (s390_safe_relative_long_p (insn
))
9212 replace_constant_pool_ref_1 (&PATTERN (insn
), ref
, offset
);
9215 /* We keep a list of constants which we have to add to internal
9216 constant tables in the middle of large functions. */
9218 static machine_mode constant_modes
[] =
9220 TFmode
, FPRX2mode
, TImode
, TDmode
,
9221 V16QImode
, V8HImode
, V4SImode
, V2DImode
, V1TImode
,
9222 V4SFmode
, V2DFmode
, V1TFmode
,
9223 DFmode
, DImode
, DDmode
,
9224 V8QImode
, V4HImode
, V2SImode
, V1DImode
, V2SFmode
, V1DFmode
,
9225 SFmode
, SImode
, SDmode
,
9226 V4QImode
, V2HImode
, V1SImode
, V1SFmode
,
9232 #define NR_C_MODES (ARRAY_SIZE (constant_modes))
9236 struct constant
*next
;
9238 rtx_code_label
*label
;
9241 struct constant_pool
9243 struct constant_pool
*next
;
9244 rtx_insn
*first_insn
;
9245 rtx_insn
*pool_insn
;
9247 rtx_insn
*emit_pool_after
;
9249 struct constant
*constants
[NR_C_MODES
];
9250 struct constant
*execute
;
9251 rtx_code_label
*label
;
9255 /* Allocate new constant_pool structure. */
9257 static struct constant_pool
*
9258 s390_alloc_pool (void)
9260 struct constant_pool
*pool
;
9263 pool
= (struct constant_pool
*) xmalloc (sizeof *pool
);
9265 for (i
= 0; i
< NR_C_MODES
; i
++)
9266 pool
->constants
[i
] = NULL
;
9268 pool
->execute
= NULL
;
9269 pool
->label
= gen_label_rtx ();
9270 pool
->first_insn
= NULL
;
9271 pool
->pool_insn
= NULL
;
9272 pool
->insns
= BITMAP_ALLOC (NULL
);
9274 pool
->emit_pool_after
= NULL
;
9279 /* Create new constant pool covering instructions starting at INSN
9280 and chain it to the end of POOL_LIST. */
9282 static struct constant_pool
*
9283 s390_start_pool (struct constant_pool
**pool_list
, rtx_insn
*insn
)
9285 struct constant_pool
*pool
, **prev
;
9287 pool
= s390_alloc_pool ();
9288 pool
->first_insn
= insn
;
9290 for (prev
= pool_list
; *prev
; prev
= &(*prev
)->next
)
9297 /* End range of instructions covered by POOL at INSN and emit
9298 placeholder insn representing the pool. */
9301 s390_end_pool (struct constant_pool
*pool
, rtx_insn
*insn
)
9303 rtx pool_size
= GEN_INT (pool
->size
+ 8 /* alignment slop */);
9306 insn
= get_last_insn ();
9308 pool
->pool_insn
= emit_insn_after (gen_pool (pool_size
), insn
);
9309 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9312 /* Add INSN to the list of insns covered by POOL. */
9315 s390_add_pool_insn (struct constant_pool
*pool
, rtx insn
)
9317 bitmap_set_bit (pool
->insns
, INSN_UID (insn
));
9320 /* Return pool out of POOL_LIST that covers INSN. */
9322 static struct constant_pool
*
9323 s390_find_pool (struct constant_pool
*pool_list
, rtx insn
)
9325 struct constant_pool
*pool
;
9327 for (pool
= pool_list
; pool
; pool
= pool
->next
)
9328 if (bitmap_bit_p (pool
->insns
, INSN_UID (insn
)))
9334 /* Add constant VAL of mode MODE to the constant pool POOL. */
9337 s390_add_constant (struct constant_pool
*pool
, rtx val
, machine_mode mode
)
9342 for (i
= 0; i
< NR_C_MODES
; i
++)
9343 if (constant_modes
[i
] == mode
)
9345 gcc_assert (i
!= NR_C_MODES
);
9347 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
9348 if (rtx_equal_p (val
, c
->value
))
9353 c
= (struct constant
*) xmalloc (sizeof *c
);
9355 c
->label
= gen_label_rtx ();
9356 c
->next
= pool
->constants
[i
];
9357 pool
->constants
[i
] = c
;
9358 pool
->size
+= GET_MODE_SIZE (mode
);
9362 /* Return an rtx that represents the offset of X from the start of
9366 s390_pool_offset (struct constant_pool
*pool
, rtx x
)
9370 label
= gen_rtx_LABEL_REF (GET_MODE (x
), pool
->label
);
9371 x
= gen_rtx_UNSPEC (GET_MODE (x
), gen_rtvec (2, x
, label
),
9372 UNSPEC_POOL_OFFSET
);
9373 return gen_rtx_CONST (GET_MODE (x
), x
);
9376 /* Find constant VAL of mode MODE in the constant pool POOL.
9377 Return an RTX describing the distance from the start of
9378 the pool to the location of the new constant. */
9381 s390_find_constant (struct constant_pool
*pool
, rtx val
,
9387 for (i
= 0; i
< NR_C_MODES
; i
++)
9388 if (constant_modes
[i
] == mode
)
9390 gcc_assert (i
!= NR_C_MODES
);
9392 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
9393 if (rtx_equal_p (val
, c
->value
))
9398 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
9401 /* Check whether INSN is an execute. Return the label_ref to its
9402 execute target template if so, NULL_RTX otherwise. */
9405 s390_execute_label (rtx insn
)
9408 && GET_CODE (PATTERN (insn
)) == PARALLEL
9409 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
9410 && (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
9411 || XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE_JUMP
))
9413 if (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
)
9414 return XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 2);
9417 gcc_assert (JUMP_P (insn
));
9418 /* For jump insns as execute target:
9419 - There is one operand less in the parallel (the
9420 modification register of the execute is always 0).
9421 - The execute target label is wrapped into an
9422 if_then_else in order to hide it from jump analysis. */
9423 return XEXP (XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 0), 0);
9430 /* Find execute target for INSN in the constant pool POOL.
9431 Return an RTX describing the distance from the start of
9432 the pool to the location of the execute target. */
9435 s390_find_execute (struct constant_pool
*pool
, rtx insn
)
9439 for (c
= pool
->execute
; c
!= NULL
; c
= c
->next
)
9440 if (INSN_UID (insn
) == INSN_UID (c
->value
))
9445 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
9448 /* For an execute INSN, extract the execute target template. */
9451 s390_execute_target (rtx insn
)
9453 rtx pattern
= PATTERN (insn
);
9454 gcc_assert (s390_execute_label (insn
));
9456 if (XVECLEN (pattern
, 0) == 2)
9458 pattern
= copy_rtx (XVECEXP (pattern
, 0, 1));
9462 rtvec vec
= rtvec_alloc (XVECLEN (pattern
, 0) - 1);
9465 for (i
= 0; i
< XVECLEN (pattern
, 0) - 1; i
++)
9466 RTVEC_ELT (vec
, i
) = copy_rtx (XVECEXP (pattern
, 0, i
+ 1));
9468 pattern
= gen_rtx_PARALLEL (VOIDmode
, vec
);
9474 /* Indicate that INSN cannot be duplicated. This is the case for
9475 execute insns that carry a unique label. */
9478 s390_cannot_copy_insn_p (rtx_insn
*insn
)
9480 rtx label
= s390_execute_label (insn
);
9481 return label
&& label
!= const0_rtx
;
9484 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
9485 do not emit the pool base label. */
9488 s390_dump_pool (struct constant_pool
*pool
, bool remote_label
)
9491 rtx_insn
*insn
= pool
->pool_insn
;
9494 /* Switch to rodata section. */
9495 insn
= emit_insn_after (gen_pool_section_start (), insn
);
9496 INSN_ADDRESSES_NEW (insn
, -1);
9498 /* Ensure minimum pool alignment. */
9499 insn
= emit_insn_after (gen_pool_align (GEN_INT (8)), insn
);
9500 INSN_ADDRESSES_NEW (insn
, -1);
9502 /* Emit pool base label. */
9505 insn
= emit_label_after (pool
->label
, insn
);
9506 INSN_ADDRESSES_NEW (insn
, -1);
9509 /* Dump constants in descending alignment requirement order,
9510 ensuring proper alignment for every constant. */
9511 for (i
= 0; i
< NR_C_MODES
; i
++)
9512 for (c
= pool
->constants
[i
]; c
; c
= c
->next
)
9514 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
9515 rtx value
= copy_rtx (c
->value
);
9516 if (GET_CODE (value
) == CONST
9517 && GET_CODE (XEXP (value
, 0)) == UNSPEC
9518 && XINT (XEXP (value
, 0), 1) == UNSPEC_LTREL_OFFSET
9519 && XVECLEN (XEXP (value
, 0), 0) == 1)
9520 value
= s390_pool_offset (pool
, XVECEXP (XEXP (value
, 0), 0, 0));
9522 insn
= emit_label_after (c
->label
, insn
);
9523 INSN_ADDRESSES_NEW (insn
, -1);
9525 value
= gen_rtx_UNSPEC_VOLATILE (constant_modes
[i
],
9526 gen_rtvec (1, value
),
9527 UNSPECV_POOL_ENTRY
);
9528 insn
= emit_insn_after (value
, insn
);
9529 INSN_ADDRESSES_NEW (insn
, -1);
9532 /* Ensure minimum alignment for instructions. */
9533 insn
= emit_insn_after (gen_pool_align (GEN_INT (2)), insn
);
9534 INSN_ADDRESSES_NEW (insn
, -1);
9536 /* Output in-pool execute template insns. */
9537 for (c
= pool
->execute
; c
; c
= c
->next
)
9539 insn
= emit_label_after (c
->label
, insn
);
9540 INSN_ADDRESSES_NEW (insn
, -1);
9542 insn
= emit_insn_after (s390_execute_target (c
->value
), insn
);
9543 INSN_ADDRESSES_NEW (insn
, -1);
9546 /* Switch back to previous section. */
9547 insn
= emit_insn_after (gen_pool_section_end (), insn
);
9548 INSN_ADDRESSES_NEW (insn
, -1);
9550 insn
= emit_barrier_after (insn
);
9551 INSN_ADDRESSES_NEW (insn
, -1);
9553 /* Remove placeholder insn. */
9554 remove_insn (pool
->pool_insn
);
9557 /* Free all memory used by POOL. */
9560 s390_free_pool (struct constant_pool
*pool
)
9562 struct constant
*c
, *next
;
9565 for (i
= 0; i
< NR_C_MODES
; i
++)
9566 for (c
= pool
->constants
[i
]; c
; c
= next
)
9572 for (c
= pool
->execute
; c
; c
= next
)
9578 BITMAP_FREE (pool
->insns
);
9583 /* Collect main literal pool. Return NULL on overflow. */
9585 static struct constant_pool
*
9586 s390_mainpool_start (void)
9588 struct constant_pool
*pool
;
9591 pool
= s390_alloc_pool ();
9593 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9595 if (NONJUMP_INSN_P (insn
)
9596 && GET_CODE (PATTERN (insn
)) == SET
9597 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC_VOLATILE
9598 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPECV_MAIN_POOL
)
9600 /* There might be two main_pool instructions if base_reg
9601 is call-clobbered; one for shrink-wrapped code and one
9602 for the rest. We want to keep the first. */
9603 if (pool
->pool_insn
)
9605 insn
= PREV_INSN (insn
);
9606 delete_insn (NEXT_INSN (insn
));
9609 pool
->pool_insn
= insn
;
9612 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9614 rtx pool_ref
= NULL_RTX
;
9615 find_constant_pool_ref (insn
, &pool_ref
);
9618 rtx constant
= get_pool_constant (pool_ref
);
9619 machine_mode mode
= get_pool_mode (pool_ref
);
9620 s390_add_constant (pool
, constant
, mode
);
9624 /* If hot/cold partitioning is enabled we have to make sure that
9625 the literal pool is emitted in the same section where the
9626 initialization of the literal pool base pointer takes place.
9627 emit_pool_after is only used in the non-overflow case on non
9628 Z cpus where we can emit the literal pool at the end of the
9629 function body within the text section. */
9631 && NOTE_KIND (insn
) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9632 && !pool
->emit_pool_after
)
9633 pool
->emit_pool_after
= PREV_INSN (insn
);
9636 gcc_assert (pool
->pool_insn
|| pool
->size
== 0);
9638 if (pool
->size
>= 4096)
9640 /* We're going to chunkify the pool, so remove the main
9641 pool placeholder insn. */
9642 remove_insn (pool
->pool_insn
);
9644 s390_free_pool (pool
);
9648 /* If the functions ends with the section where the literal pool
9649 should be emitted set the marker to its end. */
9650 if (pool
&& !pool
->emit_pool_after
)
9651 pool
->emit_pool_after
= get_last_insn ();
9656 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9657 Modify the current function to output the pool constants as well as
9658 the pool register setup instruction. */
9661 s390_mainpool_finish (struct constant_pool
*pool
)
9663 rtx base_reg
= cfun
->machine
->base_reg
;
9667 /* If the pool is empty, we're done. */
9668 if (pool
->size
== 0)
9670 /* We don't actually need a base register after all. */
9671 cfun
->machine
->base_reg
= NULL_RTX
;
9673 if (pool
->pool_insn
)
9674 remove_insn (pool
->pool_insn
);
9675 s390_free_pool (pool
);
9679 /* We need correct insn addresses. */
9680 shorten_branches (get_insns ());
9682 /* Use a LARL to load the pool register. The pool is
9683 located in the .rodata section, so we emit it after the function. */
9684 set
= gen_main_base_64 (base_reg
, pool
->label
);
9685 insn
= emit_insn_after (set
, pool
->pool_insn
);
9686 INSN_ADDRESSES_NEW (insn
, -1);
9687 remove_insn (pool
->pool_insn
);
9689 insn
= get_last_insn ();
9690 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9691 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9693 s390_dump_pool (pool
, 0);
9695 /* Replace all literal pool references. */
9697 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9699 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9701 rtx addr
, pool_ref
= NULL_RTX
;
9702 find_constant_pool_ref (insn
, &pool_ref
);
9705 if (s390_execute_label (insn
))
9706 addr
= s390_find_execute (pool
, insn
);
9708 addr
= s390_find_constant (pool
, get_pool_constant (pool_ref
),
9709 get_pool_mode (pool_ref
));
9711 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9712 INSN_CODE (insn
) = -1;
9718 /* Free the pool. */
9719 s390_free_pool (pool
);
9722 /* Chunkify the literal pool. */
9724 #define S390_POOL_CHUNK_MIN 0xc00
9725 #define S390_POOL_CHUNK_MAX 0xe00
9727 static struct constant_pool
*
9728 s390_chunkify_start (void)
9730 struct constant_pool
*curr_pool
= NULL
, *pool_list
= NULL
;
9734 /* We need correct insn addresses. */
9736 shorten_branches (get_insns ());
9738 /* Scan all insns and move literals to pool chunks. */
9740 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9742 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9744 rtx pool_ref
= NULL_RTX
;
9745 find_constant_pool_ref (insn
, &pool_ref
);
9748 rtx constant
= get_pool_constant (pool_ref
);
9749 machine_mode mode
= get_pool_mode (pool_ref
);
9752 curr_pool
= s390_start_pool (&pool_list
, insn
);
9754 s390_add_constant (curr_pool
, constant
, mode
);
9755 s390_add_pool_insn (curr_pool
, insn
);
9759 if (JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
) || LABEL_P (insn
))
9762 s390_add_pool_insn (curr_pool
, insn
);
9765 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_VAR_LOCATION
)
9769 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn
)
9770 || INSN_ADDRESSES (INSN_UID (insn
)) == -1)
9773 if (curr_pool
->size
< S390_POOL_CHUNK_MAX
)
9776 s390_end_pool (curr_pool
, NULL
);
9781 s390_end_pool (curr_pool
, NULL
);
9783 /* Find all labels that are branched into
9784 from an insn belonging to a different chunk. */
9786 far_labels
= BITMAP_ALLOC (NULL
);
9788 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9790 rtx_jump_table_data
*table
;
9792 /* Labels marked with LABEL_PRESERVE_P can be target
9793 of non-local jumps, so we have to mark them.
9794 The same holds for named labels.
9796 Don't do that, however, if it is the label before
9800 && (LABEL_PRESERVE_P (insn
) || LABEL_NAME (insn
)))
9802 rtx_insn
*vec_insn
= NEXT_INSN (insn
);
9803 if (! vec_insn
|| ! JUMP_TABLE_DATA_P (vec_insn
))
9804 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (insn
));
9806 /* Check potential targets in a table jump (casesi_jump). */
9807 else if (tablejump_p (insn
, NULL
, &table
))
9809 rtx vec_pat
= PATTERN (table
);
9810 int i
, diff_p
= GET_CODE (vec_pat
) == ADDR_DIFF_VEC
;
9812 for (i
= 0; i
< XVECLEN (vec_pat
, diff_p
); i
++)
9814 rtx label
= XEXP (XVECEXP (vec_pat
, diff_p
, i
), 0);
9816 if (s390_find_pool (pool_list
, label
)
9817 != s390_find_pool (pool_list
, insn
))
9818 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9821 /* If we have a direct jump (conditional or unconditional),
9822 check all potential targets. */
9823 else if (JUMP_P (insn
))
9825 rtx pat
= PATTERN (insn
);
9827 if (GET_CODE (pat
) == PARALLEL
)
9828 pat
= XVECEXP (pat
, 0, 0);
9830 if (GET_CODE (pat
) == SET
)
9832 rtx label
= JUMP_LABEL (insn
);
9833 if (label
&& !ANY_RETURN_P (label
))
9835 if (s390_find_pool (pool_list
, label
)
9836 != s390_find_pool (pool_list
, insn
))
9837 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9843 /* Insert base register reload insns before every pool. */
9845 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9847 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9849 rtx_insn
*insn
= curr_pool
->first_insn
;
9850 INSN_ADDRESSES_NEW (emit_insn_before (new_insn
, insn
), -1);
9853 /* Insert base register reload insns at every far label. */
9855 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9857 && bitmap_bit_p (far_labels
, CODE_LABEL_NUMBER (insn
)))
9859 struct constant_pool
*pool
= s390_find_pool (pool_list
, insn
);
9862 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9864 INSN_ADDRESSES_NEW (emit_insn_after (new_insn
, insn
), -1);
9869 BITMAP_FREE (far_labels
);
9872 /* Recompute insn addresses. */
9874 init_insn_lengths ();
9875 shorten_branches (get_insns ());
9880 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9881 After we have decided to use this list, finish implementing
9882 all changes to the current function as required. */
9885 s390_chunkify_finish (struct constant_pool
*pool_list
)
9887 struct constant_pool
*curr_pool
= NULL
;
9891 /* Replace all literal pool references. */
9893 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9895 curr_pool
= s390_find_pool (pool_list
, insn
);
9899 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9901 rtx addr
, pool_ref
= NULL_RTX
;
9902 find_constant_pool_ref (insn
, &pool_ref
);
9905 if (s390_execute_label (insn
))
9906 addr
= s390_find_execute (curr_pool
, insn
);
9908 addr
= s390_find_constant (curr_pool
,
9909 get_pool_constant (pool_ref
),
9910 get_pool_mode (pool_ref
));
9912 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9913 INSN_CODE (insn
) = -1;
9918 /* Dump out all literal pools. */
9920 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9921 s390_dump_pool (curr_pool
, 0);
9923 /* Free pool list. */
9927 struct constant_pool
*next
= pool_list
->next
;
9928 s390_free_pool (pool_list
);
9933 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9936 s390_output_pool_entry (rtx exp
, machine_mode mode
, unsigned int align
)
9938 switch (GET_MODE_CLASS (mode
))
9941 case MODE_DECIMAL_FLOAT
:
9942 gcc_assert (GET_CODE (exp
) == CONST_DOUBLE
);
9944 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp
),
9945 as_a
<scalar_float_mode
> (mode
), align
);
9949 assemble_integer (exp
, GET_MODE_SIZE (mode
), align
, 1);
9950 mark_symbol_refs_as_used (exp
);
9953 case MODE_VECTOR_INT
:
9954 case MODE_VECTOR_FLOAT
:
9957 machine_mode inner_mode
;
9958 gcc_assert (GET_CODE (exp
) == CONST_VECTOR
);
9960 inner_mode
= GET_MODE_INNER (GET_MODE (exp
));
9961 for (i
= 0; i
< XVECLEN (exp
, 0); i
++)
9962 s390_output_pool_entry (XVECEXP (exp
, 0, i
),
9966 : GET_MODE_BITSIZE (inner_mode
));
9975 /* Return true if MEM refers to an integer constant in the literal pool. If
9976 VAL is not nullptr, then also fill it with the constant's value. */
9979 s390_const_int_pool_entry_p (rtx mem
, HOST_WIDE_INT
*val
)
9981 /* Try to match the following:
9982 - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
9983 - (mem (symbol_ref)). */
9988 rtx addr
= XEXP (mem
, 0);
9990 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LTREF
)
9991 sym
= XVECEXP (addr
, 0, 0);
9995 if (!SYMBOL_REF_P (sym
) || !CONSTANT_POOL_ADDRESS_P (sym
))
9998 rtx val_rtx
= get_pool_constant (sym
);
9999 if (!CONST_INT_P (val_rtx
))
10002 if (val
!= nullptr)
10003 *val
= INTVAL (val_rtx
);
10007 /* Return an RTL expression representing the value of the return address
10008 for the frame COUNT steps up from the current frame. FRAME is the
10009 frame pointer of that frame. */
10012 s390_return_addr_rtx (int count
, rtx frame ATTRIBUTE_UNUSED
)
10017 /* Without backchain, we fail for all but the current frame. */
10019 if (!TARGET_BACKCHAIN
&& count
> 0)
10022 /* For the current frame, we need to make sure the initial
10023 value of RETURN_REGNUM is actually saved. */
10026 return get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
);
10028 if (TARGET_PACKED_STACK
)
10029 offset
= -2 * UNITS_PER_LONG
;
10031 offset
= RETURN_REGNUM
* UNITS_PER_LONG
;
10033 addr
= plus_constant (Pmode
, frame
, offset
);
10034 addr
= memory_address (Pmode
, addr
);
10035 return gen_rtx_MEM (Pmode
, addr
);
10038 /* Return an RTL expression representing the back chain stored in
10039 the current stack frame. */
10042 s390_back_chain_rtx (void)
10046 gcc_assert (TARGET_BACKCHAIN
);
10048 if (TARGET_PACKED_STACK
)
10049 chain
= plus_constant (Pmode
, stack_pointer_rtx
,
10050 STACK_POINTER_OFFSET
- UNITS_PER_LONG
);
10052 chain
= stack_pointer_rtx
;
10054 chain
= gen_rtx_MEM (Pmode
, chain
);
10058 /* Find first call clobbered register unused in a function.
10059 This could be used as base register in a leaf function
10060 or for holding the return address before epilogue. */
10063 find_unused_clobbered_reg (void)
10066 for (i
= 0; i
< 6; i
++)
10067 if (!df_regs_ever_live_p (i
))
10073 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
10074 clobbered hard regs in SETREG. */
10077 s390_reg_clobbered_rtx (rtx setreg
, const_rtx set_insn ATTRIBUTE_UNUSED
, void *data
)
10079 char *regs_ever_clobbered
= (char *)data
;
10080 unsigned int i
, regno
;
10081 machine_mode mode
= GET_MODE (setreg
);
10083 if (GET_CODE (setreg
) == SUBREG
)
10085 rtx inner
= SUBREG_REG (setreg
);
10086 if (!GENERAL_REG_P (inner
) && !FP_REG_P (inner
))
10088 regno
= subreg_regno (setreg
);
10090 else if (GENERAL_REG_P (setreg
) || FP_REG_P (setreg
))
10091 regno
= REGNO (setreg
);
10096 i
< end_hard_regno (mode
, regno
);
10098 regs_ever_clobbered
[i
] = 1;
10101 /* Walks through all basic blocks of the current function looking
10102 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
10103 of the passed integer array REGS_EVER_CLOBBERED are set to one for
10104 each of those regs. */
10107 s390_regs_ever_clobbered (char regs_ever_clobbered
[])
10109 basic_block cur_bb
;
10110 rtx_insn
*cur_insn
;
10113 memset (regs_ever_clobbered
, 0, 32);
10115 /* For non-leaf functions we have to consider all call clobbered regs to be
10117 if (!crtl
->is_leaf
)
10119 for (i
= 0; i
< 32; i
++)
10120 regs_ever_clobbered
[i
] = call_used_regs
[i
];
10123 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
10124 this work is done by liveness analysis (mark_regs_live_at_end).
10125 Special care is needed for functions containing landing pads. Landing pads
10126 may use the eh registers, but the code which sets these registers is not
10127 contained in that function. Hence s390_regs_ever_clobbered is not able to
10128 deal with this automatically. */
10129 if (crtl
->calls_eh_return
|| cfun
->machine
->has_landing_pad_p
)
10130 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; i
++)
10131 if (crtl
->calls_eh_return
10132 || (cfun
->machine
->has_landing_pad_p
10133 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i
))))
10134 regs_ever_clobbered
[EH_RETURN_DATA_REGNO (i
)] = 1;
10136 /* For nonlocal gotos all call-saved registers have to be saved.
10137 This flag is also set for the unwinding code in libgcc.
10138 See expand_builtin_unwind_init. For regs_ever_live this is done by
10140 if (crtl
->saves_all_registers
)
10141 for (i
= 0; i
< 32; i
++)
10142 if (!call_used_regs
[i
])
10143 regs_ever_clobbered
[i
] = 1;
10145 FOR_EACH_BB_FN (cur_bb
, cfun
)
10147 FOR_BB_INSNS (cur_bb
, cur_insn
)
10151 if (!INSN_P (cur_insn
))
10154 pat
= PATTERN (cur_insn
);
10156 /* Ignore GPR restore insns. */
10157 if (epilogue_completed
&& RTX_FRAME_RELATED_P (cur_insn
))
10159 if (GET_CODE (pat
) == SET
10160 && GENERAL_REG_P (SET_DEST (pat
)))
10163 if (GET_MODE (SET_SRC (pat
)) == DImode
10164 && FP_REG_P (SET_SRC (pat
)))
10168 if (GET_CODE (SET_SRC (pat
)) == MEM
)
10173 if (GET_CODE (pat
) == PARALLEL
10174 && load_multiple_operation (pat
, VOIDmode
))
10178 note_stores (cur_insn
,
10179 s390_reg_clobbered_rtx
,
10180 regs_ever_clobbered
);
10185 /* Determine the frame area which actually has to be accessed
10186 in the function epilogue. The values are stored at the
10187 given pointers AREA_BOTTOM (address of the lowest used stack
10188 address) and AREA_TOP (address of the first item which does
10189 not belong to the stack frame). */
10192 s390_frame_area (int *area_bottom
, int *area_top
)
10199 if (cfun_frame_layout
.first_restore_gpr
!= -1)
10201 b
= (cfun_frame_layout
.gprs_offset
10202 + cfun_frame_layout
.first_restore_gpr
* UNITS_PER_LONG
);
10203 t
= b
+ (cfun_frame_layout
.last_restore_gpr
10204 - cfun_frame_layout
.first_restore_gpr
+ 1) * UNITS_PER_LONG
;
10207 if (TARGET_64BIT
&& cfun_save_high_fprs_p
)
10209 b
= MIN (b
, cfun_frame_layout
.f8_offset
);
10210 t
= MAX (t
, (cfun_frame_layout
.f8_offset
10211 + cfun_frame_layout
.high_fprs
* 8));
10216 if (cfun_fpr_save_p (FPR4_REGNUM
))
10218 b
= MIN (b
, cfun_frame_layout
.f4_offset
);
10219 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 8);
10221 if (cfun_fpr_save_p (FPR6_REGNUM
))
10223 b
= MIN (b
, cfun_frame_layout
.f4_offset
+ 8);
10224 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 16);
10230 /* Update gpr_save_slots in the frame layout trying to make use of
10231 FPRs as GPR save slots.
10232 This is a helper routine of s390_register_info. */
10235 s390_register_info_gprtofpr ()
10237 int save_reg_slot
= FPR0_REGNUM
;
10240 if (TARGET_TPF
|| !TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
10243 /* builtin_eh_return needs to be able to modify the return address
10244 on the stack. It could also adjust the FPR save slot instead but
10245 is it worth the trouble?! */
10246 if (crtl
->calls_eh_return
)
10249 for (i
= 15; i
>= 6; i
--)
10251 if (cfun_gpr_save_slot (i
) == SAVE_SLOT_NONE
)
10254 /* Advance to the next FP register which can be used as a
10256 while ((!call_used_regs
[save_reg_slot
]
10257 || df_regs_ever_live_p (save_reg_slot
)
10258 || cfun_fpr_save_p (save_reg_slot
))
10259 && FP_REGNO_P (save_reg_slot
))
10261 if (!FP_REGNO_P (save_reg_slot
))
10263 /* We only want to use ldgr/lgdr if we can get rid of
10264 stm/lm entirely. So undo the gpr slot allocation in
10265 case we ran out of FPR save slots. */
10266 for (j
= 6; j
<= 15; j
++)
10267 if (FP_REGNO_P (cfun_gpr_save_slot (j
)))
10268 cfun_gpr_save_slot (j
) = SAVE_SLOT_STACK
;
10271 cfun_gpr_save_slot (i
) = save_reg_slot
++;
10275 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
10276 stdarg or -mpreserve-args.
10277 This is a helper routine for s390_register_info. */
10279 s390_register_info_arg_fpr ()
10282 int min_stdarg_fpr
= INT_MAX
, max_stdarg_fpr
= -1;
10283 int min_preserve_fpr
= INT_MAX
, max_preserve_fpr
= -1;
10284 int min_fpr
, max_fpr
;
10286 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
10287 f0-f4 for 64 bit. */
10289 && TARGET_HARD_FLOAT
10290 && cfun
->va_list_fpr_size
10291 && crtl
->args
.info
.fprs
< FP_ARG_NUM_REG
)
10293 min_stdarg_fpr
= crtl
->args
.info
.fprs
;
10294 max_stdarg_fpr
= min_stdarg_fpr
+ cfun
->va_list_fpr_size
- 1;
10295 if (max_stdarg_fpr
>= FP_ARG_NUM_REG
)
10296 max_stdarg_fpr
= FP_ARG_NUM_REG
- 1;
10298 /* FPR argument regs start at f0. */
10299 min_stdarg_fpr
+= FPR0_REGNUM
;
10300 max_stdarg_fpr
+= FPR0_REGNUM
;
10303 if (s390_preserve_args_p
&& crtl
->args
.info
.fprs
)
10305 min_preserve_fpr
= FPR0_REGNUM
;
10306 max_preserve_fpr
= MIN (FPR0_REGNUM
+ FP_ARG_NUM_REG
- 1,
10307 FPR0_REGNUM
+ crtl
->args
.info
.fprs
- 1);
10310 min_fpr
= MIN (min_stdarg_fpr
, min_preserve_fpr
);
10311 max_fpr
= MAX (max_stdarg_fpr
, max_preserve_fpr
);
10316 for (i
= min_fpr
; i
<= max_fpr
; i
++)
10317 cfun_set_fpr_save (i
);
10321 /* Reserve the GPR save slots for GPRs which need to be saved due to
10322 stdarg or -mpreserve-args.
10323 This is a helper routine for s390_register_info. */
10326 s390_register_info_arg_gpr ()
10329 int min_stdarg_gpr
= INT_MAX
, max_stdarg_gpr
= -1;
10330 int min_preserve_gpr
= INT_MAX
, max_preserve_gpr
= -1;
10331 int min_gpr
, max_gpr
;
10334 && cfun
->va_list_gpr_size
10335 && crtl
->args
.info
.gprs
< GP_ARG_NUM_REG
)
10337 min_stdarg_gpr
= crtl
->args
.info
.gprs
;
10338 max_stdarg_gpr
= min_stdarg_gpr
+ cfun
->va_list_gpr_size
- 1;
10339 if (max_stdarg_gpr
>= GP_ARG_NUM_REG
)
10340 max_stdarg_gpr
= GP_ARG_NUM_REG
- 1;
10342 /* GPR argument regs start at r2. */
10343 min_stdarg_gpr
+= GPR2_REGNUM
;
10344 max_stdarg_gpr
+= GPR2_REGNUM
;
10347 if (s390_preserve_args_p
&& crtl
->args
.info
.gprs
)
10349 min_preserve_gpr
= GPR2_REGNUM
;
10350 max_preserve_gpr
= MIN (GPR6_REGNUM
,
10351 GPR2_REGNUM
+ crtl
->args
.info
.gprs
- 1);
10354 min_gpr
= MIN (min_stdarg_gpr
, min_preserve_gpr
);
10355 max_gpr
= MAX (max_stdarg_gpr
, max_preserve_gpr
);
10360 /* If r6 was supposed to be saved into an FPR and now needs to go to
10361 the stack for vararg we have to adjust the restore range to make
10362 sure that the restore is done from stack as well. */
10363 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM
))
10364 && min_gpr
<= GPR6_REGNUM
10365 && max_gpr
>= GPR6_REGNUM
)
10367 if (cfun_frame_layout
.first_restore_gpr
== -1
10368 || cfun_frame_layout
.first_restore_gpr
> GPR6_REGNUM
)
10369 cfun_frame_layout
.first_restore_gpr
= GPR6_REGNUM
;
10370 if (cfun_frame_layout
.last_restore_gpr
== -1
10371 || cfun_frame_layout
.last_restore_gpr
< GPR6_REGNUM
)
10372 cfun_frame_layout
.last_restore_gpr
= GPR6_REGNUM
;
10375 if (cfun_frame_layout
.first_save_gpr
== -1
10376 || cfun_frame_layout
.first_save_gpr
> min_gpr
)
10377 cfun_frame_layout
.first_save_gpr
= min_gpr
;
10379 if (cfun_frame_layout
.last_save_gpr
== -1
10380 || cfun_frame_layout
.last_save_gpr
< max_gpr
)
10381 cfun_frame_layout
.last_save_gpr
= max_gpr
;
10383 for (i
= min_gpr
; i
<= max_gpr
; i
++)
10384 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
10387 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
10388 prologue and epilogue. */
10391 s390_register_info_set_ranges ()
10395 /* Find the first and the last save slot supposed to use the stack
10396 to set the restore range.
10397 Vararg regs might be marked as save to stack but only the
10398 call-saved regs really need restoring (i.e. r6). This code
10399 assumes that the vararg regs have not yet been recorded in
10400 cfun_gpr_save_slot. */
10401 for (i
= 0; i
< 16 && cfun_gpr_save_slot (i
) != SAVE_SLOT_STACK
; i
++);
10402 for (j
= 15; j
> i
&& cfun_gpr_save_slot (j
) != SAVE_SLOT_STACK
; j
--);
10403 cfun_frame_layout
.first_restore_gpr
= (i
== 16) ? -1 : i
;
10404 cfun_frame_layout
.last_restore_gpr
= (i
== 16) ? -1 : j
;
10405 cfun_frame_layout
.first_save_gpr
= (i
== 16) ? -1 : i
;
10406 cfun_frame_layout
.last_save_gpr
= (i
== 16) ? -1 : j
;
10409 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
10410 for registers which need to be saved in function prologue.
10411 This function can be used until the insns emitted for save/restore
10412 of the regs are visible in the RTL stream. */
10415 s390_register_info ()
10418 char clobbered_regs
[32];
10420 gcc_assert (!epilogue_completed
);
10422 if (reload_completed
)
10423 /* After reload we rely on our own routine to determine which
10424 registers need saving. */
10425 s390_regs_ever_clobbered (clobbered_regs
);
10427 /* During reload we use regs_ever_live as a base since reload
10428 does changes in there which we otherwise would not be aware
10430 for (i
= 0; i
< 32; i
++)
10431 clobbered_regs
[i
] = df_regs_ever_live_p (i
);
10433 for (i
= 0; i
< 32; i
++)
10434 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_regs
[i
];
10436 /* Mark the call-saved FPRs which need to be saved.
10437 This needs to be done before checking the special GPRs since the
10438 stack pointer usage depends on whether high FPRs have to be saved
10440 cfun_frame_layout
.fpr_bitmap
= 0;
10441 cfun_frame_layout
.high_fprs
= 0;
10442 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
10443 if (clobbered_regs
[i
] && !call_used_regs
[i
])
10445 cfun_set_fpr_save (i
);
10446 if (i
>= FPR8_REGNUM
)
10447 cfun_frame_layout
.high_fprs
++;
10450 /* Register 12 is used for GOT address, but also as temp in prologue
10451 for split-stack stdarg functions (unless r14 is available). */
10453 |= ((flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
10454 || (flag_split_stack
&& cfun
->stdarg
10455 && (crtl
->is_leaf
|| TARGET_TPF_PROFILING
10456 || has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
))));
10458 clobbered_regs
[BASE_REGNUM
]
10459 |= (cfun
->machine
->base_reg
10460 && REGNO (cfun
->machine
->base_reg
) == BASE_REGNUM
);
10462 clobbered_regs
[HARD_FRAME_POINTER_REGNUM
]
10463 |= !!frame_pointer_needed
;
10465 /* On pre z900 machines this might take until machine dependent
10467 save_return_addr_p will only be set on non-zarch machines so
10468 there is no risk that r14 goes into an FPR instead of a stack
10470 clobbered_regs
[RETURN_REGNUM
]
10472 || TARGET_TPF_PROFILING
10473 || cfun_frame_layout
.save_return_addr_p
10474 || crtl
->calls_eh_return
);
10476 clobbered_regs
[STACK_POINTER_REGNUM
]
10478 || TARGET_TPF_PROFILING
10479 || cfun_save_high_fprs_p
10480 || get_frame_size () > 0
10481 || (reload_completed
&& cfun_frame_layout
.frame_size
> 0)
10482 || cfun
->calls_alloca
);
10484 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 16);
10486 for (i
= 0; i
< 16; i
++)
10487 if (clobbered_regs
[i
] && !call_used_regs
[i
])
10488 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
10490 s390_register_info_arg_fpr ();
10491 s390_register_info_gprtofpr ();
10492 s390_register_info_set_ranges ();
10494 /* Forcing argument registers to be saved on the stack might
10495 override the GPR->FPR save decision for r6 so this must come
10497 s390_register_info_arg_gpr ();
10500 /* Return true if REGNO is a global register, but not one
10501 of the special ones that need to be saved/restored in anyway. */
10504 global_not_special_regno_p (int regno
)
10506 return (global_regs
[regno
]
10507 /* These registers are special and need to be
10508 restored in any case. */
10509 && !(regno
== STACK_POINTER_REGNUM
10510 || regno
== RETURN_REGNUM
10511 || regno
== BASE_REGNUM
10512 || (flag_pic
&& regno
== (int)PIC_OFFSET_TABLE_REGNUM
)));
10515 /* This function is called by s390_optimize_prologue in order to get
10516 rid of unnecessary GPR save/restore instructions. The register info
10517 for the GPRs is re-computed and the ranges are re-calculated. */
10520 s390_optimize_register_info ()
10522 char clobbered_regs
[32];
10525 gcc_assert (epilogue_completed
);
10527 s390_regs_ever_clobbered (clobbered_regs
);
10529 /* Global registers do not need to be saved and restored unless it
10530 is one of our special regs. (r12, r13, r14, or r15). */
10531 for (i
= 0; i
< 32; i
++)
10532 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_not_special_regno_p (i
);
10534 /* There is still special treatment needed for cases invisible to
10535 s390_regs_ever_clobbered. */
10536 clobbered_regs
[RETURN_REGNUM
]
10537 |= (TARGET_TPF_PROFILING
10538 /* When expanding builtin_return_addr in ESA mode we do not
10539 know whether r14 will later be needed as scratch reg when
10540 doing branch splitting. So the builtin always accesses the
10541 r14 save slot and we need to stick to the save/restore
10542 decision for r14 even if it turns out that it didn't get
10544 || cfun_frame_layout
.save_return_addr_p
10545 || crtl
->calls_eh_return
);
10547 for (i
= 0; i
< 16; i
++)
10548 if (!clobbered_regs
[i
] || call_used_regs
[i
])
10549 cfun_gpr_save_slot (i
) = SAVE_SLOT_NONE
;
10551 s390_register_info_set_ranges ();
10552 s390_register_info_arg_gpr ();
10555 /* Fill cfun->machine with info about frame of current function. */
10558 s390_frame_info (void)
10560 HOST_WIDE_INT lowest_offset
;
10562 cfun_frame_layout
.first_save_gpr_slot
= cfun_frame_layout
.first_save_gpr
;
10563 cfun_frame_layout
.last_save_gpr_slot
= cfun_frame_layout
.last_save_gpr
;
10565 /* The va_arg builtin uses a constant distance of 16 *
10566 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10567 pointer. So even if we are going to save the stack pointer in an
10568 FPR we need the stack space in order to keep the offsets
10570 if (cfun
->stdarg
&& cfun_save_arg_fprs_p
)
10572 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10574 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10575 cfun_frame_layout
.first_save_gpr_slot
= STACK_POINTER_REGNUM
;
10578 cfun_frame_layout
.frame_size
= get_frame_size ();
10579 if (!TARGET_64BIT
&& cfun_frame_layout
.frame_size
> 0x7fff0000)
10580 fatal_error (input_location
,
10581 "total size of local variables exceeds architecture limit");
10583 if (!TARGET_PACKED_STACK
)
10585 /* Fixed stack layout. */
10586 cfun_frame_layout
.backchain_offset
= 0;
10587 cfun_frame_layout
.f0_offset
= 16 * UNITS_PER_LONG
;
10588 cfun_frame_layout
.f4_offset
= cfun_frame_layout
.f0_offset
+ 2 * 8;
10589 cfun_frame_layout
.f8_offset
= -cfun_frame_layout
.high_fprs
* 8;
10590 cfun_frame_layout
.gprs_offset
= (cfun_frame_layout
.first_save_gpr_slot
10593 else if (TARGET_BACKCHAIN
)
10595 /* Kernel stack layout - packed stack, backchain, no float */
10596 gcc_assert (TARGET_SOFT_FLOAT
);
10597 cfun_frame_layout
.backchain_offset
= (STACK_POINTER_OFFSET
10600 /* The distance between the backchain and the return address
10601 save slot must not change. So we always need a slot for the
10602 stack pointer which resides in between. */
10603 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10605 cfun_frame_layout
.gprs_offset
10606 = cfun_frame_layout
.backchain_offset
- cfun_gprs_save_area_size
;
10608 /* FPRs will not be saved. Nevertheless pick sane values to
10609 keep area calculations valid. */
10610 cfun_frame_layout
.f0_offset
=
10611 cfun_frame_layout
.f4_offset
=
10612 cfun_frame_layout
.f8_offset
= cfun_frame_layout
.gprs_offset
;
10618 /* Packed stack layout without backchain. */
10620 /* With stdarg FPRs need their dedicated slots. */
10621 num_fprs
= (TARGET_64BIT
&& cfun
->stdarg
? 2
10622 : (cfun_fpr_save_p (FPR4_REGNUM
) +
10623 cfun_fpr_save_p (FPR6_REGNUM
)));
10624 cfun_frame_layout
.f4_offset
= STACK_POINTER_OFFSET
- 8 * num_fprs
;
10626 num_fprs
= (cfun
->stdarg
? 2
10627 : (cfun_fpr_save_p (FPR0_REGNUM
)
10628 + cfun_fpr_save_p (FPR2_REGNUM
)));
10629 cfun_frame_layout
.f0_offset
= cfun_frame_layout
.f4_offset
- 8 * num_fprs
;
10631 cfun_frame_layout
.gprs_offset
10632 = cfun_frame_layout
.f0_offset
- cfun_gprs_save_area_size
;
10634 cfun_frame_layout
.f8_offset
= (cfun_frame_layout
.gprs_offset
10635 - cfun_frame_layout
.high_fprs
* 8);
10638 if (cfun_save_high_fprs_p
)
10639 cfun_frame_layout
.frame_size
+= cfun_frame_layout
.high_fprs
* 8;
10641 if (!crtl
->is_leaf
)
10642 cfun_frame_layout
.frame_size
+= crtl
->outgoing_args_size
;
10644 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10645 sized area at the bottom of the stack. This is required also for
10646 leaf functions. When GCC generates a local stack reference it
10647 will always add STACK_POINTER_OFFSET to all these references. */
10649 && !TARGET_TPF_PROFILING
10650 && cfun_frame_layout
.frame_size
== 0
10651 && !cfun
->calls_alloca
)
10654 /* Calculate the number of bytes we have used in our own register
10655 save area. With the packed stack layout we can re-use the
10656 remaining bytes for normal stack elements. */
10658 if (TARGET_PACKED_STACK
)
10659 lowest_offset
= MIN (MIN (cfun_frame_layout
.f0_offset
,
10660 cfun_frame_layout
.f4_offset
),
10661 cfun_frame_layout
.gprs_offset
);
10665 if (TARGET_BACKCHAIN
)
10666 lowest_offset
= MIN (lowest_offset
, cfun_frame_layout
.backchain_offset
);
10668 cfun_frame_layout
.frame_size
+= STACK_POINTER_OFFSET
- lowest_offset
;
10670 /* If under 31 bit an odd number of gprs has to be saved we have to
10671 adjust the frame size to sustain 8 byte alignment of stack
10673 cfun_frame_layout
.frame_size
= ((cfun_frame_layout
.frame_size
+
10674 STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
10675 & ~(STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
10678 /* Generate frame layout. Fills in register and frame data for the current
10679 function in cfun->machine. This routine can be called multiple times;
10680 it will re-do the complete frame layout every time. */
10683 s390_init_frame_layout (void)
10685 HOST_WIDE_INT frame_size
;
10688 /* After LRA the frame layout is supposed to be read-only and should
10689 not be re-computed. */
10690 if (reload_completed
)
10695 frame_size
= cfun_frame_layout
.frame_size
;
10697 /* Try to predict whether we'll need the base register. */
10698 base_used
= crtl
->uses_const_pool
10699 || (!DISP_IN_RANGE (frame_size
)
10700 && !CONST_OK_FOR_K (frame_size
));
10702 /* Decide which register to use as literal pool base. In small
10703 leaf functions, try to use an unused call-clobbered register
10704 as base register to avoid save/restore overhead. */
10706 cfun
->machine
->base_reg
= NULL_RTX
;
10712 /* Prefer r5 (most likely to be free). */
10713 for (br
= 5; br
>= 2 && df_regs_ever_live_p (br
); br
--)
10715 cfun
->machine
->base_reg
=
10716 gen_rtx_REG (Pmode
, (br
>= 2) ? br
: BASE_REGNUM
);
10719 s390_register_info ();
10720 s390_frame_info ();
10722 while (frame_size
!= cfun_frame_layout
.frame_size
);
10725 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10726 the TX is nonescaping. A transaction is considered escaping if
10727 there is at least one path from tbegin returning CC0 to the
10728 function exit block without an tend.
10730 The check so far has some limitations:
10731 - only single tbegin/tend BBs are supported
10732 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10733 - when CC is copied to a GPR and the CC0 check is done with the GPR
10734 this is not supported
10738 s390_optimize_nonescaping_tx (void)
10740 const unsigned int CC0
= 1 << 3;
10741 basic_block tbegin_bb
= NULL
;
10742 basic_block tend_bb
= NULL
;
10745 bool result
= true;
10747 rtx_insn
*tbegin_insn
= NULL
;
10749 if (!cfun
->machine
->tbegin_p
)
10752 for (bb_index
= 0; bb_index
< n_basic_blocks_for_fn (cfun
); bb_index
++)
10754 bb
= BASIC_BLOCK_FOR_FN (cfun
, bb_index
);
10759 FOR_BB_INSNS (bb
, insn
)
10761 rtx ite
, cc
, pat
, target
;
10762 unsigned HOST_WIDE_INT mask
;
10764 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
10767 pat
= PATTERN (insn
);
10769 if (GET_CODE (pat
) == PARALLEL
)
10770 pat
= XVECEXP (pat
, 0, 0);
10772 if (GET_CODE (pat
) != SET
10773 || GET_CODE (SET_SRC (pat
)) != UNSPEC_VOLATILE
)
10776 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TBEGIN
)
10780 tbegin_insn
= insn
;
10782 /* Just return if the tbegin doesn't have clobbers. */
10783 if (GET_CODE (PATTERN (insn
)) != PARALLEL
)
10786 if (tbegin_bb
!= NULL
)
10789 /* Find the next conditional jump. */
10790 for (tmp
= NEXT_INSN (insn
);
10792 tmp
= NEXT_INSN (tmp
))
10794 if (reg_set_p (gen_rtx_REG (CCmode
, CC_REGNUM
), tmp
))
10799 ite
= SET_SRC (PATTERN (tmp
));
10800 if (GET_CODE (ite
) != IF_THEN_ELSE
)
10803 cc
= XEXP (XEXP (ite
, 0), 0);
10804 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
))
10805 || GET_MODE (cc
) != CCRAWmode
10806 || GET_CODE (XEXP (XEXP (ite
, 0), 1)) != CONST_INT
)
10809 if (bb
->succs
->length () != 2)
10812 mask
= INTVAL (XEXP (XEXP (ite
, 0), 1));
10813 if (GET_CODE (XEXP (ite
, 0)) == NE
)
10817 target
= XEXP (ite
, 1);
10818 else if (mask
== (CC0
^ 0xf))
10819 target
= XEXP (ite
, 2);
10827 ei
= ei_start (bb
->succs
);
10828 e1
= ei_safe_edge (ei
);
10830 e2
= ei_safe_edge (ei
);
10832 if (e2
->flags
& EDGE_FALLTHRU
)
10835 e1
= ei_safe_edge (ei
);
10838 if (!(e1
->flags
& EDGE_FALLTHRU
))
10841 tbegin_bb
= (target
== pc_rtx
) ? e1
->dest
: e2
->dest
;
10843 if (tmp
== BB_END (bb
))
10848 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TEND
)
10850 if (tend_bb
!= NULL
)
10857 /* Either we successfully remove the FPR clobbers here or we are not
10858 able to do anything for this TX. Both cases don't qualify for
10860 cfun
->machine
->tbegin_p
= false;
10862 if (tbegin_bb
== NULL
|| tend_bb
== NULL
)
10865 calculate_dominance_info (CDI_POST_DOMINATORS
);
10866 result
= dominated_by_p (CDI_POST_DOMINATORS
, tbegin_bb
, tend_bb
);
10867 free_dominance_info (CDI_POST_DOMINATORS
);
10872 PATTERN (tbegin_insn
) = gen_rtx_PARALLEL (VOIDmode
,
10874 XVECEXP (PATTERN (tbegin_insn
), 0, 0),
10875 XVECEXP (PATTERN (tbegin_insn
), 0, 1)));
10876 INSN_CODE (tbegin_insn
) = -1;
10877 df_insn_rescan (tbegin_insn
);
10882 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10883 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10885 static unsigned int
10886 s390_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
10888 return s390_class_max_nregs (REGNO_REG_CLASS (regno
), mode
);
10891 /* Implement TARGET_HARD_REGNO_MODE_OK.
10893 Integer modes <= word size fit into any GPR.
10894 Integer modes > word size fit into successive GPRs, starting with
10895 an even-numbered register.
10896 SImode and DImode fit into FPRs as well.
10898 Floating point modes <= word size fit into any FPR or GPR.
10899 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10900 into any FPR, or an even-odd GPR pair.
10901 TFmode fits only into an even-odd FPR pair.
10903 Complex floating point modes fit either into two FPRs, or into
10904 successive GPRs (again starting with an even number).
10905 TCmode fits only into two successive even-odd FPR pairs.
10907 Condition code modes fit only into the CC register. */
10910 s390_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10912 if (!TARGET_VX
&& VECTOR_NOFP_REGNO_P (regno
))
10915 switch (REGNO_REG_CLASS (regno
))
10918 return ((GET_MODE_CLASS (mode
) == MODE_INT
10919 && s390_class_max_nregs (VEC_REGS
, mode
) == 1)
10921 || (TARGET_VXE
&& mode
== SFmode
)
10922 || s390_vector_mode_supported_p (mode
));
10926 && ((GET_MODE_CLASS (mode
) == MODE_INT
10927 && s390_class_max_nregs (FP_REGS
, mode
) == 1)
10929 || s390_vector_mode_supported_p (mode
)))
10932 if (REGNO_PAIR_OK (regno
, mode
))
10934 if (mode
== SImode
|| mode
== DImode
)
10937 if (FLOAT_MODE_P (mode
) && GET_MODE_CLASS (mode
) != MODE_VECTOR_FLOAT
)
10942 if (FRAME_REGNO_P (regno
) && mode
== Pmode
)
10947 if (REGNO_PAIR_OK (regno
, mode
))
10950 || (mode
!= TFmode
&& mode
!= TCmode
&& mode
!= TDmode
))
10955 if (GET_MODE_CLASS (mode
) == MODE_CC
)
10959 if (REGNO_PAIR_OK (regno
, mode
))
10961 if (mode
== SImode
|| mode
== Pmode
)
10972 /* Implement TARGET_MODES_TIEABLE_P. */
10975 s390_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10977 return ((mode1
== SFmode
|| mode1
== DFmode
)
10978 == (mode2
== SFmode
|| mode2
== DFmode
));
10981 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10984 s390_hard_regno_rename_ok (unsigned int old_reg
, unsigned int new_reg
)
10986 /* Once we've decided upon a register to use as base register, it must
10987 no longer be used for any other purpose. */
10988 if (cfun
->machine
->base_reg
)
10989 if (REGNO (cfun
->machine
->base_reg
) == old_reg
10990 || REGNO (cfun
->machine
->base_reg
) == new_reg
)
10993 /* Prevent regrename from using call-saved regs which haven't
10994 actually been saved. This is necessary since regrename assumes
10995 the backend save/restore decisions are based on
10996 df_regs_ever_live. Since we have our own routine we have to tell
10997 regrename manually about it. */
10998 if (GENERAL_REGNO_P (new_reg
)
10999 && !call_used_regs
[new_reg
]
11000 && cfun_gpr_save_slot (new_reg
) == SAVE_SLOT_NONE
)
11006 /* Return nonzero if register REGNO can be used as a scratch register
11010 s390_hard_regno_scratch_ok (unsigned int regno
)
11012 /* See s390_hard_regno_rename_ok. */
11013 if (GENERAL_REGNO_P (regno
)
11014 && !call_used_regs
[regno
]
11015 && cfun_gpr_save_slot (regno
) == SAVE_SLOT_NONE
)
11021 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
11022 code that runs in z/Architecture mode, but conforms to the 31-bit
11023 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
11024 bytes are saved across calls, however. */
11027 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
11030 /* For r12 we know that the only bits we actually care about are
11031 preserved across function calls. Since r12 is a fixed reg all
11032 accesses to r12 are generated by the backend.
11034 This workaround is necessary until gcse implements proper
11035 tracking of partially clobbered registers. */
11038 && GET_MODE_SIZE (mode
) > 4
11039 && (!flag_pic
|| regno
!= PIC_OFFSET_TABLE_REGNUM
)
11040 && ((regno
>= 6 && regno
<= 15) || regno
== 32))
11044 && GET_MODE_SIZE (mode
) > 8
11045 && (((TARGET_64BIT
&& regno
>= 24 && regno
<= 31))
11046 || (!TARGET_64BIT
&& (regno
== 18 || regno
== 19))))
11052 /* Maximum number of registers to represent a value of mode MODE
11053 in a register of class RCLASS. */
11056 s390_class_max_nregs (enum reg_class rclass
, machine_mode mode
)
11059 bool reg_pair_required_p
= false;
11065 reg_size
= TARGET_VX
? 16 : 8;
11067 /* TF and TD modes would fit into a VR but we put them into a
11068 register pair since we do not have 128bit FP instructions on
11071 && SCALAR_FLOAT_MODE_P (mode
)
11072 && GET_MODE_SIZE (mode
) >= 16
11073 && !(TARGET_VXE
&& mode
== TFmode
))
11074 reg_pair_required_p
= true;
11076 /* Even if complex types would fit into a single FPR/VR we force
11077 them into a register pair to deal with the parts more easily.
11078 (FIXME: What about complex ints?) */
11079 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
11080 reg_pair_required_p
= true;
11086 reg_size
= UNITS_PER_WORD
;
11090 if (reg_pair_required_p
)
11091 return 2 * ((GET_MODE_SIZE (mode
) / 2 + reg_size
- 1) / reg_size
);
11093 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
11096 /* Return nonzero if mode M describes a 128-bit float in a floating point
11100 s390_is_fpr128 (machine_mode m
)
11102 return m
== FPRX2mode
|| (!TARGET_VXE
&& m
== TFmode
);
11105 /* Return nonzero if mode M describes a 128-bit float in a vector
11109 s390_is_vr128 (machine_mode m
)
11111 return m
== V1TFmode
|| (TARGET_VXE
&& m
== TFmode
);
11114 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
11117 s390_can_change_mode_class (machine_mode from_mode
,
11118 machine_mode to_mode
,
11119 reg_class_t rclass
)
11121 machine_mode small_mode
;
11122 machine_mode big_mode
;
11124 /* 128-bit values have different representations in floating point and
11125 vector registers. */
11126 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
11127 && ((s390_is_fpr128 (from_mode
) && s390_is_vr128 (to_mode
))
11128 || (s390_is_vr128 (from_mode
) && s390_is_fpr128 (to_mode
))))
11131 if (GET_MODE_SIZE (from_mode
) == GET_MODE_SIZE (to_mode
))
11134 if (GET_MODE_SIZE (from_mode
) < GET_MODE_SIZE (to_mode
))
11136 small_mode
= from_mode
;
11137 big_mode
= to_mode
;
11141 small_mode
= to_mode
;
11142 big_mode
= from_mode
;
11145 /* Values residing in VRs are little-endian style. All modes are
11146 placed left-aligned in an VR. This means that we cannot allow
11147 switching between modes with differing sizes. Also if the vector
11148 facility is available we still place TFmode values in VR register
11149 pairs, since the only instructions we have operating on TFmodes
11150 only deal with register pairs. Therefore we have to allow DFmode
11151 subregs of TFmodes to enable the TFmode splitters. */
11152 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
11153 && (GET_MODE_SIZE (small_mode
) < 8
11154 || s390_class_max_nregs (VEC_REGS
, big_mode
) == 1))
11157 /* Likewise for access registers, since they have only half the
11158 word size on 64-bit. */
11159 if (reg_classes_intersect_p (ACCESS_REGS
, rclass
))
11165 /* Return true if we use LRA instead of reload pass. */
11169 return s390_lra_flag
;
11172 /* Return true if register FROM can be eliminated via register TO. */
11175 s390_can_eliminate (const int from
, const int to
)
11177 /* We have not marked the base register as fixed.
11178 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
11179 If a function requires the base register, we say here that this
11180 elimination cannot be performed. This will cause reload to free
11181 up the base register (as if it were fixed). On the other hand,
11182 if the current function does *not* require the base register, we
11183 say here the elimination succeeds, which in turn allows reload
11184 to allocate the base register for any other purpose. */
11185 if (from
== BASE_REGNUM
&& to
== BASE_REGNUM
)
11187 s390_init_frame_layout ();
11188 return cfun
->machine
->base_reg
== NULL_RTX
;
11191 /* Everything else must point into the stack frame. */
11192 gcc_assert (to
== STACK_POINTER_REGNUM
11193 || to
== HARD_FRAME_POINTER_REGNUM
);
11195 gcc_assert (from
== FRAME_POINTER_REGNUM
11196 || from
== ARG_POINTER_REGNUM
11197 || from
== RETURN_ADDRESS_POINTER_REGNUM
);
11199 /* Make sure we actually saved the return address. */
11200 if (from
== RETURN_ADDRESS_POINTER_REGNUM
)
11201 if (!crtl
->calls_eh_return
11203 && !cfun_frame_layout
.save_return_addr_p
)
11209 /* Return offset between register FROM and TO initially after prolog. */
11212 s390_initial_elimination_offset (int from
, int to
)
11214 HOST_WIDE_INT offset
;
11216 /* ??? Why are we called for non-eliminable pairs? */
11217 if (!s390_can_eliminate (from
, to
))
11222 case FRAME_POINTER_REGNUM
:
11223 offset
= (get_frame_size()
11224 + STACK_POINTER_OFFSET
11225 + crtl
->outgoing_args_size
);
11228 case ARG_POINTER_REGNUM
:
11229 s390_init_frame_layout ();
11230 offset
= cfun_frame_layout
.frame_size
+ STACK_POINTER_OFFSET
;
11233 case RETURN_ADDRESS_POINTER_REGNUM
:
11234 s390_init_frame_layout ();
11236 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
11238 /* If it turns out that for stdarg nothing went into the reg
11239 save area we also do not need the return address
11241 if (cfun
->stdarg
&& !cfun_save_arg_fprs_p
)
11244 gcc_unreachable ();
11247 /* In order to make the following work it is not necessary for
11248 r14 to have a save slot. It is sufficient if one other GPR
11249 got one. Since the GPRs are always stored without gaps we
11250 are able to calculate where the r14 save slot would
11252 offset
= (cfun_frame_layout
.frame_size
+ cfun_frame_layout
.gprs_offset
+
11253 (RETURN_REGNUM
- cfun_frame_layout
.first_save_gpr_slot
) *
11262 gcc_unreachable ();
11268 /* Emit insn to save fpr REGNUM at offset OFFSET relative
11269 to register BASE. Return generated insn. */
11272 save_fpr (rtx base
, int offset
, int regnum
)
11277 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
11279 if (regnum
>= FPR0_REGNUM
&& regnum
<= (FPR0_REGNUM
+ FP_ARG_NUM_REG
))
11280 set_mem_alias_set (addr
, get_varargs_alias_set ());
11282 set_mem_alias_set (addr
, get_frame_alias_set ());
11284 insn
= emit_move_insn (addr
, gen_rtx_REG (DFmode
, regnum
));
11286 if (!call_used_regs
[regnum
] || s390_preserve_fpr_arg_p (regnum
))
11287 RTX_FRAME_RELATED_P (insn
) = 1;
11289 if (s390_preserve_fpr_arg_p (regnum
) && !cfun_fpr_save_p (regnum
))
11291 rtx reg
= gen_rtx_REG (DFmode
, regnum
);
11292 add_reg_note (insn
, REG_CFA_NO_RESTORE
, reg
);
11293 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (addr
, reg
));
11299 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
11300 to register BASE. Return generated insn. */
11303 restore_fpr (rtx base
, int offset
, int regnum
)
11306 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
11307 set_mem_alias_set (addr
, get_frame_alias_set ());
11309 return emit_move_insn (gen_rtx_REG (DFmode
, regnum
), addr
);
11312 /* Generate insn to save registers FIRST to LAST into
11313 the register save area located at offset OFFSET
11314 relative to register BASE. */
11317 save_gprs (rtx base
, int offset
, int first
, int last
, rtx_insn
*before
= NULL
)
11319 rtx addr
, insn
, note
;
11320 rtx_insn
*out_insn
;
11323 addr
= plus_constant (Pmode
, base
, offset
);
11324 addr
= gen_frame_mem (Pmode
, addr
);
11326 /* Special-case single register. */
11330 insn
= gen_movdi (addr
, gen_rtx_REG (Pmode
, first
));
11332 insn
= gen_movsi (addr
, gen_rtx_REG (Pmode
, first
));
11334 if (!global_not_special_regno_p (first
))
11335 RTX_FRAME_RELATED_P (insn
) = 1;
11337 if (s390_preserve_gpr_arg_p (first
) && !s390_restore_gpr_p (first
))
11339 rtx reg
= gen_rtx_REG (Pmode
, first
);
11340 add_reg_note (insn
, REG_CFA_NO_RESTORE
, reg
);
11341 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (addr
, reg
));
11348 insn
= gen_store_multiple (addr
,
11349 gen_rtx_REG (Pmode
, first
),
11350 GEN_INT (last
- first
+ 1));
11352 if (first
<= 6 && cfun
->stdarg
)
11353 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
11355 rtx mem
= XEXP (XVECEXP (PATTERN (insn
), 0, i
), 0);
11357 if (first
+ i
<= 6)
11358 set_mem_alias_set (mem
, get_varargs_alias_set ());
11361 /* We need to set the FRAME_RELATED flag on all SETs
11362 inside the store-multiple pattern.
11364 However, we must not emit DWARF records for registers 2..5
11365 if they are stored for use by variable arguments ...
11367 ??? Unfortunately, it is not enough to simply not the
11368 FRAME_RELATED flags for those SETs, because the first SET
11369 of the PARALLEL is always treated as if it had the flag
11370 set, even if it does not. Therefore we emit a new pattern
11371 without those registers as REG_FRAME_RELATED_EXPR note. */
11373 /* In these cases all of the sets are marked as frame related:
11374 1. call-save GPR saved and restored
11375 2. argument GPR saved because of -mpreserve-args */
11376 if ((first
>= GPR6_REGNUM
&& !global_not_special_regno_p (first
))
11377 || s390_preserve_gpr_arg_in_range_p (first
, last
))
11380 rtx pat
= PATTERN (insn
);
11382 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
11383 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
11384 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat
,
11386 RTX_FRAME_RELATED_P (XVECEXP (pat
, 0, i
)) = 1;
11388 RTX_FRAME_RELATED_P (insn
) = 1;
11390 /* For the -mpreserve-args register saves no restore operations
11391 will be emitted. CFI checking would complain about this. We
11392 manually generate the REG_CFA notes here to be able to mark
11393 those operations with REG_CFA_NO_RESTORE. */
11394 if (s390_preserve_gpr_arg_in_range_p (first
, last
))
11396 for (int regno
= first
; regno
<= last
; regno
++)
11398 rtx reg
= gen_rtx_REG (Pmode
, regno
);
11399 rtx reg_addr
= plus_constant (Pmode
, base
,
11400 offset
+ (regno
- first
) * UNITS_PER_LONG
);
11401 if (!s390_restore_gpr_p (regno
))
11402 add_reg_note (insn
, REG_CFA_NO_RESTORE
, reg
);
11403 add_reg_note (insn
, REG_CFA_OFFSET
,
11404 gen_rtx_SET (gen_frame_mem (Pmode
, reg_addr
), reg
));
11408 else if (last
>= 6)
11412 for (start
= first
>= 6 ? first
: 6; start
<= last
; start
++)
11413 if (!global_not_special_regno_p (start
))
11419 addr
= plus_constant (Pmode
, base
,
11420 offset
+ (start
- first
) * UNITS_PER_LONG
);
11425 note
= gen_movdi (gen_rtx_MEM (Pmode
, addr
),
11426 gen_rtx_REG (Pmode
, start
));
11428 note
= gen_movsi (gen_rtx_MEM (Pmode
, addr
),
11429 gen_rtx_REG (Pmode
, start
));
11430 note
= PATTERN (note
);
11432 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
11433 RTX_FRAME_RELATED_P (insn
) = 1;
11438 note
= gen_store_multiple (gen_rtx_MEM (Pmode
, addr
),
11439 gen_rtx_REG (Pmode
, start
),
11440 GEN_INT (last
- start
+ 1));
11441 note
= PATTERN (note
);
11443 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
11445 for (i
= 0; i
< XVECLEN (note
, 0); i
++)
11446 if (GET_CODE (XVECEXP (note
, 0, i
)) == SET
11447 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note
,
11449 RTX_FRAME_RELATED_P (XVECEXP (note
, 0, i
)) = 1;
11451 RTX_FRAME_RELATED_P (insn
) = 1;
11455 if (before
!= NULL_RTX
)
11456 out_insn
= emit_insn_before (insn
, before
);
11458 out_insn
= emit_insn (insn
);
11459 INSN_ADDRESSES_NEW (out_insn
, -1);
11463 /* Generate insn to restore registers FIRST to LAST from
11464 the register save area located at offset OFFSET
11465 relative to register BASE. */
11468 restore_gprs (rtx base
, int offset
, int first
, int last
)
11472 addr
= plus_constant (Pmode
, base
, offset
);
11473 addr
= gen_frame_mem (Pmode
, addr
);
11475 /* Special-case single register. */
11479 insn
= gen_movdi (gen_rtx_REG (Pmode
, first
), addr
);
11481 insn
= gen_movsi (gen_rtx_REG (Pmode
, first
), addr
);
11483 RTX_FRAME_RELATED_P (insn
) = 1;
11487 insn
= gen_load_multiple (gen_rtx_REG (Pmode
, first
),
11489 GEN_INT (last
- first
+ 1));
11490 RTX_FRAME_RELATED_P (insn
) = 1;
11494 /* Return insn sequence to load the GOT register. */
11497 s390_load_got (void)
11501 /* We cannot use pic_offset_table_rtx here since we use this
11502 function also for non-pic if __tls_get_offset is called and in
11503 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
11505 rtx got_rtx
= gen_rtx_REG (Pmode
, 12);
11509 emit_move_insn (got_rtx
, s390_got_symbol ());
11511 insns
= get_insns ();
11516 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11517 and the change to the stack pointer. */
11520 s390_emit_stack_tie (void)
11522 rtx mem
= gen_frame_mem (BLKmode
, stack_pointer_rtx
);
11523 if (frame_pointer_needed
)
11524 emit_insn (gen_stack_tie (Pmode
, mem
, hard_frame_pointer_rtx
));
11526 emit_insn (gen_stack_tie (Pmode
, mem
, stack_pointer_rtx
));
11529 /* Copy GPRS into FPR save slots. */
11532 s390_save_gprs_to_fprs (void)
11536 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
11539 for (i
= 6; i
< 16; i
++)
11541 if (FP_REGNO_P (cfun_gpr_save_slot (i
)))
11544 emit_move_insn (gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
)),
11545 gen_rtx_REG (DImode
, i
));
11546 RTX_FRAME_RELATED_P (insn
) = 1;
11547 /* This prevents dwarf2cfi from interpreting the set. Doing
11548 so it might emit def_cfa_register infos setting an FPR as
11550 add_reg_note (insn
, REG_CFA_REGISTER
, copy_rtx (PATTERN (insn
)));
11555 /* Restore GPRs from FPR save slots. */
11558 s390_restore_gprs_from_fprs (void)
11562 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
11565 /* Restore the GPRs starting with the stack pointer. That way the
11566 stack pointer already has its original value when it comes to
11567 restoring the hard frame pointer. So we can set the cfa reg back
11568 to the stack pointer. */
11569 for (i
= STACK_POINTER_REGNUM
; i
>= 6; i
--)
11573 if (!FP_REGNO_P (cfun_gpr_save_slot (i
)))
11576 rtx fpr
= gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
));
11578 if (i
== STACK_POINTER_REGNUM
)
11579 insn
= emit_insn (gen_stack_restore_from_fpr (fpr
));
11581 insn
= emit_move_insn (gen_rtx_REG (DImode
, i
), fpr
);
11583 df_set_regs_ever_live (i
, true);
11584 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, i
));
11586 /* If either the stack pointer or the frame pointer get restored
11587 set the CFA value to its value at function start. Doing this
11588 for the frame pointer results in .cfi_def_cfa_register 15
11589 what is ok since if the stack pointer got modified it has
11590 been restored already. */
11591 if (i
== STACK_POINTER_REGNUM
|| i
== HARD_FRAME_POINTER_REGNUM
)
11592 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11593 plus_constant (Pmode
, stack_pointer_rtx
,
11594 STACK_POINTER_OFFSET
));
11595 RTX_FRAME_RELATED_P (insn
) = 1;
11600 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11605 const pass_data pass_data_s390_early_mach
=
11607 RTL_PASS
, /* type */
11608 "early_mach", /* name */
11609 OPTGROUP_NONE
, /* optinfo_flags */
11610 TV_MACH_DEP
, /* tv_id */
11611 0, /* properties_required */
11612 0, /* properties_provided */
11613 0, /* properties_destroyed */
11614 0, /* todo_flags_start */
11615 ( TODO_df_verify
| TODO_df_finish
), /* todo_flags_finish */
11618 class pass_s390_early_mach
: public rtl_opt_pass
11621 pass_s390_early_mach (gcc::context
*ctxt
)
11622 : rtl_opt_pass (pass_data_s390_early_mach
, ctxt
)
11625 /* opt_pass methods: */
11626 virtual unsigned int execute (function
*);
11628 }; // class pass_s390_early_mach
11631 pass_s390_early_mach::execute (function
*fun
)
11635 /* Try to get rid of the FPR clobbers. */
11636 s390_optimize_nonescaping_tx ();
11638 /* Re-compute register info. */
11639 s390_register_info ();
11641 /* If we're using a base register, ensure that it is always valid for
11642 the first non-prologue instruction. */
11643 if (fun
->machine
->base_reg
)
11644 emit_insn_at_entry (gen_main_pool (fun
->machine
->base_reg
));
11646 /* Annotate all constant pool references to let the scheduler know
11647 they implicitly use the base register. */
11648 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
11651 annotate_constant_pool_refs (insn
);
11652 df_insn_rescan (insn
);
11657 } // anon namespace
11660 make_pass_s390_early_mach (gcc::context
*ctxt
)
11662 return new pass_s390_early_mach (ctxt
);
11665 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11666 - push too big immediates to the literal pool and annotate the refs
11667 - emit frame related notes for stack pointer changes. */
11670 s390_prologue_plus_offset (rtx target
, rtx reg
, rtx offset
, bool frame_related_p
)
11673 rtx orig_offset
= offset
;
11675 gcc_assert (REG_P (target
));
11676 gcc_assert (REG_P (reg
));
11677 gcc_assert (CONST_INT_P (offset
));
11679 if (offset
== const0_rtx
) /* lr/lgr */
11681 insn
= emit_move_insn (target
, reg
);
11683 else if (DISP_IN_RANGE (INTVAL (offset
))) /* la */
11685 insn
= emit_move_insn (target
, gen_rtx_PLUS (Pmode
, reg
,
11690 if (!satisfies_constraint_K (offset
) /* ahi/aghi */
11692 || (!satisfies_constraint_Op (offset
) /* alfi/algfi */
11693 && !satisfies_constraint_On (offset
)))) /* slfi/slgfi */
11694 offset
= force_const_mem (Pmode
, offset
);
11698 insn
= emit_move_insn (target
, reg
);
11699 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11702 insn
= emit_insn (gen_add2_insn (target
, offset
));
11704 if (!CONST_INT_P (offset
))
11706 annotate_constant_pool_refs (insn
);
11708 if (frame_related_p
)
11709 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11710 gen_rtx_SET (target
,
11711 gen_rtx_PLUS (Pmode
, target
,
11716 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11718 /* If this is a stack adjustment and we are generating a stack clash
11719 prologue, then add a REG_STACK_CHECK note to signal that this insn
11720 should be left alone. */
11721 if (flag_stack_clash_protection
&& target
== stack_pointer_rtx
)
11722 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
11727 /* Emit a compare instruction with a volatile memory access as stack
11728 probe. It does not waste store tags and does not clobber any
11729 registers apart from the condition code. */
11731 s390_emit_stack_probe (rtx addr
)
11733 rtx mem
= gen_rtx_MEM (word_mode
, addr
);
11734 MEM_VOLATILE_P (mem
) = 1;
11735 emit_insn (gen_probe_stack (mem
));
11738 /* Use a runtime loop if we have to emit more probes than this. */
11739 #define MIN_UNROLL_PROBES 3
11741 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11742 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11743 probe relative to the stack pointer.
11745 Note that SIZE is negative.
11747 The return value is true if TEMP_REG has been clobbered. */
11749 allocate_stack_space (rtx size
, HOST_WIDE_INT last_probe_offset
,
11752 bool temp_reg_clobbered_p
= false;
11753 HOST_WIDE_INT probe_interval
11754 = 1 << param_stack_clash_protection_probe_interval
;
11755 HOST_WIDE_INT guard_size
11756 = 1 << param_stack_clash_protection_guard_size
;
11758 if (flag_stack_clash_protection
)
11760 if (last_probe_offset
+ -INTVAL (size
) < guard_size
)
11761 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
11764 rtx offset
= GEN_INT (probe_interval
- UNITS_PER_LONG
);
11765 HOST_WIDE_INT rounded_size
= -INTVAL (size
) & -probe_interval
;
11766 HOST_WIDE_INT num_probes
= rounded_size
/ probe_interval
;
11767 HOST_WIDE_INT residual
= -INTVAL (size
) - rounded_size
;
11769 if (num_probes
< MIN_UNROLL_PROBES
)
11771 /* Emit unrolled probe statements. */
11773 for (unsigned int i
= 0; i
< num_probes
; i
++)
11775 s390_prologue_plus_offset (stack_pointer_rtx
,
11777 GEN_INT (-probe_interval
), true);
11778 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11782 if (num_probes
> 0)
11783 last_probe_offset
= INTVAL (offset
);
11784 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
11788 /* Emit a loop probing the pages. */
11790 rtx_code_label
*loop_start_label
= gen_label_rtx ();
11792 /* From now on temp_reg will be the CFA register. */
11793 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11794 GEN_INT (-rounded_size
), true);
11795 emit_label (loop_start_label
);
11797 s390_prologue_plus_offset (stack_pointer_rtx
,
11799 GEN_INT (-probe_interval
), false);
11800 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11803 emit_cmp_and_jump_insns (stack_pointer_rtx
, temp_reg
,
11805 Pmode
, 1, loop_start_label
);
11807 /* Without this make_edges ICEes. */
11808 JUMP_LABEL (get_last_insn ()) = loop_start_label
;
11809 LABEL_NUSES (loop_start_label
) = 1;
11811 /* That's going to be a NOP since stack pointer and
11812 temp_reg are supposed to be the same here. We just
11813 emit it to set the CFA reg back to r15. */
11814 s390_prologue_plus_offset (stack_pointer_rtx
, temp_reg
,
11816 temp_reg_clobbered_p
= true;
11817 last_probe_offset
= INTVAL (offset
);
11818 dump_stack_clash_frame_info (PROBE_LOOP
, residual
!= 0);
11821 /* Handle any residual allocation request. */
11822 s390_prologue_plus_offset (stack_pointer_rtx
,
11824 GEN_INT (-residual
), true);
11825 last_probe_offset
+= residual
;
11826 if (last_probe_offset
>= probe_interval
)
11827 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11830 - UNITS_PER_LONG
)));
11832 return temp_reg_clobbered_p
;
11836 /* Subtract frame size from stack pointer. */
11837 s390_prologue_plus_offset (stack_pointer_rtx
,
11841 return temp_reg_clobbered_p
;
11844 /* Expand the prologue into a bunch of separate insns. */
11847 s390_emit_prologue (void)
11855 /* Choose best register to use for temp use within prologue.
11856 TPF with profiling must avoid the register 14 - the tracing function
11857 needs the original contents of r14 to be preserved. */
11859 if (!has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
)
11861 && !TARGET_TPF_PROFILING
)
11862 temp_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11863 else if (flag_split_stack
&& cfun
->stdarg
)
11864 temp_reg
= gen_rtx_REG (Pmode
, 12);
11866 temp_reg
= gen_rtx_REG (Pmode
, 1);
11868 /* When probing for stack-clash mitigation, we have to track the distance
11869 between the stack pointer and closest known reference.
11871 Most of the time we have to make a worst case assumption. The
11872 only exception is when TARGET_BACKCHAIN is active, in which case
11873 we know *sp (offset 0) was written. */
11874 HOST_WIDE_INT probe_interval
11875 = 1 << param_stack_clash_protection_probe_interval
;
11876 HOST_WIDE_INT last_probe_offset
11877 = (TARGET_BACKCHAIN
11878 ? (TARGET_PACKED_STACK
? STACK_POINTER_OFFSET
- UNITS_PER_LONG
: 0)
11879 : probe_interval
- (STACK_BOUNDARY
/ UNITS_PER_WORD
));
11881 s390_save_gprs_to_fprs ();
11883 /* Save call saved gprs. */
11884 if (cfun_frame_layout
.first_save_gpr
!= -1)
11886 save_gprs (stack_pointer_rtx
,
11887 cfun_frame_layout
.gprs_offset
+
11888 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11889 - cfun_frame_layout
.first_save_gpr_slot
),
11890 cfun_frame_layout
.first_save_gpr
,
11891 cfun_frame_layout
.last_save_gpr
);
11893 /* This is not 100% correct. If we have more than one register saved,
11894 then LAST_PROBE_OFFSET can move even closer to sp. */
11896 = (cfun_frame_layout
.gprs_offset
+
11897 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11898 - cfun_frame_layout
.first_save_gpr_slot
));
11901 /* Dummy insn to mark literal pool slot. */
11903 if (cfun
->machine
->base_reg
)
11904 emit_insn (gen_main_pool (cfun
->machine
->base_reg
));
11906 offset
= cfun_frame_layout
.f0_offset
;
11908 /* Save f0 and f2. */
11909 for (i
= FPR0_REGNUM
; i
<= FPR0_REGNUM
+ 1; i
++)
11911 if (cfun_fpr_save_p (i
))
11913 save_fpr (stack_pointer_rtx
, offset
, i
);
11914 if (offset
< last_probe_offset
)
11915 last_probe_offset
= offset
;
11918 else if (!TARGET_PACKED_STACK
|| cfun
->stdarg
)
11922 /* Save f4 and f6. */
11923 offset
= cfun_frame_layout
.f4_offset
;
11924 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11926 if (cfun_fpr_save_p (i
))
11928 save_fpr (stack_pointer_rtx
, offset
, i
);
11929 if (offset
< last_probe_offset
)
11930 last_probe_offset
= offset
;
11933 else if (!TARGET_PACKED_STACK
|| call_used_regs
[i
])
11937 if (TARGET_PACKED_STACK
11938 && cfun_save_high_fprs_p
11939 && cfun_frame_layout
.f8_offset
+ cfun_frame_layout
.high_fprs
* 8 > 0)
11941 offset
= (cfun_frame_layout
.f8_offset
11942 + (cfun_frame_layout
.high_fprs
- 1) * 8);
11944 for (i
= FPR15_REGNUM
; i
>= FPR8_REGNUM
&& offset
>= 0; i
--)
11945 if (cfun_fpr_save_p (i
))
11947 save_fpr (stack_pointer_rtx
, offset
, i
);
11948 if (offset
< last_probe_offset
)
11949 last_probe_offset
= offset
;
11953 if (offset
>= cfun_frame_layout
.f8_offset
)
11957 if (!TARGET_PACKED_STACK
)
11958 next_fpr
= cfun_save_high_fprs_p
? FPR15_REGNUM
: 0;
11960 if (flag_stack_usage_info
)
11961 current_function_static_stack_size
= cfun_frame_layout
.frame_size
;
11963 /* Decrement stack pointer. */
11965 if (cfun_frame_layout
.frame_size
> 0)
11967 rtx frame_off
= GEN_INT (-cfun_frame_layout
.frame_size
);
11968 rtx_insn
*stack_pointer_backup_loc
;
11969 bool temp_reg_clobbered_p
;
11971 if (s390_stack_size
)
11973 HOST_WIDE_INT stack_guard
;
11975 if (s390_stack_guard
)
11976 stack_guard
= s390_stack_guard
;
11979 /* If no value for stack guard is provided the smallest power of 2
11980 larger than the current frame size is chosen. */
11982 while (stack_guard
< cfun_frame_layout
.frame_size
)
11986 if (cfun_frame_layout
.frame_size
>= s390_stack_size
)
11988 warning (0, "frame size of function %qs is %wd"
11989 " bytes exceeding user provided stack limit of "
11991 "an unconditional trap is added",
11992 current_function_name(), cfun_frame_layout
.frame_size
,
11994 emit_insn (gen_trap ());
11999 /* stack_guard has to be smaller than s390_stack_size.
12000 Otherwise we would emit an AND with zero which would
12001 not match the test under mask pattern. */
12002 if (stack_guard
>= s390_stack_size
)
12004 warning (0, "frame size of function %qs is %wd"
12005 " bytes which is more than half the stack size; "
12006 "the dynamic check would not be reliable; "
12007 "no check emitted for this function",
12008 current_function_name(),
12009 cfun_frame_layout
.frame_size
);
12013 HOST_WIDE_INT stack_check_mask
= ((s390_stack_size
- 1)
12014 & ~(stack_guard
- 1));
12016 rtx t
= gen_rtx_AND (Pmode
, stack_pointer_rtx
,
12017 GEN_INT (stack_check_mask
));
12019 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode
,
12021 t
, const0_rtx
, const0_rtx
));
12023 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode
,
12025 t
, const0_rtx
, const0_rtx
));
12030 if (s390_warn_framesize
> 0
12031 && cfun_frame_layout
.frame_size
>= s390_warn_framesize
)
12032 warning (0, "frame size of %qs is %wd bytes",
12033 current_function_name (), cfun_frame_layout
.frame_size
);
12035 if (s390_warn_dynamicstack_p
&& cfun
->calls_alloca
)
12036 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
12038 /* Save the location where we could backup the incoming stack
12040 stack_pointer_backup_loc
= get_last_insn ();
12042 temp_reg_clobbered_p
= allocate_stack_space (frame_off
, last_probe_offset
,
12045 if (TARGET_BACKCHAIN
|| next_fpr
)
12047 if (temp_reg_clobbered_p
)
12049 /* allocate_stack_space had to make use of temp_reg and
12050 we need it to hold a backup of the incoming stack
12051 pointer. Calculate back that value from the current
12053 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
12054 GEN_INT (cfun_frame_layout
.frame_size
),
12059 /* allocate_stack_space didn't actually required
12060 temp_reg. Insert the stack pointer backup insn
12061 before the stack pointer decrement code - knowing now
12062 that the value will survive. */
12063 emit_insn_after (gen_move_insn (temp_reg
, stack_pointer_rtx
),
12064 stack_pointer_backup_loc
);
12068 /* Set backchain. */
12070 if (TARGET_BACKCHAIN
)
12072 if (cfun_frame_layout
.backchain_offset
)
12073 addr
= gen_rtx_MEM (Pmode
,
12074 plus_constant (Pmode
, stack_pointer_rtx
,
12075 cfun_frame_layout
.backchain_offset
));
12077 addr
= gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
12078 set_mem_alias_set (addr
, get_frame_alias_set ());
12079 insn
= emit_insn (gen_move_insn (addr
, temp_reg
));
12082 /* If we support non-call exceptions (e.g. for Java),
12083 we need to make sure the backchain pointer is set up
12084 before any possibly trapping memory access. */
12085 if (TARGET_BACKCHAIN
&& cfun
->can_throw_non_call_exceptions
)
12087 addr
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
12088 emit_clobber (addr
);
12091 else if (flag_stack_clash_protection
)
12092 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
12094 /* Save fprs 8 - 15 (64 bit ABI). */
12096 if (cfun_save_high_fprs_p
&& next_fpr
)
12098 /* If the stack might be accessed through a different register
12099 we have to make sure that the stack pointer decrement is not
12100 moved below the use of the stack slots. */
12101 s390_emit_stack_tie ();
12103 insn
= emit_insn (gen_add2_insn (temp_reg
,
12104 GEN_INT (cfun_frame_layout
.f8_offset
)));
12108 for (i
= FPR8_REGNUM
; i
<= next_fpr
; i
++)
12109 if (cfun_fpr_save_p (i
))
12111 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
,
12112 cfun_frame_layout
.frame_size
12113 + cfun_frame_layout
.f8_offset
12116 insn
= save_fpr (temp_reg
, offset
, i
);
12118 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
12119 gen_rtx_SET (gen_rtx_MEM (DFmode
, addr
),
12120 gen_rtx_REG (DFmode
, i
)));
12124 /* Set frame pointer, if needed. */
12126 if (frame_pointer_needed
)
12128 s390_emit_stack_tie ();
12129 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
12130 RTX_FRAME_RELATED_P (insn
) = 1;
12133 /* Set up got pointer, if needed. */
12135 if (flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
12137 rtx_insn
*insns
= s390_load_got ();
12139 for (rtx_insn
*insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
12140 annotate_constant_pool_refs (insn
);
12145 #if TARGET_TPF != 0
12146 if (TARGET_TPF_PROFILING
)
12148 /* Generate a BAS instruction to serve as a function entry
12149 intercept to facilitate the use of tracing algorithms located
12150 at the branch target. */
12151 emit_insn (gen_prologue_tpf (
12152 GEN_INT (s390_tpf_trace_hook_prologue_check
),
12153 GEN_INT (s390_tpf_trace_hook_prologue_target
)));
12155 /* Emit a blockage here so that all code lies between the
12156 profiling mechanisms. */
12157 emit_insn (gen_blockage ());
12162 /* Expand the epilogue into a bunch of separate insns. */
12165 s390_emit_epilogue (bool sibcall
)
12167 rtx frame_pointer
, return_reg
= NULL_RTX
, cfa_restores
= NULL_RTX
;
12168 int area_bottom
, area_top
, offset
= 0;
12172 #if TARGET_TPF != 0
12173 if (TARGET_TPF_PROFILING
)
12175 /* Generate a BAS instruction to serve as a function entry
12176 intercept to facilitate the use of tracing algorithms located
12177 at the branch target. */
12179 /* Emit a blockage here so that all code lies between the
12180 profiling mechanisms. */
12181 emit_insn (gen_blockage ());
12183 emit_insn (gen_epilogue_tpf (
12184 GEN_INT (s390_tpf_trace_hook_epilogue_check
),
12185 GEN_INT (s390_tpf_trace_hook_epilogue_target
)));
12189 /* Check whether to use frame or stack pointer for restore. */
12191 frame_pointer
= (frame_pointer_needed
12192 ? hard_frame_pointer_rtx
: stack_pointer_rtx
);
12194 s390_frame_area (&area_bottom
, &area_top
);
12196 /* Check whether we can access the register save area.
12197 If not, increment the frame pointer as required. */
12199 if (area_top
<= area_bottom
)
12201 /* Nothing to restore. */
12203 else if (DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_bottom
)
12204 && DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_top
- 1))
12206 /* Area is in range. */
12207 offset
= cfun_frame_layout
.frame_size
;
12212 rtx frame_off
, cfa
;
12214 offset
= area_bottom
< 0 ? -area_bottom
: 0;
12215 frame_off
= GEN_INT (cfun_frame_layout
.frame_size
- offset
);
12217 cfa
= gen_rtx_SET (frame_pointer
,
12218 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
12219 if (DISP_IN_RANGE (INTVAL (frame_off
)))
12223 set
= gen_rtx_SET (frame_pointer
,
12224 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
12225 insn
= emit_insn (set
);
12229 if (!CONST_OK_FOR_K (INTVAL (frame_off
)))
12230 frame_off
= force_const_mem (Pmode
, frame_off
);
12232 insn
= emit_insn (gen_add2_insn (frame_pointer
, frame_off
));
12233 annotate_constant_pool_refs (insn
);
12235 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, cfa
);
12236 RTX_FRAME_RELATED_P (insn
) = 1;
12239 /* Restore call saved fprs. */
12243 if (cfun_save_high_fprs_p
)
12245 next_offset
= cfun_frame_layout
.f8_offset
;
12246 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
12248 if (cfun_fpr_save_p (i
))
12250 restore_fpr (frame_pointer
,
12251 offset
+ next_offset
, i
);
12253 = alloc_reg_note (REG_CFA_RESTORE
,
12254 gen_rtx_REG (DFmode
, i
), cfa_restores
);
12263 next_offset
= cfun_frame_layout
.f4_offset
;
12265 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
12267 if (cfun_fpr_save_p (i
))
12269 restore_fpr (frame_pointer
,
12270 offset
+ next_offset
, i
);
12272 = alloc_reg_note (REG_CFA_RESTORE
,
12273 gen_rtx_REG (DFmode
, i
), cfa_restores
);
12276 else if (!TARGET_PACKED_STACK
)
12282 /* Restore call saved gprs. */
12284 if (cfun_frame_layout
.first_restore_gpr
!= -1)
12289 /* Check for global register and save them
12290 to stack location from where they get restored. */
12292 for (i
= cfun_frame_layout
.first_restore_gpr
;
12293 i
<= cfun_frame_layout
.last_restore_gpr
;
12296 if (global_not_special_regno_p (i
))
12298 addr
= plus_constant (Pmode
, frame_pointer
,
12299 offset
+ cfun_frame_layout
.gprs_offset
12300 + (i
- cfun_frame_layout
.first_save_gpr_slot
)
12302 addr
= gen_rtx_MEM (Pmode
, addr
);
12303 set_mem_alias_set (addr
, get_frame_alias_set ());
12304 emit_move_insn (addr
, gen_rtx_REG (Pmode
, i
));
12308 = alloc_reg_note (REG_CFA_RESTORE
,
12309 gen_rtx_REG (Pmode
, i
), cfa_restores
);
12312 /* Fetch return address from stack before load multiple,
12313 this will do good for scheduling.
12315 Only do this if we already decided that r14 needs to be
12316 saved to a stack slot. (And not just because r14 happens to
12317 be in between two GPRs which need saving.) Otherwise it
12318 would be difficult to take that decision back in
12319 s390_optimize_prologue.
12321 This optimization is only helpful on in-order machines. */
12323 && cfun_gpr_save_slot (RETURN_REGNUM
) == SAVE_SLOT_STACK
12324 && s390_tune
<= PROCESSOR_2097_Z10
)
12326 int return_regnum
= find_unused_clobbered_reg();
12328 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
12330 && return_regnum
== INDIRECT_BRANCH_THUNK_REGNUM
))
12332 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM
!= 4);
12335 return_reg
= gen_rtx_REG (Pmode
, return_regnum
);
12337 addr
= plus_constant (Pmode
, frame_pointer
,
12338 offset
+ cfun_frame_layout
.gprs_offset
12340 - cfun_frame_layout
.first_save_gpr_slot
)
12342 addr
= gen_rtx_MEM (Pmode
, addr
);
12343 set_mem_alias_set (addr
, get_frame_alias_set ());
12344 emit_move_insn (return_reg
, addr
);
12346 /* Once we did that optimization we have to make sure
12347 s390_optimize_prologue does not try to remove the store
12348 of r14 since we will not be able to find the load issued
12350 cfun_frame_layout
.save_return_addr_p
= true;
12353 insn
= restore_gprs (frame_pointer
,
12354 offset
+ cfun_frame_layout
.gprs_offset
12355 + (cfun_frame_layout
.first_restore_gpr
12356 - cfun_frame_layout
.first_save_gpr_slot
)
12358 cfun_frame_layout
.first_restore_gpr
,
12359 cfun_frame_layout
.last_restore_gpr
);
12360 insn
= emit_insn (insn
);
12361 REG_NOTES (insn
) = cfa_restores
;
12362 add_reg_note (insn
, REG_CFA_DEF_CFA
,
12363 plus_constant (Pmode
, stack_pointer_rtx
,
12364 STACK_POINTER_OFFSET
));
12365 RTX_FRAME_RELATED_P (insn
) = 1;
12368 s390_restore_gprs_from_fprs ();
12372 if (!return_reg
&& !s390_can_use_return_insn ())
12373 /* We planned to emit (return), be we are not allowed to. */
12374 return_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
12377 /* Emit (return) and (use). */
12378 emit_jump_insn (gen_return_use (return_reg
));
12380 /* The fact that RETURN_REGNUM is used is already reflected by
12381 EPILOGUE_USES. Emit plain (return). */
12382 emit_jump_insn (gen_return ());
12386 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
12389 s300_set_up_by_prologue (hard_reg_set_container
*regs
)
12391 if (cfun
->machine
->base_reg
12392 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
12393 SET_HARD_REG_BIT (regs
->set
, REGNO (cfun
->machine
->base_reg
));
12396 /* -fsplit-stack support. */
12398 /* A SYMBOL_REF for __morestack. */
12399 static GTY(()) rtx morestack_ref
;
12401 /* When using -fsplit-stack, the allocation routines set a field in
12402 the TCB to the bottom of the stack plus this much space, measured
12405 #define SPLIT_STACK_AVAILABLE 1024
12407 /* Emit the parmblock for __morestack into .rodata section. It
12408 consists of 3 pointer size entries:
12410 - size of stack arguments
12411 - offset between parm block and __morestack return label */
12414 s390_output_split_stack_data (rtx parm_block
, rtx call_done
,
12415 rtx frame_size
, rtx args_size
)
12417 rtx ops
[] = { parm_block
, call_done
};
12419 switch_to_section (targetm
.asm_out
.function_rodata_section
12420 (current_function_decl
, false));
12423 output_asm_insn (".align\t8", NULL
);
12425 output_asm_insn (".align\t4", NULL
);
12427 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
12428 CODE_LABEL_NUMBER (parm_block
));
12431 output_asm_insn (".quad\t%0", &frame_size
);
12432 output_asm_insn (".quad\t%0", &args_size
);
12433 output_asm_insn (".quad\t%1-%0", ops
);
12437 output_asm_insn (".long\t%0", &frame_size
);
12438 output_asm_insn (".long\t%0", &args_size
);
12439 output_asm_insn (".long\t%1-%0", ops
);
12442 switch_to_section (current_function_section ());
12445 /* Emit -fsplit-stack prologue, which goes before the regular function
12449 s390_expand_split_stack_prologue (void)
12451 rtx r1
, guard
, cc
= NULL
;
12453 /* Offset from thread pointer to __private_ss. */
12454 int psso
= TARGET_64BIT
? 0x38 : 0x20;
12455 /* Pointer size in bytes. */
12456 /* Frame size and argument size - the two parameters to __morestack. */
12457 HOST_WIDE_INT frame_size
= cfun_frame_layout
.frame_size
;
12458 /* Align argument size to 8 bytes - simplifies __morestack code. */
12459 HOST_WIDE_INT args_size
= crtl
->args
.size
>= 0
12460 ? ((crtl
->args
.size
+ 7) & ~7)
12462 /* Label to be called by __morestack. */
12463 rtx_code_label
*call_done
= NULL
;
12464 rtx_code_label
*parm_base
= NULL
;
12467 gcc_assert (flag_split_stack
&& reload_completed
);
12469 r1
= gen_rtx_REG (Pmode
, 1);
12471 /* If no stack frame will be allocated, don't do anything. */
12474 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12476 /* If va_start is used, just use r15. */
12477 emit_move_insn (r1
,
12478 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12479 GEN_INT (STACK_POINTER_OFFSET
)));
12485 if (morestack_ref
== NULL_RTX
)
12487 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
12488 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
12489 | SYMBOL_FLAG_FUNCTION
);
12492 if (CONST_OK_FOR_K (frame_size
) || CONST_OK_FOR_Op (frame_size
))
12494 /* If frame_size will fit in an add instruction, do a stack space
12495 check, and only call __morestack if there's not enough space. */
12497 /* Get thread pointer. r1 is the only register we can always destroy - r0
12498 could contain a static chain (and cannot be used to address memory
12499 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
12500 emit_insn (gen_get_thread_pointer (Pmode
, r1
));
12501 /* Aim at __private_ss. */
12502 guard
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, r1
, psso
));
12504 /* If less that 1kiB used, skip addition and compare directly with
12506 if (frame_size
> SPLIT_STACK_AVAILABLE
)
12508 emit_move_insn (r1
, guard
);
12510 emit_insn (gen_adddi3 (r1
, r1
, GEN_INT (frame_size
)));
12512 emit_insn (gen_addsi3 (r1
, r1
, GEN_INT (frame_size
)));
12516 /* Compare the (maybe adjusted) guard with the stack pointer. */
12517 cc
= s390_emit_compare (LT
, stack_pointer_rtx
, guard
);
12520 call_done
= gen_label_rtx ();
12521 parm_base
= gen_label_rtx ();
12522 LABEL_NUSES (parm_base
)++;
12523 LABEL_NUSES (call_done
)++;
12525 /* %r1 = litbase. */
12526 insn
= emit_move_insn (r1
, gen_rtx_LABEL_REF (VOIDmode
, parm_base
));
12527 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
12528 LABEL_NUSES (parm_base
)++;
12530 /* Now, we need to call __morestack. It has very special calling
12531 conventions: it preserves param/return/static chain registers for
12532 calling main function body, and looks for its own parameters at %r1. */
12534 tmp
= gen_split_stack_cond_call (Pmode
,
12538 GEN_INT (frame_size
),
12539 GEN_INT (args_size
),
12542 tmp
= gen_split_stack_call (Pmode
,
12546 GEN_INT (frame_size
),
12547 GEN_INT (args_size
));
12549 insn
= emit_jump_insn (tmp
);
12550 JUMP_LABEL (insn
) = call_done
;
12551 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
12552 add_reg_note (insn
, REG_LABEL_OPERAND
, call_done
);
12556 /* Mark the jump as very unlikely to be taken. */
12557 add_reg_br_prob_note (insn
,
12558 profile_probability::very_unlikely ());
12560 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12562 /* If va_start is used, and __morestack was not called, just use
12564 emit_move_insn (r1
,
12565 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12566 GEN_INT (STACK_POINTER_OFFSET
)));
12574 /* __morestack will call us here. */
12576 emit_label (call_done
);
12579 /* We may have to tell the dataflow pass that the split stack prologue
12580 is initializing a register. */
12583 s390_live_on_entry (bitmap regs
)
12585 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12587 gcc_assert (flag_split_stack
);
12588 bitmap_set_bit (regs
, 1);
12592 /* Return true if the function can use simple_return to return outside
12593 of a shrink-wrapped region. At present shrink-wrapping is supported
12597 s390_can_use_simple_return_insn (void)
12602 /* Return true if the epilogue is guaranteed to contain only a return
12603 instruction and if a direct return can therefore be used instead.
12604 One of the main advantages of using direct return instructions
12605 is that we can then use conditional returns. */
12608 s390_can_use_return_insn (void)
12612 if (!reload_completed
)
12618 if (TARGET_TPF_PROFILING
)
12621 for (i
= 0; i
< 16; i
++)
12622 if (cfun_gpr_save_slot (i
) != SAVE_SLOT_NONE
)
12625 /* For 31 bit this is not covered by the frame_size check below
12626 since f4, f6 are saved in the register save area without needing
12627 additional stack space. */
12629 && (cfun_fpr_save_p (FPR4_REGNUM
) || cfun_fpr_save_p (FPR6_REGNUM
)))
12632 if (cfun
->machine
->base_reg
12633 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
12636 return cfun_frame_layout
.frame_size
== 0;
12639 /* The VX ABI differs for vararg functions. Therefore we need the
12640 prototype of the callee to be available when passing vector type
12642 static const char *
12643 s390_invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
12645 return ((TARGET_VX_ABI
12647 && VECTOR_TYPE_P (TREE_TYPE (val
))
12648 && (funcdecl
== NULL_TREE
12649 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
12650 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
12651 && !fndecl_built_in_p (funcdecl
, BUILT_IN_CLASSIFY_TYPE
))))
12652 ? N_("vector argument passed to unprototyped function")
12657 /* Return the size in bytes of a function argument of
12658 type TYPE and/or mode MODE. At least one of TYPE or
12659 MODE must be specified. */
12662 s390_function_arg_size (machine_mode mode
, const_tree type
)
12665 return int_size_in_bytes (type
);
12667 /* No type info available for some library calls ... */
12668 if (mode
!= BLKmode
)
12669 return GET_MODE_SIZE (mode
);
12671 /* If we have neither type nor mode, abort */
12672 gcc_unreachable ();
12675 /* Return true if a variable of TYPE should be passed as single value
12676 with type CODE. If STRICT_SIZE_CHECK_P is true the sizes of the
12677 record type and the field type must match.
12679 The ABI says that record types with a single member are treated
12680 just like that member would be. This function is a helper to
12681 detect such cases. The function also produces the proper
12682 diagnostics for cases where the outcome might be different
12683 depending on the GCC version. */
12685 s390_single_field_struct_p (enum tree_code code
, const_tree type
,
12686 bool strict_size_check_p
)
12688 int empty_base_seen
= 0;
12689 bool zero_width_bf_skipped_p
= false;
12690 const_tree orig_type
= type
;
12692 while (TREE_CODE (type
) == RECORD_TYPE
)
12694 tree field
, single_type
= NULL_TREE
;
12696 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12698 if (TREE_CODE (field
) != FIELD_DECL
)
12701 if (DECL_FIELD_ABI_IGNORED (field
))
12703 if (lookup_attribute ("no_unique_address",
12704 DECL_ATTRIBUTES (field
)))
12705 empty_base_seen
|= 2;
12707 empty_base_seen
|= 1;
12711 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
12713 zero_width_bf_skipped_p
= true;
12717 if (single_type
== NULL_TREE
)
12718 single_type
= TREE_TYPE (field
);
12723 if (single_type
== NULL_TREE
)
12726 /* Reaching this point we have a struct with a single member and
12727 zero or more zero-sized bit-fields which have been skipped in the
12730 /* If ZERO_WIDTH_BF_SKIPPED_P then the struct will not be accepted. In case
12731 we are not supposed to emit a warning exit early. */
12732 if (zero_width_bf_skipped_p
&& !warn_psabi
)
12735 /* If the field declaration adds extra bytes due to padding this
12736 is not accepted with STRICT_SIZE_CHECK_P. */
12737 if (strict_size_check_p
12738 && (int_size_in_bytes (single_type
) <= 0
12739 || int_size_in_bytes (single_type
) != int_size_in_bytes (type
)))
12742 type
= single_type
;
12745 if (TREE_CODE (type
) != code
)
12750 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12752 if (empty_base_seen
)
12754 static unsigned last_reported_type_uid_empty_base
;
12755 if (uid
!= last_reported_type_uid_empty_base
)
12757 last_reported_type_uid_empty_base
= uid
;
12758 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12759 if (empty_base_seen
& 1)
12760 inform (input_location
,
12761 "parameter passing for argument of type %qT when C++17 "
12762 "is enabled changed to match C++14 %{in GCC 10.1%}",
12765 inform (input_location
,
12766 "parameter passing for argument of type %qT with "
12767 "%<[[no_unique_address]]%> members changed "
12768 "%{in GCC 10.1%}", orig_type
, url
);
12772 /* For C++ older GCCs ignored zero width bitfields and therefore
12773 passed structs more often as single values than GCC 12 does.
12774 So diagnostics are only required in cases where we do NOT
12775 accept the struct to be passed as single value. */
12776 if (zero_width_bf_skipped_p
)
12778 static unsigned last_reported_type_uid_zero_width
;
12779 if (uid
!= last_reported_type_uid_zero_width
)
12781 last_reported_type_uid_zero_width
= uid
;
12782 inform (input_location
,
12783 "parameter passing for argument of type %qT with "
12784 "zero-width bit fields members changed in GCC 12",
12790 return !zero_width_bf_skipped_p
;
12794 /* Return true if a function argument of type TYPE and mode MODE
12795 is to be passed in a vector register, if available. */
12798 s390_function_arg_vector (machine_mode mode
, const_tree type
)
12800 if (!TARGET_VX_ABI
)
12803 if (s390_function_arg_size (mode
, type
) > 16)
12806 /* No type info available for some library calls ... */
12808 return VECTOR_MODE_P (mode
);
12810 if (!s390_single_field_struct_p (VECTOR_TYPE
, type
, true))
12816 /* Return true if a function argument of type TYPE and mode MODE
12817 is to be passed in a floating-point register, if available. */
12820 s390_function_arg_float (machine_mode mode
, const_tree type
)
12822 if (s390_function_arg_size (mode
, type
) > 8)
12825 /* Soft-float changes the ABI: no floating-point registers are used. */
12826 if (TARGET_SOFT_FLOAT
)
12829 /* No type info available for some library calls ... */
12831 return mode
== SFmode
|| mode
== DFmode
|| mode
== SDmode
|| mode
== DDmode
;
12833 if (!s390_single_field_struct_p (REAL_TYPE
, type
, false))
12839 /* Return true if a function argument of type TYPE and mode MODE
12840 is to be passed in an integer register, or a pair of integer
12841 registers, if available. */
12844 s390_function_arg_integer (machine_mode mode
, const_tree type
)
12846 int size
= s390_function_arg_size (mode
, type
);
12850 /* No type info available for some library calls ... */
12852 return GET_MODE_CLASS (mode
) == MODE_INT
12853 || (TARGET_SOFT_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
));
12855 /* We accept small integral (and similar) types. */
12856 if (INTEGRAL_TYPE_P (type
)
12857 || POINTER_TYPE_P (type
)
12858 || TREE_CODE (type
) == NULLPTR_TYPE
12859 || TREE_CODE (type
) == OFFSET_TYPE
12860 || (TARGET_SOFT_FLOAT
&& SCALAR_FLOAT_TYPE_P (type
)))
12863 /* We also accept structs of size 1, 2, 4, 8 that are not
12864 passed in floating-point registers. */
12865 if (AGGREGATE_TYPE_P (type
)
12866 && exact_log2 (size
) >= 0
12867 && !s390_function_arg_float (mode
, type
))
12873 /* Return 1 if a function argument ARG is to be passed by reference.
12874 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12875 are passed by value, all other structures (and complex numbers) are
12876 passed by reference. */
12879 s390_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
12881 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12883 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12889 if (tree type
= arg
.type
)
12891 if (AGGREGATE_TYPE_P (type
) && exact_log2 (size
) < 0)
12894 if (TREE_CODE (type
) == COMPLEX_TYPE
12895 || TREE_CODE (type
) == VECTOR_TYPE
)
12902 /* Update the data in CUM to advance over argument ARG. */
12905 s390_function_arg_advance (cumulative_args_t cum_v
,
12906 const function_arg_info
&arg
)
12908 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12910 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12912 /* We are called for unnamed vector stdarg arguments which are
12913 passed on the stack. In this case this hook does not have to
12914 do anything since stack arguments are tracked by common
12920 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12924 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12926 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12927 cum
->gprs
+= ((size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
);
12930 gcc_unreachable ();
12933 /* Define where to put the arguments to a function.
12934 Value is zero to push the argument on the stack,
12935 or a hard register in which to store the argument.
12937 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12938 the preceding args and about the function being called.
12939 ARG is a description of the argument.
12941 On S/390, we use general purpose registers 2 through 6 to
12942 pass integer, pointer, and certain structure arguments, and
12943 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12944 to pass floating point arguments. All remaining arguments
12945 are pushed to the stack. */
12948 s390_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
12950 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12953 s390_check_type_for_vector_abi (arg
.type
, true, false);
12955 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12957 /* Vector arguments being part of the ellipsis are passed on the
12959 if (!arg
.named
|| (cum
->vrs
+ 1 > VEC_ARG_NUM_REG
))
12962 return gen_rtx_REG (arg
.mode
, cum
->vrs
+ FIRST_VEC_ARG_REGNO
);
12964 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12966 if (cum
->fprs
+ 1 > FP_ARG_NUM_REG
)
12969 return gen_rtx_REG (arg
.mode
, cum
->fprs
+ 16);
12971 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12973 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12974 int n_gprs
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12976 if (cum
->gprs
+ n_gprs
> GP_ARG_NUM_REG
)
12978 else if (n_gprs
== 1 || UNITS_PER_WORD
== UNITS_PER_LONG
)
12979 return gen_rtx_REG (arg
.mode
, cum
->gprs
+ 2);
12980 else if (n_gprs
== 2)
12982 rtvec p
= rtvec_alloc (2);
12985 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 2),
12988 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 3),
12991 return gen_rtx_PARALLEL (arg
.mode
, p
);
12995 /* After the real arguments, expand_call calls us once again with an
12996 end marker. Whatever we return here is passed as operand 2 to the
12999 We don't need this feature ... */
13000 else if (arg
.end_marker_p ())
13003 gcc_unreachable ();
13006 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
13007 left-justified when placed on the stack during parameter passing. */
13009 static pad_direction
13010 s390_function_arg_padding (machine_mode mode
, const_tree type
)
13012 if (s390_function_arg_vector (mode
, type
))
13015 return default_function_arg_padding (mode
, type
);
13018 /* Return true if return values of type TYPE should be returned
13019 in a memory buffer whose address is passed by the caller as
13020 hidden first argument. */
13023 s390_return_in_memory (const_tree type
, const_tree fundecl ATTRIBUTE_UNUSED
)
13025 /* We accept small integral (and similar) types. */
13026 if (INTEGRAL_TYPE_P (type
)
13027 || POINTER_TYPE_P (type
)
13028 || TREE_CODE (type
) == OFFSET_TYPE
13029 || SCALAR_FLOAT_TYPE_P (type
))
13030 return int_size_in_bytes (type
) > 8;
13032 /* vector types which fit into a VR. */
13034 && VECTOR_TYPE_P (type
)
13035 && int_size_in_bytes (type
) <= 16)
13038 /* Aggregates and similar constructs are always returned
13040 if (AGGREGATE_TYPE_P (type
)
13041 || TREE_CODE (type
) == COMPLEX_TYPE
13042 || VECTOR_TYPE_P (type
))
13045 /* ??? We get called on all sorts of random stuff from
13046 aggregate_value_p. We can't abort, but it's not clear
13047 what's safe to return. Pretend it's a struct I guess. */
13051 /* Function arguments and return values are promoted to word size. */
13053 static machine_mode
13054 s390_promote_function_mode (const_tree type
, machine_mode mode
,
13056 const_tree fntype ATTRIBUTE_UNUSED
,
13057 int for_return ATTRIBUTE_UNUSED
)
13059 if (INTEGRAL_MODE_P (mode
)
13060 && GET_MODE_SIZE (mode
) < UNITS_PER_LONG
)
13062 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
13063 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
13070 /* Define where to return a (scalar) value of type RET_TYPE.
13071 If RET_TYPE is null, define where to return a (scalar)
13072 value of mode MODE from a libcall. */
13075 s390_function_and_libcall_value (machine_mode mode
,
13076 const_tree ret_type
,
13077 const_tree fntype_or_decl
,
13078 bool outgoing ATTRIBUTE_UNUSED
)
13080 /* For vector return types it is important to use the RET_TYPE
13081 argument whenever available since the middle-end might have
13082 changed the mode to a scalar mode. */
13083 bool vector_ret_type_p
= ((ret_type
&& VECTOR_TYPE_P (ret_type
))
13084 || (!ret_type
&& VECTOR_MODE_P (mode
)));
13086 /* For normal functions perform the promotion as
13087 promote_function_mode would do. */
13090 int unsignedp
= TYPE_UNSIGNED (ret_type
);
13091 mode
= promote_function_mode (ret_type
, mode
, &unsignedp
,
13092 fntype_or_decl
, 1);
13095 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
13096 || SCALAR_FLOAT_MODE_P (mode
)
13097 || (TARGET_VX_ABI
&& vector_ret_type_p
));
13098 gcc_assert (GET_MODE_SIZE (mode
) <= (TARGET_VX_ABI
? 16 : 8));
13100 if (TARGET_VX_ABI
&& vector_ret_type_p
)
13101 return gen_rtx_REG (mode
, FIRST_VEC_ARG_REGNO
);
13102 else if (TARGET_HARD_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
))
13103 return gen_rtx_REG (mode
, 16);
13104 else if (GET_MODE_SIZE (mode
) <= UNITS_PER_LONG
13105 || UNITS_PER_LONG
== UNITS_PER_WORD
)
13106 return gen_rtx_REG (mode
, 2);
13107 else if (GET_MODE_SIZE (mode
) == 2 * UNITS_PER_LONG
)
13109 /* This case is triggered when returning a 64 bit value with
13110 -m31 -mzarch. Although the value would fit into a single
13111 register it has to be forced into a 32 bit register pair in
13112 order to match the ABI. */
13113 rtvec p
= rtvec_alloc (2);
13116 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 2), const0_rtx
);
13118 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 3), GEN_INT (4));
13120 return gen_rtx_PARALLEL (mode
, p
);
13123 gcc_unreachable ();
13126 /* Define where to return a scalar return value of type RET_TYPE. */
13129 s390_function_value (const_tree ret_type
, const_tree fn_decl_or_type
,
13132 return s390_function_and_libcall_value (TYPE_MODE (ret_type
), ret_type
,
13133 fn_decl_or_type
, outgoing
);
13136 /* Define where to return a scalar libcall return value of mode
13140 s390_libcall_value (machine_mode mode
, const_rtx fun ATTRIBUTE_UNUSED
)
13142 return s390_function_and_libcall_value (mode
, NULL_TREE
,
13147 /* Create and return the va_list datatype.
13149 On S/390, va_list is an array type equivalent to
13151 typedef struct __va_list_tag
13155 void *__overflow_arg_area;
13156 void *__reg_save_area;
13159 where __gpr and __fpr hold the number of general purpose
13160 or floating point arguments used up to now, respectively,
13161 __overflow_arg_area points to the stack location of the
13162 next argument passed on the stack, and __reg_save_area
13163 always points to the start of the register area in the
13164 call frame of the current function. The function prologue
13165 saves all registers used for argument passing into this
13166 area if the function uses variable arguments. */
13169 s390_build_builtin_va_list (void)
13171 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
13173 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
13176 build_decl (BUILTINS_LOCATION
,
13177 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
13179 f_gpr
= build_decl (BUILTINS_LOCATION
,
13180 FIELD_DECL
, get_identifier ("__gpr"),
13181 long_integer_type_node
);
13182 f_fpr
= build_decl (BUILTINS_LOCATION
,
13183 FIELD_DECL
, get_identifier ("__fpr"),
13184 long_integer_type_node
);
13185 f_ovf
= build_decl (BUILTINS_LOCATION
,
13186 FIELD_DECL
, get_identifier ("__overflow_arg_area"),
13188 f_sav
= build_decl (BUILTINS_LOCATION
,
13189 FIELD_DECL
, get_identifier ("__reg_save_area"),
13192 va_list_gpr_counter_field
= f_gpr
;
13193 va_list_fpr_counter_field
= f_fpr
;
13195 DECL_FIELD_CONTEXT (f_gpr
) = record
;
13196 DECL_FIELD_CONTEXT (f_fpr
) = record
;
13197 DECL_FIELD_CONTEXT (f_ovf
) = record
;
13198 DECL_FIELD_CONTEXT (f_sav
) = record
;
13200 TYPE_STUB_DECL (record
) = type_decl
;
13201 TYPE_NAME (record
) = type_decl
;
13202 TYPE_FIELDS (record
) = f_gpr
;
13203 DECL_CHAIN (f_gpr
) = f_fpr
;
13204 DECL_CHAIN (f_fpr
) = f_ovf
;
13205 DECL_CHAIN (f_ovf
) = f_sav
;
13207 layout_type (record
);
13209 /* The correct type is an array type of one element. */
13210 return build_array_type (record
, build_index_type (size_zero_node
));
13213 /* Implement va_start by filling the va_list structure VALIST.
13214 STDARG_P is always true, and ignored.
13215 NEXTARG points to the first anonymous stack argument.
13217 The following global variables are used to initialize
13218 the va_list structure:
13221 holds number of gprs and fprs used for named arguments.
13222 crtl->args.arg_offset_rtx:
13223 holds the offset of the first anonymous stack argument
13224 (relative to the virtual arg pointer). */
13227 s390_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
13229 HOST_WIDE_INT n_gpr
, n_fpr
;
13231 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
13232 tree gpr
, fpr
, ovf
, sav
, t
;
13234 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13235 f_fpr
= DECL_CHAIN (f_gpr
);
13236 f_ovf
= DECL_CHAIN (f_fpr
);
13237 f_sav
= DECL_CHAIN (f_ovf
);
13239 valist
= build_simple_mem_ref (valist
);
13240 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13241 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
13242 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
13243 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
13245 /* Count number of gp and fp argument registers used. */
13247 n_gpr
= crtl
->args
.info
.gprs
;
13248 n_fpr
= crtl
->args
.info
.fprs
;
13250 if (cfun
->va_list_gpr_size
)
13252 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
13253 build_int_cst (NULL_TREE
, n_gpr
));
13254 TREE_SIDE_EFFECTS (t
) = 1;
13255 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13258 if (cfun
->va_list_fpr_size
)
13260 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
13261 build_int_cst (NULL_TREE
, n_fpr
));
13262 TREE_SIDE_EFFECTS (t
) = 1;
13263 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13266 if (flag_split_stack
13267 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
13269 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
13274 reg
= gen_reg_rtx (Pmode
);
13275 cfun
->machine
->split_stack_varargs_pointer
= reg
;
13278 emit_move_insn (reg
, gen_rtx_REG (Pmode
, 1));
13279 seq
= get_insns ();
13282 push_topmost_sequence ();
13283 emit_insn_after (seq
, entry_of_function ());
13284 pop_topmost_sequence ();
13287 /* Find the overflow area.
13288 FIXME: This currently is too pessimistic when the vector ABI is
13289 enabled. In that case we *always* set up the overflow area
13291 if (n_gpr
+ cfun
->va_list_gpr_size
> GP_ARG_NUM_REG
13292 || n_fpr
+ cfun
->va_list_fpr_size
> FP_ARG_NUM_REG
13295 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
13296 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
13298 t
= make_tree (TREE_TYPE (ovf
), cfun
->machine
->split_stack_varargs_pointer
);
13300 off
= INTVAL (crtl
->args
.arg_offset_rtx
);
13301 off
= off
< 0 ? 0 : off
;
13302 if (TARGET_DEBUG_ARG
)
13303 fprintf (stderr
, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
13304 (int)n_gpr
, (int)n_fpr
, off
);
13306 t
= fold_build_pointer_plus_hwi (t
, off
);
13308 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
13309 TREE_SIDE_EFFECTS (t
) = 1;
13310 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13313 /* Find the register save area. */
13314 if ((cfun
->va_list_gpr_size
&& n_gpr
< GP_ARG_NUM_REG
)
13315 || (cfun
->va_list_fpr_size
&& n_fpr
< FP_ARG_NUM_REG
))
13317 t
= make_tree (TREE_TYPE (sav
), return_address_pointer_rtx
);
13318 t
= fold_build_pointer_plus_hwi (t
, -RETURN_REGNUM
* UNITS_PER_LONG
);
13320 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
13321 TREE_SIDE_EFFECTS (t
) = 1;
13322 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13326 /* Implement va_arg by updating the va_list structure
13327 VALIST as required to retrieve an argument of type
13328 TYPE, and returning that argument.
13330 Generates code equivalent to:
13332 if (integral value) {
13333 if (size <= 4 && args.gpr < 5 ||
13334 size > 4 && args.gpr < 4 )
13335 ret = args.reg_save_area[args.gpr+8]
13337 ret = *args.overflow_arg_area++;
13338 } else if (vector value) {
13339 ret = *args.overflow_arg_area;
13340 args.overflow_arg_area += size / 8;
13341 } else if (float value) {
13343 ret = args.reg_save_area[args.fpr+64]
13345 ret = *args.overflow_arg_area++;
13346 } else if (aggregate value) {
13348 ret = *args.reg_save_area[args.gpr]
13350 ret = **args.overflow_arg_area++;
13354 s390_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
13355 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
13357 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
13358 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
13359 int indirect_p
, size
, n_reg
, sav_ofs
, sav_scale
, max_reg
;
13360 tree lab_false
, lab_over
= NULL_TREE
;
13361 tree addr
= create_tmp_var (ptr_type_node
, "addr");
13362 bool left_align_p
; /* How a value < UNITS_PER_LONG is aligned within
13365 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13366 f_fpr
= DECL_CHAIN (f_gpr
);
13367 f_ovf
= DECL_CHAIN (f_fpr
);
13368 f_sav
= DECL_CHAIN (f_ovf
);
13370 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13371 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
13372 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
13374 /* The tree for args* cannot be shared between gpr/fpr and ovf since
13375 both appear on a lhs. */
13376 valist
= unshare_expr (valist
);
13377 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
13379 size
= int_size_in_bytes (type
);
13381 s390_check_type_for_vector_abi (type
, true, false);
13383 if (pass_va_arg_by_reference (type
))
13385 if (TARGET_DEBUG_ARG
)
13387 fprintf (stderr
, "va_arg: aggregate type");
13391 /* Aggregates are passed by reference. */
13396 /* kernel stack layout on 31 bit: It is assumed here that no padding
13397 will be added by s390_frame_info because for va_args always an even
13398 number of gprs has to be saved r15-r2 = 14 regs. */
13399 sav_ofs
= 2 * UNITS_PER_LONG
;
13400 sav_scale
= UNITS_PER_LONG
;
13401 size
= UNITS_PER_LONG
;
13402 max_reg
= GP_ARG_NUM_REG
- n_reg
;
13403 left_align_p
= false;
13405 else if (s390_function_arg_vector (TYPE_MODE (type
), type
))
13407 if (TARGET_DEBUG_ARG
)
13409 fprintf (stderr
, "va_arg: vector type");
13419 left_align_p
= true;
13421 else if (s390_function_arg_float (TYPE_MODE (type
), type
))
13423 if (TARGET_DEBUG_ARG
)
13425 fprintf (stderr
, "va_arg: float type");
13429 /* FP args go in FP registers, if present. */
13433 sav_ofs
= 16 * UNITS_PER_LONG
;
13435 max_reg
= FP_ARG_NUM_REG
- n_reg
;
13436 left_align_p
= false;
13440 if (TARGET_DEBUG_ARG
)
13442 fprintf (stderr
, "va_arg: other type");
13446 /* Otherwise into GP registers. */
13449 n_reg
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
13451 /* kernel stack layout on 31 bit: It is assumed here that no padding
13452 will be added by s390_frame_info because for va_args always an even
13453 number of gprs has to be saved r15-r2 = 14 regs. */
13454 sav_ofs
= 2 * UNITS_PER_LONG
;
13456 if (size
< UNITS_PER_LONG
)
13457 sav_ofs
+= UNITS_PER_LONG
- size
;
13459 sav_scale
= UNITS_PER_LONG
;
13460 max_reg
= GP_ARG_NUM_REG
- n_reg
;
13461 left_align_p
= false;
13464 /* Pull the value out of the saved registers ... */
13466 if (reg
!= NULL_TREE
)
13469 if (reg > ((typeof (reg))max_reg))
13472 addr = sav + sav_ofs + reg * save_scale;
13479 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
13480 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
13482 t
= fold_convert (TREE_TYPE (reg
), size_int (max_reg
));
13483 t
= build2 (GT_EXPR
, boolean_type_node
, reg
, t
);
13484 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
13485 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
13486 gimplify_and_add (t
, pre_p
);
13488 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
13489 u
= build2 (MULT_EXPR
, TREE_TYPE (reg
), reg
,
13490 fold_convert (TREE_TYPE (reg
), size_int (sav_scale
)));
13491 t
= fold_build_pointer_plus (t
, u
);
13493 gimplify_assign (addr
, t
, pre_p
);
13495 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
13497 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
13500 /* ... Otherwise out of the overflow area. */
13503 if (size
< UNITS_PER_LONG
&& !left_align_p
)
13504 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
- size
);
13506 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
13508 gimplify_assign (addr
, t
, pre_p
);
13510 if (size
< UNITS_PER_LONG
&& left_align_p
)
13511 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
);
13513 t
= fold_build_pointer_plus_hwi (t
, size
);
13515 gimplify_assign (ovf
, t
, pre_p
);
13517 if (reg
!= NULL_TREE
)
13518 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
13521 /* Increment register save count. */
13525 u
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (reg
), reg
,
13526 fold_convert (TREE_TYPE (reg
), size_int (n_reg
)));
13527 gimplify_and_add (u
, pre_p
);
13532 t
= build_pointer_type_for_mode (build_pointer_type (type
),
13534 addr
= fold_convert (t
, addr
);
13535 addr
= build_va_arg_indirect_ref (addr
);
13539 t
= build_pointer_type_for_mode (type
, ptr_mode
, true);
13540 addr
= fold_convert (t
, addr
);
13543 return build_va_arg_indirect_ref (addr
);
13546 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
13548 DEST - Register location where CC will be stored.
13549 TDB - Pointer to a 256 byte area where to store the transaction.
13550 diagnostic block. NULL if TDB is not needed.
13551 RETRY - Retry count value. If non-NULL a retry loop for CC2
13553 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
13554 of the tbegin instruction pattern. */
13557 s390_expand_tbegin (rtx dest
, rtx tdb
, rtx retry
, bool clobber_fprs_p
)
13559 rtx retry_plus_two
= gen_reg_rtx (SImode
);
13560 rtx retry_reg
= gen_reg_rtx (SImode
);
13561 rtx_code_label
*retry_label
= NULL
;
13563 if (retry
!= NULL_RTX
)
13565 emit_move_insn (retry_reg
, retry
);
13566 emit_insn (gen_addsi3 (retry_plus_two
, retry_reg
, const2_rtx
));
13567 emit_insn (gen_addsi3 (retry_reg
, retry_reg
, const1_rtx
));
13568 retry_label
= gen_label_rtx ();
13569 emit_label (retry_label
);
13572 if (clobber_fprs_p
)
13575 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13578 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13582 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13585 emit_move_insn (dest
, gen_rtx_UNSPEC (SImode
,
13586 gen_rtvec (1, gen_rtx_REG (CCRAWmode
,
13588 UNSPEC_CC_TO_INT
));
13589 if (retry
!= NULL_RTX
)
13591 const int CC0
= 1 << 3;
13592 const int CC1
= 1 << 2;
13593 const int CC3
= 1 << 0;
13595 rtx count
= gen_reg_rtx (SImode
);
13596 rtx_code_label
*leave_label
= gen_label_rtx ();
13598 /* Exit for success and permanent failures. */
13599 jump
= s390_emit_jump (leave_label
,
13600 gen_rtx_EQ (VOIDmode
,
13601 gen_rtx_REG (CCRAWmode
, CC_REGNUM
),
13602 gen_rtx_CONST_INT (VOIDmode
, CC0
| CC1
| CC3
)));
13603 LABEL_NUSES (leave_label
) = 1;
13605 /* CC2 - transient failure. Perform retry with ppa. */
13606 emit_move_insn (count
, retry_plus_two
);
13607 emit_insn (gen_subsi3 (count
, count
, retry_reg
));
13608 emit_insn (gen_tx_assist (count
));
13609 jump
= emit_jump_insn (gen_doloop_si64 (retry_label
,
13612 JUMP_LABEL (jump
) = retry_label
;
13613 LABEL_NUSES (retry_label
) = 1;
13614 emit_label (leave_label
);
13619 /* Return the decl for the target specific builtin with the function
13623 s390_builtin_decl (unsigned fcode
, bool initialized_p ATTRIBUTE_UNUSED
)
13625 if (fcode
>= S390_BUILTIN_MAX
)
13626 return error_mark_node
;
13628 return s390_builtin_decls
[fcode
];
13631 /* We call mcount before the function prologue. So a profiled leaf
13632 function should stay a leaf function. */
13635 s390_keep_leaf_when_profiled ()
13640 /* Output assembly code for the trampoline template to
13643 On S/390, we use gpr 1 internally in the trampoline code;
13644 gpr 0 is used to hold the static chain. */
13647 s390_asm_trampoline_template (FILE *file
)
13650 op
[0] = gen_rtx_REG (Pmode
, 0);
13651 op
[1] = gen_rtx_REG (Pmode
, 1);
13655 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13656 output_asm_insn ("lmg\t%0,%1,14(%1)", op
); /* 6 byte */
13657 output_asm_insn ("br\t%1", op
); /* 2 byte */
13658 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 10));
13662 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13663 output_asm_insn ("lm\t%0,%1,6(%1)", op
); /* 4 byte */
13664 output_asm_insn ("br\t%1", op
); /* 2 byte */
13665 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 8));
13669 /* Emit RTL insns to initialize the variable parts of a trampoline.
13670 FNADDR is an RTX for the address of the function's pure code.
13671 CXT is an RTX for the static chain value for the function. */
13674 s390_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
13676 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
13679 emit_block_move (m_tramp
, assemble_trampoline_template (),
13680 GEN_INT (2 * UNITS_PER_LONG
), BLOCK_OP_NORMAL
);
13682 mem
= adjust_address (m_tramp
, Pmode
, 2 * UNITS_PER_LONG
);
13683 emit_move_insn (mem
, cxt
);
13684 mem
= adjust_address (m_tramp
, Pmode
, 3 * UNITS_PER_LONG
);
13685 emit_move_insn (mem
, fnaddr
);
13689 output_asm_nops (const char *user
, int hw
)
13691 asm_fprintf (asm_out_file
, "\t# NOPs for %s (%d halfwords)\n", user
, hw
);
13696 output_asm_insn ("brcl\t0,0", NULL
);
13701 output_asm_insn ("bc\t0,0", NULL
);
13706 output_asm_insn ("bcr\t0,0", NULL
);
13712 /* Output assembler code to FILE to call a profiler hook. */
13715 s390_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
13719 fprintf (file
, "# function profiler \n");
13721 op
[0] = gen_rtx_REG (Pmode
, RETURN_REGNUM
);
13722 op
[1] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
13723 op
[1] = gen_rtx_MEM (Pmode
, plus_constant (Pmode
, op
[1], UNITS_PER_LONG
));
13724 op
[3] = GEN_INT (UNITS_PER_LONG
);
13726 op
[2] = gen_rtx_SYMBOL_REF (Pmode
, flag_fentry
? "__fentry__" : "_mcount");
13727 SYMBOL_REF_FLAGS (op
[2]) |= SYMBOL_FLAG_FUNCTION
;
13728 if (flag_pic
&& !TARGET_64BIT
)
13730 op
[2] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[2]), UNSPEC_PLT31
);
13731 op
[2] = gen_rtx_CONST (Pmode
, op
[2]);
13734 if (flag_record_mcount
)
13735 fprintf (file
, "1:\n");
13739 if (flag_nop_mcount
)
13740 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13741 else if (cfun
->static_chain_decl
)
13742 warning (OPT_Wcannot_profile
, "nested functions cannot be profiled "
13743 "with %<-mfentry%> on s390");
13745 output_asm_insn ("brasl\t0,%2%K2", op
);
13747 else if (TARGET_64BIT
)
13749 if (flag_nop_mcount
)
13750 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* brasl */ 3 +
13754 output_asm_insn ("stg\t%0,%1", op
);
13755 if (flag_dwarf2_cfi_asm
)
13756 output_asm_insn (".cfi_rel_offset\t%0,%3", op
);
13757 output_asm_insn ("brasl\t%0,%2%K2", op
);
13758 output_asm_insn ("lg\t%0,%1", op
);
13759 if (flag_dwarf2_cfi_asm
)
13760 output_asm_insn (".cfi_restore\t%0", op
);
13765 if (flag_nop_mcount
)
13766 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* brasl */ 3 +
13770 output_asm_insn ("st\t%0,%1", op
);
13771 if (flag_dwarf2_cfi_asm
)
13772 output_asm_insn (".cfi_rel_offset\t%0,%3", op
);
13773 output_asm_insn ("brasl\t%0,%2%K2", op
);
13774 output_asm_insn ("l\t%0,%1", op
);
13775 if (flag_dwarf2_cfi_asm
)
13776 output_asm_insn (".cfi_restore\t%0", op
);
13780 if (flag_record_mcount
)
13782 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
13783 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
13784 fprintf (file
, "\t.previous\n");
13788 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13789 into its SYMBOL_REF_FLAGS. */
13792 s390_encode_section_info (tree decl
, rtx rtl
, int first
)
13794 default_encode_section_info (decl
, rtl
, first
);
13798 /* Store the alignment to be able to check if we can use
13799 a larl/load-relative instruction. We only handle the cases
13800 that can go wrong (i.e. no FUNC_DECLs).
13801 All symbols without an explicit alignment are assumed to be 2
13802 byte aligned as mandated by our ABI. This behavior can be
13803 overridden for external symbols with the -munaligned-symbols
13805 if ((DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) % 16)
13806 || (s390_unaligned_symbols_p
&& !decl_binds_to_current_def_p (decl
)))
13807 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13808 else if (DECL_ALIGN (decl
) % 32)
13809 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13810 else if (DECL_ALIGN (decl
) % 64)
13811 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13814 /* Literal pool references don't have a decl so they are handled
13815 differently here. We rely on the information in the MEM_ALIGN
13816 entry to decide upon the alignment. */
13818 && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
13819 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl
, 0)))
13821 if (MEM_ALIGN (rtl
) == 0 || MEM_ALIGN (rtl
) % 16)
13822 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13823 else if (MEM_ALIGN (rtl
) % 32)
13824 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13825 else if (MEM_ALIGN (rtl
) % 64)
13826 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13830 /* Output thunk to FILE that implements a C++ virtual function call (with
13831 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13832 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13833 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13834 relative to the resulting this pointer. */
13837 s390_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
13838 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
13841 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
13845 assemble_start_function (thunk
, fnname
);
13846 /* Make sure unwind info is emitted for the thunk if needed. */
13847 final_start_function (emit_barrier (), file
, 1);
13849 /* Operand 0 is the target function. */
13850 op
[0] = XEXP (DECL_RTL (function
), 0);
13851 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (op
[0]))
13856 op
[0] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[0]), UNSPEC_GOT
);
13857 op
[0] = gen_rtx_CONST (Pmode
, op
[0]);
13861 /* Operand 1 is the 'this' pointer. */
13862 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
13863 op
[1] = gen_rtx_REG (Pmode
, 3);
13865 op
[1] = gen_rtx_REG (Pmode
, 2);
13867 /* Operand 2 is the delta. */
13868 op
[2] = GEN_INT (delta
);
13870 /* Operand 3 is the vcall_offset. */
13871 op
[3] = GEN_INT (vcall_offset
);
13873 /* Operand 4 is the temporary register. */
13874 op
[4] = gen_rtx_REG (Pmode
, 1);
13876 /* Operands 5 to 8 can be used as labels. */
13882 /* Operand 9 can be used for temporary register. */
13885 /* Generate code. */
13888 /* Setup literal pool pointer if required. */
13889 if ((!DISP_IN_RANGE (delta
)
13890 && !CONST_OK_FOR_K (delta
)
13891 && !CONST_OK_FOR_Os (delta
))
13892 || (!DISP_IN_RANGE (vcall_offset
)
13893 && !CONST_OK_FOR_K (vcall_offset
)
13894 && !CONST_OK_FOR_Os (vcall_offset
)))
13896 op
[5] = gen_label_rtx ();
13897 output_asm_insn ("larl\t%4,%5", op
);
13900 /* Add DELTA to this pointer. */
13903 if (CONST_OK_FOR_J (delta
))
13904 output_asm_insn ("la\t%1,%2(%1)", op
);
13905 else if (DISP_IN_RANGE (delta
))
13906 output_asm_insn ("lay\t%1,%2(%1)", op
);
13907 else if (CONST_OK_FOR_K (delta
))
13908 output_asm_insn ("aghi\t%1,%2", op
);
13909 else if (CONST_OK_FOR_Os (delta
))
13910 output_asm_insn ("agfi\t%1,%2", op
);
13913 op
[6] = gen_label_rtx ();
13914 output_asm_insn ("agf\t%1,%6-%5(%4)", op
);
13918 /* Perform vcall adjustment. */
13921 if (DISP_IN_RANGE (vcall_offset
))
13923 output_asm_insn ("lg\t%4,0(%1)", op
);
13924 output_asm_insn ("ag\t%1,%3(%4)", op
);
13926 else if (CONST_OK_FOR_K (vcall_offset
))
13928 output_asm_insn ("lghi\t%4,%3", op
);
13929 output_asm_insn ("ag\t%4,0(%1)", op
);
13930 output_asm_insn ("ag\t%1,0(%4)", op
);
13932 else if (CONST_OK_FOR_Os (vcall_offset
))
13934 output_asm_insn ("lgfi\t%4,%3", op
);
13935 output_asm_insn ("ag\t%4,0(%1)", op
);
13936 output_asm_insn ("ag\t%1,0(%4)", op
);
13940 op
[7] = gen_label_rtx ();
13941 output_asm_insn ("llgf\t%4,%7-%5(%4)", op
);
13942 output_asm_insn ("ag\t%4,0(%1)", op
);
13943 output_asm_insn ("ag\t%1,0(%4)", op
);
13947 /* Jump to target. */
13948 output_asm_insn ("jg\t%0%K0", op
);
13950 /* Output literal pool if required. */
13953 output_asm_insn (".align\t4", op
);
13954 targetm
.asm_out
.internal_label (file
, "L",
13955 CODE_LABEL_NUMBER (op
[5]));
13959 targetm
.asm_out
.internal_label (file
, "L",
13960 CODE_LABEL_NUMBER (op
[6]));
13961 output_asm_insn (".long\t%2", op
);
13965 targetm
.asm_out
.internal_label (file
, "L",
13966 CODE_LABEL_NUMBER (op
[7]));
13967 output_asm_insn (".long\t%3", op
);
13972 /* Setup base pointer if required. */
13974 || (!DISP_IN_RANGE (delta
)
13975 && !CONST_OK_FOR_K (delta
)
13976 && !CONST_OK_FOR_Os (delta
))
13977 || (!DISP_IN_RANGE (delta
)
13978 && !CONST_OK_FOR_K (vcall_offset
)
13979 && !CONST_OK_FOR_Os (vcall_offset
)))
13981 op
[5] = gen_label_rtx ();
13982 output_asm_insn ("basr\t%4,0", op
);
13983 targetm
.asm_out
.internal_label (file
, "L",
13984 CODE_LABEL_NUMBER (op
[5]));
13987 /* Add DELTA to this pointer. */
13990 if (CONST_OK_FOR_J (delta
))
13991 output_asm_insn ("la\t%1,%2(%1)", op
);
13992 else if (DISP_IN_RANGE (delta
))
13993 output_asm_insn ("lay\t%1,%2(%1)", op
);
13994 else if (CONST_OK_FOR_K (delta
))
13995 output_asm_insn ("ahi\t%1,%2", op
);
13996 else if (CONST_OK_FOR_Os (delta
))
13997 output_asm_insn ("afi\t%1,%2", op
);
14000 op
[6] = gen_label_rtx ();
14001 output_asm_insn ("a\t%1,%6-%5(%4)", op
);
14005 /* Perform vcall adjustment. */
14008 if (CONST_OK_FOR_J (vcall_offset
))
14010 output_asm_insn ("l\t%4,0(%1)", op
);
14011 output_asm_insn ("a\t%1,%3(%4)", op
);
14013 else if (DISP_IN_RANGE (vcall_offset
))
14015 output_asm_insn ("l\t%4,0(%1)", op
);
14016 output_asm_insn ("ay\t%1,%3(%4)", op
);
14018 else if (CONST_OK_FOR_K (vcall_offset
))
14020 output_asm_insn ("lhi\t%4,%3", op
);
14021 output_asm_insn ("a\t%4,0(%1)", op
);
14022 output_asm_insn ("a\t%1,0(%4)", op
);
14024 else if (CONST_OK_FOR_Os (vcall_offset
))
14026 output_asm_insn ("iilf\t%4,%3", op
);
14027 output_asm_insn ("a\t%4,0(%1)", op
);
14028 output_asm_insn ("a\t%1,0(%4)", op
);
14032 op
[7] = gen_label_rtx ();
14033 output_asm_insn ("l\t%4,%7-%5(%4)", op
);
14034 output_asm_insn ("a\t%4,0(%1)", op
);
14035 output_asm_insn ("a\t%1,0(%4)", op
);
14038 /* We had to clobber the base pointer register.
14039 Re-setup the base pointer (with a different base). */
14040 op
[5] = gen_label_rtx ();
14041 output_asm_insn ("basr\t%4,0", op
);
14042 targetm
.asm_out
.internal_label (file
, "L",
14043 CODE_LABEL_NUMBER (op
[5]));
14046 /* Jump to target. */
14047 op
[8] = gen_label_rtx ();
14050 output_asm_insn ("l\t%4,%8-%5(%4)", op
);
14051 else if (!nonlocal
)
14052 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
14053 /* We cannot call through .plt, since .plt requires %r12 loaded. */
14054 else if (flag_pic
== 1)
14056 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
14057 output_asm_insn ("l\t%4,%0(%4)", op
);
14059 else if (flag_pic
== 2)
14061 op
[9] = gen_rtx_REG (Pmode
, 0);
14062 output_asm_insn ("l\t%9,%8-4-%5(%4)", op
);
14063 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
14064 output_asm_insn ("ar\t%4,%9", op
);
14065 output_asm_insn ("l\t%4,0(%4)", op
);
14068 output_asm_insn ("br\t%4", op
);
14070 /* Output literal pool. */
14071 output_asm_insn (".align\t4", op
);
14073 if (nonlocal
&& flag_pic
== 2)
14074 output_asm_insn (".long\t%0", op
);
14077 op
[0] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
14078 SYMBOL_REF_FLAGS (op
[0]) = SYMBOL_FLAG_LOCAL
;
14081 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[8]));
14083 output_asm_insn (".long\t%0", op
);
14085 output_asm_insn (".long\t%0-%5", op
);
14089 targetm
.asm_out
.internal_label (file
, "L",
14090 CODE_LABEL_NUMBER (op
[6]));
14091 output_asm_insn (".long\t%2", op
);
14095 targetm
.asm_out
.internal_label (file
, "L",
14096 CODE_LABEL_NUMBER (op
[7]));
14097 output_asm_insn (".long\t%3", op
);
14100 final_end_function ();
14101 assemble_end_function (thunk
, fnname
);
14104 /* Output either an indirect jump or an indirect call
14105 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
14106 using a branch trampoline disabling branch target prediction. */
14109 s390_indirect_branch_via_thunk (unsigned int regno
,
14110 unsigned int return_addr_regno
,
14111 rtx comparison_operator
,
14112 enum s390_indirect_branch_type type
)
14114 enum s390_indirect_branch_option option
;
14116 if (type
== s390_indirect_branch_type_return
)
14118 if (s390_return_addr_from_memory ())
14119 option
= s390_opt_function_return_mem
;
14121 option
= s390_opt_function_return_reg
;
14123 else if (type
== s390_indirect_branch_type_jump
)
14124 option
= s390_opt_indirect_branch_jump
;
14125 else if (type
== s390_indirect_branch_type_call
)
14126 option
= s390_opt_indirect_branch_call
;
14128 gcc_unreachable ();
14130 if (TARGET_INDIRECT_BRANCH_TABLE
)
14134 ASM_GENERATE_INTERNAL_LABEL (label
,
14135 indirect_branch_table_label
[option
],
14136 indirect_branch_table_label_no
[option
]++);
14137 ASM_OUTPUT_LABEL (asm_out_file
, label
);
14140 if (return_addr_regno
!= INVALID_REGNUM
)
14142 gcc_assert (comparison_operator
== NULL_RTX
);
14143 fprintf (asm_out_file
, " \tbrasl\t%%r%d,", return_addr_regno
);
14147 fputs (" \tjg", asm_out_file
);
14148 if (comparison_operator
!= NULL_RTX
)
14149 print_operand (asm_out_file
, comparison_operator
, 'C');
14151 fputs ("\t", asm_out_file
);
14154 if (TARGET_CPU_Z10
)
14155 fprintf (asm_out_file
,
14156 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
"\n",
14159 fprintf (asm_out_file
,
14160 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
"\n",
14161 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
14163 if ((option
== s390_opt_indirect_branch_jump
14164 && cfun
->machine
->indirect_branch_jump
== indirect_branch_thunk
)
14165 || (option
== s390_opt_indirect_branch_call
14166 && cfun
->machine
->indirect_branch_call
== indirect_branch_thunk
)
14167 || (option
== s390_opt_function_return_reg
14168 && cfun
->machine
->function_return_reg
== indirect_branch_thunk
)
14169 || (option
== s390_opt_function_return_mem
14170 && cfun
->machine
->function_return_mem
== indirect_branch_thunk
))
14172 if (TARGET_CPU_Z10
)
14173 indirect_branch_z10thunk_mask
|= (1 << regno
);
14175 indirect_branch_prez10thunk_mask
|= (1 << regno
);
14179 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
14180 either be an address register or a label pointing to the location
14181 of the jump instruction. */
14184 s390_indirect_branch_via_inline_thunk (rtx execute_target
)
14186 if (TARGET_INDIRECT_BRANCH_TABLE
)
14190 ASM_GENERATE_INTERNAL_LABEL (label
,
14191 indirect_branch_table_label
[s390_opt_indirect_branch_jump
],
14192 indirect_branch_table_label_no
[s390_opt_indirect_branch_jump
]++);
14193 ASM_OUTPUT_LABEL (asm_out_file
, label
);
14197 fputs ("\t.machinemode zarch\n", asm_out_file
);
14199 if (REG_P (execute_target
))
14200 fprintf (asm_out_file
, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target
));
14202 output_asm_insn ("\texrl\t%%r0,%0", &execute_target
);
14205 fputs ("\t.machinemode esa\n", asm_out_file
);
14207 fputs ("0:\tj\t0b\n", asm_out_file
);
14211 s390_valid_pointer_mode (scalar_int_mode mode
)
14213 return (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
));
14216 /* Checks whether the given CALL_EXPR would use a caller
14217 saved register. This is used to decide whether sibling call
14218 optimization could be performed on the respective function
14222 s390_call_saved_register_used (tree call_expr
)
14224 CUMULATIVE_ARGS cum_v
;
14225 cumulative_args_t cum
;
14230 INIT_CUMULATIVE_ARGS (cum_v
, NULL
, NULL
, 0, 0);
14231 cum
= pack_cumulative_args (&cum_v
);
14233 for (i
= 0; i
< call_expr_nargs (call_expr
); i
++)
14235 parameter
= CALL_EXPR_ARG (call_expr
, i
);
14236 gcc_assert (parameter
);
14238 /* For an undeclared variable passed as parameter we will get
14239 an ERROR_MARK node here. */
14240 if (TREE_CODE (parameter
) == ERROR_MARK
)
14243 /* We assume that in the target function all parameters are
14244 named. This only has an impact on vector argument register
14245 usage none of which is call-saved. */
14246 function_arg_info
arg (TREE_TYPE (parameter
), /*named=*/true);
14247 apply_pass_by_reference_rules (&cum_v
, arg
);
14249 parm_rtx
= s390_function_arg (cum
, arg
);
14251 s390_function_arg_advance (cum
, arg
);
14256 if (REG_P (parm_rtx
))
14258 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
14259 int nregs
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
14261 for (reg
= 0; reg
< nregs
; reg
++)
14262 if (!call_used_or_fixed_reg_p (reg
+ REGNO (parm_rtx
)))
14265 else if (GET_CODE (parm_rtx
) == PARALLEL
)
14269 for (i
= 0; i
< XVECLEN (parm_rtx
, 0); i
++)
14271 rtx r
= XEXP (XVECEXP (parm_rtx
, 0, i
), 0);
14273 gcc_assert (REG_P (r
));
14274 gcc_assert (REG_NREGS (r
) == 1);
14276 if (!call_used_or_fixed_reg_p (REGNO (r
)))
14284 /* Return true if the given call expression can be
14285 turned into a sibling call.
14286 DECL holds the declaration of the function to be called whereas
14287 EXP is the call expression itself. */
14290 s390_function_ok_for_sibcall (tree decl
, tree exp
)
14292 /* The TPF epilogue uses register 1. */
14293 if (TARGET_TPF_PROFILING
)
14296 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
14297 which would have to be restored before the sibcall. */
14298 if (!TARGET_64BIT
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
14301 /* The thunks for indirect branches require r1 if no exrl is
14302 available. r1 might not be available when doing a sibling
14304 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
14309 /* Register 6 on s390 is available as an argument register but unfortunately
14310 "caller saved". This makes functions needing this register for arguments
14311 not suitable for sibcalls. */
14312 return !s390_call_saved_register_used (exp
);
14315 /* Return the fixed registers used for condition codes. */
14318 s390_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
14321 *p2
= INVALID_REGNUM
;
14326 /* This function is used by the call expanders of the machine description.
14327 It emits the call insn itself together with the necessary operations
14328 to adjust the target address and returns the emitted insn.
14329 ADDR_LOCATION is the target address rtx
14330 TLS_CALL the location of the thread-local symbol
14331 RESULT_REG the register where the result of the call should be stored
14332 RETADDR_REG the register where the return address should be stored
14333 If this parameter is NULL_RTX the call is considered
14334 to be a sibling call. */
14337 s390_emit_call (rtx addr_location
, rtx tls_call
, rtx result_reg
,
14340 bool plt31_call_p
= false;
14342 rtx vec
[4] = { NULL_RTX
};
14344 rtx
*call
= &vec
[0];
14345 rtx
*clobber_ret_reg
= &vec
[1];
14346 rtx
*use
= &vec
[2];
14347 rtx
*clobber_thunk_reg
= &vec
[3];
14350 /* Direct function calls need special treatment. */
14351 if (GET_CODE (addr_location
) == SYMBOL_REF
)
14353 /* When calling a global routine in PIC mode, we must
14354 replace the symbol itself with the PLT stub. */
14355 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (addr_location
) && !TARGET_64BIT
)
14357 if (retaddr_reg
!= NULL_RTX
)
14359 addr_location
= gen_rtx_UNSPEC (Pmode
,
14360 gen_rtvec (1, addr_location
),
14362 addr_location
= gen_rtx_CONST (Pmode
, addr_location
);
14363 plt31_call_p
= true;
14366 /* For -fpic code the PLT entries might use r12 which is
14367 call-saved. Therefore we cannot do a sibcall when
14368 calling directly using a symbol ref. When reaching
14369 this point we decided (in s390_function_ok_for_sibcall)
14370 to do a sibcall for a function pointer but one of the
14371 optimizers was able to get rid of the function pointer
14372 by propagating the symbol ref into the call. This
14373 optimization is illegal for S/390 so we turn the direct
14374 call into a indirect call again. */
14375 addr_location
= force_reg (Pmode
, addr_location
);
14379 /* If it is already an indirect call or the code above moved the
14380 SYMBOL_REF to somewhere else make sure the address can be found in
14382 if (retaddr_reg
== NULL_RTX
14383 && GET_CODE (addr_location
) != SYMBOL_REF
14386 emit_move_insn (gen_rtx_REG (Pmode
, SIBCALL_REGNUM
), addr_location
);
14387 addr_location
= gen_rtx_REG (Pmode
, SIBCALL_REGNUM
);
14390 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
14391 && GET_CODE (addr_location
) != SYMBOL_REF
14394 /* Indirect branch thunks require the target to be a single GPR. */
14395 addr_location
= force_reg (Pmode
, addr_location
);
14397 /* Without exrl the indirect branch thunks need an additional
14398 register for larl;ex */
14399 if (!TARGET_CPU_Z10
)
14401 *clobber_thunk_reg
= gen_rtx_REG (Pmode
, INDIRECT_BRANCH_THUNK_REGNUM
);
14402 *clobber_thunk_reg
= gen_rtx_CLOBBER (VOIDmode
, *clobber_thunk_reg
);
14406 addr_location
= gen_rtx_MEM (QImode
, addr_location
);
14407 *call
= gen_rtx_CALL (VOIDmode
, addr_location
, const0_rtx
);
14409 if (result_reg
!= NULL_RTX
)
14410 *call
= gen_rtx_SET (result_reg
, *call
);
14412 if (retaddr_reg
!= NULL_RTX
)
14414 *clobber_ret_reg
= gen_rtx_CLOBBER (VOIDmode
, retaddr_reg
);
14416 if (tls_call
!= NULL_RTX
)
14417 *use
= gen_rtx_USE (VOIDmode
, tls_call
);
14421 for (i
= 0; i
< 4; i
++)
14422 if (vec
[i
] != NULL_RTX
)
14430 v
= rtvec_alloc (elts
);
14431 for (i
= 0; i
< 4; i
++)
14432 if (vec
[i
] != NULL_RTX
)
14434 RTVEC_ELT (v
, e
) = vec
[i
];
14438 *call
= gen_rtx_PARALLEL (VOIDmode
, v
);
14441 insn
= emit_call_insn (*call
);
14443 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
14444 if (plt31_call_p
|| tls_call
!= NULL_RTX
)
14446 /* s390_function_ok_for_sibcall should
14447 have denied sibcalls in this case. */
14448 gcc_assert (retaddr_reg
!= NULL_RTX
);
14449 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, 12));
14454 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
14457 s390_conditional_register_usage (void)
14462 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
14463 fixed_regs
[BASE_REGNUM
] = 0;
14464 fixed_regs
[RETURN_REGNUM
] = 0;
14467 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
14468 call_used_regs
[i
] = 0;
14472 call_used_regs
[FPR4_REGNUM
] = 0;
14473 call_used_regs
[FPR6_REGNUM
] = 0;
14476 if (TARGET_SOFT_FLOAT
)
14478 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
14482 /* Disable v16 - v31 for non-vector target. */
14485 for (i
= VR16_REGNUM
; i
<= VR31_REGNUM
; i
++)
14486 fixed_regs
[i
] = call_used_regs
[i
] = 1;
14490 /* Corresponding function to eh_return expander. */
14492 static GTY(()) rtx s390_tpf_eh_return_symbol
;
14494 s390_emit_tpf_eh_return (rtx target
)
14499 if (!s390_tpf_eh_return_symbol
)
14501 s390_tpf_eh_return_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tpf_eh_return");
14502 SYMBOL_REF_FLAGS (s390_tpf_eh_return_symbol
) |= SYMBOL_FLAG_FUNCTION
;
14505 reg
= gen_rtx_REG (Pmode
, 2);
14506 orig_ra
= gen_rtx_REG (Pmode
, 3);
14508 emit_move_insn (reg
, target
);
14509 emit_move_insn (orig_ra
, get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
));
14510 insn
= s390_emit_call (s390_tpf_eh_return_symbol
, NULL_RTX
, reg
,
14511 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
14512 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), reg
);
14513 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), orig_ra
);
14515 emit_move_insn (EH_RETURN_HANDLER_RTX
, reg
);
14518 /* Rework the prologue/epilogue to avoid saving/restoring
14519 registers unnecessarily. */
14522 s390_optimize_prologue (void)
14524 rtx_insn
*insn
, *new_insn
, *next_insn
;
14526 /* Do a final recompute of the frame-related data. */
14527 s390_optimize_register_info ();
14529 /* If all special registers are in fact used, there's nothing we
14530 can do, so no point in walking the insn list. */
14532 if (cfun_frame_layout
.first_save_gpr
<= BASE_REGNUM
14533 && cfun_frame_layout
.last_save_gpr
>= BASE_REGNUM
)
14536 /* Search for prologue/epilogue insns and replace them. */
14537 for (insn
= get_insns (); insn
; insn
= next_insn
)
14539 int first
, last
, off
;
14540 rtx set
, base
, offset
;
14543 next_insn
= NEXT_INSN (insn
);
14545 if (! NONJUMP_INSN_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
14548 pat
= PATTERN (insn
);
14550 /* Remove ldgr/lgdr instructions used for saving and restore
14551 GPRs if possible. */
14556 if (INSN_CODE (insn
) == CODE_FOR_stack_restore_from_fpr
)
14557 tmp_pat
= XVECEXP (pat
, 0, 0);
14559 if (GET_CODE (tmp_pat
) == SET
14560 && GET_MODE (SET_SRC (tmp_pat
)) == DImode
14561 && REG_P (SET_SRC (tmp_pat
))
14562 && REG_P (SET_DEST (tmp_pat
)))
14564 int src_regno
= REGNO (SET_SRC (tmp_pat
));
14565 int dest_regno
= REGNO (SET_DEST (tmp_pat
));
14569 if (!((GENERAL_REGNO_P (src_regno
)
14570 && FP_REGNO_P (dest_regno
))
14571 || (FP_REGNO_P (src_regno
)
14572 && GENERAL_REGNO_P (dest_regno
))))
14575 gpr_regno
= GENERAL_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
14576 fpr_regno
= FP_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
14578 /* GPR must be call-saved, FPR must be call-clobbered. */
14579 if (!call_used_regs
[fpr_regno
]
14580 || call_used_regs
[gpr_regno
])
14583 /* It must not happen that what we once saved in an FPR now
14584 needs a stack slot. */
14585 gcc_assert (cfun_gpr_save_slot (gpr_regno
) != SAVE_SLOT_STACK
);
14587 if (cfun_gpr_save_slot (gpr_regno
) == SAVE_SLOT_NONE
)
14589 remove_insn (insn
);
14595 if (GET_CODE (pat
) == PARALLEL
14596 && store_multiple_operation (pat
, VOIDmode
))
14598 set
= XVECEXP (pat
, 0, 0);
14599 first
= REGNO (SET_SRC (set
));
14600 last
= first
+ XVECLEN (pat
, 0) - 1;
14601 offset
= const0_rtx
;
14602 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
14603 off
= INTVAL (offset
);
14605 if (GET_CODE (base
) != REG
|| off
< 0)
14607 if (cfun_frame_layout
.first_save_gpr
!= -1
14608 && (cfun_frame_layout
.first_save_gpr
< first
14609 || cfun_frame_layout
.last_save_gpr
> last
))
14611 if (REGNO (base
) != STACK_POINTER_REGNUM
14612 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14614 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
14617 if (cfun_frame_layout
.first_save_gpr
!= -1)
14619 off
+ (cfun_frame_layout
.first_save_gpr
14620 - first
) * UNITS_PER_LONG
,
14621 cfun_frame_layout
.first_save_gpr
,
14622 cfun_frame_layout
.last_save_gpr
, insn
);
14624 remove_insn (insn
);
14628 if (cfun_frame_layout
.first_save_gpr
== -1
14629 && GET_CODE (pat
) == SET
14630 && GENERAL_REG_P (SET_SRC (pat
))
14631 && GET_CODE (SET_DEST (pat
)) == MEM
)
14634 first
= REGNO (SET_SRC (set
));
14635 offset
= const0_rtx
;
14636 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
14637 off
= INTVAL (offset
);
14639 if (GET_CODE (base
) != REG
|| off
< 0)
14641 if (REGNO (base
) != STACK_POINTER_REGNUM
14642 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14645 remove_insn (insn
);
14649 if (GET_CODE (pat
) == PARALLEL
14650 && load_multiple_operation (pat
, VOIDmode
))
14652 set
= XVECEXP (pat
, 0, 0);
14653 first
= REGNO (SET_DEST (set
));
14654 last
= first
+ XVECLEN (pat
, 0) - 1;
14655 offset
= const0_rtx
;
14656 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
14657 off
= INTVAL (offset
);
14659 if (GET_CODE (base
) != REG
|| off
< 0)
14662 if (cfun_frame_layout
.first_restore_gpr
!= -1
14663 && (cfun_frame_layout
.first_restore_gpr
< first
14664 || cfun_frame_layout
.last_restore_gpr
> last
))
14666 if (REGNO (base
) != STACK_POINTER_REGNUM
14667 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14669 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
14672 if (cfun_frame_layout
.first_restore_gpr
!= -1)
14674 rtx rpat
= restore_gprs (base
,
14675 off
+ (cfun_frame_layout
.first_restore_gpr
14676 - first
) * UNITS_PER_LONG
,
14677 cfun_frame_layout
.first_restore_gpr
,
14678 cfun_frame_layout
.last_restore_gpr
);
14680 /* Remove REG_CFA_RESTOREs for registers that we no
14681 longer need to save. */
14682 REG_NOTES (rpat
) = REG_NOTES (insn
);
14683 for (rtx
*ptr
= ®_NOTES (rpat
); *ptr
; )
14684 if (REG_NOTE_KIND (*ptr
) == REG_CFA_RESTORE
14685 && ((int) REGNO (XEXP (*ptr
, 0))
14686 < cfun_frame_layout
.first_restore_gpr
))
14687 *ptr
= XEXP (*ptr
, 1);
14689 ptr
= &XEXP (*ptr
, 1);
14690 new_insn
= emit_insn_before (rpat
, insn
);
14691 RTX_FRAME_RELATED_P (new_insn
) = 1;
14692 INSN_ADDRESSES_NEW (new_insn
, -1);
14695 remove_insn (insn
);
14699 if (cfun_frame_layout
.first_restore_gpr
== -1
14700 && GET_CODE (pat
) == SET
14701 && GENERAL_REG_P (SET_DEST (pat
))
14702 && GET_CODE (SET_SRC (pat
)) == MEM
)
14705 first
= REGNO (SET_DEST (set
));
14706 offset
= const0_rtx
;
14707 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
14708 off
= INTVAL (offset
);
14710 if (GET_CODE (base
) != REG
|| off
< 0)
14713 if (REGNO (base
) != STACK_POINTER_REGNUM
14714 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14717 remove_insn (insn
);
14723 /* On z10 and later the dynamic branch prediction must see the
14724 backward jump within a certain windows. If not it falls back to
14725 the static prediction. This function rearranges the loop backward
14726 branch in a way which makes the static prediction always correct.
14727 The function returns true if it added an instruction. */
14729 s390_fix_long_loop_prediction (rtx_insn
*insn
)
14731 rtx set
= single_set (insn
);
14732 rtx code_label
, label_ref
;
14733 rtx_insn
*uncond_jump
;
14734 rtx_insn
*cur_insn
;
14738 /* This will exclude branch on count and branch on index patterns
14739 since these are correctly statically predicted.
14741 The additional check for a PARALLEL is required here since
14742 single_set might be != NULL for PARALLELs where the set of the
14743 iteration variable is dead. */
14744 if (GET_CODE (PATTERN (insn
)) == PARALLEL
14746 || SET_DEST (set
) != pc_rtx
14747 || GET_CODE (SET_SRC(set
)) != IF_THEN_ELSE
)
14750 /* Skip conditional returns. */
14751 if (ANY_RETURN_P (XEXP (SET_SRC (set
), 1))
14752 && XEXP (SET_SRC (set
), 2) == pc_rtx
)
14755 label_ref
= (GET_CODE (XEXP (SET_SRC (set
), 1)) == LABEL_REF
?
14756 XEXP (SET_SRC (set
), 1) : XEXP (SET_SRC (set
), 2));
14758 gcc_assert (GET_CODE (label_ref
) == LABEL_REF
);
14760 code_label
= XEXP (label_ref
, 0);
14762 if (INSN_ADDRESSES (INSN_UID (code_label
)) == -1
14763 || INSN_ADDRESSES (INSN_UID (insn
)) == -1
14764 || (INSN_ADDRESSES (INSN_UID (insn
))
14765 - INSN_ADDRESSES (INSN_UID (code_label
)) < PREDICT_DISTANCE
))
14768 for (distance
= 0, cur_insn
= PREV_INSN (insn
);
14769 distance
< PREDICT_DISTANCE
- 6;
14770 distance
+= get_attr_length (cur_insn
), cur_insn
= PREV_INSN (cur_insn
))
14771 if (!cur_insn
|| JUMP_P (cur_insn
) || LABEL_P (cur_insn
))
14774 rtx_code_label
*new_label
= gen_label_rtx ();
14775 uncond_jump
= emit_jump_insn_after (
14776 gen_rtx_SET (pc_rtx
,
14777 gen_rtx_LABEL_REF (VOIDmode
, code_label
)),
14779 emit_label_after (new_label
, uncond_jump
);
14781 tmp
= XEXP (SET_SRC (set
), 1);
14782 XEXP (SET_SRC (set
), 1) = XEXP (SET_SRC (set
), 2);
14783 XEXP (SET_SRC (set
), 2) = tmp
;
14784 INSN_CODE (insn
) = -1;
14786 XEXP (label_ref
, 0) = new_label
;
14787 JUMP_LABEL (insn
) = new_label
;
14788 JUMP_LABEL (uncond_jump
) = code_label
;
14793 /* Returns 1 if INSN reads the value of REG for purposes not related
14794 to addressing of memory, and 0 otherwise. */
14796 s390_non_addr_reg_read_p (rtx reg
, rtx_insn
*insn
)
14798 return reg_referenced_p (reg
, PATTERN (insn
))
14799 && !reg_used_in_mem_p (REGNO (reg
), PATTERN (insn
));
14802 /* Starting from INSN find_cond_jump looks downwards in the insn
14803 stream for a single jump insn which is the last user of the
14804 condition code set in INSN. */
14806 find_cond_jump (rtx_insn
*insn
)
14808 for (; insn
; insn
= NEXT_INSN (insn
))
14812 if (LABEL_P (insn
))
14815 if (!JUMP_P (insn
))
14817 if (reg_mentioned_p (gen_rtx_REG (CCmode
, CC_REGNUM
), insn
))
14822 /* This will be triggered by a return. */
14823 if (GET_CODE (PATTERN (insn
)) != SET
)
14826 gcc_assert (SET_DEST (PATTERN (insn
)) == pc_rtx
);
14827 ite
= SET_SRC (PATTERN (insn
));
14829 if (GET_CODE (ite
) != IF_THEN_ELSE
)
14832 cc
= XEXP (XEXP (ite
, 0), 0);
14833 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
)))
14836 if (find_reg_note (insn
, REG_DEAD
, cc
))
14844 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14845 the semantics does not change. If NULL_RTX is passed as COND the
14846 function tries to find the conditional jump starting with INSN. */
14848 s390_swap_cmp (rtx cond
, rtx
*op0
, rtx
*op1
, rtx_insn
*insn
)
14852 if (cond
== NULL_RTX
)
14854 rtx_insn
*jump
= find_cond_jump (NEXT_INSN (insn
));
14855 rtx set
= jump
? single_set (jump
) : NULL_RTX
;
14857 if (set
== NULL_RTX
)
14860 cond
= XEXP (SET_SRC (set
), 0);
14865 PUT_CODE (cond
, swap_condition (GET_CODE (cond
)));
14868 /* On z10, instructions of the compare-and-branch family have the
14869 property to access the register occurring as second operand with
14870 its bits complemented. If such a compare is grouped with a second
14871 instruction that accesses the same register non-complemented, and
14872 if that register's value is delivered via a bypass, then the
14873 pipeline recycles, thereby causing significant performance decline.
14874 This function locates such situations and exchanges the two
14875 operands of the compare. The function return true whenever it
14878 s390_z10_optimize_cmp (rtx_insn
*insn
)
14880 rtx_insn
*prev_insn
, *next_insn
;
14881 bool insn_added_p
= false;
14882 rtx cond
, *op0
, *op1
;
14884 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
14886 /* Handle compare and branch and branch on count
14888 rtx pattern
= single_set (insn
);
14891 || SET_DEST (pattern
) != pc_rtx
14892 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
)
14895 cond
= XEXP (SET_SRC (pattern
), 0);
14896 op0
= &XEXP (cond
, 0);
14897 op1
= &XEXP (cond
, 1);
14899 else if (GET_CODE (PATTERN (insn
)) == SET
)
14903 /* Handle normal compare instructions. */
14904 src
= SET_SRC (PATTERN (insn
));
14905 dest
= SET_DEST (PATTERN (insn
));
14908 || !CC_REGNO_P (REGNO (dest
))
14909 || GET_CODE (src
) != COMPARE
)
14912 /* s390_swap_cmp will try to find the conditional
14913 jump when passing NULL_RTX as condition. */
14915 op0
= &XEXP (src
, 0);
14916 op1
= &XEXP (src
, 1);
14921 if (!REG_P (*op0
) || !REG_P (*op1
))
14924 if (GET_MODE_CLASS (GET_MODE (*op0
)) != MODE_INT
)
14927 /* Swap the COMPARE arguments and its mask if there is a
14928 conflicting access in the previous insn. */
14929 prev_insn
= prev_active_insn (insn
);
14930 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14931 && reg_referenced_p (*op1
, PATTERN (prev_insn
)))
14932 s390_swap_cmp (cond
, op0
, op1
, insn
);
14934 /* Check if there is a conflict with the next insn. If there
14935 was no conflict with the previous insn, then swap the
14936 COMPARE arguments and its mask. If we already swapped
14937 the operands, or if swapping them would cause a conflict
14938 with the previous insn, issue a NOP after the COMPARE in
14939 order to separate the two instuctions. */
14940 next_insn
= next_active_insn (insn
);
14941 if (next_insn
!= NULL_RTX
&& INSN_P (next_insn
)
14942 && s390_non_addr_reg_read_p (*op1
, next_insn
))
14944 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14945 && s390_non_addr_reg_read_p (*op0
, prev_insn
))
14947 if (REGNO (*op1
) == 0)
14948 emit_insn_after (gen_nop_lr1 (), insn
);
14950 emit_insn_after (gen_nop_lr0 (), insn
);
14951 insn_added_p
= true;
14954 s390_swap_cmp (cond
, op0
, op1
, insn
);
14956 return insn_added_p
;
14959 /* Number of INSNs to be scanned backward in the last BB of the loop
14960 and forward in the first BB of the loop. This usually should be a
14961 bit more than the number of INSNs which could go into one
14963 #define S390_OSC_SCAN_INSN_NUM 5
14965 /* Scan LOOP for static OSC collisions and return true if a osc_break
14966 should be issued for this loop. */
14968 s390_adjust_loop_scan_osc (struct loop
* loop
)
14971 HARD_REG_SET modregs
, newregs
;
14972 rtx_insn
*insn
, *store_insn
= NULL
;
14974 struct s390_address addr_store
, addr_load
;
14975 subrtx_iterator::array_type array
;
14978 CLEAR_HARD_REG_SET (modregs
);
14981 FOR_BB_INSNS_REVERSE (loop
->latch
, insn
)
14983 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14987 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14990 find_all_hard_reg_sets (insn
, &newregs
, true);
14991 modregs
|= newregs
;
14993 set
= single_set (insn
);
14997 if (MEM_P (SET_DEST (set
))
14998 && s390_decompose_address (XEXP (SET_DEST (set
), 0), &addr_store
))
15005 if (store_insn
== NULL_RTX
)
15009 FOR_BB_INSNS (loop
->header
, insn
)
15011 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
15014 if (insn
== store_insn
)
15018 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
15021 find_all_hard_reg_sets (insn
, &newregs
, true);
15022 modregs
|= newregs
;
15024 set
= single_set (insn
);
15028 /* An intermediate store disrupts static OSC checking
15030 if (MEM_P (SET_DEST (set
))
15031 && s390_decompose_address (XEXP (SET_DEST (set
), 0), NULL
))
15034 FOR_EACH_SUBRTX (iter
, array
, SET_SRC (set
), NONCONST
)
15036 && s390_decompose_address (XEXP (*iter
, 0), &addr_load
)
15037 && rtx_equal_p (addr_load
.base
, addr_store
.base
)
15038 && rtx_equal_p (addr_load
.indx
, addr_store
.indx
)
15039 && rtx_equal_p (addr_load
.disp
, addr_store
.disp
))
15041 if ((addr_load
.base
!= NULL_RTX
15042 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.base
)))
15043 || (addr_load
.indx
!= NULL_RTX
15044 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.indx
))))
15051 /* Look for adjustments which can be done on simple innermost
15054 s390_adjust_loops ()
15057 compute_bb_for_insn ();
15059 /* Find the loops. */
15060 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
15062 for (auto loop
: loops_list (cfun
, LI_ONLY_INNERMOST
))
15066 flow_loop_dump (loop
, dump_file
, NULL
, 0);
15067 fprintf (dump_file
, ";; OSC loop scan Loop: ");
15069 if (loop
->latch
== NULL
15070 || pc_set (BB_END (loop
->latch
)) == NULL_RTX
15071 || !s390_adjust_loop_scan_osc (loop
))
15075 if (loop
->latch
== NULL
)
15076 fprintf (dump_file
, " muliple backward jumps\n");
15079 fprintf (dump_file
, " header insn: %d latch insn: %d ",
15080 INSN_UID (BB_HEAD (loop
->header
)),
15081 INSN_UID (BB_END (loop
->latch
)));
15082 if (pc_set (BB_END (loop
->latch
)) == NULL_RTX
)
15083 fprintf (dump_file
, " loop does not end with jump\n");
15085 fprintf (dump_file
, " not instrumented\n");
15091 rtx_insn
*new_insn
;
15094 fprintf (dump_file
, " adding OSC break insn: ");
15095 new_insn
= emit_insn_before (gen_osc_break (),
15096 BB_END (loop
->latch
));
15097 INSN_ADDRESSES_NEW (new_insn
, -1);
15101 loop_optimizer_finalize ();
15103 df_finish_pass (false);
15106 /* Perform machine-dependent processing. */
15111 struct constant_pool
*pool
;
15113 int hw_before
, hw_after
;
15115 if (s390_tune
== PROCESSOR_2964_Z13
)
15116 s390_adjust_loops ();
15118 /* Make sure all splits have been performed; splits after
15119 machine_dependent_reorg might confuse insn length counts. */
15120 split_all_insns_noflow ();
15122 /* Install the main literal pool and the associated base
15123 register load insns. The literal pool might be > 4096 bytes in
15124 size, so that some of its elements cannot be directly accessed.
15126 To fix this, we split the single literal pool into multiple
15127 pool chunks, reloading the pool base register at various
15128 points throughout the function to ensure it always points to
15129 the pool chunk the following code expects. */
15131 /* Collect the literal pool. */
15132 pool
= s390_mainpool_start ();
15135 /* Finish up literal pool related changes. */
15136 s390_mainpool_finish (pool
);
15140 /* If literal pool overflowed, chunkify it. */
15141 pool
= s390_chunkify_start ();
15142 s390_chunkify_finish (pool
);
15145 /* Generate out-of-pool execute target insns. */
15146 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15151 label
= s390_execute_label (insn
);
15155 gcc_assert (label
!= const0_rtx
);
15157 target
= emit_label (XEXP (label
, 0));
15158 INSN_ADDRESSES_NEW (target
, -1);
15162 target
= emit_jump_insn (s390_execute_target (insn
));
15163 /* This is important in order to keep a table jump
15164 pointing at the jump table label. Only this makes it
15165 being recognized as table jump. */
15166 JUMP_LABEL (target
) = JUMP_LABEL (insn
);
15169 target
= emit_insn (s390_execute_target (insn
));
15170 INSN_ADDRESSES_NEW (target
, -1);
15173 /* Try to optimize prologue and epilogue further. */
15174 s390_optimize_prologue ();
15176 /* Walk over the insns and do some >=z10 specific changes. */
15177 if (s390_tune
>= PROCESSOR_2097_Z10
)
15180 bool insn_added_p
= false;
15182 /* The insn lengths and addresses have to be up to date for the
15183 following manipulations. */
15184 shorten_branches (get_insns ());
15186 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15188 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
15192 insn_added_p
|= s390_fix_long_loop_prediction (insn
);
15194 if ((GET_CODE (PATTERN (insn
)) == PARALLEL
15195 || GET_CODE (PATTERN (insn
)) == SET
)
15196 && s390_tune
== PROCESSOR_2097_Z10
)
15197 insn_added_p
|= s390_z10_optimize_cmp (insn
);
15200 /* Adjust branches if we added new instructions. */
15202 shorten_branches (get_insns ());
15205 s390_function_num_hotpatch_hw (current_function_decl
, &hw_before
, &hw_after
);
15210 /* Insert NOPs for hotpatching. */
15211 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15213 1. inside the area covered by debug information to allow setting
15214 breakpoints at the NOPs,
15215 2. before any insn which results in an asm instruction,
15216 3. before in-function labels to avoid jumping to the NOPs, for
15217 example as part of a loop,
15218 4. before any barrier in case the function is completely empty
15219 (__builtin_unreachable ()) and has neither internal labels nor
15222 if (active_insn_p (insn
) || BARRIER_P (insn
) || LABEL_P (insn
))
15224 /* Output a series of NOPs before the first active insn. */
15225 while (insn
&& hw_after
> 0)
15229 emit_insn_before (gen_nop_6_byte (), insn
);
15232 else if (hw_after
>= 2)
15234 emit_insn_before (gen_nop_4_byte (), insn
);
15239 emit_insn_before (gen_nop_2_byte (), insn
);
15246 /* Return true if INSN is a fp load insn writing register REGNO. */
15248 s390_fpload_toreg (rtx_insn
*insn
, unsigned int regno
)
15251 enum attr_type flag
= s390_safe_attr_type (insn
);
15253 if (flag
!= TYPE_FLOADSF
&& flag
!= TYPE_FLOADDF
)
15256 set
= single_set (insn
);
15258 if (set
== NULL_RTX
)
15261 if (!REG_P (SET_DEST (set
)) || !MEM_P (SET_SRC (set
)))
15264 if (REGNO (SET_DEST (set
)) != regno
)
15270 /* This value describes the distance to be avoided between an
15271 arithmetic fp instruction and an fp load writing the same register.
15272 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
15273 fine but the exact value has to be avoided. Otherwise the FP
15274 pipeline will throw an exception causing a major penalty. */
15275 #define Z10_EARLYLOAD_DISTANCE 7
15277 /* Rearrange the ready list in order to avoid the situation described
15278 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
15279 moved to the very end of the ready list. */
15281 s390_z10_prevent_earlyload_conflicts (rtx_insn
**ready
, int *nready_p
)
15283 unsigned int regno
;
15284 int nready
= *nready_p
;
15289 enum attr_type flag
;
15292 /* Skip DISTANCE - 1 active insns. */
15293 for (insn
= last_scheduled_insn
, distance
= Z10_EARLYLOAD_DISTANCE
- 1;
15294 distance
> 0 && insn
!= NULL_RTX
;
15295 distance
--, insn
= prev_active_insn (insn
))
15296 if (CALL_P (insn
) || JUMP_P (insn
))
15299 if (insn
== NULL_RTX
)
15302 set
= single_set (insn
);
15304 if (set
== NULL_RTX
|| !REG_P (SET_DEST (set
))
15305 || GET_MODE_CLASS (GET_MODE (SET_DEST (set
))) != MODE_FLOAT
)
15308 flag
= s390_safe_attr_type (insn
);
15310 if (flag
== TYPE_FLOADSF
|| flag
== TYPE_FLOADDF
)
15313 regno
= REGNO (SET_DEST (set
));
15316 while (!s390_fpload_toreg (ready
[i
], regno
) && i
> 0)
15323 memmove (&ready
[1], &ready
[0], sizeof (rtx_insn
*) * i
);
15327 struct s390_sched_state
15329 /* Number of insns in the group. */
15331 /* Execution side of the group. */
15333 /* Group can only hold two insns. */
15335 } s390_sched_state
;
15337 static struct s390_sched_state sched_state
;
15339 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
15340 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
15341 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
15342 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
15343 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
15345 static unsigned int
15346 s390_get_sched_attrmask (rtx_insn
*insn
)
15348 unsigned int mask
= 0;
15352 case PROCESSOR_2827_ZEC12
:
15353 if (get_attr_zEC12_cracked (insn
))
15354 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
15355 if (get_attr_zEC12_expanded (insn
))
15356 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
15357 if (get_attr_zEC12_endgroup (insn
))
15358 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
15359 if (get_attr_zEC12_groupalone (insn
))
15360 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
15362 case PROCESSOR_2964_Z13
:
15363 if (get_attr_z13_cracked (insn
))
15364 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
15365 if (get_attr_z13_expanded (insn
))
15366 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
15367 if (get_attr_z13_endgroup (insn
))
15368 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
15369 if (get_attr_z13_groupalone (insn
))
15370 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
15371 if (get_attr_z13_groupoftwo (insn
))
15372 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
15374 case PROCESSOR_3906_Z14
:
15375 if (get_attr_z14_cracked (insn
))
15376 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
15377 if (get_attr_z14_expanded (insn
))
15378 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
15379 if (get_attr_z14_endgroup (insn
))
15380 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
15381 if (get_attr_z14_groupalone (insn
))
15382 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
15383 if (get_attr_z14_groupoftwo (insn
))
15384 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
15386 case PROCESSOR_8561_Z15
:
15387 if (get_attr_z15_cracked (insn
))
15388 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
15389 if (get_attr_z15_expanded (insn
))
15390 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
15391 if (get_attr_z15_endgroup (insn
))
15392 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
15393 if (get_attr_z15_groupalone (insn
))
15394 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
15395 if (get_attr_z15_groupoftwo (insn
))
15396 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
15398 case PROCESSOR_3931_Z16
:
15399 if (get_attr_z16_cracked (insn
))
15400 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
15401 if (get_attr_z16_expanded (insn
))
15402 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
15403 if (get_attr_z16_endgroup (insn
))
15404 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
15405 if (get_attr_z16_groupalone (insn
))
15406 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
15407 if (get_attr_z16_groupoftwo (insn
))
15408 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
15411 gcc_unreachable ();
15416 static unsigned int
15417 s390_get_unit_mask (rtx_insn
*insn
, int *units
)
15419 unsigned int mask
= 0;
15423 case PROCESSOR_2964_Z13
:
15425 if (get_attr_z13_unit_lsu (insn
))
15427 if (get_attr_z13_unit_fxa (insn
))
15429 if (get_attr_z13_unit_fxb (insn
))
15431 if (get_attr_z13_unit_vfu (insn
))
15434 case PROCESSOR_3906_Z14
:
15436 if (get_attr_z14_unit_lsu (insn
))
15438 if (get_attr_z14_unit_fxa (insn
))
15440 if (get_attr_z14_unit_fxb (insn
))
15442 if (get_attr_z14_unit_vfu (insn
))
15445 case PROCESSOR_8561_Z15
:
15447 if (get_attr_z15_unit_lsu (insn
))
15449 if (get_attr_z15_unit_fxa (insn
))
15451 if (get_attr_z15_unit_fxb (insn
))
15453 if (get_attr_z15_unit_vfu (insn
))
15456 case PROCESSOR_3931_Z16
:
15458 if (get_attr_z16_unit_lsu (insn
))
15460 if (get_attr_z16_unit_fxa (insn
))
15462 if (get_attr_z16_unit_fxb (insn
))
15464 if (get_attr_z16_unit_vfu (insn
))
15468 gcc_unreachable ();
15474 s390_is_fpd (rtx_insn
*insn
)
15476 if (insn
== NULL_RTX
)
15479 return get_attr_z13_unit_fpd (insn
) || get_attr_z14_unit_fpd (insn
)
15480 || get_attr_z15_unit_fpd (insn
) || get_attr_z16_unit_fpd (insn
);
15484 s390_is_fxd (rtx_insn
*insn
)
15486 if (insn
== NULL_RTX
)
15489 return get_attr_z13_unit_fxd (insn
) || get_attr_z14_unit_fxd (insn
)
15490 || get_attr_z15_unit_fxd (insn
) || get_attr_z16_unit_fxd (insn
);
15493 /* Returns TRUE if INSN is a long-running instruction. */
15495 s390_is_longrunning (rtx_insn
*insn
)
15497 if (insn
== NULL_RTX
)
15500 return s390_is_fxd (insn
) || s390_is_fpd (insn
);
15504 /* Return the scheduling score for INSN. The higher the score the
15505 better. The score is calculated from the OOO scheduling attributes
15506 of INSN and the scheduling state sched_state. */
15508 s390_sched_score (rtx_insn
*insn
)
15510 unsigned int mask
= s390_get_sched_attrmask (insn
);
15513 switch (sched_state
.group_state
)
15516 /* Try to put insns into the first slot which would otherwise
15518 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
15519 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
15521 if ((mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
15525 /* Prefer not cracked insns while trying to put together a
15527 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
15528 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
15529 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
15531 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) == 0)
15533 /* If we are in a group of two already, try to schedule another
15534 group-of-two insn to avoid shortening another group. */
15535 if (sched_state
.group_of_two
15536 && (mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15540 /* Prefer not cracked insns while trying to put together a
15542 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
15543 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
15544 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
15546 /* Prefer endgroup insns in the last slot. */
15547 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
15549 /* Try to avoid group-of-two insns in the last slot as they will
15550 shorten this group as well as the next one. */
15551 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15552 score
= MAX (0, score
- 15);
15556 if (s390_tune
>= PROCESSOR_2964_Z13
)
15559 unsigned unit_mask
, m
= 1;
15561 unit_mask
= s390_get_unit_mask (insn
, &units
);
15562 gcc_assert (units
<= MAX_SCHED_UNITS
);
15564 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
15565 ago the last insn of this unit type got scheduled. This is
15566 supposed to help providing a proper instruction mix to the
15568 for (i
= 0; i
< units
; i
++, m
<<= 1)
15570 score
+= (last_scheduled_unit_distance
[i
][sched_state
.side
]
15571 * MAX_SCHED_MIX_SCORE
/ MAX_SCHED_MIX_DISTANCE
);
15573 int other_side
= 1 - sched_state
.side
;
15575 /* Try to delay long-running insns when side is busy. */
15576 if (s390_is_longrunning (insn
))
15578 if (s390_is_fxd (insn
))
15580 if (fxd_longrunning
[sched_state
.side
]
15581 && fxd_longrunning
[other_side
]
15582 <= fxd_longrunning
[sched_state
.side
])
15583 score
= MAX (0, score
- 10);
15585 else if (fxd_longrunning
[other_side
]
15586 >= fxd_longrunning
[sched_state
.side
])
15590 if (s390_is_fpd (insn
))
15592 if (fpd_longrunning
[sched_state
.side
]
15593 && fpd_longrunning
[other_side
]
15594 <= fpd_longrunning
[sched_state
.side
])
15595 score
= MAX (0, score
- 10);
15597 else if (fpd_longrunning
[other_side
]
15598 >= fpd_longrunning
[sched_state
.side
])
15607 /* This function is called via hook TARGET_SCHED_REORDER before
15608 issuing one insn from list READY which contains *NREADYP entries.
15609 For target z10 it reorders load instructions to avoid early load
15610 conflicts in the floating point pipeline */
15612 s390_sched_reorder (FILE *file
, int verbose
,
15613 rtx_insn
**ready
, int *nreadyp
, int clock ATTRIBUTE_UNUSED
)
15615 if (s390_tune
== PROCESSOR_2097_Z10
15616 && reload_completed
15618 s390_z10_prevent_earlyload_conflicts (ready
, nreadyp
);
15620 if (s390_tune
>= PROCESSOR_2827_ZEC12
15621 && reload_completed
15625 int last_index
= *nreadyp
- 1;
15626 int max_index
= -1;
15627 int max_score
= -1;
15630 /* Just move the insn with the highest score to the top (the
15631 end) of the list. A full sort is not needed since a conflict
15632 in the hazard recognition cannot happen. So the top insn in
15633 the ready list will always be taken. */
15634 for (i
= last_index
; i
>= 0; i
--)
15638 if (recog_memoized (ready
[i
]) < 0)
15641 score
= s390_sched_score (ready
[i
]);
15642 if (score
> max_score
)
15649 if (max_index
!= -1)
15651 if (max_index
!= last_index
)
15653 tmp
= ready
[max_index
];
15654 ready
[max_index
] = ready
[last_index
];
15655 ready
[last_index
] = tmp
;
15659 ";;\t\tBACKEND: move insn %d to the top of list\n",
15660 INSN_UID (ready
[last_index
]));
15662 else if (verbose
> 5)
15664 ";;\t\tBACKEND: best insn %d already on top\n",
15665 INSN_UID (ready
[last_index
]));
15670 fprintf (file
, "ready list ooo attributes - sched state: %d\n",
15671 sched_state
.group_state
);
15673 for (i
= last_index
; i
>= 0; i
--)
15675 unsigned int sched_mask
;
15676 rtx_insn
*insn
= ready
[i
];
15678 if (recog_memoized (insn
) < 0)
15681 sched_mask
= s390_get_sched_attrmask (insn
);
15682 fprintf (file
, ";;\t\tBACKEND: insn %d score: %d: ",
15684 s390_sched_score (insn
));
15685 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15686 ((M) & sched_mask) ? #ATTR : "");
15687 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15688 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15689 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15690 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15691 #undef PRINT_SCHED_ATTR
15692 if (s390_tune
>= PROCESSOR_2964_Z13
)
15694 unsigned int unit_mask
, m
= 1;
15697 unit_mask
= s390_get_unit_mask (insn
, &units
);
15698 fprintf (file
, "(units:");
15699 for (j
= 0; j
< units
; j
++, m
<<= 1)
15701 fprintf (file
, " u%d", j
);
15702 fprintf (file
, ")");
15704 fprintf (file
, "\n");
15709 return s390_issue_rate ();
15713 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15714 the scheduler has issued INSN. It stores the last issued insn into
15715 last_scheduled_insn in order to make it available for
15716 s390_sched_reorder. */
15718 s390_sched_variable_issue (FILE *file
, int verbose
, rtx_insn
*insn
, int more
)
15720 last_scheduled_insn
= insn
;
15722 bool ends_group
= false;
15724 if (s390_tune
>= PROCESSOR_2827_ZEC12
15725 && reload_completed
15726 && recog_memoized (insn
) >= 0)
15728 unsigned int mask
= s390_get_sched_attrmask (insn
);
15730 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15731 sched_state
.group_of_two
= true;
15733 /* If this is a group-of-two insn, we actually ended the last group
15734 and this insn is the first one of the new group. */
15735 if (sched_state
.group_state
== 2 && sched_state
.group_of_two
)
15737 sched_state
.side
= sched_state
.side
? 0 : 1;
15738 sched_state
.group_state
= 0;
15741 /* Longrunning and side bookkeeping. */
15742 for (int i
= 0; i
< 2; i
++)
15744 fxd_longrunning
[i
] = MAX (0, fxd_longrunning
[i
] - 1);
15745 fpd_longrunning
[i
] = MAX (0, fpd_longrunning
[i
] - 1);
15748 unsigned latency
= insn_default_latency (insn
);
15749 if (s390_is_longrunning (insn
))
15751 if (s390_is_fxd (insn
))
15752 fxd_longrunning
[sched_state
.side
] = latency
;
15754 fpd_longrunning
[sched_state
.side
] = latency
;
15757 if (s390_tune
>= PROCESSOR_2964_Z13
)
15760 unsigned unit_mask
, m
= 1;
15762 unit_mask
= s390_get_unit_mask (insn
, &units
);
15763 gcc_assert (units
<= MAX_SCHED_UNITS
);
15765 for (i
= 0; i
< units
; i
++, m
<<= 1)
15767 last_scheduled_unit_distance
[i
][sched_state
.side
] = 0;
15768 else if (last_scheduled_unit_distance
[i
][sched_state
.side
]
15769 < MAX_SCHED_MIX_DISTANCE
)
15770 last_scheduled_unit_distance
[i
][sched_state
.side
]++;
15773 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
15774 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0
15775 || (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0
15776 || (mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
15778 sched_state
.group_state
= 0;
15783 switch (sched_state
.group_state
)
15786 sched_state
.group_state
++;
15789 sched_state
.group_state
++;
15790 if (sched_state
.group_of_two
)
15792 sched_state
.group_state
= 0;
15797 sched_state
.group_state
++;
15805 unsigned int sched_mask
;
15807 sched_mask
= s390_get_sched_attrmask (insn
);
15809 fprintf (file
, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn
));
15810 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15811 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15812 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15813 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15814 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15815 #undef PRINT_SCHED_ATTR
15817 if (s390_tune
>= PROCESSOR_2964_Z13
)
15819 unsigned int unit_mask
, m
= 1;
15822 unit_mask
= s390_get_unit_mask (insn
, &units
);
15823 fprintf (file
, "(units:");
15824 for (j
= 0; j
< units
; j
++, m
<<= 1)
15826 fprintf (file
, " %d", j
);
15827 fprintf (file
, ")");
15829 fprintf (file
, " sched state: %d\n", sched_state
.group_state
);
15831 if (s390_tune
>= PROCESSOR_2964_Z13
)
15835 s390_get_unit_mask (insn
, &units
);
15837 fprintf (file
, ";;\t\tBACKEND: units on this side (%d) unused for: ", sched_state
.side
);
15838 for (j
= 0; j
< units
; j
++)
15839 fprintf (file
, "%d:%d ", j
,
15840 last_scheduled_unit_distance
[j
][sched_state
.side
]);
15841 fprintf (file
, "\n");
15845 /* If this insn ended a group, the next will be on the other side. */
15848 sched_state
.group_state
= 0;
15849 sched_state
.side
= sched_state
.side
? 0 : 1;
15850 sched_state
.group_of_two
= false;
15854 if (GET_CODE (PATTERN (insn
)) != USE
15855 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
15862 s390_sched_init (FILE *file ATTRIBUTE_UNUSED
,
15863 int verbose ATTRIBUTE_UNUSED
,
15864 int max_ready ATTRIBUTE_UNUSED
)
15866 /* If the next basic block is most likely entered via a fallthru edge
15867 we keep the last sched state. Otherwise we start a new group.
15868 The scheduler traverses basic blocks in "instruction stream" ordering
15869 so if we see a fallthru edge here, sched_state will be of its
15872 current_sched_info->prev_head is the insn before the first insn of the
15873 block of insns to be scheduled.
15875 last_scheduled_insn
= NULL
;
15876 memset (last_scheduled_unit_distance
, 0,
15877 MAX_SCHED_UNITS
* NUM_SIDES
* sizeof (int));
15878 memset (fpd_longrunning
, 0, NUM_SIDES
* sizeof (int));
15879 memset (fxd_longrunning
, 0, NUM_SIDES
* sizeof (int));
15883 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15884 a new number struct loop *loop should be unrolled if tuned for cpus with
15885 a built-in stride prefetcher.
15886 The loop is analyzed for memory accesses by calling check_dpu for
15887 each rtx of the loop. Depending on the loop_depth and the amount of
15888 memory accesses a new number <=nunroll is returned to improve the
15889 behavior of the hardware prefetch unit. */
15891 s390_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
15896 unsigned mem_count
= 0;
15898 if (s390_tune
< PROCESSOR_2097_Z10
)
15901 if (unroll_only_small_loops
)
15903 /* Only unroll loops smaller than or equal to 12 insns. */
15904 const unsigned int small_threshold
= 12;
15906 if (loop
->ninsns
> small_threshold
)
15909 /* ???: Make this dependent on the type of registers in
15910 the loop. Increase the limit for vector registers. */
15911 const unsigned int max_insns
= optimize
>= 3 ? 36 : 24;
15913 nunroll
= MIN (nunroll
, max_insns
/ loop
->ninsns
);
15916 /* Count the number of memory references within the loop body. */
15917 bbs
= get_loop_body (loop
);
15918 subrtx_iterator::array_type array
;
15919 for (i
= 0; i
< loop
->num_nodes
; i
++)
15920 FOR_BB_INSNS (bbs
[i
], insn
)
15921 if (INSN_P (insn
) && INSN_CODE (insn
) != -1)
15925 /* The runtime of small loops with memory block operations
15926 will be determined by the memory operation. Doing
15927 unrolling doesn't help here. Measurements to confirm
15928 this where only done on recent CPU levels. So better do
15929 not change anything for older CPUs. */
15930 if (s390_tune
>= PROCESSOR_2964_Z13
15931 && loop
->ninsns
<= BLOCK_MEM_OPS_LOOP_INSNS
15932 && ((set
= single_set (insn
)) != NULL_RTX
)
15933 && ((GET_MODE (SET_DEST (set
)) == BLKmode
15934 && (GET_MODE (SET_SRC (set
)) == BLKmode
15935 || SET_SRC (set
) == const0_rtx
))
15936 || (GET_CODE (SET_SRC (set
)) == COMPARE
15937 && GET_MODE (XEXP (SET_SRC (set
), 0)) == BLKmode
15938 && GET_MODE (XEXP (SET_SRC (set
), 1)) == BLKmode
)))
15944 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
15950 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15951 if (mem_count
== 0)
15954 switch (loop_depth(loop
))
15957 return MIN (nunroll
, 28 / mem_count
);
15959 return MIN (nunroll
, 22 / mem_count
);
15961 return MIN (nunroll
, 16 / mem_count
);
15965 /* Restore the current options. This is a hook function and also called
15969 s390_function_specific_restore (struct gcc_options
*opts
,
15970 struct gcc_options */
* opts_set */
,
15971 struct cl_target_option
*ptr ATTRIBUTE_UNUSED
)
15973 opts
->x_s390_cost_pointer
= (long)processor_table
[opts
->x_s390_tune
].cost
;
15977 s390_default_align (struct gcc_options
*opts
)
15979 /* Set the default function alignment to 16 in order to get rid of
15980 some unwanted performance effects. */
15981 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
15982 && opts
->x_s390_tune
>= PROCESSOR_2964_Z13
)
15983 opts
->x_str_align_functions
= "16";
15987 s390_override_options_after_change (void)
15989 s390_default_align (&global_options
);
15991 /* Explicit -funroll-loops turns -munroll-only-small-loops off. */
15992 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
15993 || (OPTION_SET_P (flag_unroll_all_loops
)
15994 && flag_unroll_all_loops
))
15996 if (!OPTION_SET_P (unroll_only_small_loops
))
15997 unroll_only_small_loops
= 0;
15998 if (!OPTION_SET_P (flag_cunroll_grow_size
))
15999 flag_cunroll_grow_size
= 1;
16001 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
16002 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
16006 s390_option_override_internal (struct gcc_options
*opts
,
16007 struct gcc_options
*opts_set
)
16009 /* Architecture mode defaults according to ABI. */
16010 if (!(opts_set
->x_target_flags
& MASK_ZARCH
))
16013 opts
->x_target_flags
|= MASK_ZARCH
;
16015 opts
->x_target_flags
&= ~MASK_ZARCH
;
16018 /* Set the march default in case it hasn't been specified on cmdline. */
16019 if (!opts_set
->x_s390_arch
)
16020 opts
->x_s390_arch
= PROCESSOR_2064_Z900
;
16022 opts
->x_s390_arch_flags
= processor_flags_table
[(int) opts
->x_s390_arch
];
16024 /* Determine processor to tune for. */
16025 if (!opts_set
->x_s390_tune
)
16026 opts
->x_s390_tune
= opts
->x_s390_arch
;
16028 opts
->x_s390_tune_flags
= processor_flags_table
[opts
->x_s390_tune
];
16030 /* Sanity checks. */
16031 if (opts
->x_s390_arch
== PROCESSOR_NATIVE
16032 || opts
->x_s390_tune
== PROCESSOR_NATIVE
)
16033 gcc_unreachable ();
16034 if (TARGET_64BIT
&& !TARGET_ZARCH_P (opts
->x_target_flags
))
16035 error ("64-bit ABI not supported in ESA/390 mode");
16037 if (opts
->x_s390_indirect_branch
== indirect_branch_thunk_inline
16038 || opts
->x_s390_indirect_branch_call
== indirect_branch_thunk_inline
16039 || opts
->x_s390_function_return
== indirect_branch_thunk_inline
16040 || opts
->x_s390_function_return_reg
== indirect_branch_thunk_inline
16041 || opts
->x_s390_function_return_mem
== indirect_branch_thunk_inline
)
16042 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
16044 if (opts
->x_s390_indirect_branch
!= indirect_branch_keep
)
16046 if (!opts_set
->x_s390_indirect_branch_call
)
16047 opts
->x_s390_indirect_branch_call
= opts
->x_s390_indirect_branch
;
16049 if (!opts_set
->x_s390_indirect_branch_jump
)
16050 opts
->x_s390_indirect_branch_jump
= opts
->x_s390_indirect_branch
;
16053 if (opts
->x_s390_function_return
!= indirect_branch_keep
)
16055 if (!opts_set
->x_s390_function_return_reg
)
16056 opts
->x_s390_function_return_reg
= opts
->x_s390_function_return
;
16058 if (!opts_set
->x_s390_function_return_mem
)
16059 opts
->x_s390_function_return_mem
= opts
->x_s390_function_return
;
16062 /* Enable hardware transactions if available and not explicitly
16063 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
16064 if (!TARGET_OPT_HTM_P (opts_set
->x_target_flags
))
16066 if (TARGET_CPU_HTM_P (opts
) && TARGET_ZARCH_P (opts
->x_target_flags
))
16067 opts
->x_target_flags
|= MASK_OPT_HTM
;
16069 opts
->x_target_flags
&= ~MASK_OPT_HTM
;
16072 if (TARGET_OPT_VX_P (opts_set
->x_target_flags
))
16074 if (TARGET_OPT_VX_P (opts
->x_target_flags
))
16076 if (!TARGET_CPU_VX_P (opts
))
16077 error ("hardware vector support not available on %s",
16078 processor_table
[(int)opts
->x_s390_arch
].name
);
16079 if (TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
16080 error ("hardware vector support not available with "
16081 "%<-msoft-float%>");
16086 if (TARGET_CPU_VX_P (opts
))
16087 /* Enable vector support if available and not explicitly disabled
16088 by user. E.g. with -m31 -march=z13 -mzarch */
16089 opts
->x_target_flags
|= MASK_OPT_VX
;
16091 opts
->x_target_flags
&= ~MASK_OPT_VX
;
16094 /* Use hardware DFP if available and not explicitly disabled by
16095 user. E.g. with -m31 -march=z10 -mzarch */
16096 if (!TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
16098 if (TARGET_DFP_P (opts
))
16099 opts
->x_target_flags
|= MASK_HARD_DFP
;
16101 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
16104 if (TARGET_HARD_DFP_P (opts
->x_target_flags
) && !TARGET_DFP_P (opts
))
16106 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
16108 if (!TARGET_CPU_DFP_P (opts
))
16109 error ("hardware decimal floating-point instructions"
16110 " not available on %s",
16111 processor_table
[(int)opts
->x_s390_arch
].name
);
16112 if (!TARGET_ZARCH_P (opts
->x_target_flags
))
16113 error ("hardware decimal floating-point instructions"
16114 " not available in ESA/390 mode");
16117 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
16120 if (TARGET_SOFT_FLOAT_P (opts_set
->x_target_flags
)
16121 && TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
16123 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
)
16124 && TARGET_HARD_DFP_P (opts
->x_target_flags
))
16125 error ("%<-mhard-dfp%> cannot be used in conjunction with "
16126 "%<-msoft-float%>");
16128 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
16131 if (TARGET_BACKCHAIN_P (opts
->x_target_flags
)
16132 && TARGET_PACKED_STACK_P (opts
->x_target_flags
)
16133 && TARGET_HARD_FLOAT_P (opts
->x_target_flags
))
16134 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
16135 "supported in combination");
16137 if (opts
->x_s390_stack_size
)
16139 if (opts
->x_s390_stack_guard
>= opts
->x_s390_stack_size
)
16140 error ("stack size must be greater than the stack guard value");
16141 else if (opts
->x_s390_stack_size
> 1 << 16)
16142 error ("stack size must not be greater than 64k");
16144 else if (opts
->x_s390_stack_guard
)
16145 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
16147 /* Our implementation of the stack probe requires the probe interval
16148 to be used as displacement in an address operand. The maximum
16149 probe interval currently is 64k. This would exceed short
16150 displacements. Trim that value down to 4k if that happens. This
16151 might result in too many probes being generated only on the
16152 oldest supported machine level z900. */
16153 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval
)))
16154 param_stack_clash_protection_probe_interval
= 12;
16156 #if TARGET_TPF != 0
16157 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_check
))
16158 error ("%<-mtpf-trace-hook-prologue-check%> requires integer in range 0-4095");
16160 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_target
))
16161 error ("%<-mtpf-trace-hook-prologue-target%> requires integer in range 0-4095");
16163 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_check
))
16164 error ("%<-mtpf-trace-hook-epilogue-check%> requires integer in range 0-4095");
16166 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_target
))
16167 error ("%<-mtpf-trace-hook-epilogue-target%> requires integer in range 0-4095");
16169 if (s390_tpf_trace_skip
)
16171 opts
->x_s390_tpf_trace_hook_prologue_target
= TPF_TRACE_PROLOGUE_SKIP_TARGET
;
16172 opts
->x_s390_tpf_trace_hook_epilogue_target
= TPF_TRACE_EPILOGUE_SKIP_TARGET
;
16176 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
16177 if (!TARGET_LONG_DOUBLE_128_P (opts_set
->x_target_flags
))
16178 opts
->x_target_flags
|= MASK_LONG_DOUBLE_128
;
16181 if (opts
->x_s390_tune
>= PROCESSOR_2097_Z10
)
16183 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unrolled_insns
,
16185 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unroll_times
, 32);
16186 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peeled_insns
,
16188 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peel_times
,
16192 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_pending_list_length
,
16194 /* values for loop prefetching */
16195 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_line_size
, 256);
16196 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_size
, 128);
16197 /* s390 has more than 2 levels and the size is much larger. Since
16198 we are always running virtualized assume that we only get a small
16199 part of the caches above l1. */
16200 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l2_cache_size
, 1500);
16201 SET_OPTION_IF_UNSET (opts
, opts_set
,
16202 param_prefetch_min_insn_to_mem_ratio
, 2);
16203 SET_OPTION_IF_UNSET (opts
, opts_set
, param_simultaneous_prefetches
, 6);
16205 /* Use the alternative scheduling-pressure algorithm by default. */
16206 SET_OPTION_IF_UNSET (opts
, opts_set
, param_sched_pressure_algorithm
, 2);
16208 /* Allow simple vector masking using vll/vstl for epilogues. */
16210 SET_OPTION_IF_UNSET (opts
, opts_set
, param_vect_partial_vector_usage
, 1);
16212 SET_OPTION_IF_UNSET (opts
, opts_set
, param_vect_partial_vector_usage
, 0);
16214 /* Do not vectorize loops with a low trip count for now. */
16215 SET_OPTION_IF_UNSET (opts
, opts_set
, param_min_vect_loop_bound
, 2);
16217 /* Set the default alignment. */
16218 s390_default_align (opts
);
16220 /* Set unroll options. */
16221 s390_override_options_after_change ();
16223 /* Call target specific restore function to do post-init work. At the moment,
16224 this just sets opts->x_s390_cost_pointer. */
16225 s390_function_specific_restore (opts
, opts_set
, NULL
);
16227 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
16228 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
16229 not the case when the code runs before the prolog. */
16230 if (opts
->x_flag_fentry
&& !TARGET_64BIT
)
16231 error ("%<-mfentry%> is supported only for 64-bit CPUs");
16235 s390_option_override (void)
16238 cl_deferred_option
*opt
;
16239 vec
<cl_deferred_option
> *v
=
16240 (vec
<cl_deferred_option
> *) s390_deferred_options
;
16243 FOR_EACH_VEC_ELT (*v
, i
, opt
)
16245 switch (opt
->opt_index
)
16247 case OPT_mhotpatch_
:
16251 char *s
= strtok (ASTRDUP (opt
->arg
), ",");
16252 char *t
= strtok (NULL
, "\0");
16256 val1
= integral_argument (s
);
16257 val2
= integral_argument (t
);
16264 if (val1
== -1 || val2
== -1)
16266 /* argument is not a plain number */
16267 error ("arguments to %qs should be non-negative integers",
16271 else if (val1
> s390_hotpatch_hw_max
16272 || val2
> s390_hotpatch_hw_max
)
16274 error ("argument to %qs is too large (max. %d)",
16275 "-mhotpatch=n,m", s390_hotpatch_hw_max
);
16278 s390_hotpatch_hw_before_label
= val1
;
16279 s390_hotpatch_hw_after_label
= val2
;
16283 gcc_unreachable ();
16287 /* Set up function hooks. */
16288 init_machine_status
= s390_init_machine_status
;
16290 s390_option_override_internal (&global_options
, &global_options_set
);
16292 /* Save the initial options in case the user does function specific
16294 target_option_default_node
16295 = build_target_option_node (&global_options
, &global_options_set
);
16296 target_option_current_node
= target_option_default_node
;
16298 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
16299 requires the arch flags to be evaluated already. Since prefetching
16300 is beneficial on s390, we enable it if available. */
16301 if (flag_prefetch_loop_arrays
< 0 && HAVE_prefetch
&& optimize
>= 3)
16302 flag_prefetch_loop_arrays
= 1;
16304 if (!s390_pic_data_is_text_relative
&& !flag_pic
)
16305 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
16306 "%<-fpic%>/%<-fPIC%>");
16310 /* Don't emit DWARF3/4 unless specifically selected. The TPF
16311 debuggers do not yet support DWARF 3/4. */
16312 if (!OPTION_SET_P (dwarf_strict
))
16314 if (!OPTION_SET_P (dwarf_version
))
16319 #if S390_USE_TARGET_ATTRIBUTE
16320 /* Inner function to process the attribute((target(...))), take an argument and
16321 set the current options from the argument. If we have a list, recursively go
16325 s390_valid_target_attribute_inner_p (tree args
,
16326 struct gcc_options
*opts
,
16327 struct gcc_options
*new_opts_set
,
16333 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
16334 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
16335 static const struct
16337 const char *string
;
16341 int only_as_pragma
;
16344 S390_ATTRIB ("arch=", OPT_march_
, 1),
16345 S390_ATTRIB ("tune=", OPT_mtune_
, 1),
16346 /* uinteger options */
16347 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_
, 1),
16348 S390_ATTRIB ("stack-size=", OPT_mstack_size_
, 1),
16349 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_
, 1),
16350 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_
, 1),
16352 S390_ATTRIB ("backchain", OPT_mbackchain
, 0),
16353 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp
, 0),
16354 S390_ATTRIB ("hard-float", OPT_mhard_float
, 0),
16355 S390_ATTRIB ("htm", OPT_mhtm
, 0),
16356 S390_ATTRIB ("vx", OPT_mvx
, 0),
16357 S390_ATTRIB ("packed-stack", OPT_mpacked_stack
, 0),
16358 S390_ATTRIB ("small-exec", OPT_msmall_exec
, 0),
16359 S390_ATTRIB ("soft-float", OPT_msoft_float
, 0),
16360 S390_ATTRIB ("mvcle", OPT_mmvcle
, 0),
16361 S390_PRAGMA ("zvector", OPT_mzvector
, 0),
16362 /* boolean options */
16363 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack
, 0),
16368 /* If this is a list, recurse to get the options. */
16369 if (TREE_CODE (args
) == TREE_LIST
)
16372 int num_pragma_values
;
16375 /* Note: attribs.cc:decl_attributes prepends the values from
16376 current_target_pragma to the list of target attributes. To determine
16377 whether we're looking at a value of the attribute or the pragma we
16378 assume that the first [list_length (current_target_pragma)] values in
16379 the list are the values from the pragma. */
16380 num_pragma_values
= (!force_pragma
&& current_target_pragma
!= NULL
)
16381 ? list_length (current_target_pragma
) : 0;
16382 for (i
= 0; args
; args
= TREE_CHAIN (args
), i
++)
16386 is_pragma
= (force_pragma
|| i
< num_pragma_values
);
16387 if (TREE_VALUE (args
)
16388 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args
),
16389 opts
, new_opts_set
,
16398 else if (TREE_CODE (args
) != STRING_CST
)
16400 error ("attribute %<target%> argument not a string");
16404 /* Handle multiple arguments separated by commas. */
16405 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
16407 while (next_optstr
&& *next_optstr
!= '\0')
16409 char *p
= next_optstr
;
16411 char *comma
= strchr (next_optstr
, ',');
16412 size_t len
, opt_len
;
16418 enum cl_var_type var_type
;
16424 len
= comma
- next_optstr
;
16425 next_optstr
= comma
+ 1;
16430 next_optstr
= NULL
;
16433 /* Recognize no-xxx. */
16434 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
16443 /* Find the option. */
16446 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
16448 opt_len
= attrs
[i
].len
;
16449 if (ch
== attrs
[i
].string
[0]
16450 && ((attrs
[i
].has_arg
) ? len
> opt_len
: len
== opt_len
)
16451 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
16453 opt
= attrs
[i
].opt
;
16454 if (!opt_set_p
&& cl_options
[opt
].cl_reject_negative
)
16456 mask
= cl_options
[opt
].var_value
;
16457 var_type
= cl_options
[opt
].var_type
;
16463 /* Process the option. */
16466 error ("attribute %<target%> argument %qs is unknown", orig_p
);
16469 else if (attrs
[i
].only_as_pragma
&& !force_pragma
)
16471 /* Value is not allowed for the target attribute. */
16472 error ("value %qs is not supported by attribute %<target%>",
16477 else if (var_type
== CLVC_BIT_SET
|| var_type
== CLVC_BIT_CLEAR
)
16479 if (var_type
== CLVC_BIT_CLEAR
)
16480 opt_set_p
= !opt_set_p
;
16483 opts
->x_target_flags
|= mask
;
16485 opts
->x_target_flags
&= ~mask
;
16486 new_opts_set
->x_target_flags
|= mask
;
16489 else if (cl_options
[opt
].var_type
== CLVC_INTEGER
)
16493 if (cl_options
[opt
].cl_uinteger
)
16495 /* Unsigned integer argument. Code based on the function
16496 decode_cmdline_option () in opts-common.cc. */
16497 value
= integral_argument (p
+ opt_len
);
16500 value
= (opt_set_p
) ? 1 : 0;
16504 struct cl_decoded_option decoded
;
16506 /* Value range check; only implemented for numeric and boolean
16507 options at the moment. */
16508 generate_option (opt
, NULL
, value
, CL_TARGET
, &decoded
);
16509 s390_handle_option (opts
, new_opts_set
, &decoded
, input_location
);
16510 set_option (opts
, new_opts_set
, opt
, value
,
16511 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
16516 error ("attribute %<target%> argument %qs is unknown", orig_p
);
16521 else if (cl_options
[opt
].var_type
== CLVC_ENUM
)
16526 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
16528 set_option (opts
, new_opts_set
, opt
, value
,
16529 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
16533 error ("attribute %<target%> argument %qs is unknown", orig_p
);
16539 gcc_unreachable ();
16544 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
16547 s390_valid_target_attribute_tree (tree args
,
16548 struct gcc_options
*opts
,
16549 const struct gcc_options
*opts_set
,
16552 tree t
= NULL_TREE
;
16553 struct gcc_options new_opts_set
;
16555 memset (&new_opts_set
, 0, sizeof (new_opts_set
));
16557 /* Process each of the options on the chain. */
16558 if (! s390_valid_target_attribute_inner_p (args
, opts
, &new_opts_set
,
16560 return error_mark_node
;
16562 /* If some option was set (even if it has not changed), rerun
16563 s390_option_override_internal, and then save the options away. */
16564 if (new_opts_set
.x_target_flags
16565 || new_opts_set
.x_s390_arch
16566 || new_opts_set
.x_s390_tune
16567 || new_opts_set
.x_s390_stack_guard
16568 || new_opts_set
.x_s390_stack_size
16569 || new_opts_set
.x_s390_branch_cost
16570 || new_opts_set
.x_s390_warn_framesize
16571 || new_opts_set
.x_s390_warn_dynamicstack_p
)
16573 const unsigned char *src
= (const unsigned char *)opts_set
;
16574 unsigned char *dest
= (unsigned char *)&new_opts_set
;
16577 /* Merge the original option flags into the new ones. */
16578 for (i
= 0; i
< sizeof(*opts_set
); i
++)
16581 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
16582 s390_option_override_internal (opts
, &new_opts_set
);
16583 /* Save the current options unless we are validating options for
16585 t
= build_target_option_node (opts
, &new_opts_set
);
16590 /* Hook to validate attribute((target("string"))). */
16593 s390_valid_target_attribute_p (tree fndecl
,
16594 tree
ARG_UNUSED (name
),
16596 int ARG_UNUSED (flags
))
16598 struct gcc_options func_options
, func_options_set
;
16599 tree new_target
, new_optimize
;
16602 /* attribute((target("default"))) does nothing, beyond
16603 affecting multi-versioning. */
16604 if (TREE_VALUE (args
)
16605 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
16606 && TREE_CHAIN (args
) == NULL_TREE
16607 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
16611 = build_optimization_node (&global_options
, &global_options_set
);
16613 /* Get the optimization options of the current function. */
16614 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
16616 if (!func_optimize
)
16617 func_optimize
= old_optimize
;
16619 /* Init func_options. */
16620 memset (&func_options
, 0, sizeof (func_options
));
16621 init_options_struct (&func_options
, NULL
);
16622 lang_hooks
.init_options_struct (&func_options
);
16623 memset (&func_options_set
, 0, sizeof (func_options_set
));
16625 cl_optimization_restore (&func_options
, &func_options_set
,
16626 TREE_OPTIMIZATION (func_optimize
));
16628 /* Initialize func_options to the default before its target options can
16630 cl_target_option_restore (&func_options
, &func_options_set
,
16631 TREE_TARGET_OPTION (target_option_default_node
));
16633 new_target
= s390_valid_target_attribute_tree (args
, &func_options
,
16634 &global_options_set
,
16636 current_target_pragma
));
16637 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
16638 if (new_target
== error_mark_node
)
16640 else if (fndecl
&& new_target
)
16642 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
16643 if (old_optimize
!= new_optimize
)
16644 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
16649 /* Hook to determine if one function can safely inline another. */
16652 s390_can_inline_p (tree caller
, tree callee
)
16654 /* Flags which if present in the callee are required in the caller as well. */
16655 const unsigned HOST_WIDE_INT caller_required_masks
= MASK_OPT_HTM
;
16657 /* Flags which affect the ABI and in general prevent inlining. */
16658 unsigned HOST_WIDE_INT must_match_masks
16659 = (MASK_64BIT
| MASK_ZARCH
| MASK_HARD_DFP
| MASK_SOFT_FLOAT
16660 | MASK_LONG_DOUBLE_128
| MASK_OPT_VX
);
16662 /* Flags which we in general want to prevent inlining but accept for
16664 const unsigned HOST_WIDE_INT always_inline_safe_masks
16665 = MASK_MVCLE
| MASK_BACKCHAIN
| MASK_SMALL_EXEC
;
16667 const HOST_WIDE_INT all_masks
16668 = (caller_required_masks
| must_match_masks
| always_inline_safe_masks
16669 | MASK_DEBUG_ARG
| MASK_PACKED_STACK
| MASK_ZVECTOR
);
16671 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
16672 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
16675 callee_tree
= target_option_default_node
;
16677 caller_tree
= target_option_default_node
;
16678 if (callee_tree
== caller_tree
)
16681 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
16682 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
16684 /* If one of these triggers make sure to add proper handling of your
16685 new flag to this hook. */
16686 gcc_assert (!(caller_opts
->x_target_flags
& ~all_masks
));
16687 gcc_assert (!(callee_opts
->x_target_flags
& ~all_masks
));
16690 = (DECL_DISREGARD_INLINE_LIMITS (callee
)
16691 && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee
)));
16693 if (!always_inline
)
16694 must_match_masks
|= always_inline_safe_masks
;
16696 /* Inlining a hard float function into a soft float function is only
16697 allowed if the hard float function doesn't actually make use of
16700 We are called from FEs for multi-versioning call optimization, so
16701 beware of ipa_fn_summaries not available. */
16702 if (always_inline
&& ipa_fn_summaries
16703 && !ipa_fn_summaries
->get(cgraph_node::get (callee
))->fp_expressions
)
16704 must_match_masks
&= ~(MASK_HARD_DFP
| MASK_SOFT_FLOAT
);
16706 if ((caller_opts
->x_target_flags
& must_match_masks
)
16707 != (callee_opts
->x_target_flags
& must_match_masks
))
16710 if (~(caller_opts
->x_target_flags
& caller_required_masks
)
16711 & (callee_opts
->x_target_flags
& caller_required_masks
))
16714 /* Don't inline functions to be compiled for a more recent arch into a
16715 function for an older arch. */
16716 if (caller_opts
->x_s390_arch
< callee_opts
->x_s390_arch
)
16719 if (!always_inline
&& caller_opts
->x_s390_tune
!= callee_opts
->x_s390_tune
)
16726 /* Set VAL to correct enum value according to the indirect-branch or
16727 function-return attribute in ATTR. */
16730 s390_indirect_branch_attrvalue (tree attr
, enum indirect_branch
*val
)
16732 const char *str
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
16733 if (strcmp (str
, "keep") == 0)
16734 *val
= indirect_branch_keep
;
16735 else if (strcmp (str
, "thunk") == 0)
16736 *val
= indirect_branch_thunk
;
16737 else if (strcmp (str
, "thunk-inline") == 0)
16738 *val
= indirect_branch_thunk_inline
;
16739 else if (strcmp (str
, "thunk-extern") == 0)
16740 *val
= indirect_branch_thunk_extern
;
16743 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16744 from either the cmdline or the function attributes in
16748 s390_indirect_branch_settings (tree fndecl
)
16755 /* Initialize with the cmdline options and let the attributes
16757 cfun
->machine
->indirect_branch_jump
= s390_indirect_branch_jump
;
16758 cfun
->machine
->indirect_branch_call
= s390_indirect_branch_call
;
16760 cfun
->machine
->function_return_reg
= s390_function_return_reg
;
16761 cfun
->machine
->function_return_mem
= s390_function_return_mem
;
16763 if ((attr
= lookup_attribute ("indirect_branch",
16764 DECL_ATTRIBUTES (fndecl
))))
16766 s390_indirect_branch_attrvalue (attr
,
16767 &cfun
->machine
->indirect_branch_jump
);
16768 s390_indirect_branch_attrvalue (attr
,
16769 &cfun
->machine
->indirect_branch_call
);
16772 if ((attr
= lookup_attribute ("indirect_branch_jump",
16773 DECL_ATTRIBUTES (fndecl
))))
16774 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_jump
);
16776 if ((attr
= lookup_attribute ("indirect_branch_call",
16777 DECL_ATTRIBUTES (fndecl
))))
16778 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_call
);
16780 if ((attr
= lookup_attribute ("function_return",
16781 DECL_ATTRIBUTES (fndecl
))))
16783 s390_indirect_branch_attrvalue (attr
,
16784 &cfun
->machine
->function_return_reg
);
16785 s390_indirect_branch_attrvalue (attr
,
16786 &cfun
->machine
->function_return_mem
);
16789 if ((attr
= lookup_attribute ("function_return_reg",
16790 DECL_ATTRIBUTES (fndecl
))))
16791 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_reg
);
16793 if ((attr
= lookup_attribute ("function_return_mem",
16794 DECL_ATTRIBUTES (fndecl
))))
16795 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_mem
);
16798 #if S390_USE_TARGET_ATTRIBUTE
16799 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16803 s390_activate_target_options (tree new_tree
)
16805 cl_target_option_restore (&global_options
, &global_options_set
,
16806 TREE_TARGET_OPTION (new_tree
));
16807 if (TREE_TARGET_GLOBALS (new_tree
))
16808 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
16809 else if (new_tree
== target_option_default_node
)
16810 restore_target_globals (&default_target_globals
);
16812 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
16813 s390_previous_fndecl
= NULL_TREE
;
16817 /* Establish appropriate back-end context for processing the function
16818 FNDECL. The argument might be NULL to indicate processing at top
16819 level, outside of any function scope. */
16821 s390_set_current_function (tree fndecl
)
16823 #if S390_USE_TARGET_ATTRIBUTE
16824 /* Only change the context if the function changes. This hook is called
16825 several times in the course of compiling a function, and we don't want to
16826 slow things down too much or call target_reinit when it isn't safe. */
16827 if (fndecl
== s390_previous_fndecl
)
16829 s390_indirect_branch_settings (fndecl
);
16834 if (s390_previous_fndecl
== NULL_TREE
)
16835 old_tree
= target_option_current_node
;
16836 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
))
16837 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
);
16839 old_tree
= target_option_default_node
;
16841 if (fndecl
== NULL_TREE
)
16843 if (old_tree
!= target_option_current_node
)
16844 s390_activate_target_options (target_option_current_node
);
16848 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
16849 if (new_tree
== NULL_TREE
)
16850 new_tree
= target_option_default_node
;
16852 if (old_tree
!= new_tree
)
16853 s390_activate_target_options (new_tree
);
16854 s390_previous_fndecl
= fndecl
;
16856 s390_indirect_branch_settings (fndecl
);
16859 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16862 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
16863 unsigned int align ATTRIBUTE_UNUSED
,
16864 enum by_pieces_operation op ATTRIBUTE_UNUSED
,
16865 bool speed_p ATTRIBUTE_UNUSED
)
16867 return (size
== 1 || size
== 2
16868 || size
== 4 || (TARGET_ZARCH
&& size
== 8));
16871 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16874 s390_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
16876 tree sfpc
= s390_builtin_decls
[S390_BUILTIN_s390_sfpc
];
16877 tree efpc
= s390_builtin_decls
[S390_BUILTIN_s390_efpc
];
16878 tree call_efpc
= build_call_expr (efpc
, 0);
16879 tree fenv_var
= create_tmp_var_raw (unsigned_type_node
);
16881 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16882 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16883 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16884 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16885 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16886 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16888 /* Generates the equivalent of feholdexcept (&fenv_var)
16890 fenv_var = __builtin_s390_efpc ();
16891 __builtin_s390_sfpc (fenv_var & mask) */
16892 tree old_fpc
= build4 (TARGET_EXPR
, unsigned_type_node
, fenv_var
, call_efpc
,
16893 NULL_TREE
, NULL_TREE
);
16895 = build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
,
16896 build_int_cst (unsigned_type_node
,
16897 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
16898 | FPC_EXCEPTION_MASK
)));
16899 tree set_new_fpc
= build_call_expr (sfpc
, 1, new_fpc
);
16900 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, old_fpc
, set_new_fpc
);
16902 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16904 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16905 new_fpc
= build2 (BIT_AND_EXPR
, unsigned_type_node
, call_efpc
,
16906 build_int_cst (unsigned_type_node
,
16907 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
)));
16908 *clear
= build_call_expr (sfpc
, 1, new_fpc
);
16910 /* Generates the equivalent of feupdateenv (fenv_var)
16912 old_fpc = __builtin_s390_efpc ();
16913 __builtin_s390_sfpc (fenv_var);
16914 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16916 old_fpc
= create_tmp_var_raw (unsigned_type_node
);
16917 tree store_old_fpc
= build4 (TARGET_EXPR
, void_type_node
, old_fpc
, call_efpc
,
16918 NULL_TREE
, NULL_TREE
);
16920 set_new_fpc
= build_call_expr (sfpc
, 1, fenv_var
);
16922 tree raise_old_except
= build2 (BIT_AND_EXPR
, unsigned_type_node
, old_fpc
,
16923 build_int_cst (unsigned_type_node
,
16925 raise_old_except
= build2 (RSHIFT_EXPR
, unsigned_type_node
, raise_old_except
,
16926 build_int_cst (unsigned_type_node
,
16928 tree atomic_feraiseexcept
16929 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
16930 raise_old_except
= build_call_expr (atomic_feraiseexcept
,
16931 1, raise_old_except
);
16933 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
16934 build2 (COMPOUND_EXPR
, void_type_node
,
16935 store_old_fpc
, set_new_fpc
),
16938 #undef FPC_EXCEPTION_MASK
16939 #undef FPC_FLAGS_MASK
16940 #undef FPC_DXC_MASK
16941 #undef FPC_EXCEPTION_MASK_SHIFT
16942 #undef FPC_FLAGS_SHIFT
16943 #undef FPC_DXC_SHIFT
16946 /* Return the vector mode to be used for inner mode MODE when doing
16948 static machine_mode
16949 s390_preferred_simd_mode (scalar_mode mode
)
16977 /* Our hardware does not require vectors to be strictly aligned. */
16979 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED
,
16980 const_tree type ATTRIBUTE_UNUSED
,
16981 int misalignment ATTRIBUTE_UNUSED
,
16982 bool is_packed ATTRIBUTE_UNUSED
)
16987 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
16991 /* The vector ABI requires vector types to be aligned on an 8 byte
16992 boundary (our stack alignment). However, we allow this to be
16993 overriden by the user, while this definitely breaks the ABI. */
16994 static HOST_WIDE_INT
16995 s390_vector_alignment (const_tree type
)
16997 tree size
= TYPE_SIZE (type
);
16999 if (!TARGET_VX_ABI
)
17000 return default_vector_alignment (type
);
17002 if (TYPE_USER_ALIGN (type
))
17003 return TYPE_ALIGN (type
);
17005 if (tree_fits_uhwi_p (size
)
17006 && tree_to_uhwi (size
) < BIGGEST_ALIGNMENT
)
17007 return tree_to_uhwi (size
);
17009 return BIGGEST_ALIGNMENT
;
17012 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
17013 LARL instruction. */
17015 static HOST_WIDE_INT
17016 s390_constant_alignment (const_tree
, HOST_WIDE_INT align
)
17018 return MAX (align
, 16);
17021 #ifdef HAVE_AS_MACHINE_MACHINEMODE
17022 /* Implement TARGET_ASM_FILE_START. */
17024 s390_asm_file_start (void)
17026 default_file_start ();
17027 s390_asm_output_machine_for_arch (asm_out_file
);
17031 /* Implement TARGET_ASM_FILE_END. */
17033 s390_asm_file_end (void)
17035 #ifdef HAVE_AS_GNU_ATTRIBUTE
17036 varpool_node
*vnode
;
17037 cgraph_node
*cnode
;
17039 FOR_EACH_VARIABLE (vnode
)
17040 if (TREE_PUBLIC (vnode
->decl
))
17041 s390_check_type_for_vector_abi (TREE_TYPE (vnode
->decl
), false, false);
17043 FOR_EACH_FUNCTION (cnode
)
17044 if (TREE_PUBLIC (cnode
->decl
))
17045 s390_check_type_for_vector_abi (TREE_TYPE (cnode
->decl
), false, false);
17048 if (s390_vector_abi
!= 0)
17049 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
17052 file_end_indicate_exec_stack ();
17054 if (flag_split_stack
)
17055 file_end_indicate_split_stack ();
17058 /* Return true if TYPE is a vector bool type. */
17060 s390_vector_bool_type_p (const_tree type
)
17062 return TYPE_VECTOR_OPAQUE (type
);
17065 /* Return the diagnostic message string if the binary operation OP is
17066 not permitted on TYPE1 and TYPE2, NULL otherwise. */
17068 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
17070 bool bool1_p
, bool2_p
;
17074 machine_mode mode1
, mode2
;
17076 if (!TARGET_ZVECTOR
)
17079 if (!VECTOR_TYPE_P (type1
) || !VECTOR_TYPE_P (type2
))
17082 bool1_p
= s390_vector_bool_type_p (type1
);
17083 bool2_p
= s390_vector_bool_type_p (type2
);
17085 /* Mixing signed and unsigned types is forbidden for all
17087 if (!bool1_p
&& !bool2_p
17088 && TYPE_UNSIGNED (type1
) != TYPE_UNSIGNED (type2
))
17089 return N_("types differ in signedness");
17091 plusminus_p
= (op
== PLUS_EXPR
|| op
== MINUS_EXPR
);
17092 muldiv_p
= (op
== MULT_EXPR
|| op
== RDIV_EXPR
|| op
== TRUNC_DIV_EXPR
17093 || op
== CEIL_DIV_EXPR
|| op
== FLOOR_DIV_EXPR
17094 || op
== ROUND_DIV_EXPR
);
17095 compare_p
= (op
== LT_EXPR
|| op
== LE_EXPR
|| op
== GT_EXPR
|| op
== GE_EXPR
17096 || op
== EQ_EXPR
|| op
== NE_EXPR
);
17098 if (bool1_p
&& bool2_p
&& (plusminus_p
|| muldiv_p
))
17099 return N_("binary operator does not support two vector bool operands");
17101 if (bool1_p
!= bool2_p
&& (muldiv_p
|| compare_p
))
17102 return N_("binary operator does not support vector bool operand");
17104 mode1
= TYPE_MODE (type1
);
17105 mode2
= TYPE_MODE (type2
);
17107 if (bool1_p
!= bool2_p
&& plusminus_p
17108 && (GET_MODE_CLASS (mode1
) == MODE_VECTOR_FLOAT
17109 || GET_MODE_CLASS (mode2
) == MODE_VECTOR_FLOAT
))
17110 return N_("binary operator does not support mixing vector "
17111 "bool with floating point vector operands");
17116 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
17117 /* Implement TARGET_C_EXCESS_PRECISION to maintain historic behavior with older
17120 For historical reasons, float_t and double_t had been typedef'ed to
17121 double on s390, causing operations on float_t to operate in a higher
17122 precision than is necessary. However, it is not the case that SFmode
17123 operations have implicit excess precision, and we generate more optimal
17124 code if we let the compiler know no implicit extra precision is added.
17126 With a glibc with that "historic" definition, configure will enable this hook
17127 to set FLT_EVAL_METHOD to 1 for -fexcess-precision=standard (e.g., as implied
17128 by -std=cXY). That means when we are compiling with -fexcess-precision=fast,
17129 the value we set for FLT_EVAL_METHOD will be out of line with the actual
17130 precision of float_t.
17132 Newer versions of glibc will be modified to derive the definition of float_t
17133 from FLT_EVAL_METHOD on s390x, as on many other architectures. There,
17134 configure will disable this hook by default, so that we defer to the default
17135 of FLT_EVAL_METHOD_PROMOTE_TO_FLOAT and a resulting typedef of float_t to
17136 float. Note that in that scenario, float_t and FLT_EVAL_METHOD will be in
17137 line independent of -fexcess-precision. */
17139 static enum flt_eval_method
17140 s390_excess_precision (enum excess_precision_type type
)
17144 case EXCESS_PRECISION_TYPE_IMPLICIT
:
17145 case EXCESS_PRECISION_TYPE_FAST
:
17146 /* The fastest type to promote to will always be the native type,
17147 whether that occurs with implicit excess precision or
17149 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
17150 case EXCESS_PRECISION_TYPE_STANDARD
:
17151 /* Otherwise, when we are in a standards compliant mode, to
17152 ensure consistency with the implementation in glibc, report that
17153 float is evaluated to the range and precision of double. */
17154 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE
;
17155 case EXCESS_PRECISION_TYPE_FLOAT16
:
17156 error ("%<-fexcess-precision=16%> is not supported on this target");
17159 gcc_unreachable ();
17161 return FLT_EVAL_METHOD_UNPREDICTABLE
;
17166 s390_rawmemchr (machine_mode elt_mode
, rtx dst
, rtx src
, rtx pat
)
17168 machine_mode vec_mode
= mode_for_vector (as_a
<scalar_int_mode
> (elt_mode
),
17169 16 / GET_MODE_SIZE (elt_mode
)).require();
17170 rtx lens
= gen_reg_rtx (V16QImode
);
17171 rtx pattern
= gen_reg_rtx (vec_mode
);
17172 rtx loop_start
= gen_label_rtx ();
17173 rtx loop_end
= gen_label_rtx ();
17174 rtx addr
= gen_reg_rtx (Pmode
);
17175 rtx offset
= gen_reg_rtx (Pmode
);
17176 rtx loadlen
= gen_reg_rtx (SImode
);
17177 rtx matchlen
= gen_reg_rtx (SImode
);
17180 pat
= GEN_INT (trunc_int_for_mode (INTVAL (pat
), elt_mode
));
17181 emit_insn (gen_rtx_SET (pattern
, gen_rtx_VEC_DUPLICATE (vec_mode
, pat
)));
17183 emit_move_insn (addr
, XEXP (src
, 0));
17186 emit_insn (gen_vlbb (lens
, gen_rtx_MEM (BLKmode
, addr
), GEN_INT (6)));
17187 emit_insn (gen_lcbb (loadlen
, addr
, GEN_INT (6)));
17188 lens
= convert_to_mode (vec_mode
, lens
, 1);
17189 emit_insn (gen_vec_vfees (vec_mode
, lens
, lens
, pattern
, GEN_INT (0)));
17190 lens
= convert_to_mode (V4SImode
, lens
, 1);
17191 emit_insn (gen_vec_extractv4sisi (matchlen
, lens
, GEN_INT (1)));
17192 lens
= convert_to_mode (vec_mode
, lens
, 1);
17193 emit_cmp_and_jump_insns (matchlen
, loadlen
, LT
, NULL_RTX
, SImode
, 1, loop_end
);
17194 force_expand_binop (Pmode
, add_optab
, addr
, GEN_INT(16), addr
, 1, OPTAB_DIRECT
);
17195 force_expand_binop (Pmode
, and_optab
, addr
, GEN_INT(~HOST_WIDE_INT_UC(0xf)), addr
, 1, OPTAB_DIRECT
);
17196 // now, addr is 16-byte aligned
17198 mem
= gen_rtx_MEM (vec_mode
, addr
);
17199 set_mem_align (mem
, 128);
17200 emit_move_insn (lens
, mem
);
17201 emit_insn (gen_vec_vfees (vec_mode
, lens
, lens
, pattern
, GEN_INT (VSTRING_FLAG_CS
)));
17202 add_int_reg_note (s390_emit_ccraw_jump (4, EQ
, loop_end
),
17204 profile_probability::very_unlikely ().to_reg_br_prob_note ());
17206 emit_label (loop_start
);
17207 LABEL_NUSES (loop_start
) = 1;
17209 force_expand_binop (Pmode
, add_optab
, addr
, GEN_INT (16), addr
, 1, OPTAB_DIRECT
);
17210 mem
= gen_rtx_MEM (vec_mode
, addr
);
17211 set_mem_align (mem
, 128);
17212 emit_move_insn (lens
, mem
);
17213 emit_insn (gen_vec_vfees (vec_mode
, lens
, lens
, pattern
, GEN_INT (VSTRING_FLAG_CS
)));
17214 add_int_reg_note (s390_emit_ccraw_jump (4, NE
, loop_start
),
17216 profile_probability::very_likely ().to_reg_br_prob_note ());
17218 emit_label (loop_end
);
17219 LABEL_NUSES (loop_end
) = 1;
17223 lens
= convert_to_mode (V2DImode
, lens
, 1);
17224 emit_insn (gen_vec_extractv2didi (offset
, lens
, GEN_INT (0)));
17228 lens
= convert_to_mode (V4SImode
, lens
, 1);
17229 emit_insn (gen_vec_extractv4sisi (offset
, lens
, GEN_INT (1)));
17231 force_expand_binop (Pmode
, add_optab
, addr
, offset
, dst
, 1, OPTAB_DIRECT
);
17234 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
17236 static unsigned HOST_WIDE_INT
17237 s390_asan_shadow_offset (void)
17239 return TARGET_64BIT
? HOST_WIDE_INT_1U
<< 52 : HOST_WIDE_INT_UC (0x20000000);
17242 #ifdef HAVE_GAS_HIDDEN
17243 # define USE_HIDDEN_LINKONCE 1
17245 # define USE_HIDDEN_LINKONCE 0
17248 /* Output an indirect branch trampoline for target register REGNO. */
17251 s390_output_indirect_thunk_function (unsigned int regno
, bool z10_p
)
17254 char thunk_label
[32];
17258 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
, regno
);
17260 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
,
17261 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
17263 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
17264 get_identifier (thunk_label
),
17265 build_function_type_list (void_type_node
, NULL_TREE
));
17266 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
17267 NULL_TREE
, void_type_node
);
17268 TREE_PUBLIC (decl
) = 1;
17269 TREE_STATIC (decl
) = 1;
17270 DECL_IGNORED_P (decl
) = 1;
17272 if (USE_HIDDEN_LINKONCE
)
17274 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
17276 targetm
.asm_out
.unique_section (decl
, 0);
17277 switch_to_section (get_named_section (decl
, NULL
, 0));
17279 targetm
.asm_out
.globalize_label (asm_out_file
, thunk_label
);
17280 fputs ("\t.hidden\t", asm_out_file
);
17281 assemble_name (asm_out_file
, thunk_label
);
17282 putc ('\n', asm_out_file
);
17283 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, thunk_label
, decl
);
17287 switch_to_section (text_section
);
17288 ASM_OUTPUT_LABEL (asm_out_file
, thunk_label
);
17291 DECL_INITIAL (decl
) = make_node (BLOCK
);
17292 current_function_decl
= decl
;
17293 allocate_struct_function (decl
, false);
17294 init_function_start (decl
);
17295 cfun
->is_thunk
= true;
17296 first_function_block_is_cold
= false;
17297 final_start_function (emit_barrier (), asm_out_file
, 1);
17299 /* This makes CFI at least usable for indirect jumps.
17301 Stopping in the thunk: backtrace will point to the thunk target
17302 is if it was interrupted by a signal. For a call this means that
17303 the call chain will be: caller->callee->thunk */
17304 if (flag_asynchronous_unwind_tables
&& flag_dwarf2_cfi_asm
)
17306 fputs ("\t.cfi_signal_frame\n", asm_out_file
);
17307 fprintf (asm_out_file
, "\t.cfi_return_column %d\n", regno
);
17308 for (i
= 0; i
< FPR15_REGNUM
; i
++)
17309 fprintf (asm_out_file
, "\t.cfi_same_value %s\n", reg_names
[i
]);
17316 /* We generate a thunk for z10 compiled code although z10 is
17317 currently not enabled. Tell the assembler to accept the
17319 if (!TARGET_CPU_Z10
)
17321 fputs ("\t.machine push\n", asm_out_file
);
17322 fputs ("\t.machine z10\n", asm_out_file
);
17324 /* We use exrl even if -mzarch hasn't been specified on the
17325 command line so we have to tell the assembler to accept
17328 fputs ("\t.machinemode zarch\n", asm_out_file
);
17330 fputs ("\texrl\t0,1f\n", asm_out_file
);
17333 fputs ("\t.machinemode esa\n", asm_out_file
);
17335 if (!TARGET_CPU_Z10
)
17336 fputs ("\t.machine pop\n", asm_out_file
);
17341 fprintf (asm_out_file
, "\tlarl\t%%r%d,1f\n",
17342 INDIRECT_BRANCH_THUNK_REGNUM
);
17345 fprintf (asm_out_file
, "\tex\t0,0(%%r%d)\n",
17346 INDIRECT_BRANCH_THUNK_REGNUM
);
17350 fputs ("0:\tj\t0b\n", asm_out_file
);
17352 /* 1: br <regno> */
17353 fprintf (asm_out_file
, "1:\tbr\t%%r%d\n", regno
);
17355 final_end_function ();
17356 init_insn_lengths ();
17357 free_after_compilation (cfun
);
17359 current_function_decl
= NULL
;
17362 /* Implement the asm.code_end target hook. */
17365 s390_code_end (void)
17369 for (i
= 1; i
< 16; i
++)
17371 if (indirect_branch_z10thunk_mask
& (1 << i
))
17372 s390_output_indirect_thunk_function (i
, true);
17374 if (indirect_branch_prez10thunk_mask
& (1 << i
))
17375 s390_output_indirect_thunk_function (i
, false);
17378 if (TARGET_INDIRECT_BRANCH_TABLE
)
17383 for (o
= 0; o
< INDIRECT_BRANCH_NUM_OPTIONS
; o
++)
17385 if (indirect_branch_table_label_no
[o
] == 0)
17388 switch_to_section (get_section (indirect_branch_table_name
[o
],
17391 for (i
= 0; i
< indirect_branch_table_label_no
[o
]; i
++)
17393 char label_start
[32];
17395 ASM_GENERATE_INTERNAL_LABEL (label_start
,
17396 indirect_branch_table_label
[o
], i
);
17398 fputs ("\t.long\t", asm_out_file
);
17399 assemble_name_raw (asm_out_file
, label_start
);
17400 fputs ("-.\n", asm_out_file
);
17406 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
17409 s390_case_values_threshold (void)
17411 /* Disabling branch prediction for indirect jumps makes jump tables
17412 much more expensive. */
17413 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP
)
17416 return default_case_values_threshold ();
17419 /* Evaluate the insns between HEAD and TAIL and do back-end to install
17420 back-end specific dependencies.
17422 Establish an ANTI dependency between r11 and r15 restores from FPRs
17423 to prevent the instructions scheduler from reordering them since
17424 this would break CFI. No further handling in the sched_reorder
17425 hook is required since the r11 and r15 restore will never appear in
17426 the same ready list with that change. */
17428 s390_sched_dependencies_evaluation (rtx_insn
*head
, rtx_insn
*tail
)
17430 if (!frame_pointer_needed
|| !epilogue_completed
)
17433 while (head
!= tail
&& DEBUG_INSN_P (head
))
17434 head
= NEXT_INSN (head
);
17436 rtx_insn
*r15_restore
= NULL
, *r11_restore
= NULL
;
17438 for (rtx_insn
*insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
17440 rtx set
= single_set (insn
);
17442 || !RTX_FRAME_RELATED_P (insn
)
17444 || !REG_P (SET_DEST (set
))
17445 || !FP_REG_P (SET_SRC (set
)))
17448 if (REGNO (SET_DEST (set
)) == HARD_FRAME_POINTER_REGNUM
)
17449 r11_restore
= insn
;
17451 if (REGNO (SET_DEST (set
)) == STACK_POINTER_REGNUM
)
17452 r15_restore
= insn
;
17455 if (r11_restore
== NULL
|| r15_restore
== NULL
)
17457 add_dependence (r11_restore
, r15_restore
, REG_DEP_ANTI
);
17460 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
17462 static unsigned HOST_WIDE_INT
17463 s390_shift_truncation_mask (machine_mode mode
)
17465 return mode
== DImode
|| mode
== SImode
? 63 : 0;
17468 /* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional
17472 f_constraint_p (const char *constraint
)
17474 bool seen_f_p
= false;
17475 bool seen_v_p
= false;
17477 for (size_t i
= 0, c_len
= strlen (constraint
); i
< c_len
;
17478 i
+= CONSTRAINT_LEN (constraint
[i
], constraint
+ i
))
17480 if (constraint
[i
] == 'f')
17482 if (constraint
[i
] == 'v')
17486 /* Treat "fv" constraints as "v", because LRA will choose the widest register
17488 return seen_f_p
&& !seen_v_p
;
17491 /* Return TRUE iff X is a hard floating-point (and not a vector) register. */
17494 s390_hard_fp_reg_p (rtx x
)
17496 if (!(REG_P (x
) && HARD_REGISTER_P (x
) && REG_ATTRS (x
)))
17499 tree decl
= REG_EXPR (x
);
17500 if (!(HAS_DECL_ASSEMBLER_NAME_P (decl
) && DECL_ASSEMBLER_NAME_SET_P (decl
)))
17503 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
17505 return name
[0] == '*' && name
[1] == 'f';
17508 /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
17509 constraints when long doubles are stored in vector registers. */
17512 s390_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &inputs
,
17513 vec
<machine_mode
> &input_modes
,
17514 vec
<const char *> &constraints
,
17515 vec
<rtx
> &/*uses*/, vec
<rtx
> &/*clobbers*/,
17516 HARD_REG_SET
&clobbered_regs
, location_t loc
)
17519 rtx_insn
*after_md_seq
= NULL
, *after_md_end
= NULL
;
17520 bool saw_cc
= false;
17522 unsigned ninputs
= inputs
.length ();
17523 unsigned noutputs
= outputs
.length ();
17524 for (unsigned i
= 0; i
< noutputs
; i
++)
17526 const char *constraint
= constraints
[i
];
17527 if (strncmp (constraint
, "=@cc", 4) == 0)
17529 if (constraint
[4] != 0)
17531 error_at (loc
, "invalid cc output constraint: %qs", constraint
);
17536 error_at (loc
, "multiple cc output constraints not supported");
17539 if (TEST_HARD_REG_BIT (clobbered_regs
, CC_REGNUM
))
17541 error_at (loc
, "%<asm%> specifier for cc output conflicts with %<asm%> clobber list");
17544 rtx dest
= outputs
[i
];
17545 if (GET_MODE (dest
) != SImode
)
17547 error ("invalid type for cc output constraint");
17551 constraints
[i
] = "=c";
17552 outputs
[i
] = gen_rtx_REG (CCRAWmode
, CC_REGNUM
);
17554 push_to_sequence2 (after_md_seq
, after_md_end
);
17555 emit_insn (gen_rtx_SET (dest
,
17556 gen_rtx_UNSPEC (SImode
,
17557 gen_rtvec (1, outputs
[i
]),
17558 UNSPEC_CC_TO_INT
)));
17559 after_md_seq
= get_insns ();
17560 after_md_end
= get_last_insn ();
17565 /* Long doubles are stored in FPR pairs - nothing to do. */
17567 if (GET_MODE (outputs
[i
]) != TFmode
)
17568 /* Not a long double - nothing to do. */
17570 bool allows_mem
, allows_reg
, is_inout
;
17571 bool ok
= parse_output_constraint (&constraint
, i
, ninputs
, noutputs
,
17572 &allows_mem
, &allows_reg
, &is_inout
);
17574 if (!f_constraint_p (constraint
))
17575 /* Long double with a constraint other than "=f" - nothing to do. */
17577 gcc_assert (allows_reg
);
17578 gcc_assert (!is_inout
);
17579 /* Copy output value from a FPR pair into a vector register. */
17581 push_to_sequence2 (after_md_seq
, after_md_end
);
17582 if (s390_hard_fp_reg_p (outputs
[i
]))
17584 fprx2
= gen_rtx_REG (FPRX2mode
, REGNO (outputs
[i
]));
17585 /* The first half is already at the correct location, copy only the
17586 * second one. Use the UNSPEC pattern instead of the SUBREG one,
17587 * since s390_can_change_mode_class() rejects
17588 * (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */
17589 rtx v1
= gen_rtx_REG (V2DFmode
, REGNO (outputs
[i
]));
17590 rtx v3
= gen_rtx_REG (V2DFmode
, REGNO (outputs
[i
]) + 1);
17591 emit_insn (gen_vec_permiv2df (v1
, v1
, v3
, const0_rtx
));
17595 fprx2
= gen_reg_rtx (FPRX2mode
);
17596 emit_insn (gen_fprx2_to_tf (outputs
[i
], fprx2
));
17598 after_md_seq
= get_insns ();
17599 after_md_end
= get_last_insn ();
17601 outputs
[i
] = fprx2
;
17605 /* Long doubles are stored in FPR pairs - nothing left to do. */
17606 return after_md_seq
;
17608 for (unsigned i
= 0; i
< ninputs
; i
++)
17610 if (GET_MODE (inputs
[i
]) != TFmode
)
17611 /* Not a long double - nothing to do. */
17613 const char *constraint
= constraints
[noutputs
+ i
];
17614 bool allows_mem
, allows_reg
;
17615 bool ok
= parse_input_constraint (&constraint
, i
, ninputs
, noutputs
, 0,
17616 constraints
.address (), &allows_mem
,
17619 if (!f_constraint_p (constraint
))
17620 /* Long double with a constraint other than "f" (or "=f" for inout
17621 operands) - nothing to do. */
17623 gcc_assert (allows_reg
);
17624 /* Copy input value from a vector register into a FPR pair. */
17626 if (s390_hard_fp_reg_p (inputs
[i
]))
17628 fprx2
= gen_rtx_REG (FPRX2mode
, REGNO (inputs
[i
]));
17629 /* Copy only the second half. */
17630 rtx v1
= gen_rtx_REG (V2DFmode
, REGNO (inputs
[i
]) + 1);
17631 rtx v2
= gen_rtx_REG (V2DFmode
, REGNO (inputs
[i
]));
17632 emit_insn (gen_vec_permiv2df (v1
, v2
, v1
, GEN_INT (3)));
17636 fprx2
= gen_reg_rtx (FPRX2mode
);
17637 emit_insn (gen_tf_to_fprx2 (fprx2
, inputs
[i
]));
17640 input_modes
[i
] = FPRX2mode
;
17643 return after_md_seq
;
17646 #define MAX_VECT_LEN 16
17648 struct expand_vec_perm_d
17650 rtx target
, op0
, op1
;
17651 unsigned char perm
[MAX_VECT_LEN
];
17652 machine_mode vmode
;
17653 unsigned char nelt
;
17659 /* Try to expand the vector permute operation described by D using the
17660 vector merge instructions vml and vmh. Return true if vector merge
17663 expand_perm_with_merge (const struct expand_vec_perm_d
&d
)
17665 static const unsigned char hi_perm_di
[2] = {0, 2};
17666 static const unsigned char hi_perm_si
[4] = {0, 4, 1, 5};
17667 static const unsigned char hi_perm_hi
[8] = {0, 8, 1, 9, 2, 10, 3, 11};
17668 static const unsigned char hi_perm_qi
[16]
17669 = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
17671 static const unsigned char hi_perm_di_swap
[2] = {2, 0};
17672 static const unsigned char hi_perm_si_swap
[4] = {4, 0, 6, 2};
17673 static const unsigned char hi_perm_hi_swap
[8] = {8, 0, 10, 2, 12, 4, 14, 6};
17674 static const unsigned char hi_perm_qi_swap
[16]
17675 = {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14};
17677 static const unsigned char lo_perm_di
[2] = {1, 3};
17678 static const unsigned char lo_perm_si
[4] = {2, 6, 3, 7};
17679 static const unsigned char lo_perm_hi
[8] = {4, 12, 5, 13, 6, 14, 7, 15};
17680 static const unsigned char lo_perm_qi
[16]
17681 = {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31};
17683 static const unsigned char lo_perm_di_swap
[2] = {3, 1};
17684 static const unsigned char lo_perm_si_swap
[4] = {5, 1, 7, 3};
17685 static const unsigned char lo_perm_hi_swap
[8] = {9, 1, 11, 3, 13, 5, 15, 7};
17686 static const unsigned char lo_perm_qi_swap
[16]
17687 = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
17689 bool merge_lo_p
= false;
17690 bool merge_hi_p
= false;
17691 bool swap_operands_p
= false;
17693 if ((d
.nelt
== 2 && memcmp (d
.perm
, hi_perm_di
, 2) == 0)
17694 || (d
.nelt
== 4 && memcmp (d
.perm
, hi_perm_si
, 4) == 0)
17695 || (d
.nelt
== 8 && memcmp (d
.perm
, hi_perm_hi
, 8) == 0)
17696 || (d
.nelt
== 16 && memcmp (d
.perm
, hi_perm_qi
, 16) == 0))
17700 else if ((d
.nelt
== 2 && memcmp (d
.perm
, hi_perm_di_swap
, 2) == 0)
17701 || (d
.nelt
== 4 && memcmp (d
.perm
, hi_perm_si_swap
, 4) == 0)
17702 || (d
.nelt
== 8 && memcmp (d
.perm
, hi_perm_hi_swap
, 8) == 0)
17703 || (d
.nelt
== 16 && memcmp (d
.perm
, hi_perm_qi_swap
, 16) == 0))
17706 swap_operands_p
= true;
17708 else if ((d
.nelt
== 2 && memcmp (d
.perm
, lo_perm_di
, 2) == 0)
17709 || (d
.nelt
== 4 && memcmp (d
.perm
, lo_perm_si
, 4) == 0)
17710 || (d
.nelt
== 8 && memcmp (d
.perm
, lo_perm_hi
, 8) == 0)
17711 || (d
.nelt
== 16 && memcmp (d
.perm
, lo_perm_qi
, 16) == 0))
17715 else if ((d
.nelt
== 2 && memcmp (d
.perm
, lo_perm_di_swap
, 2) == 0)
17716 || (d
.nelt
== 4 && memcmp (d
.perm
, lo_perm_si_swap
, 4) == 0)
17717 || (d
.nelt
== 8 && memcmp (d
.perm
, lo_perm_hi_swap
, 8) == 0)
17718 || (d
.nelt
== 16 && memcmp (d
.perm
, lo_perm_qi_swap
, 16) == 0))
17721 swap_operands_p
= true;
17724 if (!merge_lo_p
&& !merge_hi_p
)
17728 return merge_lo_p
|| merge_hi_p
;
17731 if (swap_operands_p
)
17742 s390_expand_merge (d
.target
, op0
, op1
, merge_hi_p
);
17747 /* Try to expand the vector permute operation described by D using the
17748 vector permute doubleword immediate instruction vpdi. Return true
17749 if vpdi could be used.
17751 VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5
17752 cases are covered by vmrhg and vmrlg already. So we only care
17753 about the 1, 4 cases here.
17754 1 - First element of src1 and second of src2
17755 4 - Second element of src1 and first of src2 */
17757 expand_perm_with_vpdi (const struct expand_vec_perm_d
&d
)
17759 bool vpdi1_p
= false;
17760 bool vpdi4_p
= false;
17761 bool swap_operands_p
= false;
17762 rtx op0_reg
, op1_reg
;
17764 // Only V2DI and V2DF are supported here.
17768 if (d
.perm
[0] == 0 && d
.perm
[1] == 3)
17770 else if (d
.perm
[0] == 2 && d
.perm
[1] == 1)
17773 swap_operands_p
= true;
17775 else if ((d
.perm
[0] == 1 && d
.perm
[1] == 2)
17776 || (d
.perm
[0] == 1 && d
.perm
[1] == 0)
17777 || (d
.perm
[0] == 3 && d
.perm
[1] == 2))
17779 else if (d
.perm
[0] == 3 && d
.perm
[1] == 0)
17782 swap_operands_p
= true;
17785 if (!vpdi1_p
&& !vpdi4_p
)
17791 op0_reg
= force_reg (GET_MODE (d
.op0
), d
.op0
);
17792 op1_reg
= force_reg (GET_MODE (d
.op1
), d
.op1
);
17794 /* If we only reference either of the operands in
17795 the permute mask, just use one of them. */
17798 else if (d
.only_op1
)
17800 else if (swap_operands_p
)
17808 emit_insn (gen_vpdi1 (d
.vmode
, d
.target
, op0_reg
, op1_reg
));
17810 emit_insn (gen_vpdi4 (d
.vmode
, d
.target
, op0_reg
, op1_reg
));
17815 /* Helper that checks if a vector permutation mask D
17816 represents a reversal of the vector's elements. */
17818 is_reverse_perm_mask (const struct expand_vec_perm_d
&d
)
17820 for (int i
= 0; i
< d
.nelt
; i
++)
17821 if (d
.perm
[i
] != d
.nelt
- i
- 1)
17827 expand_perm_reverse_elements (const struct expand_vec_perm_d
&d
)
17829 if (d
.op0
!= d
.op1
|| !is_reverse_perm_mask (d
))
17837 case V1TImode
: emit_move_insn (d
.target
, d
.op0
); break;
17838 case V2DImode
: emit_insn (gen_eltswapv2di (d
.target
, d
.op0
)); break;
17839 case V4SImode
: emit_insn (gen_eltswapv4si (d
.target
, d
.op0
)); break;
17840 case V8HImode
: emit_insn (gen_eltswapv8hi (d
.target
, d
.op0
)); break;
17841 case V16QImode
: emit_insn (gen_eltswapv16qi (d
.target
, d
.op0
)); break;
17842 case V2DFmode
: emit_insn (gen_eltswapv2df (d
.target
, d
.op0
)); break;
17843 case V4SFmode
: emit_insn (gen_eltswapv4sf (d
.target
, d
.op0
)); break;
17844 default: gcc_unreachable();
17850 /* Try to emit vlbr/vstbr. Note, this is only a candidate insn since
17851 TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only. Thus,
17852 either fwprop, combine et al. "fixes" one of the input/output operands into
17853 a memory operand or a splitter has to reverse this into a general vperm
17857 expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d
&d
)
17859 static const char perm
[4][MAX_VECT_LEN
]
17860 = { { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 },
17861 { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 },
17862 { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 },
17863 { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } };
17865 if (!TARGET_VXE2
|| d
.vmode
!= V16QImode
|| d
.op0
!= d
.op1
)
17868 if (memcmp (d
.perm
, perm
[0], MAX_VECT_LEN
) == 0)
17872 rtx target
= gen_rtx_SUBREG (V8HImode
, d
.target
, 0);
17873 rtx op0
= gen_rtx_SUBREG (V8HImode
, d
.op0
, 0);
17874 emit_insn (gen_bswapv8hi (target
, op0
));
17879 if (memcmp (d
.perm
, perm
[1], MAX_VECT_LEN
) == 0)
17883 rtx target
= gen_rtx_SUBREG (V4SImode
, d
.target
, 0);
17884 rtx op0
= gen_rtx_SUBREG (V4SImode
, d
.op0
, 0);
17885 emit_insn (gen_bswapv4si (target
, op0
));
17890 if (memcmp (d
.perm
, perm
[2], MAX_VECT_LEN
) == 0)
17894 rtx target
= gen_rtx_SUBREG (V2DImode
, d
.target
, 0);
17895 rtx op0
= gen_rtx_SUBREG (V2DImode
, d
.op0
, 0);
17896 emit_insn (gen_bswapv2di (target
, op0
));
17901 if (memcmp (d
.perm
, perm
[3], MAX_VECT_LEN
) == 0)
17905 rtx target
= gen_rtx_SUBREG (V1TImode
, d
.target
, 0);
17906 rtx op0
= gen_rtx_SUBREG (V1TImode
, d
.op0
, 0);
17907 emit_insn (gen_bswapv1ti (target
, op0
));
17915 /* Try to find the best sequence for the vector permute operation
17916 described by D. Return true if the operation could be
17919 vectorize_vec_perm_const_1 (const struct expand_vec_perm_d
&d
)
17921 if (expand_perm_reverse_elements (d
))
17924 if (expand_perm_with_merge (d
))
17927 if (expand_perm_with_vpdi (d
))
17930 if (expand_perm_as_a_vlbr_vstbr_candidate (d
))
17936 /* Return true if we can emit instructions for the constant
17937 permutation vector in SEL. If OUTPUT, IN0, IN1 are non-null the
17938 hook is supposed to emit the required INSNs. */
17941 s390_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
17942 rtx target
, rtx op0
, rtx op1
,
17943 const vec_perm_indices
&sel
)
17945 if (vmode
!= op_mode
)
17948 struct expand_vec_perm_d d
;
17949 unsigned int i
, nelt
;
17951 if (!s390_vector_mode_supported_p (vmode
) || GET_MODE_SIZE (vmode
) != 16)
17959 gcc_assert (VECTOR_MODE_P (d
.vmode
));
17960 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
17961 d
.testing_p
= target
== NULL_RTX
;
17962 d
.only_op0
= false;
17963 d
.only_op1
= false;
17965 gcc_assert (target
== NULL_RTX
|| REG_P (target
));
17966 gcc_assert (sel
.length () == nelt
);
17968 unsigned int highest
= 0, lowest
= 2 * nelt
- 1;
17969 for (i
= 0; i
< nelt
; i
++)
17971 unsigned char e
= sel
[i
];
17972 lowest
= MIN (lowest
, e
);
17973 highest
= MAX (highest
, e
);
17974 gcc_assert (e
< 2 * nelt
);
17978 if (lowest
< nelt
&& highest
< nelt
)
17980 else if (lowest
>= nelt
&& highest
>= nelt
)
17983 return vectorize_vec_perm_const_1 (d
);
17986 /* Initialize GCC target structure. */
17988 #undef TARGET_ASM_ALIGNED_HI_OP
17989 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
17990 #undef TARGET_ASM_ALIGNED_DI_OP
17991 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
17992 #undef TARGET_ASM_INTEGER
17993 #define TARGET_ASM_INTEGER s390_assemble_integer
17995 #undef TARGET_ASM_OPEN_PAREN
17996 #define TARGET_ASM_OPEN_PAREN ""
17998 #undef TARGET_ASM_CLOSE_PAREN
17999 #define TARGET_ASM_CLOSE_PAREN ""
18001 #undef TARGET_OPTION_OVERRIDE
18002 #define TARGET_OPTION_OVERRIDE s390_option_override
18004 #ifdef TARGET_THREAD_SSP_OFFSET
18005 #undef TARGET_STACK_PROTECT_GUARD
18006 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
18009 #undef TARGET_ENCODE_SECTION_INFO
18010 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
18012 #undef TARGET_SCALAR_MODE_SUPPORTED_P
18013 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
18016 #undef TARGET_HAVE_TLS
18017 #define TARGET_HAVE_TLS true
18019 #undef TARGET_CANNOT_FORCE_CONST_MEM
18020 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
18022 #undef TARGET_DELEGITIMIZE_ADDRESS
18023 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
18025 #undef TARGET_LEGITIMIZE_ADDRESS
18026 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
18028 #undef TARGET_RETURN_IN_MEMORY
18029 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
18031 #undef TARGET_INIT_BUILTINS
18032 #define TARGET_INIT_BUILTINS s390_init_builtins
18033 #undef TARGET_EXPAND_BUILTIN
18034 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
18035 #undef TARGET_BUILTIN_DECL
18036 #define TARGET_BUILTIN_DECL s390_builtin_decl
18038 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
18039 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
18041 #undef TARGET_ASM_OUTPUT_MI_THUNK
18042 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
18043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
18044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
18046 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
18047 /* This hook is only needed to maintain the historic behavior with glibc
18048 versions that typedef float_t to double. */
18049 #undef TARGET_C_EXCESS_PRECISION
18050 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
18053 #undef TARGET_SCHED_ADJUST_PRIORITY
18054 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
18055 #undef TARGET_SCHED_ISSUE_RATE
18056 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
18057 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
18058 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
18060 #undef TARGET_SCHED_VARIABLE_ISSUE
18061 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
18062 #undef TARGET_SCHED_REORDER
18063 #define TARGET_SCHED_REORDER s390_sched_reorder
18064 #undef TARGET_SCHED_INIT
18065 #define TARGET_SCHED_INIT s390_sched_init
18067 #undef TARGET_CANNOT_COPY_INSN_P
18068 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
18069 #undef TARGET_RTX_COSTS
18070 #define TARGET_RTX_COSTS s390_rtx_costs
18071 #undef TARGET_ADDRESS_COST
18072 #define TARGET_ADDRESS_COST s390_address_cost
18073 #undef TARGET_REGISTER_MOVE_COST
18074 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
18075 #undef TARGET_MEMORY_MOVE_COST
18076 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
18077 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
18078 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
18079 s390_builtin_vectorization_cost
18081 #undef TARGET_MACHINE_DEPENDENT_REORG
18082 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
18084 #undef TARGET_VALID_POINTER_MODE
18085 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
18087 #undef TARGET_BUILD_BUILTIN_VA_LIST
18088 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
18089 #undef TARGET_EXPAND_BUILTIN_VA_START
18090 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
18091 #undef TARGET_ASAN_SHADOW_OFFSET
18092 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
18093 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
18094 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
18096 #undef TARGET_PROMOTE_FUNCTION_MODE
18097 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
18098 #undef TARGET_PASS_BY_REFERENCE
18099 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
18101 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
18102 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
18104 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
18105 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
18106 #undef TARGET_FUNCTION_ARG
18107 #define TARGET_FUNCTION_ARG s390_function_arg
18108 #undef TARGET_FUNCTION_ARG_ADVANCE
18109 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
18110 #undef TARGET_FUNCTION_ARG_PADDING
18111 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
18112 #undef TARGET_FUNCTION_VALUE
18113 #define TARGET_FUNCTION_VALUE s390_function_value
18114 #undef TARGET_LIBCALL_VALUE
18115 #define TARGET_LIBCALL_VALUE s390_libcall_value
18116 #undef TARGET_STRICT_ARGUMENT_NAMING
18117 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
18119 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
18120 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
18122 #undef TARGET_FIXED_CONDITION_CODE_REGS
18123 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
18125 #undef TARGET_CC_MODES_COMPATIBLE
18126 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
18128 #undef TARGET_INVALID_WITHIN_DOLOOP
18129 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
18132 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
18133 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
18136 #undef TARGET_DWARF_FRAME_REG_MODE
18137 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
18139 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
18140 #undef TARGET_MANGLE_TYPE
18141 #define TARGET_MANGLE_TYPE s390_mangle_type
18144 #undef TARGET_SCALAR_MODE_SUPPORTED_P
18145 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
18147 #undef TARGET_VECTOR_MODE_SUPPORTED_P
18148 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
18150 #undef TARGET_PREFERRED_RELOAD_CLASS
18151 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
18153 #undef TARGET_SECONDARY_RELOAD
18154 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
18155 #undef TARGET_SECONDARY_MEMORY_NEEDED
18156 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
18157 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
18158 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
18160 #undef TARGET_LIBGCC_CMP_RETURN_MODE
18161 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
18163 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
18164 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
18166 #undef TARGET_LEGITIMATE_ADDRESS_P
18167 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
18169 #undef TARGET_LEGITIMATE_CONSTANT_P
18170 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
18172 #undef TARGET_LRA_P
18173 #define TARGET_LRA_P s390_lra_p
18175 #undef TARGET_CAN_ELIMINATE
18176 #define TARGET_CAN_ELIMINATE s390_can_eliminate
18178 #undef TARGET_CONDITIONAL_REGISTER_USAGE
18179 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
18181 #undef TARGET_LOOP_UNROLL_ADJUST
18182 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
18184 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
18185 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
18186 #undef TARGET_TRAMPOLINE_INIT
18187 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
18190 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
18191 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
18193 #undef TARGET_UNWIND_WORD_MODE
18194 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
18196 #undef TARGET_CANONICALIZE_COMPARISON
18197 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
18199 #undef TARGET_HARD_REGNO_SCRATCH_OK
18200 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
18202 #undef TARGET_HARD_REGNO_NREGS
18203 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
18204 #undef TARGET_HARD_REGNO_MODE_OK
18205 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
18206 #undef TARGET_MODES_TIEABLE_P
18207 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
18209 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
18210 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
18211 s390_hard_regno_call_part_clobbered
18213 #undef TARGET_ATTRIBUTE_TABLE
18214 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
18216 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
18217 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
18219 #undef TARGET_SET_UP_BY_PROLOGUE
18220 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
18222 #undef TARGET_EXTRA_LIVE_ON_ENTRY
18223 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
18225 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
18226 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
18227 s390_use_by_pieces_infrastructure_p
18229 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
18230 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
18232 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
18233 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
18235 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
18236 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
18238 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
18239 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
18241 #undef TARGET_VECTOR_ALIGNMENT
18242 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
18244 #undef TARGET_INVALID_BINARY_OP
18245 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
18247 #ifdef HAVE_AS_MACHINE_MACHINEMODE
18248 #undef TARGET_ASM_FILE_START
18249 #define TARGET_ASM_FILE_START s390_asm_file_start
18252 #undef TARGET_ASM_FILE_END
18253 #define TARGET_ASM_FILE_END s390_asm_file_end
18255 #undef TARGET_SET_CURRENT_FUNCTION
18256 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
18258 #if S390_USE_TARGET_ATTRIBUTE
18259 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
18260 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
18262 #undef TARGET_CAN_INLINE_P
18263 #define TARGET_CAN_INLINE_P s390_can_inline_p
18266 #undef TARGET_OPTION_RESTORE
18267 #define TARGET_OPTION_RESTORE s390_function_specific_restore
18269 #undef TARGET_CAN_CHANGE_MODE_CLASS
18270 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
18272 #undef TARGET_CONSTANT_ALIGNMENT
18273 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
18275 #undef TARGET_ASM_CODE_END
18276 #define TARGET_ASM_CODE_END s390_code_end
18278 #undef TARGET_CASE_VALUES_THRESHOLD
18279 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
18281 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
18282 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
18283 s390_sched_dependencies_evaluation
18285 #undef TARGET_SHIFT_TRUNCATION_MASK
18286 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
18288 /* Use only short displacement, since long displacement is not available for
18289 the floating point instructions. */
18290 #undef TARGET_MAX_ANCHOR_OFFSET
18291 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
18293 #undef TARGET_MD_ASM_ADJUST
18294 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
18296 #undef TARGET_VECTORIZE_VEC_PERM_CONST
18297 #define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
18299 struct gcc_target targetm
= TARGET_INITIALIZER
;
18301 #include "gt-s390.h"