1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2021 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
27 #include "coretypes.h"
30 #include "target-globals.h"
39 #include "stringpool.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
55 #include "conditions.h"
57 #include "insn-attr.h"
69 #include "cfgcleanup.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
77 #include "tree-pass.h"
82 #include "tm-constrs.h"
84 #include "symbol-summary.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
89 /* This file should be included last. */
90 #include "target-def.h"
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode
);
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl
;
97 /* Define the specific costs for a given cpu. */
99 struct processor_costs
102 const int m
; /* cost of an M instruction. */
103 const int mghi
; /* cost of an MGHI instruction. */
104 const int mh
; /* cost of an MH instruction. */
105 const int mhi
; /* cost of an MHI instruction. */
106 const int ml
; /* cost of an ML instruction. */
107 const int mr
; /* cost of an MR instruction. */
108 const int ms
; /* cost of an MS instruction. */
109 const int msg
; /* cost of an MSG instruction. */
110 const int msgf
; /* cost of an MSGF instruction. */
111 const int msgfr
; /* cost of an MSGFR instruction. */
112 const int msgr
; /* cost of an MSGR instruction. */
113 const int msr
; /* cost of an MSR instruction. */
114 const int mult_df
; /* cost of multiplication in DFmode. */
117 const int sqxbr
; /* cost of square root in TFmode. */
118 const int sqdbr
; /* cost of square root in DFmode. */
119 const int sqebr
; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr
; /* cost of multiply and add in DFmode. */
122 const int maebr
; /* cost of multiply and add in SFmode. */
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
137 struct processor_costs z900_cost
=
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
169 struct processor_costs z990_cost
=
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
201 struct processor_costs z9_109_cost
=
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
233 struct processor_costs z10_cost
=
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
265 struct processor_costs z196_cost
=
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
297 struct processor_costs zEC12_cost
=
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
328 const struct s390_processor processor_table
[] =
330 { "z900", "z900", PROCESSOR_2064_Z900
, &z900_cost
, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990
, &z990_cost
, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109
, &z9_109_cost
, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC
, &z9_109_cost
, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10
, &z10_cost
, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196
, &z196_cost
, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12
, &zEC12_cost
, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13
, &zEC12_cost
, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14
, &zEC12_cost
, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15
, &zEC12_cost
, 13 },
340 { "arch14", "arch14", PROCESSOR_ARCH14
, &zEC12_cost
, 14 },
341 { "native", "", PROCESSOR_NATIVE
, NULL
, 0 }
344 extern int reload_completed
;
346 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
347 static rtx_insn
*last_scheduled_insn
;
350 #define MAX_SCHED_UNITS 4
351 static int last_scheduled_unit_distance
[MAX_SCHED_UNITS
][NUM_SIDES
];
353 /* Estimate of number of cycles a long-running insn occupies an
355 static int fxd_longrunning
[NUM_SIDES
];
356 static int fpd_longrunning
[NUM_SIDES
];
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
362 #define MAX_SCHED_MIX_SCORE 2
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 70
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
372 base + index + displacement
373 where any of the components is optional.
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
378 /* The max number of insns of backend generated memset/memcpy/memcmp
379 loops. This value is used in the unroll adjust hook to detect such
380 loops. Current max is 9 coming from the memcmp loop. */
381 #define BLOCK_MEM_OPS_LOOP_INSNS 9
392 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
394 #define cfun_frame_layout (cfun->machine->frame_layout)
395 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
396 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
397 ? cfun_frame_layout.fpr_bitmap & 0x0f \
398 : cfun_frame_layout.fpr_bitmap & 0x03))
399 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
400 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
401 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
402 (1 << (REGNO - FPR0_REGNUM)))
403 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
404 (1 << (REGNO - FPR0_REGNUM))))
405 #define cfun_gpr_save_slot(REGNO) \
406 cfun->machine->frame_layout.gpr_save_slots[REGNO]
408 /* Number of GPRs and FPRs used for argument passing. */
409 #define GP_ARG_NUM_REG 5
410 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
411 #define VEC_ARG_NUM_REG 8
413 /* A couple of shortcuts. */
414 #define CONST_OK_FOR_J(x) \
415 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
416 #define CONST_OK_FOR_K(x) \
417 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
418 #define CONST_OK_FOR_Os(x) \
419 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
420 #define CONST_OK_FOR_Op(x) \
421 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
422 #define CONST_OK_FOR_On(x) \
423 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
425 #define REGNO_PAIR_OK(REGNO, MODE) \
426 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
428 /* That's the read ahead of the dynamic branch prediction unit in
429 bytes on a z10 (or higher) CPU. */
430 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
432 /* Masks per jump target register indicating which thunk need to be
434 static GTY(()) int indirect_branch_prez10thunk_mask
= 0;
435 static GTY(()) int indirect_branch_z10thunk_mask
= 0;
437 #define INDIRECT_BRANCH_NUM_OPTIONS 4
439 enum s390_indirect_branch_option
441 s390_opt_indirect_branch_jump
= 0,
442 s390_opt_indirect_branch_call
,
443 s390_opt_function_return_reg
,
444 s390_opt_function_return_mem
447 static GTY(()) int indirect_branch_table_label_no
[INDIRECT_BRANCH_NUM_OPTIONS
] = { 0 };
448 const char *indirect_branch_table_label
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
449 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
450 const char *indirect_branch_table_name
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
451 { ".s390_indirect_jump", ".s390_indirect_call",
452 ".s390_return_reg", ".s390_return_mem" };
455 s390_return_addr_from_memory ()
457 return cfun_gpr_save_slot(RETURN_REGNUM
) == SAVE_SLOT_STACK
;
460 /* Return nonzero if it's OK to use fused multiply-add for MODE. */
462 s390_fma_allowed_p (machine_mode mode
)
464 if (TARGET_VXE
&& mode
== TFmode
)
465 return flag_vx_long_double_fma
;
470 /* Indicate which ABI has been used for passing vector args.
471 0 - no vector type arguments have been passed where the ABI is relevant
472 1 - the old ABI has been used
473 2 - a vector type argument has been passed either in a vector register
474 or on the stack by value */
475 static int s390_vector_abi
= 0;
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478 switch. The vector ABI affects only vector data types. There are
479 two aspects of the vector ABI relevant here:
481 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482 ABI and natural alignment with the old.
484 2. vector <= 16 bytes are passed in VRs or by value on the stack
485 with the new ABI but by reference on the stack with the old.
487 If ARG_P is true TYPE is used for a function argument or return
488 value. The ABI marker then is set for all vector data types. If
489 ARG_P is false only type 1 vectors are being checked. */
492 s390_check_type_for_vector_abi (const_tree type
, bool arg_p
, bool in_struct_p
)
494 static hash_set
<const_tree
> visited_types_hash
;
499 if (type
== NULL_TREE
|| TREE_CODE (type
) == ERROR_MARK
)
502 if (visited_types_hash
.contains (type
))
505 visited_types_hash
.add (type
);
507 if (VECTOR_TYPE_P (type
))
509 int type_size
= int_size_in_bytes (type
);
511 /* Outside arguments only the alignment is changing and this
512 only happens for vector types >= 16 bytes. */
513 if (!arg_p
&& type_size
< 16)
516 /* In arguments vector types > 16 are passed as before (GCC
517 never enforced the bigger alignment for arguments which was
518 required by the old vector ABI). However, it might still be
519 ABI relevant due to the changed alignment if it is a struct
521 if (arg_p
&& type_size
> 16 && !in_struct_p
)
524 s390_vector_abi
= TARGET_VX_ABI
? 2 : 1;
526 else if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == ARRAY_TYPE
)
528 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 natural alignment there will never be ABI dependent padding
530 in an array type. That's why we do not set in_struct_p to
532 s390_check_type_for_vector_abi (TREE_TYPE (type
), arg_p
, in_struct_p
);
534 else if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
538 /* Check the return type. */
539 s390_check_type_for_vector_abi (TREE_TYPE (type
), true, false);
541 for (arg_chain
= TYPE_ARG_TYPES (type
);
543 arg_chain
= TREE_CHAIN (arg_chain
))
544 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain
), true, false);
546 else if (RECORD_OR_UNION_TYPE_P (type
))
550 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
552 if (TREE_CODE (field
) != FIELD_DECL
)
555 s390_check_type_for_vector_abi (TREE_TYPE (field
), arg_p
, true);
561 /* System z builtins. */
563 #include "s390-builtins.h"
565 const unsigned int bflags_builtin
[S390_BUILTIN_MAX
+ 1] =
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
577 const unsigned int opflags_builtin
[S390_BUILTIN_MAX
+ 1] =
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
589 const unsigned int bflags_overloaded_builtin
[S390_OVERLOADED_BUILTIN_MAX
+ 1] =
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
602 bflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
615 opflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
627 tree s390_builtin_types
[BT_MAX
];
628 tree s390_builtin_fn_types
[BT_FN_MAX
];
629 tree s390_builtin_decls
[S390_BUILTIN_MAX
+
630 S390_OVERLOADED_BUILTIN_MAX
+
631 S390_OVERLOADED_BUILTIN_VAR_MAX
];
633 static enum insn_code
const code_for_builtin
[S390_BUILTIN_MAX
+ 1] = {
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
639 #define OB_DEF_VAR(...)
641 #include "s390-builtins.def"
646 s390_init_builtins (void)
648 /* These definitions are being used in s390-builtins.def. */
649 tree returns_twice_attr
= tree_cons (get_identifier ("returns_twice"),
651 tree noreturn_attr
= tree_cons (get_identifier ("noreturn"), NULL
, NULL
);
652 tree c_uint64_type_node
;
654 /* The uint64_type_node from tree.c is not compatible to the C99
655 uint64_t data type. What we want is c_uint64_type_node from
656 c-common.c. But since backend code is not supposed to interface
657 with the frontend we recreate it here. */
659 c_uint64_type_node
= long_unsigned_type_node
;
661 c_uint64_type_node
= long_long_unsigned_type_node
;
664 #define DEF_TYPE(INDEX, NODE, CONST_P) \
665 if (s390_builtin_types[INDEX] == NULL) \
666 s390_builtin_types[INDEX] = (!CONST_P) ? \
667 (NODE) : build_type_variant ((NODE), 1, 0);
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
671 if (s390_builtin_types[INDEX] == NULL) \
672 s390_builtin_types[INDEX] = \
673 build_pointer_type (s390_builtin_types[INDEX_BASE]);
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = \
679 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
694 #define DEF_FN_TYPE(INDEX, args...) \
695 if (s390_builtin_fn_types[INDEX] == NULL) \
696 s390_builtin_fn_types[INDEX] = \
697 build_function_type_list (args, NULL_TREE);
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
704 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
705 s390_builtin_decls[S390_BUILTIN_##NAME] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_BUILTIN_##NAME, \
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
714 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
716 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730 builtin DECL. The operand flags from s390-builtins.def have to
731 passed as OP_FLAGS. */
733 s390_const_operand_ok (tree arg
, int argnum
, int op_flags
, tree decl
)
735 if (O_UIMM_P (op_flags
))
737 unsigned HOST_WIDE_INT bitwidths
[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
738 unsigned HOST_WIDE_INT bitmasks
[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
739 unsigned HOST_WIDE_INT bitwidth
= bitwidths
[op_flags
- O_U1
];
740 unsigned HOST_WIDE_INT bitmask
= bitmasks
[op_flags
- O_U1
];
742 gcc_assert(ARRAY_SIZE(bitwidths
) == (O_M12
- O_U1
+ 1));
743 gcc_assert(ARRAY_SIZE(bitmasks
) == (O_M12
- O_U1
+ 1));
745 if (!tree_fits_uhwi_p (arg
)
746 || tree_to_uhwi (arg
) > (HOST_WIDE_INT_1U
<< bitwidth
) - 1
747 || (bitmask
&& tree_to_uhwi (arg
) & ~bitmask
))
751 gcc_assert (bitmask
< 16);
752 char values
[120] = "";
754 for (unsigned HOST_WIDE_INT i
= 0; i
<= bitmask
; i
++)
759 int ret
= snprintf (buf
, 5, HOST_WIDE_INT_PRINT_UNSIGNED
, i
& bitmask
);
760 gcc_assert (ret
< 5);
761 strcat (values
, buf
);
763 strcat (values
, ", ");
765 error ("constant argument %d for builtin %qF is invalid (%s)",
766 argnum
, decl
, values
);
769 error ("constant argument %d for builtin %qF is out of range (0..%wu)",
770 argnum
, decl
, (HOST_WIDE_INT_1U
<< bitwidth
) - 1);
776 if (O_SIMM_P (op_flags
))
778 int bitwidths
[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
779 int bitwidth
= bitwidths
[op_flags
- O_S2
];
781 if (!tree_fits_shwi_p (arg
)
782 || tree_to_shwi (arg
) < -(HOST_WIDE_INT_1
<< (bitwidth
- 1))
783 || tree_to_shwi (arg
) > ((HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1))
785 error ("constant argument %d for builtin %qF is out of range "
786 "(%wd..%wd)", argnum
, decl
,
787 -(HOST_WIDE_INT_1
<< (bitwidth
- 1)),
788 (HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1);
795 /* Expand an expression EXP that calls a built-in function,
796 with result going to TARGET if that's convenient
797 (and in mode MODE if that's convenient).
798 SUBTARGET may be used as the target for computing one of EXP's operands.
799 IGNORE is nonzero if the value is to be ignored. */
802 s390_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
803 machine_mode mode ATTRIBUTE_UNUSED
,
804 int ignore ATTRIBUTE_UNUSED
)
808 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
809 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
810 enum insn_code icode
;
811 rtx op
[MAX_ARGS
], pat
;
815 call_expr_arg_iterator iter
;
816 unsigned int all_op_flags
= opflags_for_builtin (fcode
);
817 machine_mode last_vec_mode
= VOIDmode
;
819 if (TARGET_DEBUG_ARG
)
822 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
823 (int)fcode
, IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
824 bflags_for_builtin (fcode
));
827 if (S390_USE_TARGET_ATTRIBUTE
)
831 bflags
= bflags_for_builtin (fcode
);
832 if ((bflags
& B_HTM
) && !TARGET_HTM
)
834 error ("builtin %qF is not supported without %<-mhtm%> "
835 "(default with %<-march=zEC12%> and higher).", fndecl
);
838 if (((bflags
& B_VX
) || (bflags
& B_VXE
)) && !TARGET_VX
)
840 error ("builtin %qF requires %<-mvx%> "
841 "(default with %<-march=z13%> and higher).", fndecl
);
845 if ((bflags
& B_VXE
) && !TARGET_VXE
)
847 error ("Builtin %qF requires z14 or higher.", fndecl
);
851 if ((bflags
& B_VXE2
) && !TARGET_VXE2
)
853 error ("Builtin %qF requires z15 or higher.", fndecl
);
857 if ((bflags
& B_NNPA
) && !TARGET_NNPA
)
859 error ("Builtin %qF requires arch14 or higher.", fndecl
);
863 if (fcode
>= S390_OVERLOADED_BUILTIN_VAR_OFFSET
864 && fcode
< S390_ALL_BUILTIN_MAX
)
868 else if (fcode
< S390_OVERLOADED_BUILTIN_OFFSET
)
870 icode
= code_for_builtin
[fcode
];
871 /* Set a flag in the machine specific cfun part in order to support
872 saving/restoring of FPRs. */
873 if (fcode
== S390_BUILTIN_tbegin
|| fcode
== S390_BUILTIN_tbegin_retry
)
874 cfun
->machine
->tbegin_p
= true;
876 else if (fcode
< S390_OVERLOADED_BUILTIN_VAR_OFFSET
)
878 error ("unresolved overloaded builtin");
882 internal_error ("bad builtin fcode");
885 internal_error ("bad builtin icode");
887 nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
891 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
893 || GET_MODE (target
) != tmode
894 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
895 target
= gen_reg_rtx (tmode
);
897 /* There are builtins (e.g. vec_promote) with no vector
898 arguments but an element selector. So we have to also look
899 at the vector return type when emitting the modulo
901 if (VECTOR_MODE_P (insn_data
[icode
].operand
[0].mode
))
902 last_vec_mode
= insn_data
[icode
].operand
[0].mode
;
906 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
909 const struct insn_operand_data
*insn_op
;
910 unsigned int op_flags
= all_op_flags
& ((1 << O_SHIFT
) - 1);
912 all_op_flags
= all_op_flags
>> O_SHIFT
;
914 if (arg
== error_mark_node
)
916 if (arity
>= MAX_ARGS
)
919 if (O_IMM_P (op_flags
)
920 && TREE_CODE (arg
) != INTEGER_CST
)
922 error ("constant value required for builtin %qF argument %d",
927 if (!s390_const_operand_ok (arg
, arity
+ 1, op_flags
, fndecl
))
930 insn_op
= &insn_data
[icode
].operand
[arity
+ nonvoid
];
931 op
[arity
] = expand_expr (arg
, NULL_RTX
, insn_op
->mode
, EXPAND_NORMAL
);
933 /* expand_expr truncates constants to the target mode only if it
934 is "convenient". However, our checks below rely on this
936 if (CONST_INT_P (op
[arity
])
937 && SCALAR_INT_MODE_P (insn_op
->mode
)
938 && GET_MODE (op
[arity
]) != insn_op
->mode
)
939 op
[arity
] = GEN_INT (trunc_int_for_mode (INTVAL (op
[arity
]),
942 /* Wrap the expanded RTX for pointer types into a MEM expr with
943 the proper mode. This allows us to use e.g. (match_operand
944 "memory_operand"..) in the insn patterns instead of (mem
945 (match_operand "address_operand)). This is helpful for
946 patterns not just accepting MEMs. */
947 if (POINTER_TYPE_P (TREE_TYPE (arg
))
948 && insn_op
->predicate
!= address_operand
)
949 op
[arity
] = gen_rtx_MEM (insn_op
->mode
, op
[arity
]);
951 /* Expand the module operation required on element selectors. */
952 if (op_flags
== O_ELEM
)
954 gcc_assert (last_vec_mode
!= VOIDmode
);
955 op
[arity
] = simplify_expand_binop (SImode
, code_to_optab (AND
),
957 GEN_INT (GET_MODE_NUNITS (last_vec_mode
) - 1),
958 NULL_RTX
, 1, OPTAB_DIRECT
);
961 /* Record the vector mode used for an element selector. This assumes:
962 1. There is no builtin with two different vector modes and an element selector
963 2. The element selector comes after the vector type it is referring to.
964 This currently the true for all the builtins but FIXME we
965 should better check for that. */
966 if (VECTOR_MODE_P (insn_op
->mode
))
967 last_vec_mode
= insn_op
->mode
;
969 if (insn_op
->predicate (op
[arity
], insn_op
->mode
))
975 /* A memory operand is rejected by the memory_operand predicate.
976 Try making the address legal by copying it into a register. */
977 if (MEM_P (op
[arity
])
978 && insn_op
->predicate
== memory_operand
979 && (GET_MODE (XEXP (op
[arity
], 0)) == Pmode
980 || GET_MODE (XEXP (op
[arity
], 0)) == VOIDmode
))
982 op
[arity
] = replace_equiv_address (op
[arity
],
983 copy_to_mode_reg (Pmode
,
984 XEXP (op
[arity
], 0)));
986 /* Some of the builtins require different modes/types than the
987 pattern in order to implement a specific API. Instead of
988 adding many expanders which do the mode change we do it here.
989 E.g. s390_vec_add_u128 required to have vector unsigned char
990 arguments is mapped to addti3. */
991 else if (insn_op
->mode
!= VOIDmode
992 && GET_MODE (op
[arity
]) != VOIDmode
993 && GET_MODE (op
[arity
]) != insn_op
->mode
994 && ((tmp_rtx
= simplify_gen_subreg (insn_op
->mode
, op
[arity
],
995 GET_MODE (op
[arity
]), 0))
1001 /* The predicate rejects the operand although the mode is fine.
1002 Copy the operand to register. */
1003 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
)
1004 && (GET_MODE (op
[arity
]) == insn_op
->mode
1005 || GET_MODE (op
[arity
]) == VOIDmode
1006 || (insn_op
->predicate
== address_operand
1007 && GET_MODE (op
[arity
]) == Pmode
)))
1009 /* An address_operand usually has VOIDmode in the expander
1010 so we cannot use this. */
1011 machine_mode target_mode
=
1012 (insn_op
->predicate
== address_operand
1013 ? (machine_mode
) Pmode
: insn_op
->mode
);
1014 op
[arity
] = copy_to_mode_reg (target_mode
, op
[arity
]);
1017 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
))
1019 error ("invalid argument %d for builtin %qF", arity
+ 1, fndecl
);
1028 pat
= GEN_FCN (icode
) (target
);
1032 pat
= GEN_FCN (icode
) (target
, op
[0]);
1034 pat
= GEN_FCN (icode
) (op
[0]);
1038 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
1040 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
1044 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
1046 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
1050 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
1052 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
1056 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
1058 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
1062 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1064 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1080 static const int s390_hotpatch_hw_max
= 1000000;
1081 static int s390_hotpatch_hw_before_label
= 0;
1082 static int s390_hotpatch_hw_after_label
= 0;
1084 /* Check whether the hotpatch attribute is applied to a function and, if it has
1085 an argument, the argument is valid. */
1088 s390_handle_hotpatch_attribute (tree
*node
, tree name
, tree args
,
1089 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1095 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1097 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1099 *no_add_attrs
= true;
1101 if (args
!= NULL
&& TREE_CHAIN (args
) != NULL
)
1103 expr
= TREE_VALUE (args
);
1104 expr2
= TREE_VALUE (TREE_CHAIN (args
));
1106 if (args
== NULL
|| TREE_CHAIN (args
) == NULL
)
1108 else if (TREE_CODE (expr
) != INTEGER_CST
1109 || !INTEGRAL_TYPE_P (TREE_TYPE (expr
))
1110 || wi::gtu_p (wi::to_wide (expr
), s390_hotpatch_hw_max
))
1112 else if (TREE_CODE (expr2
) != INTEGER_CST
1113 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2
))
1114 || wi::gtu_p (wi::to_wide (expr2
), s390_hotpatch_hw_max
))
1120 error ("requested %qE attribute is not a comma separated pair of"
1121 " non-negative integer constants or too large (max. %d)", name
,
1122 s390_hotpatch_hw_max
);
1123 *no_add_attrs
= true;
1129 /* Expand the s390_vector_bool type attribute. */
1132 s390_handle_vectorbool_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1133 tree args ATTRIBUTE_UNUSED
,
1134 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1136 tree type
= *node
, result
= NULL_TREE
;
1139 while (POINTER_TYPE_P (type
)
1140 || TREE_CODE (type
) == FUNCTION_TYPE
1141 || TREE_CODE (type
) == METHOD_TYPE
1142 || TREE_CODE (type
) == ARRAY_TYPE
)
1143 type
= TREE_TYPE (type
);
1145 mode
= TYPE_MODE (type
);
1148 case E_DImode
: case E_V2DImode
:
1149 result
= s390_builtin_types
[BT_BV2DI
];
1151 case E_SImode
: case E_V4SImode
:
1152 result
= s390_builtin_types
[BT_BV4SI
];
1154 case E_HImode
: case E_V8HImode
:
1155 result
= s390_builtin_types
[BT_BV8HI
];
1157 case E_QImode
: case E_V16QImode
:
1158 result
= s390_builtin_types
[BT_BV16QI
];
1164 *no_add_attrs
= true; /* No need to hang on to the attribute. */
1167 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
1172 /* Check syntax of function decl attributes having a string type value. */
1175 s390_handle_string_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1176 tree args ATTRIBUTE_UNUSED
,
1177 int flags ATTRIBUTE_UNUSED
,
1182 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1184 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1186 *no_add_attrs
= true;
1189 cst
= TREE_VALUE (args
);
1191 if (TREE_CODE (cst
) != STRING_CST
)
1193 warning (OPT_Wattributes
,
1194 "%qE attribute requires a string constant argument",
1196 *no_add_attrs
= true;
1199 if (is_attribute_p ("indirect_branch", name
)
1200 || is_attribute_p ("indirect_branch_call", name
)
1201 || is_attribute_p ("function_return", name
)
1202 || is_attribute_p ("function_return_reg", name
)
1203 || is_attribute_p ("function_return_mem", name
))
1205 if (strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1206 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1207 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1209 warning (OPT_Wattributes
,
1210 "argument to %qE attribute is not "
1211 "(keep|thunk|thunk-extern)", name
);
1212 *no_add_attrs
= true;
1216 if (is_attribute_p ("indirect_branch_jump", name
)
1217 && strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1218 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1219 && strcmp (TREE_STRING_POINTER (cst
), "thunk-inline") != 0
1220 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1222 warning (OPT_Wattributes
,
1223 "argument to %qE attribute is not "
1224 "(keep|thunk|thunk-inline|thunk-extern)", name
);
1225 *no_add_attrs
= true;
1231 static const struct attribute_spec s390_attribute_table
[] = {
1232 { "hotpatch", 2, 2, true, false, false, false,
1233 s390_handle_hotpatch_attribute
, NULL
},
1234 { "s390_vector_bool", 0, 0, false, true, false, true,
1235 s390_handle_vectorbool_attribute
, NULL
},
1236 { "indirect_branch", 1, 1, true, false, false, false,
1237 s390_handle_string_attribute
, NULL
},
1238 { "indirect_branch_jump", 1, 1, true, false, false, false,
1239 s390_handle_string_attribute
, NULL
},
1240 { "indirect_branch_call", 1, 1, true, false, false, false,
1241 s390_handle_string_attribute
, NULL
},
1242 { "function_return", 1, 1, true, false, false, false,
1243 s390_handle_string_attribute
, NULL
},
1244 { "function_return_reg", 1, 1, true, false, false, false,
1245 s390_handle_string_attribute
, NULL
},
1246 { "function_return_mem", 1, 1, true, false, false, false,
1247 s390_handle_string_attribute
, NULL
},
1250 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1253 /* Return the alignment for LABEL. We default to the -falign-labels
1254 value except for the literal pool base label. */
1256 s390_label_align (rtx_insn
*label
)
1258 rtx_insn
*prev_insn
= prev_active_insn (label
);
1261 if (prev_insn
== NULL_RTX
)
1264 set
= single_set (prev_insn
);
1266 if (set
== NULL_RTX
)
1269 src
= SET_SRC (set
);
1271 /* Don't align literal pool base labels. */
1272 if (GET_CODE (src
) == UNSPEC
1273 && XINT (src
, 1) == UNSPEC_MAIN_BASE
)
1277 return align_labels
.levels
[0].log
;
1280 static GTY(()) rtx got_symbol
;
1282 /* Return the GOT table symbol. The symbol will be created when the
1283 function is invoked for the first time. */
1286 s390_got_symbol (void)
1290 got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1291 SYMBOL_REF_FLAGS (got_symbol
) = SYMBOL_FLAG_LOCAL
;
1297 static scalar_int_mode
1298 s390_libgcc_cmp_return_mode (void)
1300 return TARGET_64BIT
? DImode
: SImode
;
1303 static scalar_int_mode
1304 s390_libgcc_shift_count_mode (void)
1306 return TARGET_64BIT
? DImode
: SImode
;
1309 static scalar_int_mode
1310 s390_unwind_word_mode (void)
1312 return TARGET_64BIT
? DImode
: SImode
;
1315 /* Return true if the back end supports mode MODE. */
1317 s390_scalar_mode_supported_p (scalar_mode mode
)
1319 /* In contrast to the default implementation reject TImode constants on 31bit
1320 TARGET_ZARCH for ABI compliance. */
1321 if (!TARGET_64BIT
&& TARGET_ZARCH
&& mode
== TImode
)
1324 if (DECIMAL_FLOAT_MODE_P (mode
))
1325 return default_decimal_float_supported_p ();
1327 return default_scalar_mode_supported_p (mode
);
1330 /* Return true if the back end supports vector mode MODE. */
1332 s390_vector_mode_supported_p (machine_mode mode
)
1336 if (!VECTOR_MODE_P (mode
)
1338 || GET_MODE_SIZE (mode
) > 16)
1341 inner
= GET_MODE_INNER (mode
);
1359 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1362 s390_set_has_landing_pad_p (bool value
)
1364 cfun
->machine
->has_landing_pad_p
= value
;
1367 /* If two condition code modes are compatible, return a condition code
1368 mode which is compatible with both. Otherwise, return
1372 s390_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
1380 if (m2
== CCUmode
|| m2
== CCTmode
|| m2
== CCZ1mode
1381 || m2
== CCSmode
|| m2
== CCSRmode
|| m2
== CCURmode
)
1402 /* Return true if SET either doesn't set the CC register, or else
1403 the source and destination have matching CC modes and that
1404 CC mode is at least as constrained as REQ_MODE. */
1407 s390_match_ccmode_set (rtx set
, machine_mode req_mode
)
1409 machine_mode set_mode
;
1411 gcc_assert (GET_CODE (set
) == SET
);
1413 /* These modes are supposed to be used only in CC consumer
1415 gcc_assert (req_mode
!= CCVIALLmode
&& req_mode
!= CCVIANYmode
1416 && req_mode
!= CCVFALLmode
&& req_mode
!= CCVFANYmode
);
1418 if (GET_CODE (SET_DEST (set
)) != REG
|| !CC_REGNO_P (REGNO (SET_DEST (set
))))
1421 set_mode
= GET_MODE (SET_DEST (set
));
1443 if (req_mode
!= set_mode
)
1448 if (req_mode
!= CCSmode
&& req_mode
!= CCUmode
&& req_mode
!= CCTmode
1449 && req_mode
!= CCSRmode
&& req_mode
!= CCURmode
1450 && req_mode
!= CCZ1mode
)
1456 if (req_mode
!= CCAmode
)
1464 return (GET_MODE (SET_SRC (set
)) == set_mode
);
1467 /* Return true if every SET in INSN that sets the CC register
1468 has source and destination with matching CC modes and that
1469 CC mode is at least as constrained as REQ_MODE.
1470 If REQ_MODE is VOIDmode, always return false. */
1473 s390_match_ccmode (rtx_insn
*insn
, machine_mode req_mode
)
1477 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1478 if (req_mode
== VOIDmode
)
1481 if (GET_CODE (PATTERN (insn
)) == SET
)
1482 return s390_match_ccmode_set (PATTERN (insn
), req_mode
);
1484 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1485 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
1487 rtx set
= XVECEXP (PATTERN (insn
), 0, i
);
1488 if (GET_CODE (set
) == SET
)
1489 if (!s390_match_ccmode_set (set
, req_mode
))
1496 /* If a test-under-mask instruction can be used to implement
1497 (compare (and ... OP1) OP2), return the CC mode required
1498 to do that. Otherwise, return VOIDmode.
1499 MIXED is true if the instruction can distinguish between
1500 CC1 and CC2 for mixed selected bits (TMxx), it is false
1501 if the instruction cannot (TM). */
1504 s390_tm_ccmode (rtx op1
, rtx op2
, bool mixed
)
1508 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1509 if (GET_CODE (op1
) != CONST_INT
|| GET_CODE (op2
) != CONST_INT
)
1512 /* Selected bits all zero: CC0.
1513 e.g.: int a; if ((a & (16 + 128)) == 0) */
1514 if (INTVAL (op2
) == 0)
1517 /* Selected bits all one: CC3.
1518 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1519 if (INTVAL (op2
) == INTVAL (op1
))
1522 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1524 if ((a & (16 + 128)) == 16) -> CCT1
1525 if ((a & (16 + 128)) == 128) -> CCT2 */
1528 bit1
= exact_log2 (INTVAL (op2
));
1529 bit0
= exact_log2 (INTVAL (op1
) ^ INTVAL (op2
));
1530 if (bit0
!= -1 && bit1
!= -1)
1531 return bit0
> bit1
? CCT1mode
: CCT2mode
;
1537 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1538 OP0 and OP1 of a COMPARE, return the mode to be used for the
1542 s390_select_ccmode (enum rtx_code code
, rtx op0
, rtx op1
)
1548 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1549 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1551 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1552 && CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1))))
1554 if ((GET_CODE (op0
) == PLUS
|| GET_CODE (op0
) == MINUS
1555 || GET_CODE (op1
) == NEG
)
1556 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1559 if (GET_CODE (op0
) == AND
)
1561 /* Check whether we can potentially do it via TM. */
1562 machine_mode ccmode
;
1563 ccmode
= s390_tm_ccmode (XEXP (op0
, 1), op1
, 1);
1564 if (ccmode
!= VOIDmode
)
1566 /* Relax CCTmode to CCZmode to allow fall-back to AND
1567 if that turns out to be beneficial. */
1568 return ccmode
== CCTmode
? CCZmode
: ccmode
;
1572 if (register_operand (op0
, HImode
)
1573 && GET_CODE (op1
) == CONST_INT
1574 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 65535))
1576 if (register_operand (op0
, QImode
)
1577 && GET_CODE (op1
) == CONST_INT
1578 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 255))
1587 /* The only overflow condition of NEG and ABS happens when
1588 -INT_MAX is used as parameter, which stays negative. So
1589 we have an overflow from a positive value to a negative.
1590 Using CCAP mode the resulting cc can be used for comparisons. */
1591 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1592 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1595 /* If constants are involved in an add instruction it is possible to use
1596 the resulting cc for comparisons with zero. Knowing the sign of the
1597 constant the overflow behavior gets predictable. e.g.:
1598 int a, b; if ((b = a + c) > 0)
1599 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1600 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1601 && (CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1)))
1602 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0
, 1)), 'O', "Os")
1603 /* Avoid INT32_MIN on 32 bit. */
1604 && (!TARGET_ZARCH
|| INTVAL (XEXP (op0
, 1)) != -0x7fffffff - 1))))
1606 if (INTVAL (XEXP((op0
), 1)) < 0)
1614 if (HONOR_NANS (op0
) || HONOR_NANS (op1
))
1625 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1626 && GET_CODE (op1
) != CONST_INT
)
1632 if (GET_CODE (op0
) == PLUS
1633 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1636 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1637 && GET_CODE (op1
) != CONST_INT
)
1643 if (GET_CODE (op0
) == MINUS
1644 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1647 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1648 && GET_CODE (op1
) != CONST_INT
)
1657 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1658 that we can implement more efficiently. */
1661 s390_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
1662 bool op0_preserve_value
)
1664 if (op0_preserve_value
)
1667 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1668 if ((*code
== EQ
|| *code
== NE
)
1669 && *op1
== const0_rtx
1670 && GET_CODE (*op0
) == ZERO_EXTRACT
1671 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1672 && GET_CODE (XEXP (*op0
, 2)) == CONST_INT
1673 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1675 rtx inner
= XEXP (*op0
, 0);
1676 HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (GET_MODE (inner
));
1677 HOST_WIDE_INT len
= INTVAL (XEXP (*op0
, 1));
1678 HOST_WIDE_INT pos
= INTVAL (XEXP (*op0
, 2));
1680 if (len
> 0 && len
< modesize
1681 && pos
>= 0 && pos
+ len
<= modesize
1682 && modesize
<= HOST_BITS_PER_WIDE_INT
)
1684 unsigned HOST_WIDE_INT block
;
1685 block
= (HOST_WIDE_INT_1U
<< len
) - 1;
1686 block
<<= modesize
- pos
- len
;
1688 *op0
= gen_rtx_AND (GET_MODE (inner
), inner
,
1689 gen_int_mode (block
, GET_MODE (inner
)));
1693 /* Narrow AND of memory against immediate to enable TM. */
1694 if ((*code
== EQ
|| *code
== NE
)
1695 && *op1
== const0_rtx
1696 && GET_CODE (*op0
) == AND
1697 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1698 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1700 rtx inner
= XEXP (*op0
, 0);
1701 rtx mask
= XEXP (*op0
, 1);
1703 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1704 if (GET_CODE (inner
) == SUBREG
1705 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner
)))
1706 && (GET_MODE_SIZE (GET_MODE (inner
))
1707 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
1709 & GET_MODE_MASK (GET_MODE (inner
))
1710 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner
))))
1712 inner
= SUBREG_REG (inner
);
1714 /* Do not change volatile MEMs. */
1715 if (MEM_P (inner
) && !MEM_VOLATILE_P (inner
))
1717 int part
= s390_single_part (XEXP (*op0
, 1),
1718 GET_MODE (inner
), QImode
, 0);
1721 mask
= gen_int_mode (s390_extract_part (mask
, QImode
, 0), QImode
);
1722 inner
= adjust_address_nv (inner
, QImode
, part
);
1723 *op0
= gen_rtx_AND (QImode
, inner
, mask
);
1728 /* Narrow comparisons against 0xffff to HImode if possible. */
1729 if ((*code
== EQ
|| *code
== NE
)
1730 && GET_CODE (*op1
) == CONST_INT
1731 && INTVAL (*op1
) == 0xffff
1732 && SCALAR_INT_MODE_P (GET_MODE (*op0
))
1733 && (nonzero_bits (*op0
, GET_MODE (*op0
))
1734 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1736 *op0
= gen_lowpart (HImode
, *op0
);
1740 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1741 if (GET_CODE (*op0
) == UNSPEC
1742 && XINT (*op0
, 1) == UNSPEC_STRCMPCC_TO_INT
1743 && XVECLEN (*op0
, 0) == 1
1744 && GET_MODE (XVECEXP (*op0
, 0, 0)) == CCUmode
1745 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1746 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1747 && *op1
== const0_rtx
)
1749 enum rtx_code new_code
= UNKNOWN
;
1752 case EQ
: new_code
= EQ
; break;
1753 case NE
: new_code
= NE
; break;
1754 case LT
: new_code
= GTU
; break;
1755 case GT
: new_code
= LTU
; break;
1756 case LE
: new_code
= GEU
; break;
1757 case GE
: new_code
= LEU
; break;
1761 if (new_code
!= UNKNOWN
)
1763 *op0
= XVECEXP (*op0
, 0, 0);
1768 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1769 if (GET_CODE (*op0
) == UNSPEC
1770 && XINT (*op0
, 1) == UNSPEC_CC_TO_INT
1771 && XVECLEN (*op0
, 0) == 1
1772 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1773 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1774 && CONST_INT_P (*op1
))
1776 enum rtx_code new_code
= UNKNOWN
;
1777 switch (GET_MODE (XVECEXP (*op0
, 0, 0)))
1783 case EQ
: new_code
= EQ
; break;
1784 case NE
: new_code
= NE
; break;
1791 if (new_code
!= UNKNOWN
)
1793 /* For CCRAWmode put the required cc mask into the second
1795 if (GET_MODE (XVECEXP (*op0
, 0, 0)) == CCRAWmode
1796 && INTVAL (*op1
) >= 0 && INTVAL (*op1
) <= 3)
1797 *op1
= gen_rtx_CONST_INT (VOIDmode
, 1 << (3 - INTVAL (*op1
)));
1798 *op0
= XVECEXP (*op0
, 0, 0);
1803 /* Simplify cascaded EQ, NE with const0_rtx. */
1804 if ((*code
== NE
|| *code
== EQ
)
1805 && (GET_CODE (*op0
) == EQ
|| GET_CODE (*op0
) == NE
)
1806 && GET_MODE (*op0
) == SImode
1807 && GET_MODE (XEXP (*op0
, 0)) == CCZ1mode
1808 && REG_P (XEXP (*op0
, 0))
1809 && XEXP (*op0
, 1) == const0_rtx
1810 && *op1
== const0_rtx
)
1812 if ((*code
== EQ
&& GET_CODE (*op0
) == NE
)
1813 || (*code
== NE
&& GET_CODE (*op0
) == EQ
))
1817 *op0
= XEXP (*op0
, 0);
1820 /* Prefer register over memory as first operand. */
1821 if (MEM_P (*op0
) && REG_P (*op1
))
1823 rtx tem
= *op0
; *op0
= *op1
; *op1
= tem
;
1824 *code
= (int)swap_condition ((enum rtx_code
)*code
);
1827 /* A comparison result is compared against zero. Replace it with
1828 the (perhaps inverted) original comparison.
1829 This probably should be done by simplify_relational_operation. */
1830 if ((*code
== EQ
|| *code
== NE
)
1831 && *op1
== const0_rtx
1832 && COMPARISON_P (*op0
)
1833 && CC_REG_P (XEXP (*op0
, 0)))
1835 enum rtx_code new_code
;
1838 new_code
= reversed_comparison_code_parts (GET_CODE (*op0
),
1840 XEXP (*op0
, 1), NULL
);
1842 new_code
= GET_CODE (*op0
);
1844 if (new_code
!= UNKNOWN
)
1847 *op1
= XEXP (*op0
, 1);
1848 *op0
= XEXP (*op0
, 0);
1852 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1854 && (*code
== EQ
|| *code
== NE
)
1855 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1856 && GET_CODE (*op0
) == NOT
)
1858 machine_mode mode
= GET_MODE (*op0
);
1859 *op0
= gen_rtx_XOR (mode
, XEXP (*op0
, 0), *op1
);
1860 *op0
= gen_rtx_NOT (mode
, *op0
);
1864 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1866 && (*code
== EQ
|| *code
== NE
)
1867 && (GET_CODE (*op0
) == AND
|| GET_CODE (*op0
) == IOR
)
1868 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1869 && CONST_INT_P (*op1
)
1870 && *op1
== constm1_rtx
)
1872 machine_mode mode
= GET_MODE (*op0
);
1873 rtx op00
= gen_rtx_NOT (mode
, XEXP (*op0
, 0));
1874 rtx op01
= gen_rtx_NOT (mode
, XEXP (*op0
, 1));
1876 if (GET_CODE (*op0
) == AND
)
1877 *op0
= gen_rtx_IOR (mode
, op00
, op01
);
1879 *op0
= gen_rtx_AND (mode
, op00
, op01
);
1886 /* Emit a compare instruction suitable to implement the comparison
1887 OP0 CODE OP1. Return the correct condition RTL to be placed in
1888 the IF_THEN_ELSE of the conditional branch testing the result. */
1891 s390_emit_compare (enum rtx_code code
, rtx op0
, rtx op1
)
1893 machine_mode mode
= s390_select_ccmode (code
, op0
, op1
);
1896 /* Force OP1 into register in order to satisfy VXE TFmode patterns. */
1897 if (TARGET_VXE
&& GET_MODE (op1
) == TFmode
)
1898 op1
= force_reg (TFmode
, op1
);
1900 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
1902 /* Do not output a redundant compare instruction if a
1903 compare_and_swap pattern already computed the result and the
1904 machine modes are compatible. */
1905 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0
), mode
)
1911 cc
= gen_rtx_REG (mode
, CC_REGNUM
);
1912 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (mode
, op0
, op1
)));
1915 return gen_rtx_fmt_ee (code
, VOIDmode
, cc
, const0_rtx
);
1918 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1919 MEM, whose address is a pseudo containing the original MEM's address. */
1922 s390_legitimize_cs_operand (rtx mem
)
1926 if (!contains_symbol_ref_p (mem
))
1928 tmp
= gen_reg_rtx (Pmode
);
1929 emit_move_insn (tmp
, copy_rtx (XEXP (mem
, 0)));
1930 return change_address (mem
, VOIDmode
, tmp
);
1933 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1935 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1936 conditional branch testing the result. */
1939 s390_emit_compare_and_swap (enum rtx_code code
, rtx old
, rtx mem
,
1940 rtx cmp
, rtx new_rtx
, machine_mode ccmode
)
1944 mem
= s390_legitimize_cs_operand (mem
);
1945 cc
= gen_rtx_REG (ccmode
, CC_REGNUM
);
1946 switch (GET_MODE (mem
))
1949 emit_insn (gen_atomic_compare_and_swapsi_internal (old
, mem
, cmp
,
1953 emit_insn (gen_atomic_compare_and_swapdi_internal (old
, mem
, cmp
,
1957 emit_insn (gen_atomic_compare_and_swapti_internal (old
, mem
, cmp
,
1965 return s390_emit_compare (code
, cc
, const0_rtx
);
1968 /* Emit a jump instruction to TARGET and return it. If COND is
1969 NULL_RTX, emit an unconditional jump, else a conditional jump under
1973 s390_emit_jump (rtx target
, rtx cond
)
1977 target
= gen_rtx_LABEL_REF (VOIDmode
, target
);
1979 target
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, target
, pc_rtx
);
1981 insn
= gen_rtx_SET (pc_rtx
, target
);
1982 return emit_jump_insn (insn
);
1985 /* Return branch condition mask to implement a branch
1986 specified by CODE. Return -1 for invalid comparisons. */
1989 s390_branch_condition_mask (rtx code
)
1991 const int CC0
= 1 << 3;
1992 const int CC1
= 1 << 2;
1993 const int CC2
= 1 << 1;
1994 const int CC3
= 1 << 0;
1996 gcc_assert (GET_CODE (XEXP (code
, 0)) == REG
);
1997 gcc_assert (REGNO (XEXP (code
, 0)) == CC_REGNUM
);
1998 gcc_assert (XEXP (code
, 1) == const0_rtx
1999 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2000 && CONST_INT_P (XEXP (code
, 1))));
2003 switch (GET_MODE (XEXP (code
, 0)))
2007 switch (GET_CODE (code
))
2009 case EQ
: return CC0
;
2010 case NE
: return CC1
| CC2
| CC3
;
2016 switch (GET_CODE (code
))
2018 case EQ
: return CC1
;
2019 case NE
: return CC0
| CC2
| CC3
;
2025 switch (GET_CODE (code
))
2027 case EQ
: return CC2
;
2028 case NE
: return CC0
| CC1
| CC3
;
2034 switch (GET_CODE (code
))
2036 case EQ
: return CC3
;
2037 case NE
: return CC0
| CC1
| CC2
;
2043 switch (GET_CODE (code
))
2045 case EQ
: return CC0
| CC2
;
2046 case NE
: return CC1
| CC3
;
2052 switch (GET_CODE (code
))
2054 case LTU
: return CC2
| CC3
; /* carry */
2055 case GEU
: return CC0
| CC1
; /* no carry */
2061 switch (GET_CODE (code
))
2063 case GTU
: return CC0
| CC1
; /* borrow */
2064 case LEU
: return CC2
| CC3
; /* no borrow */
2070 switch (GET_CODE (code
))
2072 case EQ
: return CC0
| CC2
;
2073 case NE
: return CC1
| CC3
;
2074 case LTU
: return CC1
;
2075 case GTU
: return CC3
;
2076 case LEU
: return CC1
| CC2
;
2077 case GEU
: return CC2
| CC3
;
2082 switch (GET_CODE (code
))
2084 case EQ
: return CC0
;
2085 case NE
: return CC1
| CC2
| CC3
;
2086 case LTU
: return CC1
;
2087 case GTU
: return CC2
;
2088 case LEU
: return CC0
| CC1
;
2089 case GEU
: return CC0
| CC2
;
2095 switch (GET_CODE (code
))
2097 case EQ
: return CC0
;
2098 case NE
: return CC2
| CC1
| CC3
;
2099 case LTU
: return CC2
;
2100 case GTU
: return CC1
;
2101 case LEU
: return CC0
| CC2
;
2102 case GEU
: return CC0
| CC1
;
2108 switch (GET_CODE (code
))
2110 case EQ
: return CC0
;
2111 case NE
: return CC1
| CC2
| CC3
;
2112 case LT
: return CC1
| CC3
;
2113 case GT
: return CC2
;
2114 case LE
: return CC0
| CC1
| CC3
;
2115 case GE
: return CC0
| CC2
;
2121 switch (GET_CODE (code
))
2123 case EQ
: return CC0
;
2124 case NE
: return CC1
| CC2
| CC3
;
2125 case LT
: return CC1
;
2126 case GT
: return CC2
| CC3
;
2127 case LE
: return CC0
| CC1
;
2128 case GE
: return CC0
| CC2
| CC3
;
2134 switch (GET_CODE (code
))
2136 case EQ
: return CC0
| CC1
| CC2
;
2137 case NE
: return CC3
;
2144 switch (GET_CODE (code
))
2146 case EQ
: return CC0
;
2147 case NE
: return CC1
| CC2
| CC3
;
2148 case LT
: return CC1
;
2149 case GT
: return CC2
;
2150 case LE
: return CC0
| CC1
;
2151 case GE
: return CC0
| CC2
;
2152 case UNORDERED
: return CC3
;
2153 case ORDERED
: return CC0
| CC1
| CC2
;
2154 case UNEQ
: return CC0
| CC3
;
2155 case UNLT
: return CC1
| CC3
;
2156 case UNGT
: return CC2
| CC3
;
2157 case UNLE
: return CC0
| CC1
| CC3
;
2158 case UNGE
: return CC0
| CC2
| CC3
;
2159 case LTGT
: return CC1
| CC2
;
2165 switch (GET_CODE (code
))
2167 case EQ
: return CC0
;
2168 case NE
: return CC2
| CC1
| CC3
;
2169 case LT
: return CC2
;
2170 case GT
: return CC1
;
2171 case LE
: return CC0
| CC2
;
2172 case GE
: return CC0
| CC1
;
2173 case UNORDERED
: return CC3
;
2174 case ORDERED
: return CC0
| CC2
| CC1
;
2175 case UNEQ
: return CC0
| CC3
;
2176 case UNLT
: return CC2
| CC3
;
2177 case UNGT
: return CC1
| CC3
;
2178 case UNLE
: return CC0
| CC2
| CC3
;
2179 case UNGE
: return CC0
| CC1
| CC3
;
2180 case LTGT
: return CC2
| CC1
;
2185 /* Vector comparison modes. */
2186 /* CC2 will never be set. It however is part of the negated
2189 switch (GET_CODE (code
))
2194 case GE
: return CC0
;
2195 /* The inverted modes are in fact *any* modes. */
2199 case LT
: return CC3
| CC1
| CC2
;
2204 switch (GET_CODE (code
))
2209 case GE
: return CC0
| CC1
;
2210 /* The inverted modes are in fact *all* modes. */
2214 case LT
: return CC3
| CC2
;
2218 switch (GET_CODE (code
))
2222 case GE
: return CC0
;
2223 /* The inverted modes are in fact *any* modes. */
2226 case UNLT
: return CC3
| CC1
| CC2
;
2231 switch (GET_CODE (code
))
2235 case GE
: return CC0
| CC1
;
2236 /* The inverted modes are in fact *all* modes. */
2239 case UNLT
: return CC3
| CC2
;
2244 switch (GET_CODE (code
))
2247 return INTVAL (XEXP (code
, 1));
2249 return (INTVAL (XEXP (code
, 1))) ^ 0xf;
2260 /* Return branch condition mask to implement a compare and branch
2261 specified by CODE. Return -1 for invalid comparisons. */
2264 s390_compare_and_branch_condition_mask (rtx code
)
2266 const int CC0
= 1 << 3;
2267 const int CC1
= 1 << 2;
2268 const int CC2
= 1 << 1;
2270 switch (GET_CODE (code
))
2294 /* If INV is false, return assembler mnemonic string to implement
2295 a branch specified by CODE. If INV is true, return mnemonic
2296 for the corresponding inverted branch. */
2299 s390_branch_condition_mnemonic (rtx code
, int inv
)
2303 static const char *const mnemonic
[16] =
2305 NULL
, "o", "h", "nle",
2306 "l", "nhe", "lh", "ne",
2307 "e", "nlh", "he", "nl",
2308 "le", "nh", "no", NULL
2311 if (GET_CODE (XEXP (code
, 0)) == REG
2312 && REGNO (XEXP (code
, 0)) == CC_REGNUM
2313 && (XEXP (code
, 1) == const0_rtx
2314 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2315 && CONST_INT_P (XEXP (code
, 1)))))
2316 mask
= s390_branch_condition_mask (code
);
2318 mask
= s390_compare_and_branch_condition_mask (code
);
2320 gcc_assert (mask
>= 0);
2325 gcc_assert (mask
>= 1 && mask
<= 14);
2327 return mnemonic
[mask
];
2330 /* Return the part of op which has a value different from def.
2331 The size of the part is determined by mode.
2332 Use this function only if you already know that op really
2333 contains such a part. */
2335 unsigned HOST_WIDE_INT
2336 s390_extract_part (rtx op
, machine_mode mode
, int def
)
2338 unsigned HOST_WIDE_INT value
= 0;
2339 int max_parts
= HOST_BITS_PER_WIDE_INT
/ GET_MODE_BITSIZE (mode
);
2340 int part_bits
= GET_MODE_BITSIZE (mode
);
2341 unsigned HOST_WIDE_INT part_mask
= (HOST_WIDE_INT_1U
<< part_bits
) - 1;
2344 for (i
= 0; i
< max_parts
; i
++)
2347 value
= UINTVAL (op
);
2349 value
>>= part_bits
;
2351 if ((value
& part_mask
) != (def
& part_mask
))
2352 return value
& part_mask
;
2358 /* If OP is an integer constant of mode MODE with exactly one
2359 part of mode PART_MODE unequal to DEF, return the number of that
2360 part. Otherwise, return -1. */
2363 s390_single_part (rtx op
,
2365 machine_mode part_mode
,
2368 unsigned HOST_WIDE_INT value
= 0;
2369 int n_parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (part_mode
);
2370 unsigned HOST_WIDE_INT part_mask
2371 = (HOST_WIDE_INT_1U
<< GET_MODE_BITSIZE (part_mode
)) - 1;
2374 if (GET_CODE (op
) != CONST_INT
)
2377 for (i
= 0; i
< n_parts
; i
++)
2380 value
= UINTVAL (op
);
2382 value
>>= GET_MODE_BITSIZE (part_mode
);
2384 if ((value
& part_mask
) != (def
& part_mask
))
2392 return part
== -1 ? -1 : n_parts
- 1 - part
;
2395 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2396 bits and no other bits are set in (the lower SIZE bits of) IN.
2398 PSTART and PEND can be used to obtain the start and end
2399 position (inclusive) of the bitfield relative to 64
2400 bits. *PSTART / *PEND gives the position of the first/last bit
2401 of the bitfield counting from the highest order bit starting
2405 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in
, int size
,
2406 int *pstart
, int *pend
)
2410 int lowbit
= HOST_BITS_PER_WIDE_INT
- 1;
2411 int highbit
= HOST_BITS_PER_WIDE_INT
- size
;
2412 unsigned HOST_WIDE_INT bitmask
= HOST_WIDE_INT_1U
;
2414 gcc_assert (!!pstart
== !!pend
);
2415 for (start
= lowbit
; start
>= highbit
; bitmask
<<= 1, start
--)
2418 /* Look for the rightmost bit of a contiguous range of ones. */
2425 /* Look for the firt zero bit after the range of ones. */
2426 if (! (bitmask
& in
))
2430 /* We're one past the last one-bit. */
2434 /* No one bits found. */
2437 if (start
> highbit
)
2439 unsigned HOST_WIDE_INT mask
;
2441 /* Calculate a mask for all bits beyond the contiguous bits. */
2442 mask
= ((~HOST_WIDE_INT_0U
>> highbit
)
2443 & (~HOST_WIDE_INT_0U
<< (lowbit
- start
+ 1)));
2445 /* There are more bits set beyond the first range of one bits. */
2458 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2459 if ~IN contains a contiguous bitfield. In that case, *END is <
2462 If WRAP_P is true, a bitmask that wraps around is also tested.
2463 When a wraparoud occurs *START is greater than *END (in
2464 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2465 part of the range. If WRAP_P is false, no wraparound is
2469 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in
, bool wrap_p
,
2470 int size
, int *start
, int *end
)
2472 int bs
= HOST_BITS_PER_WIDE_INT
;
2475 gcc_assert (!!start
== !!end
);
2476 if ((in
& ((~HOST_WIDE_INT_0U
) >> (bs
- size
))) == 0)
2477 /* This cannot be expressed as a contiguous bitmask. Exit early because
2478 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2481 b
= s390_contiguous_bitmask_nowrap_p (in
, size
, start
, end
);
2486 b
= s390_contiguous_bitmask_nowrap_p (~in
, size
, start
, end
);
2492 gcc_assert (s
>= 1);
2493 *start
= ((e
+ 1) & (bs
- 1));
2494 *end
= ((s
- 1 + bs
) & (bs
- 1));
2500 /* Return true if OP contains the same contiguous bitfield in *all*
2501 its elements. START and END can be used to obtain the start and
2502 end position of the bitfield.
2504 START/STOP give the position of the first/last bit of the bitfield
2505 counting from the lowest order bit starting with zero. In order to
2506 use these values for S/390 instructions this has to be converted to
2507 "bits big endian" style. */
2510 s390_contiguous_bitmask_vector_p (rtx op
, int *start
, int *end
)
2512 unsigned HOST_WIDE_INT mask
;
2517 /* Handle floats by bitcasting them to ints. */
2518 op
= gen_lowpart (related_int_vector_mode (GET_MODE (op
)).require (), op
);
2520 gcc_assert (!!start
== !!end
);
2521 if (!const_vec_duplicate_p (op
, &elt
)
2522 || !CONST_INT_P (elt
))
2525 size
= GET_MODE_UNIT_BITSIZE (GET_MODE (op
));
2527 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2531 mask
= UINTVAL (elt
);
2533 b
= s390_contiguous_bitmask_p (mask
, true, size
, start
, end
);
2538 *start
-= (HOST_BITS_PER_WIDE_INT
- size
);
2539 *end
-= (HOST_BITS_PER_WIDE_INT
- size
);
2547 /* Return true if C consists only of byte chunks being either 0 or
2548 0xff. If MASK is !=NULL a byte mask is generated which is
2549 appropriate for the vector generate byte mask instruction. */
2552 s390_bytemask_vector_p (rtx op
, unsigned *mask
)
2555 unsigned tmp_mask
= 0;
2556 int nunit
, unit_size
;
2558 if (!VECTOR_MODE_P (GET_MODE (op
))
2559 || GET_CODE (op
) != CONST_VECTOR
2560 || !CONST_INT_P (XVECEXP (op
, 0, 0)))
2563 nunit
= GET_MODE_NUNITS (GET_MODE (op
));
2564 unit_size
= GET_MODE_UNIT_SIZE (GET_MODE (op
));
2566 for (i
= 0; i
< nunit
; i
++)
2568 unsigned HOST_WIDE_INT c
;
2571 if (!CONST_INT_P (XVECEXP (op
, 0, i
)))
2574 c
= UINTVAL (XVECEXP (op
, 0, i
));
2575 for (j
= 0; j
< unit_size
; j
++)
2577 if ((c
& 0xff) != 0 && (c
& 0xff) != 0xff)
2579 tmp_mask
|= (c
& 1) << ((nunit
- 1 - i
) * unit_size
+ j
);
2580 c
= c
>> BITS_PER_UNIT
;
2590 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2591 equivalent to a shift followed by the AND. In particular, CONTIG
2592 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2593 for ROTL indicate a rotate to the right. */
2596 s390_extzv_shift_ok (int bitsize
, int rotl
, unsigned HOST_WIDE_INT contig
)
2601 ok
= s390_contiguous_bitmask_nowrap_p (contig
, bitsize
, &start
, &end
);
2605 return (64 - end
>= rotl
);
2608 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2610 rotl
= -rotl
+ (64 - bitsize
);
2611 return (start
>= rotl
);
2615 /* Check whether we can (and want to) split a double-word
2616 move in mode MODE from SRC to DST into two single-word
2617 moves, moving the subword FIRST_SUBWORD first. */
2620 s390_split_ok_p (rtx dst
, rtx src
, machine_mode mode
, int first_subword
)
2622 /* Floating point and vector registers cannot be split. */
2623 if (FP_REG_P (src
) || FP_REG_P (dst
) || VECTOR_REG_P (src
) || VECTOR_REG_P (dst
))
2626 /* Non-offsettable memory references cannot be split. */
2627 if ((GET_CODE (src
) == MEM
&& !offsettable_memref_p (src
))
2628 || (GET_CODE (dst
) == MEM
&& !offsettable_memref_p (dst
)))
2631 /* Moving the first subword must not clobber a register
2632 needed to move the second subword. */
2633 if (register_operand (dst
, mode
))
2635 rtx subreg
= operand_subword (dst
, first_subword
, 0, mode
);
2636 if (reg_overlap_mentioned_p (subreg
, src
))
2643 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2644 and [MEM2, MEM2 + SIZE] do overlap and false
2648 s390_overlap_p (rtx mem1
, rtx mem2
, HOST_WIDE_INT size
)
2650 rtx addr1
, addr2
, addr_delta
;
2651 HOST_WIDE_INT delta
;
2653 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2659 addr1
= XEXP (mem1
, 0);
2660 addr2
= XEXP (mem2
, 0);
2662 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2664 /* This overlapping check is used by peepholes merging memory block operations.
2665 Overlapping operations would otherwise be recognized by the S/390 hardware
2666 and would fall back to a slower implementation. Allowing overlapping
2667 operations would lead to slow code but not to wrong code. Therefore we are
2668 somewhat optimistic if we cannot prove that the memory blocks are
2670 That's why we return false here although this may accept operations on
2671 overlapping memory areas. */
2672 if (!addr_delta
|| GET_CODE (addr_delta
) != CONST_INT
)
2675 delta
= INTVAL (addr_delta
);
2678 || (delta
> 0 && delta
< size
)
2679 || (delta
< 0 && -delta
< size
))
2685 /* Check whether the address of memory reference MEM2 equals exactly
2686 the address of memory reference MEM1 plus DELTA. Return true if
2687 we can prove this to be the case, false otherwise. */
2690 s390_offset_p (rtx mem1
, rtx mem2
, rtx delta
)
2692 rtx addr1
, addr2
, addr_delta
;
2694 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2697 addr1
= XEXP (mem1
, 0);
2698 addr2
= XEXP (mem2
, 0);
2700 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2701 if (!addr_delta
|| !rtx_equal_p (addr_delta
, delta
))
2707 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2710 s390_expand_logical_operator (enum rtx_code code
, machine_mode mode
,
2713 machine_mode wmode
= mode
;
2714 rtx dst
= operands
[0];
2715 rtx src1
= operands
[1];
2716 rtx src2
= operands
[2];
2719 /* If we cannot handle the operation directly, use a temp register. */
2720 if (!s390_logical_operator_ok_p (operands
))
2721 dst
= gen_reg_rtx (mode
);
2723 /* QImode and HImode patterns make sense only if we have a destination
2724 in memory. Otherwise perform the operation in SImode. */
2725 if ((mode
== QImode
|| mode
== HImode
) && GET_CODE (dst
) != MEM
)
2728 /* Widen operands if required. */
2731 if (GET_CODE (dst
) == SUBREG
2732 && (tem
= simplify_subreg (wmode
, dst
, mode
, 0)) != 0)
2734 else if (REG_P (dst
))
2735 dst
= gen_rtx_SUBREG (wmode
, dst
, 0);
2737 dst
= gen_reg_rtx (wmode
);
2739 if (GET_CODE (src1
) == SUBREG
2740 && (tem
= simplify_subreg (wmode
, src1
, mode
, 0)) != 0)
2742 else if (GET_MODE (src1
) != VOIDmode
)
2743 src1
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src1
), 0);
2745 if (GET_CODE (src2
) == SUBREG
2746 && (tem
= simplify_subreg (wmode
, src2
, mode
, 0)) != 0)
2748 else if (GET_MODE (src2
) != VOIDmode
)
2749 src2
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src2
), 0);
2752 /* Emit the instruction. */
2753 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, wmode
, src1
, src2
));
2754 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
2755 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
2757 /* Fix up the destination if needed. */
2758 if (dst
!= operands
[0])
2759 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
2762 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2765 s390_logical_operator_ok_p (rtx
*operands
)
2767 /* If the destination operand is in memory, it needs to coincide
2768 with one of the source operands. After reload, it has to be
2769 the first source operand. */
2770 if (GET_CODE (operands
[0]) == MEM
)
2771 return rtx_equal_p (operands
[0], operands
[1])
2772 || (!reload_completed
&& rtx_equal_p (operands
[0], operands
[2]));
2777 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2778 operand IMMOP to switch from SS to SI type instructions. */
2781 s390_narrow_logical_operator (enum rtx_code code
, rtx
*memop
, rtx
*immop
)
2783 int def
= code
== AND
? -1 : 0;
2787 gcc_assert (GET_CODE (*memop
) == MEM
);
2788 gcc_assert (!MEM_VOLATILE_P (*memop
));
2790 mask
= s390_extract_part (*immop
, QImode
, def
);
2791 part
= s390_single_part (*immop
, GET_MODE (*memop
), QImode
, def
);
2792 gcc_assert (part
>= 0);
2794 *memop
= adjust_address (*memop
, QImode
, part
);
2795 *immop
= gen_int_mode (mask
, QImode
);
2799 /* How to allocate a 'struct machine_function'. */
2801 static struct machine_function
*
2802 s390_init_machine_status (void)
2804 return ggc_cleared_alloc
<machine_function
> ();
2807 /* Map for smallest class containing reg regno. */
2809 const enum reg_class regclass_map
[FIRST_PSEUDO_REGISTER
] =
2810 { GENERAL_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 0 */
2811 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 4 */
2812 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 8 */
2813 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 12 */
2814 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 16 */
2815 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 20 */
2816 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 24 */
2817 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 28 */
2818 ADDR_REGS
, CC_REGS
, ADDR_REGS
, ADDR_REGS
, /* 32 */
2819 ACCESS_REGS
, ACCESS_REGS
, VEC_REGS
, VEC_REGS
, /* 36 */
2820 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 40 */
2821 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 44 */
2822 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 48 */
2823 VEC_REGS
, VEC_REGS
/* 52 */
2826 /* Return attribute type of insn. */
2828 static enum attr_type
2829 s390_safe_attr_type (rtx_insn
*insn
)
2831 if (recog_memoized (insn
) >= 0)
2832 return get_attr_type (insn
);
2837 /* Return attribute relative_long of insn. */
2840 s390_safe_relative_long_p (rtx_insn
*insn
)
2842 if (recog_memoized (insn
) >= 0)
2843 return get_attr_relative_long (insn
) == RELATIVE_LONG_YES
;
2848 /* Return true if DISP is a valid short displacement. */
2851 s390_short_displacement (rtx disp
)
2853 /* No displacement is OK. */
2857 /* Without the long displacement facility we don't need to
2858 distingiush between long and short displacement. */
2859 if (!TARGET_LONG_DISPLACEMENT
)
2862 /* Integer displacement in range. */
2863 if (GET_CODE (disp
) == CONST_INT
)
2864 return INTVAL (disp
) >= 0 && INTVAL (disp
) < 4096;
2866 /* GOT offset is not OK, the GOT can be large. */
2867 if (GET_CODE (disp
) == CONST
2868 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
2869 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
2870 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTNTPOFF
))
2873 /* All other symbolic constants are literal pool references,
2874 which are OK as the literal pool must be small. */
2875 if (GET_CODE (disp
) == CONST
)
2881 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2882 If successful, also determines the
2883 following characteristics of `ref': `is_ptr' - whether it can be an
2884 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2885 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2886 considered a literal pool pointer for purposes of avoiding two different
2887 literal pool pointers per insn during or after reload (`B' constraint). */
2889 s390_decompose_constant_pool_ref (rtx
*ref
, rtx
*disp
, bool *is_ptr
,
2890 bool *is_base_ptr
, bool *is_pool_ptr
)
2895 if (GET_CODE (*ref
) == UNSPEC
)
2896 switch (XINT (*ref
, 1))
2900 *disp
= gen_rtx_UNSPEC (Pmode
,
2901 gen_rtvec (1, XVECEXP (*ref
, 0, 0)),
2902 UNSPEC_LTREL_OFFSET
);
2906 *ref
= XVECEXP (*ref
, 0, 1);
2913 if (!REG_P (*ref
) || GET_MODE (*ref
) != Pmode
)
2916 if (REGNO (*ref
) == STACK_POINTER_REGNUM
2917 || REGNO (*ref
) == FRAME_POINTER_REGNUM
2918 || ((reload_completed
|| reload_in_progress
)
2919 && frame_pointer_needed
2920 && REGNO (*ref
) == HARD_FRAME_POINTER_REGNUM
)
2921 || REGNO (*ref
) == ARG_POINTER_REGNUM
2923 && REGNO (*ref
) == PIC_OFFSET_TABLE_REGNUM
))
2924 *is_ptr
= *is_base_ptr
= true;
2926 if ((reload_completed
|| reload_in_progress
)
2927 && *ref
== cfun
->machine
->base_reg
)
2928 *is_ptr
= *is_base_ptr
= *is_pool_ptr
= true;
2933 /* Decompose a RTL expression ADDR for a memory address into
2934 its components, returned in OUT.
2936 Returns false if ADDR is not a valid memory address, true
2937 otherwise. If OUT is NULL, don't return the components,
2938 but check for validity only.
2940 Note: Only addresses in canonical form are recognized.
2941 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2942 canonical form so that they will be recognized. */
2945 s390_decompose_address (rtx addr
, struct s390_address
*out
)
2947 HOST_WIDE_INT offset
= 0;
2948 rtx base
= NULL_RTX
;
2949 rtx indx
= NULL_RTX
;
2950 rtx disp
= NULL_RTX
;
2952 bool pointer
= false;
2953 bool base_ptr
= false;
2954 bool indx_ptr
= false;
2955 bool literal_pool
= false;
2957 /* We may need to substitute the literal pool base register into the address
2958 below. However, at this point we do not know which register is going to
2959 be used as base, so we substitute the arg pointer register. This is going
2960 to be treated as holding a pointer below -- it shouldn't be used for any
2962 rtx fake_pool_base
= gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
);
2964 /* Decompose address into base + index + displacement. */
2966 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == UNSPEC
)
2969 else if (GET_CODE (addr
) == PLUS
)
2971 rtx op0
= XEXP (addr
, 0);
2972 rtx op1
= XEXP (addr
, 1);
2973 enum rtx_code code0
= GET_CODE (op0
);
2974 enum rtx_code code1
= GET_CODE (op1
);
2976 if (code0
== REG
|| code0
== UNSPEC
)
2978 if (code1
== REG
|| code1
== UNSPEC
)
2980 indx
= op0
; /* index + base */
2986 base
= op0
; /* base + displacement */
2991 else if (code0
== PLUS
)
2993 indx
= XEXP (op0
, 0); /* index + base + disp */
2994 base
= XEXP (op0
, 1);
3005 disp
= addr
; /* displacement */
3007 /* Extract integer part of displacement. */
3011 if (GET_CODE (disp
) == CONST_INT
)
3013 offset
= INTVAL (disp
);
3016 else if (GET_CODE (disp
) == CONST
3017 && GET_CODE (XEXP (disp
, 0)) == PLUS
3018 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
3020 offset
= INTVAL (XEXP (XEXP (disp
, 0), 1));
3021 disp
= XEXP (XEXP (disp
, 0), 0);
3025 /* Strip off CONST here to avoid special case tests later. */
3026 if (disp
&& GET_CODE (disp
) == CONST
)
3027 disp
= XEXP (disp
, 0);
3029 /* We can convert literal pool addresses to
3030 displacements by basing them off the base register. */
3031 if (disp
&& GET_CODE (disp
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (disp
))
3036 base
= fake_pool_base
, literal_pool
= true;
3038 /* Mark up the displacement. */
3039 disp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, disp
),
3040 UNSPEC_LTREL_OFFSET
);
3043 /* Validate base register. */
3044 if (!s390_decompose_constant_pool_ref (&base
, &disp
, &pointer
, &base_ptr
,
3048 /* Validate index register. */
3049 if (!s390_decompose_constant_pool_ref (&indx
, &disp
, &pointer
, &indx_ptr
,
3053 /* Prefer to use pointer as base, not index. */
3054 if (base
&& indx
&& !base_ptr
3055 && (indx_ptr
|| (!REG_POINTER (base
) && REG_POINTER (indx
))))
3062 /* Validate displacement. */
3065 /* If virtual registers are involved, the displacement will change later
3066 anyway as the virtual registers get eliminated. This could make a
3067 valid displacement invalid, but it is more likely to make an invalid
3068 displacement valid, because we sometimes access the register save area
3069 via negative offsets to one of those registers.
3070 Thus we don't check the displacement for validity here. If after
3071 elimination the displacement turns out to be invalid after all,
3072 this is fixed up by reload in any case. */
3073 /* LRA maintains always displacements up to date and we need to
3074 know the displacement is right during all LRA not only at the
3075 final elimination. */
3077 || (base
!= arg_pointer_rtx
3078 && indx
!= arg_pointer_rtx
3079 && base
!= return_address_pointer_rtx
3080 && indx
!= return_address_pointer_rtx
3081 && base
!= frame_pointer_rtx
3082 && indx
!= frame_pointer_rtx
3083 && base
!= virtual_stack_vars_rtx
3084 && indx
!= virtual_stack_vars_rtx
))
3085 if (!DISP_IN_RANGE (offset
))
3090 /* All the special cases are pointers. */
3093 /* In the small-PIC case, the linker converts @GOT
3094 and @GOTNTPOFF offsets to possible displacements. */
3095 if (GET_CODE (disp
) == UNSPEC
3096 && (XINT (disp
, 1) == UNSPEC_GOT
3097 || XINT (disp
, 1) == UNSPEC_GOTNTPOFF
)
3103 /* Accept pool label offsets. */
3104 else if (GET_CODE (disp
) == UNSPEC
3105 && XINT (disp
, 1) == UNSPEC_POOL_OFFSET
)
3108 /* Accept literal pool references. */
3109 else if (GET_CODE (disp
) == UNSPEC
3110 && XINT (disp
, 1) == UNSPEC_LTREL_OFFSET
)
3112 /* In case CSE pulled a non literal pool reference out of
3113 the pool we have to reject the address. This is
3114 especially important when loading the GOT pointer on non
3115 zarch CPUs. In this case the literal pool contains an lt
3116 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3117 will most likely exceed the displacement. */
3118 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
3119 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp
, 0, 0)))
3122 orig_disp
= gen_rtx_CONST (Pmode
, disp
);
3125 /* If we have an offset, make sure it does not
3126 exceed the size of the constant pool entry.
3127 Otherwise we might generate an out-of-range
3128 displacement for the base register form. */
3129 rtx sym
= XVECEXP (disp
, 0, 0);
3130 if (offset
>= GET_MODE_SIZE (get_pool_mode (sym
)))
3133 orig_disp
= plus_constant (Pmode
, orig_disp
, offset
);
3148 out
->disp
= orig_disp
;
3149 out
->pointer
= pointer
;
3150 out
->literal_pool
= literal_pool
;
3156 /* Decompose a RTL expression OP for an address style operand into its
3157 components, and return the base register in BASE and the offset in
3158 OFFSET. While OP looks like an address it is never supposed to be
3161 Return true if OP is a valid address operand, false if not. */
3164 s390_decompose_addrstyle_without_index (rtx op
, rtx
*base
,
3165 HOST_WIDE_INT
*offset
)
3169 /* We can have an integer constant, an address register,
3170 or a sum of the two. */
3171 if (CONST_SCALAR_INT_P (op
))
3176 if (op
&& GET_CODE (op
) == PLUS
&& CONST_SCALAR_INT_P (XEXP (op
, 1)))
3181 while (op
&& GET_CODE (op
) == SUBREG
)
3182 op
= SUBREG_REG (op
);
3184 if (op
&& GET_CODE (op
) != REG
)
3189 if (off
== NULL_RTX
)
3191 else if (CONST_INT_P (off
))
3192 *offset
= INTVAL (off
);
3193 else if (CONST_WIDE_INT_P (off
))
3194 /* The offset will anyway be cut down to 12 bits so take just
3195 the lowest order chunk of the wide int. */
3196 *offset
= CONST_WIDE_INT_ELT (off
, 0);
3206 /* Check that OP is a valid shift count operand.
3207 It should be of the following structure:
3208 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3209 where subreg, and and plus are optional.
3211 If IMPLICIT_MASK is > 0 and OP contains and
3213 it is checked whether IMPLICIT_MASK and the immediate match.
3214 Otherwise, no checking is performed.
3217 s390_valid_shift_count (rtx op
, HOST_WIDE_INT implicit_mask
)
3220 while (GET_CODE (op
) == SUBREG
&& subreg_lowpart_p (op
))
3223 /* Check for an and with proper constant. */
3224 if (GET_CODE (op
) == AND
)
3226 rtx op1
= XEXP (op
, 0);
3227 rtx imm
= XEXP (op
, 1);
3229 if (GET_CODE (op1
) == SUBREG
&& subreg_lowpart_p (op1
))
3230 op1
= XEXP (op1
, 0);
3232 if (!(register_operand (op1
, GET_MODE (op1
)) || GET_CODE (op1
) == PLUS
))
3235 if (!immediate_operand (imm
, GET_MODE (imm
)))
3238 HOST_WIDE_INT val
= INTVAL (imm
);
3239 if (implicit_mask
> 0
3240 && (val
& implicit_mask
) != implicit_mask
)
3246 /* Check the rest. */
3247 return s390_decompose_addrstyle_without_index (op
, NULL
, NULL
);
3250 /* Return true if CODE is a valid address without index. */
3253 s390_legitimate_address_without_index_p (rtx op
)
3255 struct s390_address addr
;
3257 if (!s390_decompose_address (XEXP (op
, 0), &addr
))
3266 /* Return TRUE if ADDR is an operand valid for a load/store relative
3267 instruction. Be aware that the alignment of the operand needs to
3268 be checked separately.
3269 Valid addresses are single references or a sum of a reference and a
3270 constant integer. Return these parts in SYMREF and ADDEND. You can
3271 pass NULL in REF and/or ADDEND if you are not interested in these
3275 s390_loadrelative_operand_p (rtx addr
, rtx
*symref
, HOST_WIDE_INT
*addend
)
3277 HOST_WIDE_INT tmpaddend
= 0;
3279 if (GET_CODE (addr
) == CONST
)
3280 addr
= XEXP (addr
, 0);
3282 if (GET_CODE (addr
) == PLUS
)
3284 if (!CONST_INT_P (XEXP (addr
, 1)))
3287 tmpaddend
= INTVAL (XEXP (addr
, 1));
3288 addr
= XEXP (addr
, 0);
3291 if (GET_CODE (addr
) == SYMBOL_REF
3292 || (GET_CODE (addr
) == UNSPEC
3293 && (XINT (addr
, 1) == UNSPEC_GOTENT
3294 || XINT (addr
, 1) == UNSPEC_PLT31
)))
3299 *addend
= tmpaddend
;
3306 /* Return true if the address in OP is valid for constraint letter C
3307 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3308 pool MEMs should be accepted. Only the Q, R, S, T constraint
3309 letters are allowed for C. */
3312 s390_check_qrst_address (char c
, rtx op
, bool lit_pool_ok
)
3315 struct s390_address addr
;
3316 bool decomposed
= false;
3318 if (!address_operand (op
, GET_MODE (op
)))
3321 /* This check makes sure that no symbolic address (except literal
3322 pool references) are accepted by the R or T constraints. */
3323 if (s390_loadrelative_operand_p (op
, &symref
, NULL
)
3325 || !SYMBOL_REF_P (symref
)
3326 || !CONSTANT_POOL_ADDRESS_P (symref
)))
3329 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3332 if (!s390_decompose_address (op
, &addr
))
3334 if (addr
.literal_pool
)
3339 /* With reload, we sometimes get intermediate address forms that are
3340 actually invalid as-is, but we need to accept them in the most
3341 generic cases below ('R' or 'T'), since reload will in fact fix
3342 them up. LRA behaves differently here; we never see such forms,
3343 but on the other hand, we need to strictly reject every invalid
3344 address form. After both reload and LRA invalid address forms
3345 must be rejected, because nothing will fix them up later. Perform
3346 this check right up front. */
3347 if (lra_in_progress
|| reload_completed
)
3349 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3356 case 'Q': /* no index short displacement */
3357 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3361 if (!s390_short_displacement (addr
.disp
))
3365 case 'R': /* with index short displacement */
3366 if (TARGET_LONG_DISPLACEMENT
)
3368 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3370 if (!s390_short_displacement (addr
.disp
))
3373 /* Any invalid address here will be fixed up by reload,
3374 so accept it for the most generic constraint. */
3377 case 'S': /* no index long displacement */
3378 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3384 case 'T': /* with index long displacement */
3385 /* Any invalid address here will be fixed up by reload,
3386 so accept it for the most generic constraint. */
3396 /* Evaluates constraint strings described by the regular expression
3397 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3398 the constraint given in STR, or 0 else. */
3401 s390_mem_constraint (const char *str
, rtx op
)
3408 /* Check for offsettable variants of memory constraints. */
3409 if (!MEM_P (op
) || MEM_VOLATILE_P (op
))
3411 if ((reload_completed
|| reload_in_progress
)
3412 ? !offsettable_memref_p (op
) : !offsettable_nonstrict_memref_p (op
))
3414 return s390_check_qrst_address (str
[1], XEXP (op
, 0), true);
3416 /* Check for non-literal-pool variants of memory constraints. */
3419 return s390_check_qrst_address (str
[1], XEXP (op
, 0), false);
3424 if (GET_CODE (op
) != MEM
)
3426 return s390_check_qrst_address (c
, XEXP (op
, 0), true);
3428 /* Simply check for the basic form of a shift count. Reload will
3429 take care of making sure we have a proper base register. */
3430 if (!s390_decompose_addrstyle_without_index (op
, NULL
, NULL
))
3434 return s390_check_qrst_address (str
[1], op
, true);
3442 /* Evaluates constraint strings starting with letter O. Input
3443 parameter C is the second letter following the "O" in the constraint
3444 string. Returns 1 if VALUE meets the respective constraint and 0
3448 s390_O_constraint_str (const char c
, HOST_WIDE_INT value
)
3456 return trunc_int_for_mode (value
, SImode
) == value
;
3460 || s390_single_part (GEN_INT (value
), DImode
, SImode
, 0) == 1;
3463 return s390_single_part (GEN_INT (value
- 1), DImode
, SImode
, -1) == 1;
3471 /* Evaluates constraint strings starting with letter N. Parameter STR
3472 contains the letters following letter "N" in the constraint string.
3473 Returns true if VALUE matches the constraint. */
3476 s390_N_constraint_str (const char *str
, HOST_WIDE_INT value
)
3478 machine_mode mode
, part_mode
;
3480 int part
, part_goal
;
3486 part_goal
= str
[0] - '0';
3530 if (GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (part_mode
))
3533 part
= s390_single_part (GEN_INT (value
), mode
, part_mode
, def
);
3536 if (part_goal
!= -1 && part_goal
!= part
)
3543 /* Returns true if the input parameter VALUE is a float zero. */
3546 s390_float_const_zero_p (rtx value
)
3548 return (GET_MODE_CLASS (GET_MODE (value
)) == MODE_FLOAT
3549 && value
== CONST0_RTX (GET_MODE (value
)));
3552 /* Implement TARGET_REGISTER_MOVE_COST. */
3555 s390_register_move_cost (machine_mode mode
,
3556 reg_class_t from
, reg_class_t to
)
3558 /* On s390, copy between fprs and gprs is expensive. */
3560 /* It becomes somewhat faster having ldgr/lgdr. */
3561 if (TARGET_Z10
&& GET_MODE_SIZE (mode
) == 8)
3563 /* ldgr is single cycle. */
3564 if (reg_classes_intersect_p (from
, GENERAL_REGS
)
3565 && reg_classes_intersect_p (to
, FP_REGS
))
3567 /* lgdr needs 3 cycles. */
3568 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
3569 && reg_classes_intersect_p (from
, FP_REGS
))
3573 /* Otherwise copying is done via memory. */
3574 if ((reg_classes_intersect_p (from
, GENERAL_REGS
)
3575 && reg_classes_intersect_p (to
, FP_REGS
))
3576 || (reg_classes_intersect_p (from
, FP_REGS
)
3577 && reg_classes_intersect_p (to
, GENERAL_REGS
)))
3580 /* We usually do not want to copy via CC. */
3581 if (reg_classes_intersect_p (from
, CC_REGS
)
3582 || reg_classes_intersect_p (to
, CC_REGS
))
3588 /* Implement TARGET_MEMORY_MOVE_COST. */
3591 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
3592 reg_class_t rclass ATTRIBUTE_UNUSED
,
3593 bool in ATTRIBUTE_UNUSED
)
3598 /* Compute a (partial) cost for rtx X. Return true if the complete
3599 cost has been computed, and false if subexpressions should be
3600 scanned. In either case, *TOTAL contains the cost result. The
3601 initial value of *TOTAL is the default value computed by
3602 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3603 code of the superexpression of x. */
3606 s390_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
3607 int opno ATTRIBUTE_UNUSED
,
3608 int *total
, bool speed ATTRIBUTE_UNUSED
)
3610 int code
= GET_CODE (x
);
3618 case CONST_WIDE_INT
:
3625 /* Without this a conditional move instruction would be
3626 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3627 comparison operator). That's a bit pessimistic. */
3629 if (!TARGET_Z196
|| GET_CODE (SET_SRC (x
)) != IF_THEN_ELSE
)
3632 rtx cond
= XEXP (SET_SRC (x
), 0);
3634 if (!CC_REG_P (XEXP (cond
, 0)) || !CONST_INT_P (XEXP (cond
, 1)))
3637 /* It is going to be a load/store on condition. Make it
3638 slightly more expensive than a normal load. */
3639 *total
= COSTS_N_INSNS (1) + 1;
3641 rtx dst
= SET_DEST (x
);
3642 rtx then
= XEXP (SET_SRC (x
), 1);
3643 rtx els
= XEXP (SET_SRC (x
), 2);
3645 /* It is a real IF-THEN-ELSE. An additional move will be
3646 needed to implement that. */
3649 && !rtx_equal_p (dst
, then
)
3650 && !rtx_equal_p (dst
, els
))
3651 *total
+= COSTS_N_INSNS (1) / 2;
3653 /* A minor penalty for constants we cannot directly handle. */
3654 if ((CONST_INT_P (then
) || CONST_INT_P (els
))
3655 && (!TARGET_Z13
|| MEM_P (dst
)
3656 || (CONST_INT_P (then
) && !satisfies_constraint_K (then
))
3657 || (CONST_INT_P (els
) && !satisfies_constraint_K (els
))))
3658 *total
+= COSTS_N_INSNS (1) / 2;
3660 /* A store on condition can only handle register src operands. */
3661 if (MEM_P (dst
) && (!REG_P (then
) || !REG_P (els
)))
3662 *total
+= COSTS_N_INSNS (1) / 2;
3670 && (mode
== SImode
|| mode
== DImode
)
3671 && GET_CODE (XEXP (x
, 0)) == NOT
3672 && GET_CODE (XEXP (x
, 1)) == NOT
)
3674 *total
= COSTS_N_INSNS (1);
3675 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3677 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3683 if (GET_CODE (XEXP (x
, 0)) == AND
3684 && GET_CODE (XEXP (x
, 1)) == ASHIFT
3685 && REG_P (XEXP (XEXP (x
, 0), 0))
3686 && REG_P (XEXP (XEXP (x
, 1), 0))
3687 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3688 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3689 && (UINTVAL (XEXP (XEXP (x
, 0), 1)) ==
3690 (HOST_WIDE_INT_1U
<< UINTVAL (XEXP (XEXP (x
, 1), 1))) - 1))
3692 *total
= COSTS_N_INSNS (2);
3696 /* ~AND on a 128 bit mode. This can be done using a vector
3699 && GET_CODE (XEXP (x
, 0)) == NOT
3700 && GET_CODE (XEXP (x
, 1)) == NOT
3701 && REG_P (XEXP (XEXP (x
, 0), 0))
3702 && REG_P (XEXP (XEXP (x
, 1), 0))
3703 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x
, 0), 0))) == 16
3704 && s390_hard_regno_mode_ok (VR0_REGNUM
,
3705 GET_MODE (XEXP (XEXP (x
, 0), 0))))
3707 *total
= COSTS_N_INSNS (1);
3711 *total
= COSTS_N_INSNS (1);
3717 && (mode
== SImode
|| mode
== DImode
)
3718 && GET_CODE (XEXP (x
, 0)) == NOT
3719 && GET_CODE (XEXP (x
, 1)) == NOT
)
3721 *total
= COSTS_N_INSNS (1);
3722 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3724 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3739 *total
= COSTS_N_INSNS (1);
3747 rtx left
= XEXP (x
, 0);
3748 rtx right
= XEXP (x
, 1);
3749 if (GET_CODE (right
) == CONST_INT
3750 && CONST_OK_FOR_K (INTVAL (right
)))
3751 *total
= s390_cost
->mhi
;
3752 else if (GET_CODE (left
) == SIGN_EXTEND
)
3753 *total
= s390_cost
->mh
;
3755 *total
= s390_cost
->ms
; /* msr, ms, msy */
3760 rtx left
= XEXP (x
, 0);
3761 rtx right
= XEXP (x
, 1);
3764 if (GET_CODE (right
) == CONST_INT
3765 && CONST_OK_FOR_K (INTVAL (right
)))
3766 *total
= s390_cost
->mghi
;
3767 else if (GET_CODE (left
) == SIGN_EXTEND
)
3768 *total
= s390_cost
->msgf
;
3770 *total
= s390_cost
->msg
; /* msgr, msg */
3772 else /* TARGET_31BIT */
3774 if (GET_CODE (left
) == SIGN_EXTEND
3775 && GET_CODE (right
) == SIGN_EXTEND
)
3776 /* mulsidi case: mr, m */
3777 *total
= s390_cost
->m
;
3778 else if (GET_CODE (left
) == ZERO_EXTEND
3779 && GET_CODE (right
) == ZERO_EXTEND
)
3780 /* umulsidi case: ml, mlr */
3781 *total
= s390_cost
->ml
;
3783 /* Complex calculation is required. */
3784 *total
= COSTS_N_INSNS (40);
3790 *total
= s390_cost
->mult_df
;
3793 *total
= s390_cost
->mxbr
;
3804 *total
= s390_cost
->madbr
;
3807 *total
= s390_cost
->maebr
;
3812 /* Negate in the third argument is free: FMSUB. */
3813 if (GET_CODE (XEXP (x
, 2)) == NEG
)
3815 *total
+= (rtx_cost (XEXP (x
, 0), mode
, FMA
, 0, speed
)
3816 + rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
)
3817 + rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, FMA
, 2, speed
));
3824 if (mode
== TImode
) /* 128 bit division */
3825 *total
= s390_cost
->dlgr
;
3826 else if (mode
== DImode
)
3828 rtx right
= XEXP (x
, 1);
3829 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3830 *total
= s390_cost
->dlr
;
3831 else /* 64 by 64 bit division */
3832 *total
= s390_cost
->dlgr
;
3834 else if (mode
== SImode
) /* 32 bit division */
3835 *total
= s390_cost
->dlr
;
3842 rtx right
= XEXP (x
, 1);
3843 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3845 *total
= s390_cost
->dsgfr
;
3847 *total
= s390_cost
->dr
;
3848 else /* 64 by 64 bit division */
3849 *total
= s390_cost
->dsgr
;
3851 else if (mode
== SImode
) /* 32 bit division */
3852 *total
= s390_cost
->dlr
;
3853 else if (mode
== SFmode
)
3855 *total
= s390_cost
->debr
;
3857 else if (mode
== DFmode
)
3859 *total
= s390_cost
->ddbr
;
3861 else if (mode
== TFmode
)
3863 *total
= s390_cost
->dxbr
;
3869 *total
= s390_cost
->sqebr
;
3870 else if (mode
== DFmode
)
3871 *total
= s390_cost
->sqdbr
;
3873 *total
= s390_cost
->sqxbr
;
3878 if (outer_code
== MULT
|| outer_code
== DIV
|| outer_code
== MOD
3879 || outer_code
== PLUS
|| outer_code
== MINUS
3880 || outer_code
== COMPARE
)
3885 *total
= COSTS_N_INSNS (1);
3887 /* nxrk, nxgrk ~(a^b)==0 */
3889 && GET_CODE (XEXP (x
, 0)) == NOT
3890 && XEXP (x
, 1) == const0_rtx
3891 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == XOR
3892 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3895 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3897 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
3902 /* nnrk, nngrk, nork, nogrk */
3904 && (GET_CODE (XEXP (x
, 0)) == AND
|| GET_CODE (XEXP (x
, 0)) == IOR
)
3905 && XEXP (x
, 1) == const0_rtx
3906 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3907 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == NOT
3908 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == NOT
3911 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3913 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 1), 0)))
3918 if (GET_CODE (XEXP (x
, 0)) == AND
3919 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3920 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
3922 rtx op0
= XEXP (XEXP (x
, 0), 0);
3923 rtx op1
= XEXP (XEXP (x
, 0), 1);
3924 rtx op2
= XEXP (x
, 1);
3926 if (memory_operand (op0
, GET_MODE (op0
))
3927 && s390_tm_ccmode (op1
, op2
, 0) != VOIDmode
)
3929 if (register_operand (op0
, GET_MODE (op0
))
3930 && s390_tm_ccmode (op1
, op2
, 1) != VOIDmode
)
3940 /* Return the cost of an address rtx ADDR. */
3943 s390_address_cost (rtx addr
, machine_mode mode ATTRIBUTE_UNUSED
,
3944 addr_space_t as ATTRIBUTE_UNUSED
,
3945 bool speed ATTRIBUTE_UNUSED
)
3947 struct s390_address ad
;
3948 if (!s390_decompose_address (addr
, &ad
))
3951 return ad
.indx
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3954 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3956 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
3958 int misalign ATTRIBUTE_UNUSED
)
3960 switch (type_of_cost
)
3968 case vector_gather_load
:
3969 case vector_scatter_store
:
3972 case cond_branch_not_taken
:
3974 case vec_promote_demote
:
3975 case unaligned_load
:
3976 case unaligned_store
:
3979 case cond_branch_taken
:
3983 return TYPE_VECTOR_SUBPARTS (vectype
) - 1;
3990 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3991 otherwise return 0. */
3994 tls_symbolic_operand (rtx op
)
3996 if (GET_CODE (op
) != SYMBOL_REF
)
3998 return SYMBOL_REF_TLS_MODEL (op
);
4001 /* Split DImode access register reference REG (on 64-bit) into its constituent
4002 low and high parts, and store them into LO and HI. Note that gen_lowpart/
4003 gen_highpart cannot be used as they assume all registers are word-sized,
4004 while our access registers have only half that size. */
4007 s390_split_access_reg (rtx reg
, rtx
*lo
, rtx
*hi
)
4009 gcc_assert (TARGET_64BIT
);
4010 gcc_assert (ACCESS_REG_P (reg
));
4011 gcc_assert (GET_MODE (reg
) == DImode
);
4012 gcc_assert (!(REGNO (reg
) & 1));
4014 *lo
= gen_rtx_REG (SImode
, REGNO (reg
) + 1);
4015 *hi
= gen_rtx_REG (SImode
, REGNO (reg
));
4018 /* Return true if OP contains a symbol reference */
4021 symbolic_reference_mentioned_p (rtx op
)
4026 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4029 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4030 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4036 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4037 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4041 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4048 /* Return true if OP contains a reference to a thread-local symbol. */
4051 tls_symbolic_reference_mentioned_p (rtx op
)
4056 if (GET_CODE (op
) == SYMBOL_REF
)
4057 return tls_symbolic_operand (op
);
4059 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4060 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4066 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4067 if (tls_symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4071 else if (fmt
[i
] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op
, i
)))
4079 /* Return true if OP is a legitimate general operand when
4080 generating PIC code. It is given that flag_pic is on
4081 and that OP satisfies CONSTANT_P. */
4084 legitimate_pic_operand_p (rtx op
)
4086 /* Accept all non-symbolic constants. */
4087 if (!SYMBOLIC_CONST (op
))
4090 /* Accept addresses that can be expressed relative to (pc). */
4091 if (larl_operand (op
, VOIDmode
))
4094 /* Reject everything else; must be handled
4095 via emit_symbolic_move. */
4099 /* Returns true if the constant value OP is a legitimate general operand.
4100 It is given that OP satisfies CONSTANT_P. */
4103 s390_legitimate_constant_p (machine_mode mode
, rtx op
)
4105 if (TARGET_VX
&& VECTOR_MODE_P (mode
) && GET_CODE (op
) == CONST_VECTOR
)
4107 if (GET_MODE_SIZE (mode
) != 16)
4110 if (!satisfies_constraint_j00 (op
)
4111 && !satisfies_constraint_jm1 (op
)
4112 && !satisfies_constraint_jKK (op
)
4113 && !satisfies_constraint_jxx (op
)
4114 && !satisfies_constraint_jyy (op
))
4118 /* Accept all non-symbolic constants. */
4119 if (!SYMBOLIC_CONST (op
))
4122 /* Accept immediate LARL operands. */
4123 if (larl_operand (op
, mode
))
4126 /* Thread-local symbols are never legal constants. This is
4127 so that emit_call knows that computing such addresses
4128 might require a function call. */
4129 if (TLS_SYMBOLIC_CONST (op
))
4132 /* In the PIC case, symbolic constants must *not* be
4133 forced into the literal pool. We accept them here,
4134 so that they will be handled by emit_symbolic_move. */
4138 /* All remaining non-PIC symbolic constants are
4139 forced into the literal pool. */
4143 /* Determine if it's legal to put X into the constant pool. This
4144 is not possible if X contains the address of a symbol that is
4145 not constant (TLS) or not known at final link time (PIC). */
4148 s390_cannot_force_const_mem (machine_mode mode
, rtx x
)
4150 switch (GET_CODE (x
))
4154 case CONST_WIDE_INT
:
4156 /* Accept all non-symbolic constants. */
4160 /* Accept an unary '-' only on scalar numeric constants. */
4161 switch (GET_CODE (XEXP (x
, 0)))
4165 case CONST_WIDE_INT
:
4172 /* Labels are OK iff we are non-PIC. */
4173 return flag_pic
!= 0;
4176 /* 'Naked' TLS symbol references are never OK,
4177 non-TLS symbols are OK iff we are non-PIC. */
4178 if (tls_symbolic_operand (x
))
4181 return flag_pic
!= 0;
4184 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0));
4187 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0))
4188 || s390_cannot_force_const_mem (mode
, XEXP (x
, 1));
4191 switch (XINT (x
, 1))
4193 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4194 case UNSPEC_LTREL_OFFSET
:
4202 case UNSPEC_GOTNTPOFF
:
4203 case UNSPEC_INDNTPOFF
:
4206 /* If the literal pool shares the code section, be put
4207 execute template placeholders into the pool as well. */
4219 /* Returns true if the constant value OP is a legitimate general
4220 operand during and after reload. The difference to
4221 legitimate_constant_p is that this function will not accept
4222 a constant that would need to be forced to the literal pool
4223 before it can be used as operand.
4224 This function accepts all constants which can be loaded directly
4228 legitimate_reload_constant_p (rtx op
)
4230 /* Accept la(y) operands. */
4231 if (GET_CODE (op
) == CONST_INT
4232 && DISP_IN_RANGE (INTVAL (op
)))
4235 /* Accept l(g)hi/l(g)fi operands. */
4236 if (GET_CODE (op
) == CONST_INT
4237 && (CONST_OK_FOR_K (INTVAL (op
)) || CONST_OK_FOR_Os (INTVAL (op
))))
4240 /* Accept lliXX operands. */
4242 && GET_CODE (op
) == CONST_INT
4243 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4244 && s390_single_part (op
, word_mode
, HImode
, 0) >= 0)
4248 && GET_CODE (op
) == CONST_INT
4249 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4250 && s390_single_part (op
, word_mode
, SImode
, 0) >= 0)
4253 /* Accept larl operands. */
4254 if (larl_operand (op
, VOIDmode
))
4257 /* Accept floating-point zero operands that fit into a single GPR. */
4258 if (GET_CODE (op
) == CONST_DOUBLE
4259 && s390_float_const_zero_p (op
)
4260 && GET_MODE_SIZE (GET_MODE (op
)) <= UNITS_PER_WORD
)
4263 /* Accept double-word operands that can be split. */
4264 if (GET_CODE (op
) == CONST_WIDE_INT
4265 || (GET_CODE (op
) == CONST_INT
4266 && trunc_int_for_mode (INTVAL (op
), word_mode
) != INTVAL (op
)))
4268 machine_mode dword_mode
= word_mode
== SImode
? DImode
: TImode
;
4269 rtx hi
= operand_subword (op
, 0, 0, dword_mode
);
4270 rtx lo
= operand_subword (op
, 1, 0, dword_mode
);
4271 return legitimate_reload_constant_p (hi
)
4272 && legitimate_reload_constant_p (lo
);
4275 /* Everything else cannot be handled without reload. */
4279 /* Returns true if the constant value OP is a legitimate fp operand
4280 during and after reload.
4281 This function accepts all constants which can be loaded directly
4285 legitimate_reload_fp_constant_p (rtx op
)
4287 /* Accept floating-point zero operands if the load zero instruction
4288 can be used. Prior to z196 the load fp zero instruction caused a
4289 performance penalty if the result is used as BFP number. */
4291 && GET_CODE (op
) == CONST_DOUBLE
4292 && s390_float_const_zero_p (op
))
4298 /* Returns true if the constant value OP is a legitimate vector operand
4299 during and after reload.
4300 This function accepts all constants which can be loaded directly
4304 legitimate_reload_vector_constant_p (rtx op
)
4306 if (TARGET_VX
&& GET_MODE_SIZE (GET_MODE (op
)) == 16
4307 && (satisfies_constraint_j00 (op
)
4308 || satisfies_constraint_jm1 (op
)
4309 || satisfies_constraint_jKK (op
)
4310 || satisfies_constraint_jxx (op
)
4311 || satisfies_constraint_jyy (op
)))
4317 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4318 return the class of reg to actually use. */
4321 s390_preferred_reload_class (rtx op
, reg_class_t rclass
)
4323 switch (GET_CODE (op
))
4325 /* Constants we cannot reload into general registers
4326 must be forced into the literal pool. */
4330 case CONST_WIDE_INT
:
4331 if (reg_class_subset_p (GENERAL_REGS
, rclass
)
4332 && legitimate_reload_constant_p (op
))
4333 return GENERAL_REGS
;
4334 else if (reg_class_subset_p (ADDR_REGS
, rclass
)
4335 && legitimate_reload_constant_p (op
))
4337 else if (reg_class_subset_p (FP_REGS
, rclass
)
4338 && legitimate_reload_fp_constant_p (op
))
4340 else if (reg_class_subset_p (VEC_REGS
, rclass
)
4341 && legitimate_reload_vector_constant_p (op
))
4346 /* If a symbolic constant or a PLUS is reloaded,
4347 it is most likely being used as an address, so
4348 prefer ADDR_REGS. If 'class' is not a superset
4349 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4351 /* Symrefs cannot be pushed into the literal pool with -fPIC
4352 so we *MUST NOT* return NO_REGS for these cases
4353 (s390_cannot_force_const_mem will return true).
4355 On the other hand we MUST return NO_REGS for symrefs with
4356 invalid addend which might have been pushed to the literal
4357 pool (no -fPIC). Usually we would expect them to be
4358 handled via secondary reload but this does not happen if
4359 they are used as literal pool slot replacement in reload
4360 inheritance (see emit_input_reload_insns). */
4361 if (GET_CODE (XEXP (op
, 0)) == PLUS
4362 && GET_CODE (XEXP (XEXP(op
, 0), 0)) == SYMBOL_REF
4363 && GET_CODE (XEXP (XEXP(op
, 0), 1)) == CONST_INT
)
4365 if (flag_pic
&& reg_class_subset_p (ADDR_REGS
, rclass
))
4373 if (!legitimate_reload_constant_p (op
))
4377 /* load address will be used. */
4378 if (reg_class_subset_p (ADDR_REGS
, rclass
))
4390 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4391 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4395 s390_check_symref_alignment (rtx addr
, HOST_WIDE_INT alignment
)
4397 HOST_WIDE_INT addend
;
4400 /* The "required alignment" might be 0 (e.g. for certain structs
4401 accessed via BLKmode). Early abort in this case, as well as when
4402 an alignment > 8 is required. */
4403 if (alignment
< 2 || alignment
> 8)
4406 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4409 if (addend
& (alignment
- 1))
4412 if (GET_CODE (symref
) == SYMBOL_REF
)
4414 /* s390_encode_section_info is not called for anchors, since they don't
4415 have corresponding VAR_DECLs. Therefore, we cannot rely on
4416 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4417 if (SYMBOL_REF_ANCHOR_P (symref
))
4419 HOST_WIDE_INT block_offset
= SYMBOL_REF_BLOCK_OFFSET (symref
);
4420 unsigned int block_alignment
= (SYMBOL_REF_BLOCK (symref
)->alignment
4423 gcc_assert (block_offset
>= 0);
4424 return ((block_offset
& (alignment
- 1)) == 0
4425 && block_alignment
>= alignment
);
4428 /* We have load-relative instructions for 2-byte, 4-byte, and
4429 8-byte alignment so allow only these. */
4432 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref
);
4433 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref
);
4434 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref
);
4435 default: return false;
4439 if (GET_CODE (symref
) == UNSPEC
4440 && alignment
<= UNITS_PER_LONG
)
4446 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4447 operand SCRATCH is used to reload the even part of the address and
4451 s390_reload_larl_operand (rtx reg
, rtx addr
, rtx scratch
)
4453 HOST_WIDE_INT addend
;
4456 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4460 /* Easy case. The addend is even so larl will do fine. */
4461 emit_move_insn (reg
, addr
);
4464 /* We can leave the scratch register untouched if the target
4465 register is a valid base register. */
4466 if (REGNO (reg
) < FIRST_PSEUDO_REGISTER
4467 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
)
4470 gcc_assert (REGNO (scratch
) < FIRST_PSEUDO_REGISTER
);
4471 gcc_assert (REGNO_REG_CLASS (REGNO (scratch
)) == ADDR_REGS
);
4474 emit_move_insn (scratch
,
4475 gen_rtx_CONST (Pmode
,
4476 gen_rtx_PLUS (Pmode
, symref
,
4477 GEN_INT (addend
- 1))));
4479 emit_move_insn (scratch
, symref
);
4481 /* Increment the address using la in order to avoid clobbering cc. */
4482 s390_load_address (reg
, gen_rtx_PLUS (Pmode
, scratch
, const1_rtx
));
4486 /* Generate what is necessary to move between REG and MEM using
4487 SCRATCH. The direction is given by TOMEM. */
4490 s390_reload_symref_address (rtx reg
, rtx mem
, rtx scratch
, bool tomem
)
4492 /* Reload might have pulled a constant out of the literal pool.
4493 Force it back in. */
4494 if (CONST_INT_P (mem
) || GET_CODE (mem
) == CONST_DOUBLE
4495 || GET_CODE (mem
) == CONST_WIDE_INT
4496 || GET_CODE (mem
) == CONST_VECTOR
4497 || GET_CODE (mem
) == CONST
)
4498 mem
= force_const_mem (GET_MODE (reg
), mem
);
4500 gcc_assert (MEM_P (mem
));
4502 /* For a load from memory we can leave the scratch register
4503 untouched if the target register is a valid base register. */
4505 && REGNO (reg
) < FIRST_PSEUDO_REGISTER
4506 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
4507 && GET_MODE (reg
) == GET_MODE (scratch
))
4510 /* Load address into scratch register. Since we can't have a
4511 secondary reload for a secondary reload we have to cover the case
4512 where larl would need a secondary reload here as well. */
4513 s390_reload_larl_operand (scratch
, XEXP (mem
, 0), scratch
);
4515 /* Now we can use a standard load/store to do the move. */
4517 emit_move_insn (replace_equiv_address (mem
, scratch
), reg
);
4519 emit_move_insn (reg
, replace_equiv_address (mem
, scratch
));
4522 /* Inform reload about cases where moving X with a mode MODE to a register in
4523 RCLASS requires an extra scratch or immediate register. Return the class
4524 needed for the immediate register. */
4527 s390_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
4528 machine_mode mode
, secondary_reload_info
*sri
)
4530 enum reg_class rclass
= (enum reg_class
) rclass_i
;
4532 /* Intermediate register needed. */
4533 if (reg_classes_intersect_p (CC_REGS
, rclass
))
4534 return GENERAL_REGS
;
4538 /* The vst/vl vector move instructions allow only for short
4541 && GET_CODE (XEXP (x
, 0)) == PLUS
4542 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4543 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x
, 0), 1)))
4544 && reg_class_subset_p (rclass
, VEC_REGS
)
4545 && (!reg_class_subset_p (rclass
, FP_REGS
)
4546 || (GET_MODE_SIZE (mode
) > 8
4547 && s390_class_max_nregs (FP_REGS
, mode
) == 1)))
4550 sri
->icode
= (TARGET_64BIT
?
4551 CODE_FOR_reloaddi_la_in
:
4552 CODE_FOR_reloadsi_la_in
);
4554 sri
->icode
= (TARGET_64BIT
?
4555 CODE_FOR_reloaddi_la_out
:
4556 CODE_FOR_reloadsi_la_out
);
4562 HOST_WIDE_INT offset
;
4565 /* On z10 several optimizer steps may generate larl operands with
4568 && s390_loadrelative_operand_p (x
, &symref
, &offset
)
4570 && !SYMBOL_FLAG_NOTALIGN2_P (symref
)
4571 && (offset
& 1) == 1)
4572 sri
->icode
= ((mode
== DImode
) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4573 : CODE_FOR_reloadsi_larl_odd_addend_z10
);
4575 /* Handle all the (mem (symref)) accesses we cannot use the z10
4576 instructions for. */
4578 && s390_loadrelative_operand_p (XEXP (x
, 0), NULL
, NULL
)
4580 || !reg_class_subset_p (rclass
, GENERAL_REGS
)
4581 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
4582 || !s390_check_symref_alignment (XEXP (x
, 0),
4583 GET_MODE_SIZE (mode
))))
4585 #define __SECONDARY_RELOAD_CASE(M,m) \
4588 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4589 CODE_FOR_reload##m##di_tomem_z10; \
4591 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4592 CODE_FOR_reload##m##si_tomem_z10; \
4595 switch (GET_MODE (x
))
4597 __SECONDARY_RELOAD_CASE (QI
, qi
);
4598 __SECONDARY_RELOAD_CASE (HI
, hi
);
4599 __SECONDARY_RELOAD_CASE (SI
, si
);
4600 __SECONDARY_RELOAD_CASE (DI
, di
);
4601 __SECONDARY_RELOAD_CASE (TI
, ti
);
4602 __SECONDARY_RELOAD_CASE (SF
, sf
);
4603 __SECONDARY_RELOAD_CASE (DF
, df
);
4604 __SECONDARY_RELOAD_CASE (TF
, tf
);
4605 __SECONDARY_RELOAD_CASE (SD
, sd
);
4606 __SECONDARY_RELOAD_CASE (DD
, dd
);
4607 __SECONDARY_RELOAD_CASE (TD
, td
);
4608 __SECONDARY_RELOAD_CASE (V1QI
, v1qi
);
4609 __SECONDARY_RELOAD_CASE (V2QI
, v2qi
);
4610 __SECONDARY_RELOAD_CASE (V4QI
, v4qi
);
4611 __SECONDARY_RELOAD_CASE (V8QI
, v8qi
);
4612 __SECONDARY_RELOAD_CASE (V16QI
, v16qi
);
4613 __SECONDARY_RELOAD_CASE (V1HI
, v1hi
);
4614 __SECONDARY_RELOAD_CASE (V2HI
, v2hi
);
4615 __SECONDARY_RELOAD_CASE (V4HI
, v4hi
);
4616 __SECONDARY_RELOAD_CASE (V8HI
, v8hi
);
4617 __SECONDARY_RELOAD_CASE (V1SI
, v1si
);
4618 __SECONDARY_RELOAD_CASE (V2SI
, v2si
);
4619 __SECONDARY_RELOAD_CASE (V4SI
, v4si
);
4620 __SECONDARY_RELOAD_CASE (V1DI
, v1di
);
4621 __SECONDARY_RELOAD_CASE (V2DI
, v2di
);
4622 __SECONDARY_RELOAD_CASE (V1TI
, v1ti
);
4623 __SECONDARY_RELOAD_CASE (V1SF
, v1sf
);
4624 __SECONDARY_RELOAD_CASE (V2SF
, v2sf
);
4625 __SECONDARY_RELOAD_CASE (V4SF
, v4sf
);
4626 __SECONDARY_RELOAD_CASE (V1DF
, v1df
);
4627 __SECONDARY_RELOAD_CASE (V2DF
, v2df
);
4628 __SECONDARY_RELOAD_CASE (V1TF
, v1tf
);
4632 #undef __SECONDARY_RELOAD_CASE
4636 /* We need a scratch register when loading a PLUS expression which
4637 is not a legitimate operand of the LOAD ADDRESS instruction. */
4638 /* LRA can deal with transformation of plus op very well -- so we
4639 don't need to prompt LRA in this case. */
4640 if (! lra_in_progress
&& in_p
&& s390_plus_operand (x
, mode
))
4641 sri
->icode
= (TARGET_64BIT
?
4642 CODE_FOR_reloaddi_plus
: CODE_FOR_reloadsi_plus
);
4644 /* Performing a multiword move from or to memory we have to make sure the
4645 second chunk in memory is addressable without causing a displacement
4646 overflow. If that would be the case we calculate the address in
4647 a scratch register. */
4649 && GET_CODE (XEXP (x
, 0)) == PLUS
4650 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4651 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x
, 0), 1))
4652 + GET_MODE_SIZE (mode
) - 1))
4654 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4655 in a s_operand address since we may fallback to lm/stm. So we only
4656 have to care about overflows in the b+i+d case. */
4657 if ((reg_classes_intersect_p (GENERAL_REGS
, rclass
)
4658 && s390_class_max_nregs (GENERAL_REGS
, mode
) > 1
4659 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == PLUS
)
4660 /* For FP_REGS no lm/stm is available so this check is triggered
4661 for displacement overflows in b+i+d and b+d like addresses. */
4662 || (reg_classes_intersect_p (FP_REGS
, rclass
)
4663 && s390_class_max_nregs (FP_REGS
, mode
) > 1))
4666 sri
->icode
= (TARGET_64BIT
?
4667 CODE_FOR_reloaddi_la_in
:
4668 CODE_FOR_reloadsi_la_in
);
4670 sri
->icode
= (TARGET_64BIT
?
4671 CODE_FOR_reloaddi_la_out
:
4672 CODE_FOR_reloadsi_la_out
);
4676 /* A scratch address register is needed when a symbolic constant is
4677 copied to r0 compiling with -fPIC. In other cases the target
4678 register might be used as temporary (see legitimize_pic_address). */
4679 if (in_p
&& SYMBOLIC_CONST (x
) && flag_pic
== 2 && rclass
!= ADDR_REGS
)
4680 sri
->icode
= (TARGET_64BIT
?
4681 CODE_FOR_reloaddi_PIC_addr
:
4682 CODE_FOR_reloadsi_PIC_addr
);
4684 /* Either scratch or no register needed. */
4688 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4690 We need secondary memory to move data between GPRs and FPRs.
4692 - With DFP the ldgr lgdr instructions are available. Due to the
4693 different alignment we cannot use them for SFmode. For 31 bit a
4694 64 bit value in GPR would be a register pair so here we still
4695 need to go via memory.
4697 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4698 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4699 in full VRs so as before also on z13 we do these moves via
4702 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4705 s390_secondary_memory_needed (machine_mode mode
,
4706 reg_class_t class1
, reg_class_t class2
)
4708 return (((reg_classes_intersect_p (class1
, VEC_REGS
)
4709 && reg_classes_intersect_p (class2
, GENERAL_REGS
))
4710 || (reg_classes_intersect_p (class1
, GENERAL_REGS
)
4711 && reg_classes_intersect_p (class2
, VEC_REGS
)))
4712 && (TARGET_TPF
|| !TARGET_DFP
|| !TARGET_64BIT
4713 || GET_MODE_SIZE (mode
) != 8)
4714 && (!TARGET_VX
|| (SCALAR_FLOAT_MODE_P (mode
)
4715 && GET_MODE_SIZE (mode
) > 8)));
4718 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4720 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4721 because the movsi and movsf patterns don't handle r/f moves. */
4724 s390_secondary_memory_needed_mode (machine_mode mode
)
4726 if (GET_MODE_BITSIZE (mode
) < 32)
4727 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
4731 /* Generate code to load SRC, which is PLUS that is not a
4732 legitimate operand for the LA instruction, into TARGET.
4733 SCRATCH may be used as scratch register. */
4736 s390_expand_plus_operand (rtx target
, rtx src
,
4740 struct s390_address ad
;
4742 /* src must be a PLUS; get its two operands. */
4743 gcc_assert (GET_CODE (src
) == PLUS
);
4744 gcc_assert (GET_MODE (src
) == Pmode
);
4746 /* Check if any of the two operands is already scheduled
4747 for replacement by reload. This can happen e.g. when
4748 float registers occur in an address. */
4749 sum1
= find_replacement (&XEXP (src
, 0));
4750 sum2
= find_replacement (&XEXP (src
, 1));
4751 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4753 /* If the address is already strictly valid, there's nothing to do. */
4754 if (!s390_decompose_address (src
, &ad
)
4755 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4756 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
4758 /* Otherwise, one of the operands cannot be an address register;
4759 we reload its value into the scratch register. */
4760 if (true_regnum (sum1
) < 1 || true_regnum (sum1
) > 15)
4762 emit_move_insn (scratch
, sum1
);
4765 if (true_regnum (sum2
) < 1 || true_regnum (sum2
) > 15)
4767 emit_move_insn (scratch
, sum2
);
4771 /* According to the way these invalid addresses are generated
4772 in reload.c, it should never happen (at least on s390) that
4773 *neither* of the PLUS components, after find_replacements
4774 was applied, is an address register. */
4775 if (sum1
== scratch
&& sum2
== scratch
)
4781 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4784 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4785 is only ever performed on addresses, so we can mark the
4786 sum as legitimate for LA in any case. */
4787 s390_load_address (target
, src
);
4791 /* Return true if ADDR is a valid memory address.
4792 STRICT specifies whether strict register checking applies. */
4795 s390_legitimate_address_p (machine_mode mode
, rtx addr
, bool strict
)
4797 struct s390_address ad
;
4800 && larl_operand (addr
, VOIDmode
)
4801 && (mode
== VOIDmode
4802 || s390_check_symref_alignment (addr
, GET_MODE_SIZE (mode
))))
4805 if (!s390_decompose_address (addr
, &ad
))
4808 /* The vector memory instructions only support short displacements.
4809 Reject invalid displacements early to prevent plenty of lay
4810 instructions to be generated later which then cannot be merged
4813 && VECTOR_MODE_P (mode
)
4814 && ad
.disp
!= NULL_RTX
4815 && CONST_INT_P (ad
.disp
)
4816 && !SHORT_DISP_IN_RANGE (INTVAL (ad
.disp
)))
4821 if (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4824 if (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
)))
4830 && !(REGNO (ad
.base
) >= FIRST_PSEUDO_REGISTER
4831 || REGNO_REG_CLASS (REGNO (ad
.base
)) == ADDR_REGS
))
4835 && !(REGNO (ad
.indx
) >= FIRST_PSEUDO_REGISTER
4836 || REGNO_REG_CLASS (REGNO (ad
.indx
)) == ADDR_REGS
))
4842 /* Return true if OP is a valid operand for the LA instruction.
4843 In 31-bit, we need to prove that the result is used as an
4844 address, as LA performs only a 31-bit addition. */
4847 legitimate_la_operand_p (rtx op
)
4849 struct s390_address addr
;
4850 if (!s390_decompose_address (op
, &addr
))
4853 return (TARGET_64BIT
|| addr
.pointer
);
4856 /* Return true if it is valid *and* preferable to use LA to
4857 compute the sum of OP1 and OP2. */
4860 preferred_la_operand_p (rtx op1
, rtx op2
)
4862 struct s390_address addr
;
4864 if (op2
!= const0_rtx
)
4865 op1
= gen_rtx_PLUS (Pmode
, op1
, op2
);
4867 if (!s390_decompose_address (op1
, &addr
))
4869 if (addr
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (addr
.base
)))
4871 if (addr
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (addr
.indx
)))
4874 /* Avoid LA instructions with index (and base) register on z196 or
4875 later; it is preferable to use regular add instructions when
4876 possible. Starting with zEC12 the la with index register is
4877 "uncracked" again but still slower than a regular add. */
4878 if (addr
.indx
&& s390_tune
>= PROCESSOR_2817_Z196
)
4881 if (!TARGET_64BIT
&& !addr
.pointer
)
4887 if ((addr
.base
&& REG_P (addr
.base
) && REG_POINTER (addr
.base
))
4888 || (addr
.indx
&& REG_P (addr
.indx
) && REG_POINTER (addr
.indx
)))
4894 /* Emit a forced load-address operation to load SRC into DST.
4895 This will use the LOAD ADDRESS instruction even in situations
4896 where legitimate_la_operand_p (SRC) returns false. */
4899 s390_load_address (rtx dst
, rtx src
)
4902 emit_move_insn (dst
, src
);
4904 emit_insn (gen_force_la_31 (dst
, src
));
4907 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4910 s390_rel_address_ok_p (rtx symbol_ref
)
4914 if (symbol_ref
== s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref
))
4917 decl
= SYMBOL_REF_DECL (symbol_ref
);
4919 if (!flag_pic
|| SYMBOL_REF_LOCAL_P (symbol_ref
))
4920 return (s390_pic_data_is_text_relative
4922 && TREE_CODE (decl
) == FUNCTION_DECL
));
4927 /* Return a legitimate reference for ORIG (an address) using the
4928 register REG. If REG is 0, a new pseudo is generated.
4930 There are two types of references that must be handled:
4932 1. Global data references must load the address from the GOT, via
4933 the PIC reg. An insn is emitted to do this load, and the reg is
4936 2. Static data references, constant pool addresses, and code labels
4937 compute the address as an offset from the GOT, whose base is in
4938 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4939 differentiate them from global data objects. The returned
4940 address is the PIC reg + an unspec constant.
4942 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4943 reg also appears in the address. */
4946 legitimize_pic_address (rtx orig
, rtx reg
)
4949 rtx addend
= const0_rtx
;
4952 gcc_assert (!TLS_SYMBOLIC_CONST (addr
));
4954 if (GET_CODE (addr
) == CONST
)
4955 addr
= XEXP (addr
, 0);
4957 if (GET_CODE (addr
) == PLUS
)
4959 addend
= XEXP (addr
, 1);
4960 addr
= XEXP (addr
, 0);
4963 if ((GET_CODE (addr
) == LABEL_REF
4964 || (SYMBOL_REF_P (addr
) && s390_rel_address_ok_p (addr
))
4965 || (GET_CODE (addr
) == UNSPEC
&&
4966 (XINT (addr
, 1) == UNSPEC_GOTENT
4967 || XINT (addr
, 1) == UNSPEC_PLT31
)))
4968 && GET_CODE (addend
) == CONST_INT
)
4970 /* This can be locally addressed. */
4972 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4973 rtx const_addr
= (GET_CODE (addr
) == UNSPEC
?
4974 gen_rtx_CONST (Pmode
, addr
) : addr
);
4976 if (larl_operand (const_addr
, VOIDmode
)
4977 && INTVAL (addend
) < HOST_WIDE_INT_1
<< 31
4978 && INTVAL (addend
) >= -(HOST_WIDE_INT_1
<< 31))
4980 if (INTVAL (addend
) & 1)
4982 /* LARL can't handle odd offsets, so emit a pair of LARL
4984 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4986 if (!DISP_IN_RANGE (INTVAL (addend
)))
4988 HOST_WIDE_INT even
= INTVAL (addend
) - 1;
4989 addr
= gen_rtx_PLUS (Pmode
, addr
, GEN_INT (even
));
4990 addr
= gen_rtx_CONST (Pmode
, addr
);
4991 addend
= const1_rtx
;
4994 emit_move_insn (temp
, addr
);
4995 new_rtx
= gen_rtx_PLUS (Pmode
, temp
, addend
);
4999 s390_load_address (reg
, new_rtx
);
5005 /* If the offset is even, we can just use LARL. This
5006 will happen automatically. */
5011 /* No larl - Access local symbols relative to the GOT. */
5013 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5015 if (reload_in_progress
|| reload_completed
)
5016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5018 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5019 if (addend
!= const0_rtx
)
5020 addr
= gen_rtx_PLUS (Pmode
, addr
, addend
);
5021 addr
= gen_rtx_CONST (Pmode
, addr
);
5022 addr
= force_const_mem (Pmode
, addr
);
5023 emit_move_insn (temp
, addr
);
5025 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
5028 s390_load_address (reg
, new_rtx
);
5033 else if (GET_CODE (addr
) == SYMBOL_REF
&& addend
== const0_rtx
)
5035 /* A non-local symbol reference without addend.
5037 The symbol ref is wrapped into an UNSPEC to make sure the
5038 proper operand modifier (@GOT or @GOTENT) will be emitted.
5039 This will tell the linker to put the symbol into the GOT.
5041 Additionally the code dereferencing the GOT slot is emitted here.
5043 An addend to the symref needs to be added afterwards.
5044 legitimize_pic_address calls itself recursively to handle
5045 that case. So no need to do it here. */
5048 reg
= gen_reg_rtx (Pmode
);
5052 /* Use load relative if possible.
5053 lgrl <target>, sym@GOTENT */
5054 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5055 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5056 new_rtx
= gen_const_mem (GET_MODE (reg
), new_rtx
);
5058 emit_move_insn (reg
, new_rtx
);
5061 else if (flag_pic
== 1)
5063 /* Assume GOT offset is a valid displacement operand (< 4k
5064 or < 512k with z990). This is handled the same way in
5065 both 31- and 64-bit code (@GOT).
5066 lg <target>, sym@GOT(r12) */
5068 if (reload_in_progress
|| reload_completed
)
5069 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5071 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5072 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5073 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5074 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5075 emit_move_insn (reg
, new_rtx
);
5080 /* If the GOT offset might be >= 4k, we determine the position
5081 of the GOT entry via a PC-relative LARL (@GOTENT).
5082 larl temp, sym@GOTENT
5083 lg <target>, 0(temp) */
5085 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5087 gcc_assert (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
5088 || REGNO_REG_CLASS (REGNO (temp
)) == ADDR_REGS
);
5090 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5091 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5092 emit_move_insn (temp
, new_rtx
);
5093 new_rtx
= gen_const_mem (Pmode
, temp
);
5094 emit_move_insn (reg
, new_rtx
);
5099 else if (GET_CODE (addr
) == UNSPEC
&& GET_CODE (addend
) == CONST_INT
)
5101 gcc_assert (XVECLEN (addr
, 0) == 1);
5102 switch (XINT (addr
, 1))
5104 /* These address symbols (or PLT slots) relative to the GOT
5105 (not GOT slots!). In general this will exceed the
5106 displacement range so these value belong into the literal
5110 new_rtx
= force_const_mem (Pmode
, orig
);
5113 /* For -fPIC the GOT size might exceed the displacement
5114 range so make sure the value is in the literal pool. */
5117 new_rtx
= force_const_mem (Pmode
, orig
);
5120 /* For @GOTENT larl is used. This is handled like local
5126 /* For @PLT larl is used. This is handled like local
5132 /* Everything else cannot happen. */
5137 else if (addend
!= const0_rtx
)
5139 /* Otherwise, compute the sum. */
5141 rtx base
= legitimize_pic_address (addr
, reg
);
5142 new_rtx
= legitimize_pic_address (addend
,
5143 base
== reg
? NULL_RTX
: reg
);
5144 if (GET_CODE (new_rtx
) == CONST_INT
)
5145 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
5148 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
5150 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
5151 new_rtx
= XEXP (new_rtx
, 1);
5153 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
5156 if (GET_CODE (new_rtx
) == CONST
)
5157 new_rtx
= XEXP (new_rtx
, 0);
5158 new_rtx
= force_operand (new_rtx
, 0);
5164 /* Load the thread pointer into a register. */
5167 s390_get_thread_pointer (void)
5169 rtx tp
= gen_reg_rtx (Pmode
);
5171 emit_insn (gen_get_thread_pointer (Pmode
, tp
));
5173 mark_reg_pointer (tp
, BITS_PER_WORD
);
5178 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5179 in s390_tls_symbol which always refers to __tls_get_offset.
5180 The returned offset is written to RESULT_REG and an USE rtx is
5181 generated for TLS_CALL. */
5183 static GTY(()) rtx s390_tls_symbol
;
5186 s390_emit_tls_call_insn (rtx result_reg
, rtx tls_call
)
5191 emit_insn (s390_load_got ());
5193 if (!s390_tls_symbol
)
5195 s390_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tls_get_offset");
5196 SYMBOL_REF_FLAGS (s390_tls_symbol
) |= SYMBOL_FLAG_FUNCTION
;
5199 insn
= s390_emit_call (s390_tls_symbol
, tls_call
, result_reg
,
5200 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
5202 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), result_reg
);
5203 RTL_CONST_CALL_P (insn
) = 1;
5206 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5207 this (thread-local) address. REG may be used as temporary. */
5210 legitimize_tls_address (rtx addr
, rtx reg
)
5212 rtx new_rtx
, tls_call
, temp
, base
, r2
;
5215 if (GET_CODE (addr
) == SYMBOL_REF
)
5216 switch (tls_symbolic_operand (addr
))
5218 case TLS_MODEL_GLOBAL_DYNAMIC
:
5220 r2
= gen_rtx_REG (Pmode
, 2);
5221 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_TLSGD
);
5222 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5223 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5224 emit_move_insn (r2
, new_rtx
);
5225 s390_emit_tls_call_insn (r2
, tls_call
);
5226 insn
= get_insns ();
5229 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5230 temp
= gen_reg_rtx (Pmode
);
5231 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5233 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5236 s390_load_address (reg
, new_rtx
);
5241 case TLS_MODEL_LOCAL_DYNAMIC
:
5243 r2
= gen_rtx_REG (Pmode
, 2);
5244 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM
);
5245 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5246 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5247 emit_move_insn (r2
, new_rtx
);
5248 s390_emit_tls_call_insn (r2
, tls_call
);
5249 insn
= get_insns ();
5252 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM_NTPOFF
);
5253 temp
= gen_reg_rtx (Pmode
);
5254 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5256 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5257 base
= gen_reg_rtx (Pmode
);
5258 s390_load_address (base
, new_rtx
);
5260 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_DTPOFF
);
5261 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5262 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5263 temp
= gen_reg_rtx (Pmode
);
5264 emit_move_insn (temp
, new_rtx
);
5266 new_rtx
= gen_rtx_PLUS (Pmode
, base
, temp
);
5269 s390_load_address (reg
, new_rtx
);
5274 case TLS_MODEL_INITIAL_EXEC
:
5277 /* Assume GOT offset < 4k. This is handled the same way
5278 in both 31- and 64-bit code. */
5280 if (reload_in_progress
|| reload_completed
)
5281 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5283 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTNTPOFF
);
5284 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5285 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5286 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5287 temp
= gen_reg_rtx (Pmode
);
5288 emit_move_insn (temp
, new_rtx
);
5292 /* If the GOT offset might be >= 4k, we determine the position
5293 of the GOT entry via a PC-relative LARL. */
5295 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_INDNTPOFF
);
5296 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5297 temp
= gen_reg_rtx (Pmode
);
5298 emit_move_insn (temp
, new_rtx
);
5300 new_rtx
= gen_const_mem (Pmode
, temp
);
5301 temp
= gen_reg_rtx (Pmode
);
5302 emit_move_insn (temp
, new_rtx
);
5305 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5308 s390_load_address (reg
, new_rtx
);
5313 case TLS_MODEL_LOCAL_EXEC
:
5314 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5315 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5316 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5317 temp
= gen_reg_rtx (Pmode
);
5318 emit_move_insn (temp
, new_rtx
);
5320 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5323 s390_load_address (reg
, new_rtx
);
5332 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == UNSPEC
)
5334 switch (XINT (XEXP (addr
, 0), 1))
5337 case UNSPEC_INDNTPOFF
:
5346 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
5347 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
5349 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5350 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5351 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5353 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5354 new_rtx
= plus_constant (Pmode
, new_rtx
,
5355 INTVAL (XEXP (XEXP (addr
, 0), 1)));
5356 new_rtx
= force_operand (new_rtx
, 0);
5359 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5360 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == NEG
)
5362 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5363 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5364 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5366 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5367 new_rtx
= gen_rtx_NEG (Pmode
, new_rtx
);
5368 new_rtx
= force_operand (new_rtx
, 0);
5372 gcc_unreachable (); /* for now ... */
5377 /* Emit insns making the address in operands[1] valid for a standard
5378 move to operands[0]. operands[1] is replaced by an address which
5379 should be used instead of the former RTX to emit the move
5383 emit_symbolic_move (rtx
*operands
)
5385 rtx temp
= !can_create_pseudo_p () ? operands
[0] : gen_reg_rtx (Pmode
);
5387 if (GET_CODE (operands
[0]) == MEM
)
5388 operands
[1] = force_reg (Pmode
, operands
[1]);
5389 else if (TLS_SYMBOLIC_CONST (operands
[1]))
5390 operands
[1] = legitimize_tls_address (operands
[1], temp
);
5392 operands
[1] = legitimize_pic_address (operands
[1], temp
);
5395 /* Try machine-dependent ways of modifying an illegitimate address X
5396 to be legitimate. If we find one, return the new, valid address.
5398 OLDX is the address as it was before break_out_memory_refs was called.
5399 In some cases it is useful to look at this to decide what needs to be done.
5401 MODE is the mode of the operand pointed to by X.
5403 When -fpic is used, special handling is needed for symbolic references.
5404 See comments by legitimize_pic_address for details. */
5407 s390_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
5408 machine_mode mode ATTRIBUTE_UNUSED
)
5410 rtx constant_term
= const0_rtx
;
5412 if (TLS_SYMBOLIC_CONST (x
))
5414 x
= legitimize_tls_address (x
, 0);
5416 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5419 else if (GET_CODE (x
) == PLUS
5420 && (TLS_SYMBOLIC_CONST (XEXP (x
, 0))
5421 || TLS_SYMBOLIC_CONST (XEXP (x
, 1))))
5427 if (SYMBOLIC_CONST (x
)
5428 || (GET_CODE (x
) == PLUS
5429 && (SYMBOLIC_CONST (XEXP (x
, 0))
5430 || SYMBOLIC_CONST (XEXP (x
, 1)))))
5431 x
= legitimize_pic_address (x
, 0);
5433 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5437 x
= eliminate_constant_term (x
, &constant_term
);
5439 /* Optimize loading of large displacements by splitting them
5440 into the multiple of 4K and the rest; this allows the
5441 former to be CSE'd if possible.
5443 Don't do this if the displacement is added to a register
5444 pointing into the stack frame, as the offsets will
5445 change later anyway. */
5447 if (GET_CODE (constant_term
) == CONST_INT
5448 && !TARGET_LONG_DISPLACEMENT
5449 && !DISP_IN_RANGE (INTVAL (constant_term
))
5450 && !(REG_P (x
) && REGNO_PTR_FRAME_P (REGNO (x
))))
5452 HOST_WIDE_INT lower
= INTVAL (constant_term
) & 0xfff;
5453 HOST_WIDE_INT upper
= INTVAL (constant_term
) ^ lower
;
5455 rtx temp
= gen_reg_rtx (Pmode
);
5456 rtx val
= force_operand (GEN_INT (upper
), temp
);
5458 emit_move_insn (temp
, val
);
5460 x
= gen_rtx_PLUS (Pmode
, x
, temp
);
5461 constant_term
= GEN_INT (lower
);
5464 if (GET_CODE (x
) == PLUS
)
5466 if (GET_CODE (XEXP (x
, 0)) == REG
)
5468 rtx temp
= gen_reg_rtx (Pmode
);
5469 rtx val
= force_operand (XEXP (x
, 1), temp
);
5471 emit_move_insn (temp
, val
);
5473 x
= gen_rtx_PLUS (Pmode
, XEXP (x
, 0), temp
);
5476 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5478 rtx temp
= gen_reg_rtx (Pmode
);
5479 rtx val
= force_operand (XEXP (x
, 0), temp
);
5481 emit_move_insn (temp
, val
);
5483 x
= gen_rtx_PLUS (Pmode
, temp
, XEXP (x
, 1));
5487 if (constant_term
!= const0_rtx
)
5488 x
= gen_rtx_PLUS (Pmode
, x
, constant_term
);
5493 /* Try a machine-dependent way of reloading an illegitimate address AD
5494 operand. If we find one, push the reload and return the new address.
5496 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5497 and TYPE is the reload type of the current reload. */
5500 legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
5501 int opnum
, int type
)
5503 if (!optimize
|| TARGET_LONG_DISPLACEMENT
)
5506 if (GET_CODE (ad
) == PLUS
)
5508 rtx tem
= simplify_binary_operation (PLUS
, Pmode
,
5509 XEXP (ad
, 0), XEXP (ad
, 1));
5514 if (GET_CODE (ad
) == PLUS
5515 && GET_CODE (XEXP (ad
, 0)) == REG
5516 && GET_CODE (XEXP (ad
, 1)) == CONST_INT
5517 && !DISP_IN_RANGE (INTVAL (XEXP (ad
, 1))))
5519 HOST_WIDE_INT lower
= INTVAL (XEXP (ad
, 1)) & 0xfff;
5520 HOST_WIDE_INT upper
= INTVAL (XEXP (ad
, 1)) ^ lower
;
5521 rtx cst
, tem
, new_rtx
;
5523 cst
= GEN_INT (upper
);
5524 if (!legitimate_reload_constant_p (cst
))
5525 cst
= force_const_mem (Pmode
, cst
);
5527 tem
= gen_rtx_PLUS (Pmode
, XEXP (ad
, 0), cst
);
5528 new_rtx
= gen_rtx_PLUS (Pmode
, tem
, GEN_INT (lower
));
5530 push_reload (XEXP (tem
, 1), 0, &XEXP (tem
, 1), 0,
5531 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
5532 opnum
, (enum reload_type
) type
);
5539 /* Emit code to move LEN bytes from DST to SRC. */
5542 s390_expand_cpymem (rtx dst
, rtx src
, rtx len
)
5544 /* When tuning for z10 or higher we rely on the Glibc functions to
5545 do the right thing. Only for constant lengths below 64k we will
5546 generate inline code. */
5547 if (s390_tune
>= PROCESSOR_2097_Z10
5548 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5551 /* Expand memcpy for constant length operands without a loop if it
5552 is shorter that way.
5554 With a constant length argument a
5555 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5556 if (GET_CODE (len
) == CONST_INT
5557 && INTVAL (len
) >= 0
5558 && INTVAL (len
) <= 256 * 6
5559 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5563 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5565 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5566 rtx newsrc
= adjust_address (src
, BLKmode
, o
);
5567 emit_insn (gen_cpymem_short (newdst
, newsrc
,
5568 GEN_INT (l
> 256 ? 255 : l
- 1)));
5572 else if (TARGET_MVCLE
)
5574 emit_insn (gen_cpymem_long (dst
, src
, convert_to_mode (Pmode
, len
, 1)));
5579 rtx dst_addr
, src_addr
, count
, blocks
, temp
;
5580 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5581 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5582 rtx_code_label
*end_label
= gen_label_rtx ();
5585 mode
= GET_MODE (len
);
5586 if (mode
== VOIDmode
)
5589 dst_addr
= gen_reg_rtx (Pmode
);
5590 src_addr
= gen_reg_rtx (Pmode
);
5591 count
= gen_reg_rtx (mode
);
5592 blocks
= gen_reg_rtx (mode
);
5594 convert_move (count
, len
, 1);
5595 emit_cmp_and_jump_insns (count
, const0_rtx
,
5596 EQ
, NULL_RTX
, mode
, 1, end_label
);
5598 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5599 emit_move_insn (src_addr
, force_operand (XEXP (src
, 0), NULL_RTX
));
5600 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5601 src
= change_address (src
, VOIDmode
, src_addr
);
5603 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5606 emit_move_insn (count
, temp
);
5608 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5611 emit_move_insn (blocks
, temp
);
5613 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5614 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5616 emit_label (loop_start_label
);
5619 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 768))
5623 /* Issue a read prefetch for the +3 cache line. */
5624 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (768)),
5625 const0_rtx
, const0_rtx
);
5626 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5627 emit_insn (prefetch
);
5629 /* Issue a write prefetch for the +3 cache line. */
5630 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (768)),
5631 const1_rtx
, const0_rtx
);
5632 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5633 emit_insn (prefetch
);
5636 emit_insn (gen_cpymem_short (dst
, src
, GEN_INT (255)));
5637 s390_load_address (dst_addr
,
5638 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5639 s390_load_address (src_addr
,
5640 gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (256)));
5642 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5645 emit_move_insn (blocks
, temp
);
5647 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5648 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5650 emit_jump (loop_start_label
);
5651 emit_label (loop_end_label
);
5653 emit_insn (gen_cpymem_short (dst
, src
,
5654 convert_to_mode (Pmode
, count
, 1)));
5655 emit_label (end_label
);
5660 /* Emit code to set LEN bytes at DST to VAL.
5661 Make use of clrmem if VAL is zero. */
5664 s390_expand_setmem (rtx dst
, rtx len
, rtx val
)
5666 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) <= 0)
5669 gcc_assert (GET_CODE (val
) == CONST_INT
|| GET_MODE (val
) == QImode
);
5671 /* Expand setmem/clrmem for a constant length operand without a
5672 loop if it will be shorter that way.
5673 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5674 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5675 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5676 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5677 if (GET_CODE (len
) == CONST_INT
5678 && ((val
== const0_rtx
5679 && (INTVAL (len
) <= 256 * 4
5680 || (INTVAL (len
) <= 256 * 5 && TARGET_SETMEM_PFD(val
,len
))))
5681 || (val
!= const0_rtx
&& INTVAL (len
) <= 257 * 4))
5682 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5686 if (val
== const0_rtx
)
5687 /* clrmem: emit 256 byte blockwise XCs. */
5688 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5690 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5691 emit_insn (gen_clrmem_short (newdst
,
5692 GEN_INT (l
> 256 ? 255 : l
- 1)));
5695 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5696 setting first byte to val and using a 256 byte mvc with one
5697 byte overlap to propagate the byte. */
5698 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 257, o
+= 257)
5700 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5701 emit_move_insn (adjust_address (dst
, QImode
, o
), val
);
5704 rtx newdstp1
= adjust_address (dst
, BLKmode
, o
+ 1);
5705 emit_insn (gen_cpymem_short (newdstp1
, newdst
,
5706 GEN_INT (l
> 257 ? 255 : l
- 2)));
5711 else if (TARGET_MVCLE
)
5713 val
= force_not_mem (convert_modes (Pmode
, QImode
, val
, 1));
5715 emit_insn (gen_setmem_long_di (dst
, convert_to_mode (Pmode
, len
, 1),
5718 emit_insn (gen_setmem_long_si (dst
, convert_to_mode (Pmode
, len
, 1),
5724 rtx dst_addr
, count
, blocks
, temp
, dstp1
= NULL_RTX
;
5725 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5726 rtx_code_label
*onebyte_end_label
= gen_label_rtx ();
5727 rtx_code_label
*zerobyte_end_label
= gen_label_rtx ();
5728 rtx_code_label
*restbyte_end_label
= gen_label_rtx ();
5731 mode
= GET_MODE (len
);
5732 if (mode
== VOIDmode
)
5735 dst_addr
= gen_reg_rtx (Pmode
);
5736 count
= gen_reg_rtx (mode
);
5737 blocks
= gen_reg_rtx (mode
);
5739 convert_move (count
, len
, 1);
5740 emit_cmp_and_jump_insns (count
, const0_rtx
,
5741 EQ
, NULL_RTX
, mode
, 1, zerobyte_end_label
,
5742 profile_probability::very_unlikely ());
5744 /* We need to make a copy of the target address since memset is
5745 supposed to return it unmodified. We have to make it here
5746 already since the new reg is used at onebyte_end_label. */
5747 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5748 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5750 if (val
!= const0_rtx
)
5752 /* When using the overlapping mvc the original target
5753 address is only accessed as single byte entity (even by
5754 the mvc reading this value). */
5755 set_mem_size (dst
, 1);
5756 dstp1
= adjust_address (dst
, VOIDmode
, 1);
5757 emit_cmp_and_jump_insns (count
,
5758 const1_rtx
, EQ
, NULL_RTX
, mode
, 1,
5760 profile_probability::very_unlikely ());
5763 /* There is one unconditional (mvi+mvc)/xc after the loop
5764 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5765 or one (xc) here leaves this number of bytes to be handled by
5767 temp
= expand_binop (mode
, add_optab
, count
,
5768 val
== const0_rtx
? constm1_rtx
: GEN_INT (-2),
5769 count
, 1, OPTAB_DIRECT
);
5771 emit_move_insn (count
, temp
);
5773 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5776 emit_move_insn (blocks
, temp
);
5778 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5779 EQ
, NULL_RTX
, mode
, 1, restbyte_end_label
);
5781 emit_jump (loop_start_label
);
5783 if (val
!= const0_rtx
)
5785 /* The 1 byte != 0 special case. Not handled efficiently
5786 since we require two jumps for that. However, this
5787 should be very rare. */
5788 emit_label (onebyte_end_label
);
5789 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5790 emit_jump (zerobyte_end_label
);
5793 emit_label (loop_start_label
);
5795 if (TARGET_SETMEM_PFD (val
, len
))
5797 /* Issue a write prefetch. */
5798 rtx distance
= GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE
);
5799 rtx prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, distance
),
5800 const1_rtx
, const0_rtx
);
5801 emit_insn (prefetch
);
5802 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5805 if (val
== const0_rtx
)
5806 emit_insn (gen_clrmem_short (dst
, GEN_INT (255)));
5809 /* Set the first byte in the block to the value and use an
5810 overlapping mvc for the block. */
5811 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5812 emit_insn (gen_cpymem_short (dstp1
, dst
, GEN_INT (254)));
5814 s390_load_address (dst_addr
,
5815 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5817 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5820 emit_move_insn (blocks
, temp
);
5822 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5823 NE
, NULL_RTX
, mode
, 1, loop_start_label
);
5825 emit_label (restbyte_end_label
);
5827 if (val
== const0_rtx
)
5828 emit_insn (gen_clrmem_short (dst
, convert_to_mode (Pmode
, count
, 1)));
5831 /* Set the first byte in the block to the value and use an
5832 overlapping mvc for the block. */
5833 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5834 /* execute only uses the lowest 8 bits of count that's
5835 exactly what we need here. */
5836 emit_insn (gen_cpymem_short (dstp1
, dst
,
5837 convert_to_mode (Pmode
, count
, 1)));
5840 emit_label (zerobyte_end_label
);
5844 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5845 and return the result in TARGET. */
5848 s390_expand_cmpmem (rtx target
, rtx op0
, rtx op1
, rtx len
)
5850 rtx ccreg
= gen_rtx_REG (CCUmode
, CC_REGNUM
);
5853 /* When tuning for z10 or higher we rely on the Glibc functions to
5854 do the right thing. Only for constant lengths below 64k we will
5855 generate inline code. */
5856 if (s390_tune
>= PROCESSOR_2097_Z10
5857 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5860 /* As the result of CMPINT is inverted compared to what we need,
5861 we have to swap the operands. */
5862 tmp
= op0
; op0
= op1
; op1
= tmp
;
5864 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) >= 0 && INTVAL (len
) <= 256)
5866 if (INTVAL (len
) > 0)
5868 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (INTVAL (len
) - 1)));
5869 emit_insn (gen_cmpint (target
, ccreg
));
5872 emit_move_insn (target
, const0_rtx
);
5874 else if (TARGET_MVCLE
)
5876 emit_insn (gen_cmpmem_long (op0
, op1
, convert_to_mode (Pmode
, len
, 1)));
5877 emit_insn (gen_cmpint (target
, ccreg
));
5881 rtx addr0
, addr1
, count
, blocks
, temp
;
5882 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5883 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5884 rtx_code_label
*end_label
= gen_label_rtx ();
5887 mode
= GET_MODE (len
);
5888 if (mode
== VOIDmode
)
5891 addr0
= gen_reg_rtx (Pmode
);
5892 addr1
= gen_reg_rtx (Pmode
);
5893 count
= gen_reg_rtx (mode
);
5894 blocks
= gen_reg_rtx (mode
);
5896 convert_move (count
, len
, 1);
5897 emit_cmp_and_jump_insns (count
, const0_rtx
,
5898 EQ
, NULL_RTX
, mode
, 1, end_label
);
5900 emit_move_insn (addr0
, force_operand (XEXP (op0
, 0), NULL_RTX
));
5901 emit_move_insn (addr1
, force_operand (XEXP (op1
, 0), NULL_RTX
));
5902 op0
= change_address (op0
, VOIDmode
, addr0
);
5903 op1
= change_address (op1
, VOIDmode
, addr1
);
5905 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5908 emit_move_insn (count
, temp
);
5910 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5913 emit_move_insn (blocks
, temp
);
5915 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5916 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5918 emit_label (loop_start_label
);
5921 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 512))
5925 /* Issue a read prefetch for the +2 cache line of operand 1. */
5926 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (512)),
5927 const0_rtx
, const0_rtx
);
5928 emit_insn (prefetch
);
5929 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5931 /* Issue a read prefetch for the +2 cache line of operand 2. */
5932 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (512)),
5933 const0_rtx
, const0_rtx
);
5934 emit_insn (prefetch
);
5935 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5938 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (255)));
5939 temp
= gen_rtx_NE (VOIDmode
, ccreg
, const0_rtx
);
5940 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5941 gen_rtx_LABEL_REF (VOIDmode
, end_label
), pc_rtx
);
5942 temp
= gen_rtx_SET (pc_rtx
, temp
);
5943 emit_jump_insn (temp
);
5945 s390_load_address (addr0
,
5946 gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (256)));
5947 s390_load_address (addr1
,
5948 gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (256)));
5950 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5953 emit_move_insn (blocks
, temp
);
5955 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5956 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5958 emit_jump (loop_start_label
);
5959 emit_label (loop_end_label
);
5961 emit_insn (gen_cmpmem_short (op0
, op1
,
5962 convert_to_mode (Pmode
, count
, 1)));
5963 emit_label (end_label
);
5965 emit_insn (gen_cmpint (target
, ccreg
));
5970 /* Emit a conditional jump to LABEL for condition code mask MASK using
5971 comparsion operator COMPARISON. Return the emitted jump insn. */
5974 s390_emit_ccraw_jump (HOST_WIDE_INT mask
, enum rtx_code comparison
, rtx label
)
5978 gcc_assert (comparison
== EQ
|| comparison
== NE
);
5979 gcc_assert (mask
> 0 && mask
< 15);
5981 temp
= gen_rtx_fmt_ee (comparison
, VOIDmode
,
5982 gen_rtx_REG (CCRAWmode
, CC_REGNUM
), GEN_INT (mask
));
5983 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5984 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
5985 temp
= gen_rtx_SET (pc_rtx
, temp
);
5986 return emit_jump_insn (temp
);
5989 /* Emit the instructions to implement strlen of STRING and store the
5990 result in TARGET. The string has the known ALIGNMENT. This
5991 version uses vector instructions and is therefore not appropriate
5992 for targets prior to z13. */
5995 s390_expand_vec_strlen (rtx target
, rtx string
, rtx alignment
)
5997 rtx highest_index_to_load_reg
= gen_reg_rtx (Pmode
);
5998 rtx str_reg
= gen_reg_rtx (V16QImode
);
5999 rtx str_addr_base_reg
= gen_reg_rtx (Pmode
);
6000 rtx str_idx_reg
= gen_reg_rtx (Pmode
);
6001 rtx result_reg
= gen_reg_rtx (V16QImode
);
6002 rtx is_aligned_label
= gen_label_rtx ();
6003 rtx into_loop_label
= NULL_RTX
;
6004 rtx loop_start_label
= gen_label_rtx ();
6006 rtx len
= gen_reg_rtx (QImode
);
6010 s390_load_address (str_addr_base_reg
, XEXP (string
, 0));
6011 emit_move_insn (str_idx_reg
, const0_rtx
);
6013 if (INTVAL (alignment
) < 16)
6015 /* Check whether the address happens to be aligned properly so
6016 jump directly to the aligned loop. */
6017 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode
,
6018 str_addr_base_reg
, GEN_INT (15)),
6019 const0_rtx
, EQ
, NULL_RTX
,
6020 Pmode
, 1, is_aligned_label
);
6022 temp
= gen_reg_rtx (Pmode
);
6023 temp
= expand_binop (Pmode
, and_optab
, str_addr_base_reg
,
6024 GEN_INT (15), temp
, 1, OPTAB_DIRECT
);
6025 gcc_assert (REG_P (temp
));
6026 highest_index_to_load_reg
=
6027 expand_binop (Pmode
, sub_optab
, GEN_INT (15), temp
,
6028 highest_index_to_load_reg
, 1, OPTAB_DIRECT
);
6029 gcc_assert (REG_P (highest_index_to_load_reg
));
6030 emit_insn (gen_vllv16qi (str_reg
,
6031 convert_to_mode (SImode
, highest_index_to_load_reg
, 1),
6032 gen_rtx_MEM (BLKmode
, str_addr_base_reg
)));
6034 into_loop_label
= gen_label_rtx ();
6035 s390_emit_jump (into_loop_label
, NULL_RTX
);
6039 emit_label (is_aligned_label
);
6040 LABEL_NUSES (is_aligned_label
) = INTVAL (alignment
) < 16 ? 2 : 1;
6042 /* Reaching this point we are only performing 16 bytes aligned
6044 emit_move_insn (highest_index_to_load_reg
, GEN_INT (15));
6046 emit_label (loop_start_label
);
6047 LABEL_NUSES (loop_start_label
) = 1;
6049 /* Load 16 bytes of the string into VR. */
6050 mem
= gen_rtx_MEM (V16QImode
,
6051 gen_rtx_PLUS (Pmode
, str_idx_reg
, str_addr_base_reg
));
6052 set_mem_align (mem
, 128);
6053 emit_move_insn (str_reg
, mem
);
6054 if (into_loop_label
!= NULL_RTX
)
6056 emit_label (into_loop_label
);
6057 LABEL_NUSES (into_loop_label
) = 1;
6060 /* Increment string index by 16 bytes. */
6061 expand_binop (Pmode
, add_optab
, str_idx_reg
, GEN_INT (16),
6062 str_idx_reg
, 1, OPTAB_DIRECT
);
6064 emit_insn (gen_vec_vfenesv16qi (result_reg
, str_reg
, str_reg
,
6065 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6067 add_int_reg_note (s390_emit_ccraw_jump (8, NE
, loop_start_label
),
6069 profile_probability::very_likely ().to_reg_br_prob_note ());
6070 emit_insn (gen_vec_extractv16qiqi (len
, result_reg
, GEN_INT (7)));
6072 /* If the string pointer wasn't aligned we have loaded less then 16
6073 bytes and the remaining bytes got filled with zeros (by vll).
6074 Now we have to check whether the resulting index lies within the
6075 bytes actually part of the string. */
6077 cond
= s390_emit_compare (GT
, convert_to_mode (Pmode
, len
, 1),
6078 highest_index_to_load_reg
);
6079 s390_load_address (highest_index_to_load_reg
,
6080 gen_rtx_PLUS (Pmode
, highest_index_to_load_reg
,
6083 emit_insn (gen_movdicc (str_idx_reg
, cond
,
6084 highest_index_to_load_reg
, str_idx_reg
));
6086 emit_insn (gen_movsicc (str_idx_reg
, cond
,
6087 highest_index_to_load_reg
, str_idx_reg
));
6089 add_reg_br_prob_note (s390_emit_jump (is_aligned_label
, cond
),
6090 profile_probability::very_unlikely ());
6092 expand_binop (Pmode
, add_optab
, str_idx_reg
,
6093 GEN_INT (-16), str_idx_reg
, 1, OPTAB_DIRECT
);
6094 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6096 temp
= expand_binop (Pmode
, add_optab
, str_idx_reg
,
6097 convert_to_mode (Pmode
, len
, 1),
6098 target
, 1, OPTAB_DIRECT
);
6100 emit_move_insn (target
, temp
);
6104 s390_expand_vec_movstr (rtx result
, rtx dst
, rtx src
)
6106 rtx temp
= gen_reg_rtx (Pmode
);
6107 rtx src_addr
= XEXP (src
, 0);
6108 rtx dst_addr
= XEXP (dst
, 0);
6109 rtx src_addr_reg
= gen_reg_rtx (Pmode
);
6110 rtx dst_addr_reg
= gen_reg_rtx (Pmode
);
6111 rtx offset
= gen_reg_rtx (Pmode
);
6112 rtx vsrc
= gen_reg_rtx (V16QImode
);
6113 rtx vpos
= gen_reg_rtx (V16QImode
);
6114 rtx loadlen
= gen_reg_rtx (SImode
);
6115 rtx gpos_qi
= gen_reg_rtx(QImode
);
6116 rtx gpos
= gen_reg_rtx (SImode
);
6117 rtx done_label
= gen_label_rtx ();
6118 rtx loop_label
= gen_label_rtx ();
6119 rtx exit_label
= gen_label_rtx ();
6120 rtx full_label
= gen_label_rtx ();
6122 /* Perform a quick check for string ending on the first up to 16
6123 bytes and exit early if successful. */
6125 emit_insn (gen_vlbb (vsrc
, src
, GEN_INT (6)));
6126 emit_insn (gen_lcbb (loadlen
, src_addr
, GEN_INT (6)));
6127 emit_insn (gen_vfenezv16qi (vpos
, vsrc
, vsrc
));
6128 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6129 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6130 /* gpos is the byte index if a zero was found and 16 otherwise.
6131 So if it is lower than the loaded bytes we have a hit. */
6132 emit_cmp_and_jump_insns (gpos
, loadlen
, GE
, NULL_RTX
, SImode
, 1,
6134 emit_insn (gen_vstlv16qi (vsrc
, gpos
, dst
));
6136 force_expand_binop (Pmode
, add_optab
, dst_addr
, gpos
, result
,
6138 emit_jump (exit_label
);
6141 emit_label (full_label
);
6142 LABEL_NUSES (full_label
) = 1;
6144 /* Calculate `offset' so that src + offset points to the last byte
6145 before 16 byte alignment. */
6147 /* temp = src_addr & 0xf */
6148 force_expand_binop (Pmode
, and_optab
, src_addr
, GEN_INT (15), temp
,
6151 /* offset = 0xf - temp */
6152 emit_move_insn (offset
, GEN_INT (15));
6153 force_expand_binop (Pmode
, sub_optab
, offset
, temp
, offset
,
6156 /* Store `offset' bytes in the dstination string. The quick check
6157 has loaded at least `offset' bytes into vsrc. */
6159 emit_insn (gen_vstlv16qi (vsrc
, gen_lowpart (SImode
, offset
), dst
));
6161 /* Advance to the next byte to be loaded. */
6162 force_expand_binop (Pmode
, add_optab
, offset
, const1_rtx
, offset
,
6165 /* Make sure the addresses are single regs which can be used as a
6167 emit_move_insn (src_addr_reg
, src_addr
);
6168 emit_move_insn (dst_addr_reg
, dst_addr
);
6172 emit_label (loop_label
);
6173 LABEL_NUSES (loop_label
) = 1;
6175 emit_move_insn (vsrc
,
6176 gen_rtx_MEM (V16QImode
,
6177 gen_rtx_PLUS (Pmode
, src_addr_reg
, offset
)));
6179 emit_insn (gen_vec_vfenesv16qi (vpos
, vsrc
, vsrc
,
6180 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6181 add_int_reg_note (s390_emit_ccraw_jump (8, EQ
, done_label
),
6182 REG_BR_PROB
, profile_probability::very_unlikely ()
6183 .to_reg_br_prob_note ());
6185 emit_move_insn (gen_rtx_MEM (V16QImode
,
6186 gen_rtx_PLUS (Pmode
, dst_addr_reg
, offset
)),
6189 force_expand_binop (Pmode
, add_optab
, offset
, GEN_INT (16),
6190 offset
, 1, OPTAB_DIRECT
);
6192 emit_jump (loop_label
);
6197 /* We are done. Add the offset of the zero character to the dst_addr
6198 pointer to get the result. */
6200 emit_label (done_label
);
6201 LABEL_NUSES (done_label
) = 1;
6203 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, offset
, dst_addr_reg
,
6206 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6207 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6209 emit_insn (gen_vstlv16qi (vsrc
, gpos
, gen_rtx_MEM (BLKmode
, dst_addr_reg
)));
6211 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, gpos
, result
,
6216 emit_label (exit_label
);
6217 LABEL_NUSES (exit_label
) = 1;
6221 /* Expand conditional increment or decrement using alc/slb instructions.
6222 Should generate code setting DST to either SRC or SRC + INCREMENT,
6223 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6224 Returns true if successful, false otherwise.
6226 That makes it possible to implement some if-constructs without jumps e.g.:
6227 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6228 unsigned int a, b, c;
6229 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6230 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6231 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6232 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6234 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6235 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6236 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6237 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6238 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6241 s390_expand_addcc (enum rtx_code cmp_code
, rtx cmp_op0
, rtx cmp_op1
,
6242 rtx dst
, rtx src
, rtx increment
)
6244 machine_mode cmp_mode
;
6245 machine_mode cc_mode
;
6251 if ((GET_MODE (cmp_op0
) == SImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6252 && (GET_MODE (cmp_op1
) == SImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6254 else if ((GET_MODE (cmp_op0
) == DImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6255 && (GET_MODE (cmp_op1
) == DImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6260 /* Try ADD LOGICAL WITH CARRY. */
6261 if (increment
== const1_rtx
)
6263 /* Determine CC mode to use. */
6264 if (cmp_code
== EQ
|| cmp_code
== NE
)
6266 if (cmp_op1
!= const0_rtx
)
6268 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6269 NULL_RTX
, 0, OPTAB_WIDEN
);
6270 cmp_op1
= const0_rtx
;
6273 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6276 if (cmp_code
== LTU
|| cmp_code
== LEU
)
6281 cmp_code
= swap_condition (cmp_code
);
6298 /* Emit comparison instruction pattern. */
6299 if (!register_operand (cmp_op0
, cmp_mode
))
6300 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6302 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6303 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6304 /* We use insn_invalid_p here to add clobbers if required. */
6305 ret
= insn_invalid_p (emit_insn (insn
), false);
6308 /* Emit ALC instruction pattern. */
6309 op_res
= gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6310 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6313 if (src
!= const0_rtx
)
6315 if (!register_operand (src
, GET_MODE (dst
)))
6316 src
= force_reg (GET_MODE (dst
), src
);
6318 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, src
);
6319 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, const0_rtx
);
6322 p
= rtvec_alloc (2);
6324 gen_rtx_SET (dst
, op_res
);
6326 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6327 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6332 /* Try SUBTRACT LOGICAL WITH BORROW. */
6333 if (increment
== constm1_rtx
)
6335 /* Determine CC mode to use. */
6336 if (cmp_code
== EQ
|| cmp_code
== NE
)
6338 if (cmp_op1
!= const0_rtx
)
6340 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6341 NULL_RTX
, 0, OPTAB_WIDEN
);
6342 cmp_op1
= const0_rtx
;
6345 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6348 if (cmp_code
== GTU
|| cmp_code
== GEU
)
6353 cmp_code
= swap_condition (cmp_code
);
6370 /* Emit comparison instruction pattern. */
6371 if (!register_operand (cmp_op0
, cmp_mode
))
6372 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6374 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6375 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6376 /* We use insn_invalid_p here to add clobbers if required. */
6377 ret
= insn_invalid_p (emit_insn (insn
), false);
6380 /* Emit SLB instruction pattern. */
6381 if (!register_operand (src
, GET_MODE (dst
)))
6382 src
= force_reg (GET_MODE (dst
), src
);
6384 op_res
= gen_rtx_MINUS (GET_MODE (dst
),
6385 gen_rtx_MINUS (GET_MODE (dst
), src
, const0_rtx
),
6386 gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6387 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6389 p
= rtvec_alloc (2);
6391 gen_rtx_SET (dst
, op_res
);
6393 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6394 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6402 /* Expand code for the insv template. Return true if successful. */
6405 s390_expand_insv (rtx dest
, rtx op1
, rtx op2
, rtx src
)
6407 int bitsize
= INTVAL (op1
);
6408 int bitpos
= INTVAL (op2
);
6409 machine_mode mode
= GET_MODE (dest
);
6411 int smode_bsize
, mode_bsize
;
6414 if (bitsize
+ bitpos
> GET_MODE_BITSIZE (mode
))
6417 /* Generate INSERT IMMEDIATE (IILL et al). */
6418 /* (set (ze (reg)) (const_int)). */
6420 && register_operand (dest
, word_mode
)
6421 && (bitpos
% 16) == 0
6422 && (bitsize
% 16) == 0
6423 && const_int_operand (src
, VOIDmode
))
6425 HOST_WIDE_INT val
= INTVAL (src
);
6426 int regpos
= bitpos
+ bitsize
;
6428 while (regpos
> bitpos
)
6430 machine_mode putmode
;
6433 if (TARGET_EXTIMM
&& (regpos
% 32 == 0) && (regpos
>= bitpos
+ 32))
6438 putsize
= GET_MODE_BITSIZE (putmode
);
6440 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6443 gen_int_mode (val
, putmode
));
6446 gcc_assert (regpos
== bitpos
);
6450 smode
= smallest_int_mode_for_size (bitsize
);
6451 smode_bsize
= GET_MODE_BITSIZE (smode
);
6452 mode_bsize
= GET_MODE_BITSIZE (mode
);
6454 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6456 && (bitsize
% BITS_PER_UNIT
) == 0
6458 && (register_operand (src
, word_mode
)
6459 || const_int_operand (src
, VOIDmode
)))
6461 /* Emit standard pattern if possible. */
6462 if (smode_bsize
== bitsize
)
6464 emit_move_insn (adjust_address (dest
, smode
, 0),
6465 gen_lowpart (smode
, src
));
6469 /* (set (ze (mem)) (const_int)). */
6470 else if (const_int_operand (src
, VOIDmode
))
6472 int size
= bitsize
/ BITS_PER_UNIT
;
6473 rtx src_mem
= adjust_address (force_const_mem (word_mode
, src
),
6475 UNITS_PER_WORD
- size
);
6477 dest
= adjust_address (dest
, BLKmode
, 0);
6478 set_mem_size (dest
, size
);
6479 s390_expand_cpymem (dest
, src_mem
, GEN_INT (size
));
6483 /* (set (ze (mem)) (reg)). */
6484 else if (register_operand (src
, word_mode
))
6487 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
, op1
,
6491 /* Emit st,stcmh sequence. */
6492 int stcmh_width
= bitsize
- 32;
6493 int size
= stcmh_width
/ BITS_PER_UNIT
;
6495 emit_move_insn (adjust_address (dest
, SImode
, size
),
6496 gen_lowpart (SImode
, src
));
6497 set_mem_size (dest
, size
);
6498 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6499 GEN_INT (stcmh_width
),
6501 gen_rtx_LSHIFTRT (word_mode
, src
, GEN_INT (32)));
6507 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6508 if ((bitpos
% BITS_PER_UNIT
) == 0
6509 && (bitsize
% BITS_PER_UNIT
) == 0
6510 && (bitpos
& 32) == ((bitpos
+ bitsize
- 1) & 32)
6512 && (mode
== DImode
|| mode
== SImode
)
6513 && register_operand (dest
, mode
))
6515 /* Emit a strict_low_part pattern if possible. */
6516 if (smode_bsize
== bitsize
&& bitpos
== mode_bsize
- smode_bsize
)
6518 rtx low_dest
= gen_lowpart (smode
, dest
);
6519 rtx low_src
= gen_lowpart (smode
, src
);
6523 case E_QImode
: emit_insn (gen_movstrictqi (low_dest
, low_src
)); return true;
6524 case E_HImode
: emit_insn (gen_movstricthi (low_dest
, low_src
)); return true;
6525 case E_SImode
: emit_insn (gen_movstrictsi (low_dest
, low_src
)); return true;
6530 /* ??? There are more powerful versions of ICM that are not
6531 completely represented in the md file. */
6534 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6535 if (TARGET_Z10
&& (mode
== DImode
|| mode
== SImode
))
6537 machine_mode mode_s
= GET_MODE (src
);
6539 if (CONSTANT_P (src
))
6541 /* For constant zero values the representation with AND
6542 appears to be folded in more situations than the (set
6543 (zero_extract) ...).
6544 We only do this when the start and end of the bitfield
6545 remain in the same SImode chunk. That way nihf or nilf
6547 The AND patterns might still generate a risbg for this. */
6548 if (src
== const0_rtx
&& bitpos
/ 32 == (bitpos
+ bitsize
- 1) / 32)
6551 src
= force_reg (mode
, src
);
6553 else if (mode_s
!= mode
)
6555 gcc_assert (GET_MODE_BITSIZE (mode_s
) >= bitsize
);
6556 src
= force_reg (mode_s
, src
);
6557 src
= gen_lowpart (mode
, src
);
6560 op
= gen_rtx_ZERO_EXTRACT (mode
, dest
, op1
, op2
),
6561 op
= gen_rtx_SET (op
, src
);
6565 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6566 op
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
));
6576 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6577 register that holds VAL of mode MODE shifted by COUNT bits. */
6580 s390_expand_mask_and_shift (rtx val
, machine_mode mode
, rtx count
)
6582 val
= expand_simple_binop (SImode
, AND
, val
, GEN_INT (GET_MODE_MASK (mode
)),
6583 NULL_RTX
, 1, OPTAB_DIRECT
);
6584 return expand_simple_binop (SImode
, ASHIFT
, val
, count
,
6585 NULL_RTX
, 1, OPTAB_DIRECT
);
6588 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6589 the result in TARGET. */
6592 s390_expand_vec_compare (rtx target
, enum rtx_code cond
,
6593 rtx cmp_op1
, rtx cmp_op2
)
6595 machine_mode mode
= GET_MODE (target
);
6596 bool neg_p
= false, swap_p
= false;
6599 if (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_VECTOR_FLOAT
)
6601 cmp_op2
= force_reg (GET_MODE (cmp_op1
), cmp_op2
);
6604 /* NE a != b -> !(a == b) */
6605 case NE
: cond
= EQ
; neg_p
= true; break;
6607 emit_insn (gen_vec_cmpungt (target
, cmp_op1
, cmp_op2
));
6610 emit_insn (gen_vec_cmpunge (target
, cmp_op1
, cmp_op2
));
6612 case LE
: cond
= GE
; swap_p
= true; break;
6613 /* UNLE: (a u<= b) -> (b u>= a). */
6615 emit_insn (gen_vec_cmpunge (target
, cmp_op2
, cmp_op1
));
6617 /* LT: a < b -> b > a */
6618 case LT
: cond
= GT
; swap_p
= true; break;
6619 /* UNLT: (a u< b) -> (b u> a). */
6621 emit_insn (gen_vec_cmpungt (target
, cmp_op2
, cmp_op1
));
6624 emit_insn (gen_vec_cmpuneq (target
, cmp_op1
, cmp_op2
));
6627 emit_insn (gen_vec_cmpltgt (target
, cmp_op1
, cmp_op2
));
6630 emit_insn (gen_vec_cmpordered (target
, cmp_op1
, cmp_op2
));
6633 emit_insn (gen_vec_cmpunordered (target
, cmp_op1
, cmp_op2
));
6640 /* Turn x < 0 into x >> (bits per element - 1) */
6641 if (cond
== LT
&& cmp_op2
== CONST0_RTX (mode
))
6643 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) - 1;
6644 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cmp_op1
,
6645 GEN_INT (shift
), target
,
6648 emit_move_insn (target
, res
);
6651 cmp_op2
= force_reg (GET_MODE (cmp_op1
), cmp_op2
);
6655 /* NE: a != b -> !(a == b) */
6656 case NE
: cond
= EQ
; neg_p
= true; break;
6657 /* GE: a >= b -> !(b > a) */
6658 case GE
: cond
= GT
; neg_p
= true; swap_p
= true; break;
6659 /* GEU: a >= b -> !(b > a) */
6660 case GEU
: cond
= GTU
; neg_p
= true; swap_p
= true; break;
6661 /* LE: a <= b -> !(a > b) */
6662 case LE
: cond
= GT
; neg_p
= true; break;
6663 /* LEU: a <= b -> !(a > b) */
6664 case LEU
: cond
= GTU
; neg_p
= true; break;
6665 /* LT: a < b -> b > a */
6666 case LT
: cond
= GT
; swap_p
= true; break;
6667 /* LTU: a < b -> b > a */
6668 case LTU
: cond
= GTU
; swap_p
= true; break;
6675 tmp
= cmp_op1
; cmp_op1
= cmp_op2
; cmp_op2
= tmp
;
6678 emit_insn (gen_rtx_SET (target
, gen_rtx_fmt_ee (cond
,
6680 cmp_op1
, cmp_op2
)));
6682 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (mode
, target
)));
6685 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6686 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6687 elements in CMP1 and CMP2 fulfill the comparison.
6688 This function is only used to emit patterns for the vx builtins and
6689 therefore only handles comparison codes required by the
6692 s390_expand_vec_compare_cc (rtx target
, enum rtx_code code
,
6693 rtx cmp1
, rtx cmp2
, bool all_p
)
6695 machine_mode cc_producer_mode
, cc_consumer_mode
, scratch_mode
;
6696 rtx tmp_reg
= gen_reg_rtx (SImode
);
6697 bool swap_p
= false;
6699 if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_INT
)
6705 cc_producer_mode
= CCVEQmode
;
6709 code
= swap_condition (code
);
6714 cc_producer_mode
= CCVIHmode
;
6718 code
= swap_condition (code
);
6723 cc_producer_mode
= CCVIHUmode
;
6729 scratch_mode
= GET_MODE (cmp1
);
6730 /* These codes represent inverted CC interpretations. Inverting
6731 an ALL CC mode results in an ANY CC mode and the other way
6732 around. Invert the all_p flag here to compensate for
6734 if (code
== NE
|| code
== LE
|| code
== LEU
)
6737 cc_consumer_mode
= all_p
? CCVIALLmode
: CCVIANYmode
;
6739 else if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_FLOAT
)
6745 case EQ
: cc_producer_mode
= CCVEQmode
; break;
6746 case NE
: cc_producer_mode
= CCVEQmode
; inv_p
= true; break;
6747 case GT
: cc_producer_mode
= CCVFHmode
; break;
6748 case GE
: cc_producer_mode
= CCVFHEmode
; break;
6749 case UNLE
: cc_producer_mode
= CCVFHmode
; inv_p
= true; break;
6750 case UNLT
: cc_producer_mode
= CCVFHEmode
; inv_p
= true; break;
6751 case LT
: cc_producer_mode
= CCVFHmode
; code
= GT
; swap_p
= true; break;
6752 case LE
: cc_producer_mode
= CCVFHEmode
; code
= GE
; swap_p
= true; break;
6753 default: gcc_unreachable ();
6755 scratch_mode
= related_int_vector_mode (GET_MODE (cmp1
)).require ();
6760 cc_consumer_mode
= all_p
? CCVFALLmode
: CCVFANYmode
;
6772 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6773 gen_rtvec (2, gen_rtx_SET (
6774 gen_rtx_REG (cc_producer_mode
, CC_REGNUM
),
6775 gen_rtx_COMPARE (cc_producer_mode
, cmp1
, cmp2
)),
6776 gen_rtx_CLOBBER (VOIDmode
,
6777 gen_rtx_SCRATCH (scratch_mode
)))));
6778 emit_move_insn (target
, const0_rtx
);
6779 emit_move_insn (tmp_reg
, const1_rtx
);
6781 emit_move_insn (target
,
6782 gen_rtx_IF_THEN_ELSE (SImode
,
6783 gen_rtx_fmt_ee (code
, VOIDmode
,
6784 gen_rtx_REG (cc_consumer_mode
, CC_REGNUM
),
6789 /* Invert the comparison CODE applied to a CC mode. This is only safe
6790 if we know whether there result was created by a floating point
6791 compare or not. For the CCV modes this is encoded as part of the
6794 s390_reverse_condition (machine_mode mode
, enum rtx_code code
)
6796 /* Reversal of FP compares takes care -- an ordered compare
6797 becomes an unordered compare and vice versa. */
6798 if (mode
== CCVFALLmode
|| mode
== CCVFANYmode
|| mode
== CCSFPSmode
)
6799 return reverse_condition_maybe_unordered (code
);
6800 else if (mode
== CCVIALLmode
|| mode
== CCVIANYmode
)
6801 return reverse_condition (code
);
6806 /* Generate a vector comparison expression loading either elements of
6807 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6811 s390_expand_vcond (rtx target
, rtx then
, rtx els
,
6812 enum rtx_code cond
, rtx cmp_op1
, rtx cmp_op2
)
6815 machine_mode result_mode
;
6818 machine_mode target_mode
= GET_MODE (target
);
6819 machine_mode cmp_mode
= GET_MODE (cmp_op1
);
6820 rtx op
= (cond
== LT
) ? els
: then
;
6822 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6823 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6824 for short and byte (x >> 15 and x >> 7 respectively). */
6825 if ((cond
== LT
|| cond
== GE
)
6826 && target_mode
== cmp_mode
6827 && cmp_op2
== CONST0_RTX (cmp_mode
)
6828 && op
== CONST0_RTX (target_mode
)
6829 && s390_vector_mode_supported_p (target_mode
)
6830 && GET_MODE_CLASS (target_mode
) == MODE_VECTOR_INT
)
6832 rtx negop
= (cond
== LT
) ? then
: els
;
6834 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (target_mode
)) - 1;
6836 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6837 if (negop
== CONST1_RTX (target_mode
))
6839 rtx res
= expand_simple_binop (cmp_mode
, LSHIFTRT
, cmp_op1
,
6840 GEN_INT (shift
), target
,
6843 emit_move_insn (target
, res
);
6847 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6848 else if (all_ones_operand (negop
, target_mode
))
6850 rtx res
= expand_simple_binop (cmp_mode
, ASHIFTRT
, cmp_op1
,
6851 GEN_INT (shift
), target
,
6854 emit_move_insn (target
, res
);
6859 /* We always use an integral type vector to hold the comparison
6861 result_mode
= related_int_vector_mode (cmp_mode
).require ();
6862 result_target
= gen_reg_rtx (result_mode
);
6864 /* We allow vector immediates as comparison operands that
6865 can be handled by the optimization above but not by the
6866 following code. Hence, force them into registers here. */
6867 if (!REG_P (cmp_op1
))
6868 cmp_op1
= force_reg (GET_MODE (cmp_op1
), cmp_op1
);
6870 s390_expand_vec_compare (result_target
, cond
, cmp_op1
, cmp_op2
);
6872 /* If the results are supposed to be either -1 or 0 we are done
6873 since this is what our compare instructions generate anyway. */
6874 if (all_ones_operand (then
, GET_MODE (then
))
6875 && const0_operand (els
, GET_MODE (els
)))
6877 emit_move_insn (target
, gen_rtx_SUBREG (target_mode
,
6882 /* Otherwise we will do a vsel afterwards. */
6883 /* This gets triggered e.g.
6884 with gcc.c-torture/compile/pr53410-1.c */
6886 then
= force_reg (target_mode
, then
);
6889 els
= force_reg (target_mode
, els
);
6891 tmp
= gen_rtx_fmt_ee (EQ
, VOIDmode
,
6893 CONST0_RTX (result_mode
));
6895 /* We compared the result against zero above so we have to swap then
6897 tmp
= gen_rtx_IF_THEN_ELSE (target_mode
, tmp
, els
, then
);
6899 gcc_assert (target_mode
== GET_MODE (then
));
6900 emit_insn (gen_rtx_SET (target
, tmp
));
6903 /* Emit the RTX necessary to initialize the vector TARGET with values
6906 s390_expand_vec_init (rtx target
, rtx vals
)
6908 machine_mode mode
= GET_MODE (target
);
6909 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6910 int n_elts
= GET_MODE_NUNITS (mode
);
6911 bool all_same
= true, all_regs
= true, all_const_int
= true;
6915 for (i
= 0; i
< n_elts
; ++i
)
6917 x
= XVECEXP (vals
, 0, i
);
6919 if (!CONST_INT_P (x
))
6920 all_const_int
= false;
6922 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6929 /* Use vector gen mask or vector gen byte mask if possible. */
6930 if (all_same
&& all_const_int
)
6932 rtx vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6933 if (XVECEXP (vals
, 0, 0) == const0_rtx
6934 || s390_contiguous_bitmask_vector_p (vec
, NULL
, NULL
)
6935 || s390_bytemask_vector_p (vec
, NULL
))
6937 emit_insn (gen_rtx_SET (target
, vec
));
6942 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6945 rtx elem
= XVECEXP (vals
, 0, 0);
6947 /* vec_splats accepts general_operand as source. */
6948 if (!general_operand (elem
, GET_MODE (elem
)))
6949 elem
= force_reg (inner_mode
, elem
);
6951 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, elem
)));
6958 && GET_MODE_SIZE (inner_mode
) == 8)
6960 /* Use vector load pair. */
6961 emit_insn (gen_rtx_SET (target
,
6962 gen_rtx_VEC_CONCAT (mode
,
6963 XVECEXP (vals
, 0, 0),
6964 XVECEXP (vals
, 0, 1))));
6968 /* Use vector load logical element and zero. */
6969 if (TARGET_VXE
&& (mode
== V4SImode
|| mode
== V4SFmode
))
6973 x
= XVECEXP (vals
, 0, 0);
6974 if (memory_operand (x
, inner_mode
))
6976 for (i
= 1; i
< n_elts
; ++i
)
6977 found
= found
&& XVECEXP (vals
, 0, i
) == const0_rtx
;
6981 machine_mode half_mode
= (inner_mode
== SFmode
6982 ? V2SFmode
: V2SImode
);
6983 emit_insn (gen_rtx_SET (target
,
6984 gen_rtx_VEC_CONCAT (mode
,
6985 gen_rtx_VEC_CONCAT (half_mode
,
6988 gen_rtx_VEC_CONCAT (half_mode
,
6996 /* We are about to set the vector elements one by one. Zero out the
6997 full register first in order to help the data flow framework to
6998 detect it as full VR set. */
6999 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
7001 /* Unfortunately the vec_init expander is not allowed to fail. So
7002 we have to implement the fallback ourselves. */
7003 for (i
= 0; i
< n_elts
; i
++)
7005 rtx elem
= XVECEXP (vals
, 0, i
);
7006 if (!general_operand (elem
, GET_MODE (elem
)))
7007 elem
= force_reg (inner_mode
, elem
);
7009 emit_insn (gen_rtx_SET (target
,
7010 gen_rtx_UNSPEC (mode
,
7012 GEN_INT (i
), target
),
7017 /* Emit a vector constant that contains 1s in each element's sign bit position
7018 and 0s in other positions. MODE is the desired constant's mode. */
7020 s390_build_signbit_mask (machine_mode mode
)
7022 if (mode
== TFmode
&& TARGET_VXE
)
7024 wide_int mask_val
= wi::set_bit_in_zero (127, 128);
7025 rtx mask
= immed_wide_int_const (mask_val
, TImode
);
7026 return gen_lowpart (TFmode
, mask
);
7029 /* Generate the integral element mask value. */
7030 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7031 int inner_bitsize
= GET_MODE_BITSIZE (inner_mode
);
7032 wide_int mask_val
= wi::set_bit_in_zero (inner_bitsize
- 1, inner_bitsize
);
7034 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
7035 value to the desired mode. */
7036 machine_mode int_mode
= related_int_vector_mode (mode
).require ();
7037 rtx mask
= immed_wide_int_const (mask_val
, GET_MODE_INNER (int_mode
));
7038 mask
= gen_lowpart (inner_mode
, mask
);
7040 /* Emit the vector mask rtx by mode the element mask rtx. */
7041 int nunits
= GET_MODE_NUNITS (mode
);
7042 rtvec v
= rtvec_alloc (nunits
);
7043 for (int i
= 0; i
< nunits
; i
++)
7044 RTVEC_ELT (v
, i
) = mask
;
7045 return gen_rtx_CONST_VECTOR (mode
, v
);
7048 /* Structure to hold the initial parameters for a compare_and_swap operation
7049 in HImode and QImode. */
7051 struct alignment_context
7053 rtx memsi
; /* SI aligned memory location. */
7054 rtx shift
; /* Bit offset with regard to lsb. */
7055 rtx modemask
; /* Mask of the HQImode shifted by SHIFT bits. */
7056 rtx modemaski
; /* ~modemask */
7057 bool aligned
; /* True if memory is aligned, false else. */
7060 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
7061 structure AC for transparent simplifying, if the memory alignment is known
7062 to be at least 32bit. MEM is the memory location for the actual operation
7063 and MODE its mode. */
7066 init_alignment_context (struct alignment_context
*ac
, rtx mem
,
7069 ac
->shift
= GEN_INT (GET_MODE_SIZE (SImode
) - GET_MODE_SIZE (mode
));
7070 ac
->aligned
= (MEM_ALIGN (mem
) >= GET_MODE_BITSIZE (SImode
));
7073 ac
->memsi
= adjust_address (mem
, SImode
, 0); /* Memory is aligned. */
7076 /* Alignment is unknown. */
7077 rtx byteoffset
, addr
, align
;
7079 /* Force the address into a register. */
7080 addr
= force_reg (Pmode
, XEXP (mem
, 0));
7082 /* Align it to SImode. */
7083 align
= expand_simple_binop (Pmode
, AND
, addr
,
7084 GEN_INT (-GET_MODE_SIZE (SImode
)),
7085 NULL_RTX
, 1, OPTAB_DIRECT
);
7087 ac
->memsi
= gen_rtx_MEM (SImode
, align
);
7088 MEM_VOLATILE_P (ac
->memsi
) = MEM_VOLATILE_P (mem
);
7089 set_mem_alias_set (ac
->memsi
, ALIAS_SET_MEMORY_BARRIER
);
7090 set_mem_align (ac
->memsi
, GET_MODE_BITSIZE (SImode
));
7092 /* Calculate shiftcount. */
7093 byteoffset
= expand_simple_binop (Pmode
, AND
, addr
,
7094 GEN_INT (GET_MODE_SIZE (SImode
) - 1),
7095 NULL_RTX
, 1, OPTAB_DIRECT
);
7096 /* As we already have some offset, evaluate the remaining distance. */
7097 ac
->shift
= expand_simple_binop (SImode
, MINUS
, ac
->shift
, byteoffset
,
7098 NULL_RTX
, 1, OPTAB_DIRECT
);
7101 /* Shift is the byte count, but we need the bitcount. */
7102 ac
->shift
= expand_simple_binop (SImode
, ASHIFT
, ac
->shift
, GEN_INT (3),
7103 NULL_RTX
, 1, OPTAB_DIRECT
);
7105 /* Calculate masks. */
7106 ac
->modemask
= expand_simple_binop (SImode
, ASHIFT
,
7107 GEN_INT (GET_MODE_MASK (mode
)),
7108 ac
->shift
, NULL_RTX
, 1, OPTAB_DIRECT
);
7109 ac
->modemaski
= expand_simple_unop (SImode
, NOT
, ac
->modemask
,
7113 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7114 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7115 perform the merge in SEQ2. */
7118 s390_two_part_insv (struct alignment_context
*ac
, rtx
*seq1
, rtx
*seq2
,
7119 machine_mode mode
, rtx val
, rtx ins
)
7126 tmp
= copy_to_mode_reg (SImode
, val
);
7127 if (s390_expand_insv (tmp
, GEN_INT (GET_MODE_BITSIZE (mode
)),
7131 *seq2
= get_insns ();
7138 /* Failed to use insv. Generate a two part shift and mask. */
7140 tmp
= s390_expand_mask_and_shift (ins
, mode
, ac
->shift
);
7141 *seq1
= get_insns ();
7145 tmp
= expand_simple_binop (SImode
, IOR
, tmp
, val
, NULL_RTX
, 1, OPTAB_DIRECT
);
7146 *seq2
= get_insns ();
7152 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7153 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7154 value to set if CMP == MEM. */
7157 s390_expand_cs_hqi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7158 rtx cmp
, rtx new_rtx
, bool is_weak
)
7160 struct alignment_context ac
;
7161 rtx cmpv
, newv
, val
, cc
, seq0
, seq1
, seq2
, seq3
;
7162 rtx res
= gen_reg_rtx (SImode
);
7163 rtx_code_label
*csloop
= NULL
, *csend
= NULL
;
7165 gcc_assert (MEM_P (mem
));
7167 init_alignment_context (&ac
, mem
, mode
);
7169 /* Load full word. Subsequent loads are performed by CS. */
7170 val
= expand_simple_binop (SImode
, AND
, ac
.memsi
, ac
.modemaski
,
7171 NULL_RTX
, 1, OPTAB_DIRECT
);
7173 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7174 possible, we try to use insv to make this happen efficiently. If
7175 that fails we'll generate code both inside and outside the loop. */
7176 cmpv
= s390_two_part_insv (&ac
, &seq0
, &seq2
, mode
, val
, cmp
);
7177 newv
= s390_two_part_insv (&ac
, &seq1
, &seq3
, mode
, val
, new_rtx
);
7184 /* Start CS loop. */
7187 /* Begin assuming success. */
7188 emit_move_insn (btarget
, const1_rtx
);
7190 csloop
= gen_label_rtx ();
7191 csend
= gen_label_rtx ();
7192 emit_label (csloop
);
7195 /* val = "<mem>00..0<mem>"
7196 * cmp = "00..0<cmp>00..0"
7197 * new = "00..0<new>00..0"
7203 cc
= s390_emit_compare_and_swap (EQ
, res
, ac
.memsi
, cmpv
, newv
, CCZ1mode
);
7205 emit_insn (gen_cstorecc4 (btarget
, cc
, XEXP (cc
, 0), XEXP (cc
, 1)));
7210 /* Jump to end if we're done (likely?). */
7211 s390_emit_jump (csend
, cc
);
7213 /* Check for changes outside mode, and loop internal if so.
7214 Arrange the moves so that the compare is adjacent to the
7215 branch so that we can generate CRJ. */
7216 tmp
= copy_to_reg (val
);
7217 force_expand_binop (SImode
, and_optab
, res
, ac
.modemaski
, val
,
7219 cc
= s390_emit_compare (NE
, val
, tmp
);
7220 s390_emit_jump (csloop
, cc
);
7223 emit_move_insn (btarget
, const0_rtx
);
7227 /* Return the correct part of the bitfield. */
7228 convert_move (vtarget
, expand_simple_binop (SImode
, LSHIFTRT
, res
, ac
.shift
,
7229 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7232 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7234 s390_expand_cs_tdsi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7235 rtx cmp
, rtx new_rtx
, bool is_weak
)
7237 rtx output
= vtarget
;
7238 rtx_code_label
*skip_cs_label
= NULL
;
7239 bool do_const_opt
= false;
7241 if (!register_operand (output
, mode
))
7242 output
= gen_reg_rtx (mode
);
7244 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7245 with the constant first and skip the compare_and_swap because its very
7246 expensive and likely to fail anyway.
7247 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7248 cause spurious in that case.
7249 Note 2: It may be useful to do this also for non-constant INPUT.
7250 Note 3: Currently only targets with "load on condition" are supported
7251 (z196 and newer). */
7254 && (mode
== SImode
|| mode
== DImode
))
7255 do_const_opt
= (is_weak
&& CONST_INT_P (cmp
));
7259 rtx cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7261 skip_cs_label
= gen_label_rtx ();
7262 emit_move_insn (btarget
, const0_rtx
);
7263 if (CONST_INT_P (cmp
) && INTVAL (cmp
) == 0)
7265 rtvec lt
= rtvec_alloc (2);
7267 /* Load-and-test + conditional jump. */
7269 = gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, mem
, cmp
));
7270 RTVEC_ELT (lt
, 1) = gen_rtx_SET (output
, mem
);
7271 emit_insn (gen_rtx_PARALLEL (VOIDmode
, lt
));
7275 emit_move_insn (output
, mem
);
7276 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, output
, cmp
)));
7278 s390_emit_jump (skip_cs_label
, gen_rtx_NE (VOIDmode
, cc
, const0_rtx
));
7279 add_reg_br_prob_note (get_last_insn (),
7280 profile_probability::very_unlikely ());
7281 /* If the jump is not taken, OUTPUT is the expected value. */
7283 /* Reload newval to a register manually, *after* the compare and jump
7284 above. Otherwise Reload might place it before the jump. */
7287 cmp
= force_reg (mode
, cmp
);
7288 new_rtx
= force_reg (mode
, new_rtx
);
7289 s390_emit_compare_and_swap (EQ
, output
, mem
, cmp
, new_rtx
,
7290 (do_const_opt
) ? CCZmode
: CCZ1mode
);
7291 if (skip_cs_label
!= NULL
)
7292 emit_label (skip_cs_label
);
7294 /* We deliberately accept non-register operands in the predicate
7295 to ensure the write back to the output operand happens *before*
7296 the store-flags code below. This makes it easier for combine
7297 to merge the store-flags code with a potential test-and-branch
7298 pattern following (immediately!) afterwards. */
7299 if (output
!= vtarget
)
7300 emit_move_insn (vtarget
, output
);
7306 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7307 btarget has already been initialized with 0 above. */
7308 cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7309 cond
= gen_rtx_EQ (VOIDmode
, cc
, const0_rtx
);
7310 ite
= gen_rtx_IF_THEN_ELSE (SImode
, cond
, const1_rtx
, btarget
);
7311 emit_insn (gen_rtx_SET (btarget
, ite
));
7317 cc
= gen_rtx_REG (CCZ1mode
, CC_REGNUM
);
7318 cond
= gen_rtx_EQ (SImode
, cc
, const0_rtx
);
7319 emit_insn (gen_cstorecc4 (btarget
, cond
, cc
, const0_rtx
));
7323 /* Expand an atomic compare and swap operation. MEM is the memory location,
7324 CMP the old value to compare MEM with and NEW_RTX the value to set if
7328 s390_expand_cs (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7329 rtx cmp
, rtx new_rtx
, bool is_weak
)
7336 s390_expand_cs_tdsi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7340 s390_expand_cs_hqi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7347 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7348 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7352 s390_expand_atomic_exchange_tdsi (rtx output
, rtx mem
, rtx input
)
7354 machine_mode mode
= GET_MODE (mem
);
7355 rtx_code_label
*csloop
;
7358 && (mode
== DImode
|| mode
== SImode
)
7359 && CONST_INT_P (input
) && INTVAL (input
) == 0)
7361 emit_move_insn (output
, const0_rtx
);
7363 emit_insn (gen_atomic_fetch_anddi (output
, mem
, const0_rtx
, input
));
7365 emit_insn (gen_atomic_fetch_andsi (output
, mem
, const0_rtx
, input
));
7369 input
= force_reg (mode
, input
);
7370 emit_move_insn (output
, mem
);
7371 csloop
= gen_label_rtx ();
7372 emit_label (csloop
);
7373 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, output
, mem
, output
,
7377 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7378 and VAL the value to play with. If AFTER is true then store the value
7379 MEM holds after the operation, if AFTER is false then store the value MEM
7380 holds before the operation. If TARGET is zero then discard that value, else
7381 store it to TARGET. */
7384 s390_expand_atomic (machine_mode mode
, enum rtx_code code
,
7385 rtx target
, rtx mem
, rtx val
, bool after
)
7387 struct alignment_context ac
;
7389 rtx new_rtx
= gen_reg_rtx (SImode
);
7390 rtx orig
= gen_reg_rtx (SImode
);
7391 rtx_code_label
*csloop
= gen_label_rtx ();
7393 gcc_assert (!target
|| register_operand (target
, VOIDmode
));
7394 gcc_assert (MEM_P (mem
));
7396 init_alignment_context (&ac
, mem
, mode
);
7398 /* Shift val to the correct bit positions.
7399 Preserve "icm", but prevent "ex icm". */
7400 if (!(ac
.aligned
&& code
== SET
&& MEM_P (val
)))
7401 val
= s390_expand_mask_and_shift (val
, mode
, ac
.shift
);
7403 /* Further preparation insns. */
7404 if (code
== PLUS
|| code
== MINUS
)
7405 emit_move_insn (orig
, val
);
7406 else if (code
== MULT
|| code
== AND
) /* val = "11..1<val>11..1" */
7407 val
= expand_simple_binop (SImode
, XOR
, val
, ac
.modemaski
,
7408 NULL_RTX
, 1, OPTAB_DIRECT
);
7410 /* Load full word. Subsequent loads are performed by CS. */
7411 cmp
= force_reg (SImode
, ac
.memsi
);
7413 /* Start CS loop. */
7414 emit_label (csloop
);
7415 emit_move_insn (new_rtx
, cmp
);
7417 /* Patch new with val at correct position. */
7422 val
= expand_simple_binop (SImode
, code
, new_rtx
, orig
,
7423 NULL_RTX
, 1, OPTAB_DIRECT
);
7424 val
= expand_simple_binop (SImode
, AND
, val
, ac
.modemask
,
7425 NULL_RTX
, 1, OPTAB_DIRECT
);
7428 if (ac
.aligned
&& MEM_P (val
))
7429 store_bit_field (new_rtx
, GET_MODE_BITSIZE (mode
), 0,
7430 0, 0, SImode
, val
, false);
7433 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, ac
.modemaski
,
7434 NULL_RTX
, 1, OPTAB_DIRECT
);
7435 new_rtx
= expand_simple_binop (SImode
, IOR
, new_rtx
, val
,
7436 NULL_RTX
, 1, OPTAB_DIRECT
);
7442 new_rtx
= expand_simple_binop (SImode
, code
, new_rtx
, val
,
7443 NULL_RTX
, 1, OPTAB_DIRECT
);
7445 case MULT
: /* NAND */
7446 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, val
,
7447 NULL_RTX
, 1, OPTAB_DIRECT
);
7448 new_rtx
= expand_simple_binop (SImode
, XOR
, new_rtx
, ac
.modemask
,
7449 NULL_RTX
, 1, OPTAB_DIRECT
);
7455 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, cmp
,
7456 ac
.memsi
, cmp
, new_rtx
,
7459 /* Return the correct part of the bitfield. */
7461 convert_move (target
, expand_simple_binop (SImode
, LSHIFTRT
,
7462 after
? new_rtx
: cmp
, ac
.shift
,
7463 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7466 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7467 We need to emit DTP-relative relocations. */
7469 static void s390_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
7472 s390_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7477 fputs ("\t.long\t", file
);
7480 fputs ("\t.quad\t", file
);
7485 output_addr_const (file
, x
);
7486 fputs ("@DTPOFF", file
);
7489 /* Return the proper mode for REGNO being represented in the dwarf
7492 s390_dwarf_frame_reg_mode (int regno
)
7494 machine_mode save_mode
= default_dwarf_frame_reg_mode (regno
);
7496 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7497 if (GENERAL_REGNO_P (regno
))
7500 /* The rightmost 64 bits of vector registers are call-clobbered. */
7501 if (GET_MODE_SIZE (save_mode
) > 8)
7507 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7508 /* Implement TARGET_MANGLE_TYPE. */
7511 s390_mangle_type (const_tree type
)
7513 type
= TYPE_MAIN_VARIANT (type
);
7515 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
7516 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
7519 if (type
== s390_builtin_types
[BT_BV16QI
]) return "U6__boolc";
7520 if (type
== s390_builtin_types
[BT_BV8HI
]) return "U6__bools";
7521 if (type
== s390_builtin_types
[BT_BV4SI
]) return "U6__booli";
7522 if (type
== s390_builtin_types
[BT_BV2DI
]) return "U6__booll";
7524 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
7525 && TARGET_LONG_DOUBLE_128
)
7528 /* For all other types, use normal C++ mangling. */
7533 /* In the name of slightly smaller debug output, and to cater to
7534 general assembler lossage, recognize various UNSPEC sequences
7535 and turn them back into a direct symbol reference. */
7538 s390_delegitimize_address (rtx orig_x
)
7542 orig_x
= delegitimize_mem_from_attrs (orig_x
);
7545 /* Extract the symbol ref from:
7546 (plus:SI (reg:SI 12 %r12)
7547 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7548 UNSPEC_GOTOFF/PLTOFF)))
7550 (plus:SI (reg:SI 12 %r12)
7551 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7552 UNSPEC_GOTOFF/PLTOFF)
7553 (const_int 4 [0x4])))) */
7554 if (GET_CODE (x
) == PLUS
7555 && REG_P (XEXP (x
, 0))
7556 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
7557 && GET_CODE (XEXP (x
, 1)) == CONST
)
7559 HOST_WIDE_INT offset
= 0;
7561 /* The const operand. */
7562 y
= XEXP (XEXP (x
, 1), 0);
7564 if (GET_CODE (y
) == PLUS
7565 && GET_CODE (XEXP (y
, 1)) == CONST_INT
)
7567 offset
= INTVAL (XEXP (y
, 1));
7571 if (GET_CODE (y
) == UNSPEC
7572 && (XINT (y
, 1) == UNSPEC_GOTOFF
7573 || XINT (y
, 1) == UNSPEC_PLTOFF
))
7574 return plus_constant (Pmode
, XVECEXP (y
, 0, 0), offset
);
7577 if (GET_CODE (x
) != MEM
)
7581 if (GET_CODE (x
) == PLUS
7582 && GET_CODE (XEXP (x
, 1)) == CONST
7583 && GET_CODE (XEXP (x
, 0)) == REG
7584 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7586 y
= XEXP (XEXP (x
, 1), 0);
7587 if (GET_CODE (y
) == UNSPEC
7588 && XINT (y
, 1) == UNSPEC_GOT
)
7589 y
= XVECEXP (y
, 0, 0);
7593 else if (GET_CODE (x
) == CONST
)
7595 /* Extract the symbol ref from:
7596 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7597 UNSPEC_PLT/GOTENT))) */
7600 if (GET_CODE (y
) == UNSPEC
7601 && (XINT (y
, 1) == UNSPEC_GOTENT
7602 || XINT (y
, 1) == UNSPEC_PLT31
))
7603 y
= XVECEXP (y
, 0, 0);
7610 if (GET_MODE (orig_x
) != Pmode
)
7612 if (GET_MODE (orig_x
) == BLKmode
)
7614 y
= lowpart_subreg (GET_MODE (orig_x
), y
, Pmode
);
7621 /* Output operand OP to stdio stream FILE.
7622 OP is an address (register + offset) which is not used to address data;
7623 instead the rightmost bits are interpreted as the value. */
7626 print_addrstyle_operand (FILE *file
, rtx op
)
7628 HOST_WIDE_INT offset
;
7631 /* Extract base register and offset. */
7632 if (!s390_decompose_addrstyle_without_index (op
, &base
, &offset
))
7638 gcc_assert (GET_CODE (base
) == REG
);
7639 gcc_assert (REGNO (base
) < FIRST_PSEUDO_REGISTER
);
7640 gcc_assert (REGNO_REG_CLASS (REGNO (base
)) == ADDR_REGS
);
7643 /* Offsets are constricted to twelve bits. */
7644 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
& ((1 << 12) - 1));
7646 fprintf (file
, "(%s)", reg_names
[REGNO (base
)]);
7649 /* Print the shift count operand OP to FILE.
7650 OP is an address-style operand in a form which
7651 s390_valid_shift_count permits. Subregs and no-op
7652 and-masking of the operand are stripped. */
7655 print_shift_count_operand (FILE *file
, rtx op
)
7657 /* No checking of the and mask required here. */
7658 if (!s390_valid_shift_count (op
, 0))
7661 while (op
&& GET_CODE (op
) == SUBREG
)
7662 op
= SUBREG_REG (op
);
7664 if (GET_CODE (op
) == AND
)
7667 print_addrstyle_operand (file
, op
);
7670 /* Assigns the number of NOP halfwords to be emitted before and after the
7671 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7672 If hotpatching is disabled for the function, the values are set to zero.
7676 s390_function_num_hotpatch_hw (tree decl
,
7682 attr
= lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl
));
7684 /* Handle the arguments of the hotpatch attribute. The values
7685 specified via attribute might override the cmdline argument
7689 tree args
= TREE_VALUE (attr
);
7691 *hw_before
= TREE_INT_CST_LOW (TREE_VALUE (args
));
7692 *hw_after
= TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args
)));
7696 /* Use the values specified by the cmdline arguments. */
7697 *hw_before
= s390_hotpatch_hw_before_label
;
7698 *hw_after
= s390_hotpatch_hw_after_label
;
7702 /* Write the current .machine and .machinemode specification to the assembler
7705 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7707 s390_asm_output_machine_for_arch (FILE *asm_out_file
)
7709 fprintf (asm_out_file
, "\t.machinemode %s\n",
7710 (TARGET_ZARCH
) ? "zarch" : "esa");
7711 fprintf (asm_out_file
, "\t.machine \"%s",
7712 processor_table
[s390_arch
].binutils_name
);
7713 if (S390_USE_ARCHITECTURE_MODIFIERS
)
7717 cpu_flags
= processor_flags_table
[(int) s390_arch
];
7718 if (TARGET_HTM
&& !(cpu_flags
& PF_TX
))
7719 fprintf (asm_out_file
, "+htm");
7720 else if (!TARGET_HTM
&& (cpu_flags
& PF_TX
))
7721 fprintf (asm_out_file
, "+nohtm");
7722 if (TARGET_VX
&& !(cpu_flags
& PF_VX
))
7723 fprintf (asm_out_file
, "+vx");
7724 else if (!TARGET_VX
&& (cpu_flags
& PF_VX
))
7725 fprintf (asm_out_file
, "+novx");
7727 fprintf (asm_out_file
, "\"\n");
7730 /* Write an extra function header before the very start of the function. */
7733 s390_asm_output_function_prefix (FILE *asm_out_file
,
7734 const char *fnname ATTRIBUTE_UNUSED
)
7736 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
) == NULL
)
7738 /* Since only the function specific options are saved but not the indications
7739 which options are set, it's too much work here to figure out which options
7740 have actually changed. Thus, generate .machine and .machinemode whenever a
7741 function has the target attribute or pragma. */
7742 fprintf (asm_out_file
, "\t.machinemode push\n");
7743 fprintf (asm_out_file
, "\t.machine push\n");
7744 s390_asm_output_machine_for_arch (asm_out_file
);
7747 /* Write an extra function footer after the very end of the function. */
7750 s390_asm_declare_function_size (FILE *asm_out_file
,
7751 const char *fnname
, tree decl
)
7753 if (!flag_inhibit_size_directive
)
7754 ASM_OUTPUT_MEASURED_SIZE (asm_out_file
, fnname
);
7755 if (DECL_FUNCTION_SPECIFIC_TARGET (decl
) == NULL
)
7757 fprintf (asm_out_file
, "\t.machine pop\n");
7758 fprintf (asm_out_file
, "\t.machinemode pop\n");
7762 /* Write the extra assembler code needed to declare a function properly. */
7765 s390_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
7768 int hw_before
, hw_after
;
7770 s390_function_num_hotpatch_hw (decl
, &hw_before
, &hw_after
);
7773 unsigned int function_alignment
;
7776 /* Add a trampoline code area before the function label and initialize it
7777 with two-byte nop instructions. This area can be overwritten with code
7778 that jumps to a patched version of the function. */
7779 asm_fprintf (asm_out_file
, "\tnopr\t%%r0"
7780 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7782 for (i
= 1; i
< hw_before
; i
++)
7783 fputs ("\tnopr\t%r0\n", asm_out_file
);
7785 /* Note: The function label must be aligned so that (a) the bytes of the
7786 following nop do not cross a cacheline boundary, and (b) a jump address
7787 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7788 stored directly before the label without crossing a cacheline
7789 boundary. All this is necessary to make sure the trampoline code can
7790 be changed atomically.
7791 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7792 if there are NOPs before the function label, the alignment is placed
7793 before them. So it is necessary to duplicate the alignment after the
7795 function_alignment
= MAX (8, DECL_ALIGN (decl
) / BITS_PER_UNIT
);
7796 if (! DECL_USER_ALIGN (decl
))
7798 = MAX (function_alignment
,
7799 (unsigned int) align_functions
.levels
[0].get_value ());
7800 fputs ("\t# alignment for hotpatch\n", asm_out_file
);
7801 ASM_OUTPUT_ALIGN (asm_out_file
, align_functions
.levels
[0].log
);
7804 if (S390_USE_TARGET_ATTRIBUTE
&& TARGET_DEBUG_ARG
)
7806 asm_fprintf (asm_out_file
, "\t# fn:%s ar%d\n", fname
, s390_arch
);
7807 asm_fprintf (asm_out_file
, "\t# fn:%s tu%d\n", fname
, s390_tune
);
7808 asm_fprintf (asm_out_file
, "\t# fn:%s sg%d\n", fname
, s390_stack_guard
);
7809 asm_fprintf (asm_out_file
, "\t# fn:%s ss%d\n", fname
, s390_stack_size
);
7810 asm_fprintf (asm_out_file
, "\t# fn:%s bc%d\n", fname
, s390_branch_cost
);
7811 asm_fprintf (asm_out_file
, "\t# fn:%s wf%d\n", fname
,
7812 s390_warn_framesize
);
7813 asm_fprintf (asm_out_file
, "\t# fn:%s ba%d\n", fname
, TARGET_BACKCHAIN
);
7814 asm_fprintf (asm_out_file
, "\t# fn:%s hd%d\n", fname
, TARGET_HARD_DFP
);
7815 asm_fprintf (asm_out_file
, "\t# fn:%s hf%d\n", fname
, !TARGET_SOFT_FLOAT
);
7816 asm_fprintf (asm_out_file
, "\t# fn:%s ht%d\n", fname
, TARGET_OPT_HTM
);
7817 asm_fprintf (asm_out_file
, "\t# fn:%s vx%d\n", fname
, TARGET_OPT_VX
);
7818 asm_fprintf (asm_out_file
, "\t# fn:%s ps%d\n", fname
,
7819 TARGET_PACKED_STACK
);
7820 asm_fprintf (asm_out_file
, "\t# fn:%s se%d\n", fname
, TARGET_SMALL_EXEC
);
7821 asm_fprintf (asm_out_file
, "\t# fn:%s mv%d\n", fname
, TARGET_MVCLE
);
7822 asm_fprintf (asm_out_file
, "\t# fn:%s zv%d\n", fname
, TARGET_ZVECTOR
);
7823 asm_fprintf (asm_out_file
, "\t# fn:%s wd%d\n", fname
,
7824 s390_warn_dynamicstack_p
);
7826 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
7828 asm_fprintf (asm_out_file
,
7829 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7833 /* Output machine-dependent UNSPECs occurring in address constant X
7834 in assembler syntax to stdio stream FILE. Returns true if the
7835 constant X could be recognized, false otherwise. */
7838 s390_output_addr_const_extra (FILE *file
, rtx x
)
7840 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 1)
7841 switch (XINT (x
, 1))
7844 output_addr_const (file
, XVECEXP (x
, 0, 0));
7845 fprintf (file
, "@GOTENT");
7848 output_addr_const (file
, XVECEXP (x
, 0, 0));
7849 fprintf (file
, "@GOT");
7852 output_addr_const (file
, XVECEXP (x
, 0, 0));
7853 fprintf (file
, "@GOTOFF");
7856 output_addr_const (file
, XVECEXP (x
, 0, 0));
7857 fprintf (file
, "@PLT");
7860 output_addr_const (file
, XVECEXP (x
, 0, 0));
7861 fprintf (file
, "@PLTOFF");
7864 output_addr_const (file
, XVECEXP (x
, 0, 0));
7865 fprintf (file
, "@TLSGD");
7868 assemble_name (file
, get_some_local_dynamic_name ());
7869 fprintf (file
, "@TLSLDM");
7872 output_addr_const (file
, XVECEXP (x
, 0, 0));
7873 fprintf (file
, "@DTPOFF");
7876 output_addr_const (file
, XVECEXP (x
, 0, 0));
7877 fprintf (file
, "@NTPOFF");
7879 case UNSPEC_GOTNTPOFF
:
7880 output_addr_const (file
, XVECEXP (x
, 0, 0));
7881 fprintf (file
, "@GOTNTPOFF");
7883 case UNSPEC_INDNTPOFF
:
7884 output_addr_const (file
, XVECEXP (x
, 0, 0));
7885 fprintf (file
, "@INDNTPOFF");
7889 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 2)
7890 switch (XINT (x
, 1))
7892 case UNSPEC_POOL_OFFSET
:
7893 x
= gen_rtx_MINUS (GET_MODE (x
), XVECEXP (x
, 0, 0), XVECEXP (x
, 0, 1));
7894 output_addr_const (file
, x
);
7900 /* Output address operand ADDR in assembler syntax to
7901 stdio stream FILE. */
7904 print_operand_address (FILE *file
, rtx addr
)
7906 struct s390_address ad
;
7907 memset (&ad
, 0, sizeof (s390_address
));
7909 if (s390_loadrelative_operand_p (addr
, NULL
, NULL
))
7913 output_operand_lossage ("symbolic memory references are "
7914 "only supported on z10 or later");
7917 output_addr_const (file
, addr
);
7921 if (!s390_decompose_address (addr
, &ad
)
7922 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7923 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
7924 output_operand_lossage ("cannot decompose address");
7927 output_addr_const (file
, ad
.disp
);
7929 fprintf (file
, "0");
7931 if (ad
.base
&& ad
.indx
)
7932 fprintf (file
, "(%s,%s)", reg_names
[REGNO (ad
.indx
)],
7933 reg_names
[REGNO (ad
.base
)]);
7935 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
7938 /* Output operand X in assembler syntax to stdio stream FILE.
7939 CODE specified the format flag. The following format flags
7942 'A': On z14 or higher: If operand is a mem print the alignment
7943 hint usable with vl/vst prefixed by a comma.
7944 'C': print opcode suffix for branch condition.
7945 'D': print opcode suffix for inverse branch condition.
7946 'E': print opcode suffix for branch on index instruction.
7947 'G': print the size of the operand in bytes.
7948 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7949 'K': print @PLT suffix for call targets and load address values.
7950 'M': print the second word of a TImode operand.
7951 'N': print the second word of a DImode operand.
7952 'O': print only the displacement of a memory reference or address.
7953 'R': print only the base register of a memory reference or address.
7954 'S': print S-type memory reference (base+displacement).
7955 'Y': print address style operand without index (e.g. shift count or setmem
7958 'b': print integer X as if it's an unsigned byte.
7959 'c': print integer X as if it's an signed byte.
7960 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7961 'f': "end" contiguous bitmask X in SImode.
7962 'h': print integer X as if it's a signed halfword.
7963 'i': print the first nonzero HImode part of X.
7964 'j': print the first HImode part unequal to -1 of X.
7965 'k': print the first nonzero SImode part of X.
7966 'm': print the first SImode part unequal to -1 of X.
7967 'o': print integer X as if it's an unsigned 32bit word.
7968 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7969 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7970 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7971 'x': print integer X as if it's an unsigned halfword.
7972 'v': print register number as vector register (v1 instead of f1).
7973 'V': print the second word of a TFmode operand as vector register.
7977 print_operand (FILE *file
, rtx x
, int code
)
7984 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS
&& MEM_P (x
))
7986 if (MEM_ALIGN (x
) >= 128)
7987 fprintf (file
, ",4");
7988 else if (MEM_ALIGN (x
) == 64)
7989 fprintf (file
, ",3");
7993 fprintf (file
, s390_branch_condition_mnemonic (x
, FALSE
));
7997 fprintf (file
, s390_branch_condition_mnemonic (x
, TRUE
));
8001 if (GET_CODE (x
) == LE
)
8002 fprintf (file
, "l");
8003 else if (GET_CODE (x
) == GT
)
8004 fprintf (file
, "h");
8006 output_operand_lossage ("invalid comparison operator "
8007 "for 'E' output modifier");
8011 if (GET_CODE (x
) == SYMBOL_REF
)
8013 fprintf (file
, "%s", ":tls_load:");
8014 output_addr_const (file
, x
);
8016 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
8018 fprintf (file
, "%s", ":tls_gdcall:");
8019 output_addr_const (file
, XVECEXP (x
, 0, 0));
8021 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSLDM
)
8023 fprintf (file
, "%s", ":tls_ldcall:");
8024 const char *name
= get_some_local_dynamic_name ();
8026 assemble_name (file
, name
);
8029 output_operand_lossage ("invalid reference for 'J' output modifier");
8033 fprintf (file
, "%u", GET_MODE_SIZE (GET_MODE (x
)));
8038 struct s390_address ad
;
8041 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
8044 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8047 output_operand_lossage ("invalid address for 'O' output modifier");
8052 output_addr_const (file
, ad
.disp
);
8054 fprintf (file
, "0");
8060 struct s390_address ad
;
8063 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
8066 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8069 output_operand_lossage ("invalid address for 'R' output modifier");
8074 fprintf (file
, "%s", reg_names
[REGNO (ad
.base
)]);
8076 fprintf (file
, "0");
8082 struct s390_address ad
;
8087 output_operand_lossage ("memory reference expected for "
8088 "'S' output modifier");
8091 ret
= s390_decompose_address (XEXP (x
, 0), &ad
);
8094 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8097 output_operand_lossage ("invalid address for 'S' output modifier");
8102 output_addr_const (file
, ad
.disp
);
8104 fprintf (file
, "0");
8107 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
8112 if (GET_CODE (x
) == REG
)
8113 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8114 else if (GET_CODE (x
) == MEM
)
8115 x
= change_address (x
, VOIDmode
,
8116 plus_constant (Pmode
, XEXP (x
, 0), 4));
8118 output_operand_lossage ("register or memory expression expected "
8119 "for 'N' output modifier");
8123 if (GET_CODE (x
) == REG
)
8124 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8125 else if (GET_CODE (x
) == MEM
)
8126 x
= change_address (x
, VOIDmode
,
8127 plus_constant (Pmode
, XEXP (x
, 0), 8));
8129 output_operand_lossage ("register or memory expression expected "
8130 "for 'M' output modifier");
8134 print_shift_count_operand (file
, x
);
8138 /* Append @PLT to both local and non-local symbols in order to support
8139 Linux Kernel livepatching: patches contain individual functions and
8140 are loaded further than 2G away from vmlinux, and therefore they must
8141 call even static functions via PLT. ld will optimize @PLT away for
8142 normal code, and keep it for patches.
8144 Do not indiscriminately add @PLT in 31-bit mode due to the %r12
8145 restriction, use UNSPEC_PLT31 instead.
8147 @PLT only makes sense for functions, data is taken care of by
8148 -mno-pic-data-is-text-relative.
8150 Adding @PLT interferes with handling of weak symbols in non-PIC code,
8151 since their addresses are loaded with larl, which then always produces
8152 a non-NULL result, so skip them here as well. */
8154 && GET_CODE (x
) == SYMBOL_REF
8155 && SYMBOL_REF_FUNCTION_P (x
)
8156 && !(SYMBOL_REF_WEAK (x
) && !flag_pic
))
8157 fprintf (file
, "@PLT");
8161 switch (GET_CODE (x
))
8164 /* Print FP regs as fx instead of vx when they are accessed
8165 through non-vector mode. */
8166 if ((code
== 'v' || code
== 'V')
8167 || VECTOR_NOFP_REG_P (x
)
8168 || (FP_REG_P (x
) && VECTOR_MODE_P (GET_MODE (x
)))
8169 || (VECTOR_REG_P (x
)
8170 && (GET_MODE_SIZE (GET_MODE (x
)) /
8171 s390_class_max_nregs (FP_REGS
, GET_MODE (x
))) > 8))
8172 fprintf (file
, "%%v%s", reg_names
[REGNO (x
) + (code
== 'V')] + 2);
8174 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
8178 output_address (GET_MODE (x
), XEXP (x
, 0));
8185 output_addr_const (file
, x
);
8198 ival
= ((ival
& 0xff) ^ 0x80) - 0x80;
8204 ival
= ((ival
& 0xffff) ^ 0x8000) - 0x8000;
8207 ival
= s390_extract_part (x
, HImode
, 0);
8210 ival
= s390_extract_part (x
, HImode
, -1);
8213 ival
= s390_extract_part (x
, SImode
, 0);
8216 ival
= s390_extract_part (x
, SImode
, -1);
8228 len
= (code
== 's' || code
== 'e' ? 64 : 32);
8229 ok
= s390_contiguous_bitmask_p (ival
, true, len
, &start
, &end
);
8231 if (code
== 's' || code
== 't')
8238 output_operand_lossage ("invalid constant for output modifier '%c'", code
);
8240 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8243 case CONST_WIDE_INT
:
8245 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8246 CONST_WIDE_INT_ELT (x
, 0) & 0xff);
8247 else if (code
== 'x')
8248 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8249 CONST_WIDE_INT_ELT (x
, 0) & 0xffff);
8250 else if (code
== 'h')
8251 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8252 ((CONST_WIDE_INT_ELT (x
, 0) & 0xffff) ^ 0x8000) - 0x8000);
8256 output_operand_lossage ("invalid constant - try using "
8257 "an output modifier");
8259 output_operand_lossage ("invalid constant for output modifier '%c'",
8267 gcc_assert (const_vec_duplicate_p (x
));
8268 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8269 ((INTVAL (XVECEXP (x
, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8277 ok
= s390_contiguous_bitmask_vector_p (x
, &start
, &end
);
8279 ival
= (code
== 's') ? start
: end
;
8280 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8286 bool ok
= s390_bytemask_vector_p (x
, &mask
);
8288 fprintf (file
, "%u", mask
);
8293 output_operand_lossage ("invalid constant vector for output "
8294 "modifier '%c'", code
);
8300 output_operand_lossage ("invalid expression - try using "
8301 "an output modifier");
8303 output_operand_lossage ("invalid expression for output "
8304 "modifier '%c'", code
);
8309 /* Target hook for assembling integer objects. We need to define it
8310 here to work a round a bug in some versions of GAS, which couldn't
8311 handle values smaller than INT_MIN when printed in decimal. */
8314 s390_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
8316 if (size
== 8 && aligned_p
8317 && GET_CODE (x
) == CONST_INT
&& INTVAL (x
) < INT_MIN
)
8319 fprintf (asm_out_file
, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX
"\n",
8323 return default_assemble_integer (x
, size
, aligned_p
);
8326 /* Returns true if register REGNO is used for forming
8327 a memory address in expression X. */
8330 reg_used_in_mem_p (int regno
, rtx x
)
8332 enum rtx_code code
= GET_CODE (x
);
8338 if (refers_to_regno_p (regno
, XEXP (x
, 0)))
8341 else if (code
== SET
8342 && GET_CODE (SET_DEST (x
)) == PC
)
8344 if (refers_to_regno_p (regno
, SET_SRC (x
)))
8348 fmt
= GET_RTX_FORMAT (code
);
8349 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8352 && reg_used_in_mem_p (regno
, XEXP (x
, i
)))
8355 else if (fmt
[i
] == 'E')
8356 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8357 if (reg_used_in_mem_p (regno
, XVECEXP (x
, i
, j
)))
8363 /* Returns true if expression DEP_RTX sets an address register
8364 used by instruction INSN to address memory. */
8367 addr_generation_dependency_p (rtx dep_rtx
, rtx_insn
*insn
)
8371 if (NONJUMP_INSN_P (dep_rtx
))
8372 dep_rtx
= PATTERN (dep_rtx
);
8374 if (GET_CODE (dep_rtx
) == SET
)
8376 target
= SET_DEST (dep_rtx
);
8377 if (GET_CODE (target
) == STRICT_LOW_PART
)
8378 target
= XEXP (target
, 0);
8379 while (GET_CODE (target
) == SUBREG
)
8380 target
= SUBREG_REG (target
);
8382 if (GET_CODE (target
) == REG
)
8384 int regno
= REGNO (target
);
8386 if (s390_safe_attr_type (insn
) == TYPE_LA
)
8388 pat
= PATTERN (insn
);
8389 if (GET_CODE (pat
) == PARALLEL
)
8391 gcc_assert (XVECLEN (pat
, 0) == 2);
8392 pat
= XVECEXP (pat
, 0, 0);
8394 gcc_assert (GET_CODE (pat
) == SET
);
8395 return refers_to_regno_p (regno
, SET_SRC (pat
));
8397 else if (get_attr_atype (insn
) == ATYPE_AGEN
)
8398 return reg_used_in_mem_p (regno
, PATTERN (insn
));
8404 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8407 s390_agen_dep_p (rtx_insn
*dep_insn
, rtx_insn
*insn
)
8409 rtx dep_rtx
= PATTERN (dep_insn
);
8412 if (GET_CODE (dep_rtx
) == SET
8413 && addr_generation_dependency_p (dep_rtx
, insn
))
8415 else if (GET_CODE (dep_rtx
) == PARALLEL
)
8417 for (i
= 0; i
< XVECLEN (dep_rtx
, 0); i
++)
8419 if (addr_generation_dependency_p (XVECEXP (dep_rtx
, 0, i
), insn
))
8427 /* A C statement (sans semicolon) to update the integer scheduling priority
8428 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8429 reduce the priority to execute INSN later. Do not define this macro if
8430 you do not need to adjust the scheduling priorities of insns.
8432 A STD instruction should be scheduled earlier,
8433 in order to use the bypass. */
8435 s390_adjust_priority (rtx_insn
*insn
, int priority
)
8437 if (! INSN_P (insn
))
8440 if (s390_tune
<= PROCESSOR_2064_Z900
)
8443 switch (s390_safe_attr_type (insn
))
8447 priority
= priority
<< 3;
8451 priority
= priority
<< 1;
8460 /* The number of instructions that can be issued per cycle. */
8463 s390_issue_rate (void)
8467 case PROCESSOR_2084_Z990
:
8468 case PROCESSOR_2094_Z9_109
:
8469 case PROCESSOR_2094_Z9_EC
:
8470 case PROCESSOR_2817_Z196
:
8472 case PROCESSOR_2097_Z10
:
8474 case PROCESSOR_2064_Z900
:
8475 /* Starting with EC12 we use the sched_reorder hook to take care
8476 of instruction dispatch constraints. The algorithm only
8477 picks the best instruction and assumes only a single
8478 instruction gets issued per cycle. */
8479 case PROCESSOR_2827_ZEC12
:
8480 case PROCESSOR_2964_Z13
:
8481 case PROCESSOR_3906_Z14
:
8482 case PROCESSOR_ARCH14
:
8489 s390_first_cycle_multipass_dfa_lookahead (void)
8495 annotate_constant_pool_refs_1 (rtx
*x
)
8500 gcc_assert (GET_CODE (*x
) != SYMBOL_REF
8501 || !CONSTANT_POOL_ADDRESS_P (*x
));
8503 /* Literal pool references can only occur inside a MEM ... */
8504 if (GET_CODE (*x
) == MEM
)
8506 rtx memref
= XEXP (*x
, 0);
8508 if (GET_CODE (memref
) == SYMBOL_REF
8509 && CONSTANT_POOL_ADDRESS_P (memref
))
8511 rtx base
= cfun
->machine
->base_reg
;
8512 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, memref
, base
),
8515 *x
= replace_equiv_address (*x
, addr
);
8519 if (GET_CODE (memref
) == CONST
8520 && GET_CODE (XEXP (memref
, 0)) == PLUS
8521 && GET_CODE (XEXP (XEXP (memref
, 0), 1)) == CONST_INT
8522 && GET_CODE (XEXP (XEXP (memref
, 0), 0)) == SYMBOL_REF
8523 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref
, 0), 0)))
8525 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (memref
, 0), 1));
8526 rtx sym
= XEXP (XEXP (memref
, 0), 0);
8527 rtx base
= cfun
->machine
->base_reg
;
8528 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8531 *x
= replace_equiv_address (*x
, plus_constant (Pmode
, addr
, off
));
8536 /* ... or a load-address type pattern. */
8537 if (GET_CODE (*x
) == SET
)
8539 rtx addrref
= SET_SRC (*x
);
8541 if (GET_CODE (addrref
) == SYMBOL_REF
8542 && CONSTANT_POOL_ADDRESS_P (addrref
))
8544 rtx base
= cfun
->machine
->base_reg
;
8545 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addrref
, base
),
8548 SET_SRC (*x
) = addr
;
8552 if (GET_CODE (addrref
) == CONST
8553 && GET_CODE (XEXP (addrref
, 0)) == PLUS
8554 && GET_CODE (XEXP (XEXP (addrref
, 0), 1)) == CONST_INT
8555 && GET_CODE (XEXP (XEXP (addrref
, 0), 0)) == SYMBOL_REF
8556 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref
, 0), 0)))
8558 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (addrref
, 0), 1));
8559 rtx sym
= XEXP (XEXP (addrref
, 0), 0);
8560 rtx base
= cfun
->machine
->base_reg
;
8561 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8564 SET_SRC (*x
) = plus_constant (Pmode
, addr
, off
);
8569 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8570 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8574 annotate_constant_pool_refs_1 (&XEXP (*x
, i
));
8576 else if (fmt
[i
] == 'E')
8578 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8579 annotate_constant_pool_refs_1 (&XVECEXP (*x
, i
, j
));
8584 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8585 Fix up MEMs as required.
8586 Skip insns which support relative addressing, because they do not use a base
8590 annotate_constant_pool_refs (rtx_insn
*insn
)
8592 if (s390_safe_relative_long_p (insn
))
8594 annotate_constant_pool_refs_1 (&PATTERN (insn
));
8598 find_constant_pool_ref_1 (rtx x
, rtx
*ref
)
8603 /* Likewise POOL_ENTRY insns. */
8604 if (GET_CODE (x
) == UNSPEC_VOLATILE
8605 && XINT (x
, 1) == UNSPECV_POOL_ENTRY
)
8608 gcc_assert (GET_CODE (x
) != SYMBOL_REF
8609 || !CONSTANT_POOL_ADDRESS_P (x
));
8611 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_LTREF
)
8613 rtx sym
= XVECEXP (x
, 0, 0);
8614 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
8615 && CONSTANT_POOL_ADDRESS_P (sym
));
8617 if (*ref
== NULL_RTX
)
8620 gcc_assert (*ref
== sym
);
8625 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8626 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8630 find_constant_pool_ref_1 (XEXP (x
, i
), ref
);
8632 else if (fmt
[i
] == 'E')
8634 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8635 find_constant_pool_ref_1 (XVECEXP (x
, i
, j
), ref
);
8640 /* Find an annotated literal pool symbol referenced in INSN,
8641 and store it at REF. Will abort if INSN contains references to
8642 more than one such pool symbol; multiple references to the same
8643 symbol are allowed, however.
8645 The rtx pointed to by REF must be initialized to NULL_RTX
8646 by the caller before calling this routine.
8648 Skip insns which support relative addressing, because they do not use a base
8652 find_constant_pool_ref (rtx_insn
*insn
, rtx
*ref
)
8654 if (s390_safe_relative_long_p (insn
))
8656 find_constant_pool_ref_1 (PATTERN (insn
), ref
);
8660 replace_constant_pool_ref_1 (rtx
*x
, rtx ref
, rtx offset
)
8665 gcc_assert (*x
!= ref
);
8667 if (GET_CODE (*x
) == UNSPEC
8668 && XINT (*x
, 1) == UNSPEC_LTREF
8669 && XVECEXP (*x
, 0, 0) == ref
)
8671 *x
= gen_rtx_PLUS (Pmode
, XVECEXP (*x
, 0, 1), offset
);
8675 if (GET_CODE (*x
) == PLUS
8676 && GET_CODE (XEXP (*x
, 1)) == CONST_INT
8677 && GET_CODE (XEXP (*x
, 0)) == UNSPEC
8678 && XINT (XEXP (*x
, 0), 1) == UNSPEC_LTREF
8679 && XVECEXP (XEXP (*x
, 0), 0, 0) == ref
)
8681 rtx addr
= gen_rtx_PLUS (Pmode
, XVECEXP (XEXP (*x
, 0), 0, 1), offset
);
8682 *x
= plus_constant (Pmode
, addr
, INTVAL (XEXP (*x
, 1)));
8686 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8687 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8691 replace_constant_pool_ref_1 (&XEXP (*x
, i
), ref
, offset
);
8693 else if (fmt
[i
] == 'E')
8695 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8696 replace_constant_pool_ref_1 (&XVECEXP (*x
, i
, j
), ref
, offset
);
8701 /* Replace every reference to the annotated literal pool
8702 symbol REF in INSN by its base plus OFFSET.
8703 Skip insns which support relative addressing, because they do not use a base
8707 replace_constant_pool_ref (rtx_insn
*insn
, rtx ref
, rtx offset
)
8709 if (s390_safe_relative_long_p (insn
))
8711 replace_constant_pool_ref_1 (&PATTERN (insn
), ref
, offset
);
8714 /* We keep a list of constants which we have to add to internal
8715 constant tables in the middle of large functions. */
8717 static machine_mode constant_modes
[] =
8719 TFmode
, FPRX2mode
, TImode
, TDmode
,
8720 V16QImode
, V8HImode
, V4SImode
, V2DImode
, V1TImode
,
8721 V4SFmode
, V2DFmode
, V1TFmode
,
8722 DFmode
, DImode
, DDmode
,
8723 V8QImode
, V4HImode
, V2SImode
, V1DImode
, V2SFmode
, V1DFmode
,
8724 SFmode
, SImode
, SDmode
,
8725 V4QImode
, V2HImode
, V1SImode
, V1SFmode
,
8731 #define NR_C_MODES (sizeof (constant_modes) / sizeof (constant_modes[0]))
8735 struct constant
*next
;
8737 rtx_code_label
*label
;
8740 struct constant_pool
8742 struct constant_pool
*next
;
8743 rtx_insn
*first_insn
;
8744 rtx_insn
*pool_insn
;
8746 rtx_insn
*emit_pool_after
;
8748 struct constant
*constants
[NR_C_MODES
];
8749 struct constant
*execute
;
8750 rtx_code_label
*label
;
8754 /* Allocate new constant_pool structure. */
8756 static struct constant_pool
*
8757 s390_alloc_pool (void)
8759 struct constant_pool
*pool
;
8762 pool
= (struct constant_pool
*) xmalloc (sizeof *pool
);
8764 for (i
= 0; i
< NR_C_MODES
; i
++)
8765 pool
->constants
[i
] = NULL
;
8767 pool
->execute
= NULL
;
8768 pool
->label
= gen_label_rtx ();
8769 pool
->first_insn
= NULL
;
8770 pool
->pool_insn
= NULL
;
8771 pool
->insns
= BITMAP_ALLOC (NULL
);
8773 pool
->emit_pool_after
= NULL
;
8778 /* Create new constant pool covering instructions starting at INSN
8779 and chain it to the end of POOL_LIST. */
8781 static struct constant_pool
*
8782 s390_start_pool (struct constant_pool
**pool_list
, rtx_insn
*insn
)
8784 struct constant_pool
*pool
, **prev
;
8786 pool
= s390_alloc_pool ();
8787 pool
->first_insn
= insn
;
8789 for (prev
= pool_list
; *prev
; prev
= &(*prev
)->next
)
8796 /* End range of instructions covered by POOL at INSN and emit
8797 placeholder insn representing the pool. */
8800 s390_end_pool (struct constant_pool
*pool
, rtx_insn
*insn
)
8802 rtx pool_size
= GEN_INT (pool
->size
+ 8 /* alignment slop */);
8805 insn
= get_last_insn ();
8807 pool
->pool_insn
= emit_insn_after (gen_pool (pool_size
), insn
);
8808 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
8811 /* Add INSN to the list of insns covered by POOL. */
8814 s390_add_pool_insn (struct constant_pool
*pool
, rtx insn
)
8816 bitmap_set_bit (pool
->insns
, INSN_UID (insn
));
8819 /* Return pool out of POOL_LIST that covers INSN. */
8821 static struct constant_pool
*
8822 s390_find_pool (struct constant_pool
*pool_list
, rtx insn
)
8824 struct constant_pool
*pool
;
8826 for (pool
= pool_list
; pool
; pool
= pool
->next
)
8827 if (bitmap_bit_p (pool
->insns
, INSN_UID (insn
)))
8833 /* Add constant VAL of mode MODE to the constant pool POOL. */
8836 s390_add_constant (struct constant_pool
*pool
, rtx val
, machine_mode mode
)
8841 for (i
= 0; i
< NR_C_MODES
; i
++)
8842 if (constant_modes
[i
] == mode
)
8844 gcc_assert (i
!= NR_C_MODES
);
8846 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8847 if (rtx_equal_p (val
, c
->value
))
8852 c
= (struct constant
*) xmalloc (sizeof *c
);
8854 c
->label
= gen_label_rtx ();
8855 c
->next
= pool
->constants
[i
];
8856 pool
->constants
[i
] = c
;
8857 pool
->size
+= GET_MODE_SIZE (mode
);
8861 /* Return an rtx that represents the offset of X from the start of
8865 s390_pool_offset (struct constant_pool
*pool
, rtx x
)
8869 label
= gen_rtx_LABEL_REF (GET_MODE (x
), pool
->label
);
8870 x
= gen_rtx_UNSPEC (GET_MODE (x
), gen_rtvec (2, x
, label
),
8871 UNSPEC_POOL_OFFSET
);
8872 return gen_rtx_CONST (GET_MODE (x
), x
);
8875 /* Find constant VAL of mode MODE in the constant pool POOL.
8876 Return an RTX describing the distance from the start of
8877 the pool to the location of the new constant. */
8880 s390_find_constant (struct constant_pool
*pool
, rtx val
,
8886 for (i
= 0; i
< NR_C_MODES
; i
++)
8887 if (constant_modes
[i
] == mode
)
8889 gcc_assert (i
!= NR_C_MODES
);
8891 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8892 if (rtx_equal_p (val
, c
->value
))
8897 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8900 /* Check whether INSN is an execute. Return the label_ref to its
8901 execute target template if so, NULL_RTX otherwise. */
8904 s390_execute_label (rtx insn
)
8907 && GET_CODE (PATTERN (insn
)) == PARALLEL
8908 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
8909 && (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
8910 || XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE_JUMP
))
8912 if (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
)
8913 return XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 2);
8916 gcc_assert (JUMP_P (insn
));
8917 /* For jump insns as execute target:
8918 - There is one operand less in the parallel (the
8919 modification register of the execute is always 0).
8920 - The execute target label is wrapped into an
8921 if_then_else in order to hide it from jump analysis. */
8922 return XEXP (XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 0), 0);
8929 /* Find execute target for INSN in the constant pool POOL.
8930 Return an RTX describing the distance from the start of
8931 the pool to the location of the execute target. */
8934 s390_find_execute (struct constant_pool
*pool
, rtx insn
)
8938 for (c
= pool
->execute
; c
!= NULL
; c
= c
->next
)
8939 if (INSN_UID (insn
) == INSN_UID (c
->value
))
8944 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8947 /* For an execute INSN, extract the execute target template. */
8950 s390_execute_target (rtx insn
)
8952 rtx pattern
= PATTERN (insn
);
8953 gcc_assert (s390_execute_label (insn
));
8955 if (XVECLEN (pattern
, 0) == 2)
8957 pattern
= copy_rtx (XVECEXP (pattern
, 0, 1));
8961 rtvec vec
= rtvec_alloc (XVECLEN (pattern
, 0) - 1);
8964 for (i
= 0; i
< XVECLEN (pattern
, 0) - 1; i
++)
8965 RTVEC_ELT (vec
, i
) = copy_rtx (XVECEXP (pattern
, 0, i
+ 1));
8967 pattern
= gen_rtx_PARALLEL (VOIDmode
, vec
);
8973 /* Indicate that INSN cannot be duplicated. This is the case for
8974 execute insns that carry a unique label. */
8977 s390_cannot_copy_insn_p (rtx_insn
*insn
)
8979 rtx label
= s390_execute_label (insn
);
8980 return label
&& label
!= const0_rtx
;
8983 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8984 do not emit the pool base label. */
8987 s390_dump_pool (struct constant_pool
*pool
, bool remote_label
)
8990 rtx_insn
*insn
= pool
->pool_insn
;
8993 /* Switch to rodata section. */
8994 insn
= emit_insn_after (gen_pool_section_start (), insn
);
8995 INSN_ADDRESSES_NEW (insn
, -1);
8997 /* Ensure minimum pool alignment. */
8998 insn
= emit_insn_after (gen_pool_align (GEN_INT (8)), insn
);
8999 INSN_ADDRESSES_NEW (insn
, -1);
9001 /* Emit pool base label. */
9004 insn
= emit_label_after (pool
->label
, insn
);
9005 INSN_ADDRESSES_NEW (insn
, -1);
9008 /* Dump constants in descending alignment requirement order,
9009 ensuring proper alignment for every constant. */
9010 for (i
= 0; i
< NR_C_MODES
; i
++)
9011 for (c
= pool
->constants
[i
]; c
; c
= c
->next
)
9013 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
9014 rtx value
= copy_rtx (c
->value
);
9015 if (GET_CODE (value
) == CONST
9016 && GET_CODE (XEXP (value
, 0)) == UNSPEC
9017 && XINT (XEXP (value
, 0), 1) == UNSPEC_LTREL_OFFSET
9018 && XVECLEN (XEXP (value
, 0), 0) == 1)
9019 value
= s390_pool_offset (pool
, XVECEXP (XEXP (value
, 0), 0, 0));
9021 insn
= emit_label_after (c
->label
, insn
);
9022 INSN_ADDRESSES_NEW (insn
, -1);
9024 value
= gen_rtx_UNSPEC_VOLATILE (constant_modes
[i
],
9025 gen_rtvec (1, value
),
9026 UNSPECV_POOL_ENTRY
);
9027 insn
= emit_insn_after (value
, insn
);
9028 INSN_ADDRESSES_NEW (insn
, -1);
9031 /* Ensure minimum alignment for instructions. */
9032 insn
= emit_insn_after (gen_pool_align (GEN_INT (2)), insn
);
9033 INSN_ADDRESSES_NEW (insn
, -1);
9035 /* Output in-pool execute template insns. */
9036 for (c
= pool
->execute
; c
; c
= c
->next
)
9038 insn
= emit_label_after (c
->label
, insn
);
9039 INSN_ADDRESSES_NEW (insn
, -1);
9041 insn
= emit_insn_after (s390_execute_target (c
->value
), insn
);
9042 INSN_ADDRESSES_NEW (insn
, -1);
9045 /* Switch back to previous section. */
9046 insn
= emit_insn_after (gen_pool_section_end (), insn
);
9047 INSN_ADDRESSES_NEW (insn
, -1);
9049 insn
= emit_barrier_after (insn
);
9050 INSN_ADDRESSES_NEW (insn
, -1);
9052 /* Remove placeholder insn. */
9053 remove_insn (pool
->pool_insn
);
9056 /* Free all memory used by POOL. */
9059 s390_free_pool (struct constant_pool
*pool
)
9061 struct constant
*c
, *next
;
9064 for (i
= 0; i
< NR_C_MODES
; i
++)
9065 for (c
= pool
->constants
[i
]; c
; c
= next
)
9071 for (c
= pool
->execute
; c
; c
= next
)
9077 BITMAP_FREE (pool
->insns
);
9082 /* Collect main literal pool. Return NULL on overflow. */
9084 static struct constant_pool
*
9085 s390_mainpool_start (void)
9087 struct constant_pool
*pool
;
9090 pool
= s390_alloc_pool ();
9092 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9094 if (NONJUMP_INSN_P (insn
)
9095 && GET_CODE (PATTERN (insn
)) == SET
9096 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC_VOLATILE
9097 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPECV_MAIN_POOL
)
9099 /* There might be two main_pool instructions if base_reg
9100 is call-clobbered; one for shrink-wrapped code and one
9101 for the rest. We want to keep the first. */
9102 if (pool
->pool_insn
)
9104 insn
= PREV_INSN (insn
);
9105 delete_insn (NEXT_INSN (insn
));
9108 pool
->pool_insn
= insn
;
9111 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9113 rtx pool_ref
= NULL_RTX
;
9114 find_constant_pool_ref (insn
, &pool_ref
);
9117 rtx constant
= get_pool_constant (pool_ref
);
9118 machine_mode mode
= get_pool_mode (pool_ref
);
9119 s390_add_constant (pool
, constant
, mode
);
9123 /* If hot/cold partitioning is enabled we have to make sure that
9124 the literal pool is emitted in the same section where the
9125 initialization of the literal pool base pointer takes place.
9126 emit_pool_after is only used in the non-overflow case on non
9127 Z cpus where we can emit the literal pool at the end of the
9128 function body within the text section. */
9130 && NOTE_KIND (insn
) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9131 && !pool
->emit_pool_after
)
9132 pool
->emit_pool_after
= PREV_INSN (insn
);
9135 gcc_assert (pool
->pool_insn
|| pool
->size
== 0);
9137 if (pool
->size
>= 4096)
9139 /* We're going to chunkify the pool, so remove the main
9140 pool placeholder insn. */
9141 remove_insn (pool
->pool_insn
);
9143 s390_free_pool (pool
);
9147 /* If the functions ends with the section where the literal pool
9148 should be emitted set the marker to its end. */
9149 if (pool
&& !pool
->emit_pool_after
)
9150 pool
->emit_pool_after
= get_last_insn ();
9155 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9156 Modify the current function to output the pool constants as well as
9157 the pool register setup instruction. */
9160 s390_mainpool_finish (struct constant_pool
*pool
)
9162 rtx base_reg
= cfun
->machine
->base_reg
;
9166 /* If the pool is empty, we're done. */
9167 if (pool
->size
== 0)
9169 /* We don't actually need a base register after all. */
9170 cfun
->machine
->base_reg
= NULL_RTX
;
9172 if (pool
->pool_insn
)
9173 remove_insn (pool
->pool_insn
);
9174 s390_free_pool (pool
);
9178 /* We need correct insn addresses. */
9179 shorten_branches (get_insns ());
9181 /* Use a LARL to load the pool register. The pool is
9182 located in the .rodata section, so we emit it after the function. */
9183 set
= gen_main_base_64 (base_reg
, pool
->label
);
9184 insn
= emit_insn_after (set
, pool
->pool_insn
);
9185 INSN_ADDRESSES_NEW (insn
, -1);
9186 remove_insn (pool
->pool_insn
);
9188 insn
= get_last_insn ();
9189 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9190 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9192 s390_dump_pool (pool
, 0);
9194 /* Replace all literal pool references. */
9196 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9198 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9200 rtx addr
, pool_ref
= NULL_RTX
;
9201 find_constant_pool_ref (insn
, &pool_ref
);
9204 if (s390_execute_label (insn
))
9205 addr
= s390_find_execute (pool
, insn
);
9207 addr
= s390_find_constant (pool
, get_pool_constant (pool_ref
),
9208 get_pool_mode (pool_ref
));
9210 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9211 INSN_CODE (insn
) = -1;
9217 /* Free the pool. */
9218 s390_free_pool (pool
);
9221 /* Chunkify the literal pool. */
9223 #define S390_POOL_CHUNK_MIN 0xc00
9224 #define S390_POOL_CHUNK_MAX 0xe00
9226 static struct constant_pool
*
9227 s390_chunkify_start (void)
9229 struct constant_pool
*curr_pool
= NULL
, *pool_list
= NULL
;
9233 /* We need correct insn addresses. */
9235 shorten_branches (get_insns ());
9237 /* Scan all insns and move literals to pool chunks. */
9239 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9241 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9243 rtx pool_ref
= NULL_RTX
;
9244 find_constant_pool_ref (insn
, &pool_ref
);
9247 rtx constant
= get_pool_constant (pool_ref
);
9248 machine_mode mode
= get_pool_mode (pool_ref
);
9251 curr_pool
= s390_start_pool (&pool_list
, insn
);
9253 s390_add_constant (curr_pool
, constant
, mode
);
9254 s390_add_pool_insn (curr_pool
, insn
);
9258 if (JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
) || LABEL_P (insn
))
9261 s390_add_pool_insn (curr_pool
, insn
);
9264 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_VAR_LOCATION
)
9268 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn
)
9269 || INSN_ADDRESSES (INSN_UID (insn
)) == -1)
9272 if (curr_pool
->size
< S390_POOL_CHUNK_MAX
)
9275 s390_end_pool (curr_pool
, NULL
);
9280 s390_end_pool (curr_pool
, NULL
);
9282 /* Find all labels that are branched into
9283 from an insn belonging to a different chunk. */
9285 far_labels
= BITMAP_ALLOC (NULL
);
9287 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9289 rtx_jump_table_data
*table
;
9291 /* Labels marked with LABEL_PRESERVE_P can be target
9292 of non-local jumps, so we have to mark them.
9293 The same holds for named labels.
9295 Don't do that, however, if it is the label before
9299 && (LABEL_PRESERVE_P (insn
) || LABEL_NAME (insn
)))
9301 rtx_insn
*vec_insn
= NEXT_INSN (insn
);
9302 if (! vec_insn
|| ! JUMP_TABLE_DATA_P (vec_insn
))
9303 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (insn
));
9305 /* Check potential targets in a table jump (casesi_jump). */
9306 else if (tablejump_p (insn
, NULL
, &table
))
9308 rtx vec_pat
= PATTERN (table
);
9309 int i
, diff_p
= GET_CODE (vec_pat
) == ADDR_DIFF_VEC
;
9311 for (i
= 0; i
< XVECLEN (vec_pat
, diff_p
); i
++)
9313 rtx label
= XEXP (XVECEXP (vec_pat
, diff_p
, i
), 0);
9315 if (s390_find_pool (pool_list
, label
)
9316 != s390_find_pool (pool_list
, insn
))
9317 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9320 /* If we have a direct jump (conditional or unconditional),
9321 check all potential targets. */
9322 else if (JUMP_P (insn
))
9324 rtx pat
= PATTERN (insn
);
9326 if (GET_CODE (pat
) == PARALLEL
)
9327 pat
= XVECEXP (pat
, 0, 0);
9329 if (GET_CODE (pat
) == SET
)
9331 rtx label
= JUMP_LABEL (insn
);
9332 if (label
&& !ANY_RETURN_P (label
))
9334 if (s390_find_pool (pool_list
, label
)
9335 != s390_find_pool (pool_list
, insn
))
9336 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9342 /* Insert base register reload insns before every pool. */
9344 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9346 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9348 rtx_insn
*insn
= curr_pool
->first_insn
;
9349 INSN_ADDRESSES_NEW (emit_insn_before (new_insn
, insn
), -1);
9352 /* Insert base register reload insns at every far label. */
9354 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9356 && bitmap_bit_p (far_labels
, CODE_LABEL_NUMBER (insn
)))
9358 struct constant_pool
*pool
= s390_find_pool (pool_list
, insn
);
9361 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9363 INSN_ADDRESSES_NEW (emit_insn_after (new_insn
, insn
), -1);
9368 BITMAP_FREE (far_labels
);
9371 /* Recompute insn addresses. */
9373 init_insn_lengths ();
9374 shorten_branches (get_insns ());
9379 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9380 After we have decided to use this list, finish implementing
9381 all changes to the current function as required. */
9384 s390_chunkify_finish (struct constant_pool
*pool_list
)
9386 struct constant_pool
*curr_pool
= NULL
;
9390 /* Replace all literal pool references. */
9392 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9394 curr_pool
= s390_find_pool (pool_list
, insn
);
9398 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9400 rtx addr
, pool_ref
= NULL_RTX
;
9401 find_constant_pool_ref (insn
, &pool_ref
);
9404 if (s390_execute_label (insn
))
9405 addr
= s390_find_execute (curr_pool
, insn
);
9407 addr
= s390_find_constant (curr_pool
,
9408 get_pool_constant (pool_ref
),
9409 get_pool_mode (pool_ref
));
9411 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9412 INSN_CODE (insn
) = -1;
9417 /* Dump out all literal pools. */
9419 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9420 s390_dump_pool (curr_pool
, 0);
9422 /* Free pool list. */
9426 struct constant_pool
*next
= pool_list
->next
;
9427 s390_free_pool (pool_list
);
9432 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9435 s390_output_pool_entry (rtx exp
, machine_mode mode
, unsigned int align
)
9437 switch (GET_MODE_CLASS (mode
))
9440 case MODE_DECIMAL_FLOAT
:
9441 gcc_assert (GET_CODE (exp
) == CONST_DOUBLE
);
9443 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp
),
9444 as_a
<scalar_float_mode
> (mode
), align
);
9448 assemble_integer (exp
, GET_MODE_SIZE (mode
), align
, 1);
9449 mark_symbol_refs_as_used (exp
);
9452 case MODE_VECTOR_INT
:
9453 case MODE_VECTOR_FLOAT
:
9456 machine_mode inner_mode
;
9457 gcc_assert (GET_CODE (exp
) == CONST_VECTOR
);
9459 inner_mode
= GET_MODE_INNER (GET_MODE (exp
));
9460 for (i
= 0; i
< XVECLEN (exp
, 0); i
++)
9461 s390_output_pool_entry (XVECEXP (exp
, 0, i
),
9465 : GET_MODE_BITSIZE (inner_mode
));
9474 /* Return true if MEM refers to an integer constant in the literal pool. If
9475 VAL is not nullptr, then also fill it with the constant's value. */
9478 s390_const_int_pool_entry_p (rtx mem
, HOST_WIDE_INT
*val
)
9480 /* Try to match the following:
9481 - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
9482 - (mem (symbol_ref)). */
9487 rtx addr
= XEXP (mem
, 0);
9489 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LTREF
)
9490 sym
= XVECEXP (addr
, 0, 0);
9494 if (!SYMBOL_REF_P (sym
) || !CONSTANT_POOL_ADDRESS_P (sym
))
9497 rtx val_rtx
= get_pool_constant (sym
);
9498 if (!CONST_INT_P (val_rtx
))
9502 *val
= INTVAL (val_rtx
);
9506 /* Return an RTL expression representing the value of the return address
9507 for the frame COUNT steps up from the current frame. FRAME is the
9508 frame pointer of that frame. */
9511 s390_return_addr_rtx (int count
, rtx frame ATTRIBUTE_UNUSED
)
9516 /* Without backchain, we fail for all but the current frame. */
9518 if (!TARGET_BACKCHAIN
&& count
> 0)
9521 /* For the current frame, we need to make sure the initial
9522 value of RETURN_REGNUM is actually saved. */
9525 return get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
);
9527 if (TARGET_PACKED_STACK
)
9528 offset
= -2 * UNITS_PER_LONG
;
9530 offset
= RETURN_REGNUM
* UNITS_PER_LONG
;
9532 addr
= plus_constant (Pmode
, frame
, offset
);
9533 addr
= memory_address (Pmode
, addr
);
9534 return gen_rtx_MEM (Pmode
, addr
);
9537 /* Return an RTL expression representing the back chain stored in
9538 the current stack frame. */
9541 s390_back_chain_rtx (void)
9545 gcc_assert (TARGET_BACKCHAIN
);
9547 if (TARGET_PACKED_STACK
)
9548 chain
= plus_constant (Pmode
, stack_pointer_rtx
,
9549 STACK_POINTER_OFFSET
- UNITS_PER_LONG
);
9551 chain
= stack_pointer_rtx
;
9553 chain
= gen_rtx_MEM (Pmode
, chain
);
9557 /* Find first call clobbered register unused in a function.
9558 This could be used as base register in a leaf function
9559 or for holding the return address before epilogue. */
9562 find_unused_clobbered_reg (void)
9565 for (i
= 0; i
< 6; i
++)
9566 if (!df_regs_ever_live_p (i
))
9572 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9573 clobbered hard regs in SETREG. */
9576 s390_reg_clobbered_rtx (rtx setreg
, const_rtx set_insn ATTRIBUTE_UNUSED
, void *data
)
9578 char *regs_ever_clobbered
= (char *)data
;
9579 unsigned int i
, regno
;
9580 machine_mode mode
= GET_MODE (setreg
);
9582 if (GET_CODE (setreg
) == SUBREG
)
9584 rtx inner
= SUBREG_REG (setreg
);
9585 if (!GENERAL_REG_P (inner
) && !FP_REG_P (inner
))
9587 regno
= subreg_regno (setreg
);
9589 else if (GENERAL_REG_P (setreg
) || FP_REG_P (setreg
))
9590 regno
= REGNO (setreg
);
9595 i
< end_hard_regno (mode
, regno
);
9597 regs_ever_clobbered
[i
] = 1;
9600 /* Walks through all basic blocks of the current function looking
9601 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9602 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9603 each of those regs. */
9606 s390_regs_ever_clobbered (char regs_ever_clobbered
[])
9612 memset (regs_ever_clobbered
, 0, 32);
9614 /* For non-leaf functions we have to consider all call clobbered regs to be
9618 for (i
= 0; i
< 32; i
++)
9619 regs_ever_clobbered
[i
] = call_used_regs
[i
];
9622 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9623 this work is done by liveness analysis (mark_regs_live_at_end).
9624 Special care is needed for functions containing landing pads. Landing pads
9625 may use the eh registers, but the code which sets these registers is not
9626 contained in that function. Hence s390_regs_ever_clobbered is not able to
9627 deal with this automatically. */
9628 if (crtl
->calls_eh_return
|| cfun
->machine
->has_landing_pad_p
)
9629 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; i
++)
9630 if (crtl
->calls_eh_return
9631 || (cfun
->machine
->has_landing_pad_p
9632 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i
))))
9633 regs_ever_clobbered
[EH_RETURN_DATA_REGNO (i
)] = 1;
9635 /* For nonlocal gotos all call-saved registers have to be saved.
9636 This flag is also set for the unwinding code in libgcc.
9637 See expand_builtin_unwind_init. For regs_ever_live this is done by
9639 if (crtl
->saves_all_registers
)
9640 for (i
= 0; i
< 32; i
++)
9641 if (!call_used_regs
[i
])
9642 regs_ever_clobbered
[i
] = 1;
9644 FOR_EACH_BB_FN (cur_bb
, cfun
)
9646 FOR_BB_INSNS (cur_bb
, cur_insn
)
9650 if (!INSN_P (cur_insn
))
9653 pat
= PATTERN (cur_insn
);
9655 /* Ignore GPR restore insns. */
9656 if (epilogue_completed
&& RTX_FRAME_RELATED_P (cur_insn
))
9658 if (GET_CODE (pat
) == SET
9659 && GENERAL_REG_P (SET_DEST (pat
)))
9662 if (GET_MODE (SET_SRC (pat
)) == DImode
9663 && FP_REG_P (SET_SRC (pat
)))
9667 if (GET_CODE (SET_SRC (pat
)) == MEM
)
9672 if (GET_CODE (pat
) == PARALLEL
9673 && load_multiple_operation (pat
, VOIDmode
))
9677 note_stores (cur_insn
,
9678 s390_reg_clobbered_rtx
,
9679 regs_ever_clobbered
);
9684 /* Determine the frame area which actually has to be accessed
9685 in the function epilogue. The values are stored at the
9686 given pointers AREA_BOTTOM (address of the lowest used stack
9687 address) and AREA_TOP (address of the first item which does
9688 not belong to the stack frame). */
9691 s390_frame_area (int *area_bottom
, int *area_top
)
9698 if (cfun_frame_layout
.first_restore_gpr
!= -1)
9700 b
= (cfun_frame_layout
.gprs_offset
9701 + cfun_frame_layout
.first_restore_gpr
* UNITS_PER_LONG
);
9702 t
= b
+ (cfun_frame_layout
.last_restore_gpr
9703 - cfun_frame_layout
.first_restore_gpr
+ 1) * UNITS_PER_LONG
;
9706 if (TARGET_64BIT
&& cfun_save_high_fprs_p
)
9708 b
= MIN (b
, cfun_frame_layout
.f8_offset
);
9709 t
= MAX (t
, (cfun_frame_layout
.f8_offset
9710 + cfun_frame_layout
.high_fprs
* 8));
9715 if (cfun_fpr_save_p (FPR4_REGNUM
))
9717 b
= MIN (b
, cfun_frame_layout
.f4_offset
);
9718 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 8);
9720 if (cfun_fpr_save_p (FPR6_REGNUM
))
9722 b
= MIN (b
, cfun_frame_layout
.f4_offset
+ 8);
9723 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 16);
9729 /* Update gpr_save_slots in the frame layout trying to make use of
9730 FPRs as GPR save slots.
9731 This is a helper routine of s390_register_info. */
9734 s390_register_info_gprtofpr ()
9736 int save_reg_slot
= FPR0_REGNUM
;
9739 if (TARGET_TPF
|| !TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
9742 /* builtin_eh_return needs to be able to modify the return address
9743 on the stack. It could also adjust the FPR save slot instead but
9744 is it worth the trouble?! */
9745 if (crtl
->calls_eh_return
)
9748 for (i
= 15; i
>= 6; i
--)
9750 if (cfun_gpr_save_slot (i
) == SAVE_SLOT_NONE
)
9753 /* Advance to the next FP register which can be used as a
9755 while ((!call_used_regs
[save_reg_slot
]
9756 || df_regs_ever_live_p (save_reg_slot
)
9757 || cfun_fpr_save_p (save_reg_slot
))
9758 && FP_REGNO_P (save_reg_slot
))
9760 if (!FP_REGNO_P (save_reg_slot
))
9762 /* We only want to use ldgr/lgdr if we can get rid of
9763 stm/lm entirely. So undo the gpr slot allocation in
9764 case we ran out of FPR save slots. */
9765 for (j
= 6; j
<= 15; j
++)
9766 if (FP_REGNO_P (cfun_gpr_save_slot (j
)))
9767 cfun_gpr_save_slot (j
) = SAVE_SLOT_STACK
;
9770 cfun_gpr_save_slot (i
) = save_reg_slot
++;
9774 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9776 This is a helper routine for s390_register_info. */
9779 s390_register_info_stdarg_fpr ()
9785 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9786 f0-f4 for 64 bit. */
9788 || !TARGET_HARD_FLOAT
9789 || !cfun
->va_list_fpr_size
9790 || crtl
->args
.info
.fprs
>= FP_ARG_NUM_REG
)
9793 min_fpr
= crtl
->args
.info
.fprs
;
9794 max_fpr
= min_fpr
+ cfun
->va_list_fpr_size
- 1;
9795 if (max_fpr
>= FP_ARG_NUM_REG
)
9796 max_fpr
= FP_ARG_NUM_REG
- 1;
9798 /* FPR argument regs start at f0. */
9799 min_fpr
+= FPR0_REGNUM
;
9800 max_fpr
+= FPR0_REGNUM
;
9802 for (i
= min_fpr
; i
<= max_fpr
; i
++)
9803 cfun_set_fpr_save (i
);
9806 /* Reserve the GPR save slots for GPRs which need to be saved due to
9808 This is a helper routine for s390_register_info. */
9811 s390_register_info_stdarg_gpr ()
9818 || !cfun
->va_list_gpr_size
9819 || crtl
->args
.info
.gprs
>= GP_ARG_NUM_REG
)
9822 min_gpr
= crtl
->args
.info
.gprs
;
9823 max_gpr
= min_gpr
+ cfun
->va_list_gpr_size
- 1;
9824 if (max_gpr
>= GP_ARG_NUM_REG
)
9825 max_gpr
= GP_ARG_NUM_REG
- 1;
9827 /* GPR argument regs start at r2. */
9828 min_gpr
+= GPR2_REGNUM
;
9829 max_gpr
+= GPR2_REGNUM
;
9831 /* If r6 was supposed to be saved into an FPR and now needs to go to
9832 the stack for vararg we have to adjust the restore range to make
9833 sure that the restore is done from stack as well. */
9834 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM
))
9835 && min_gpr
<= GPR6_REGNUM
9836 && max_gpr
>= GPR6_REGNUM
)
9838 if (cfun_frame_layout
.first_restore_gpr
== -1
9839 || cfun_frame_layout
.first_restore_gpr
> GPR6_REGNUM
)
9840 cfun_frame_layout
.first_restore_gpr
= GPR6_REGNUM
;
9841 if (cfun_frame_layout
.last_restore_gpr
== -1
9842 || cfun_frame_layout
.last_restore_gpr
< GPR6_REGNUM
)
9843 cfun_frame_layout
.last_restore_gpr
= GPR6_REGNUM
;
9846 if (cfun_frame_layout
.first_save_gpr
== -1
9847 || cfun_frame_layout
.first_save_gpr
> min_gpr
)
9848 cfun_frame_layout
.first_save_gpr
= min_gpr
;
9850 if (cfun_frame_layout
.last_save_gpr
== -1
9851 || cfun_frame_layout
.last_save_gpr
< max_gpr
)
9852 cfun_frame_layout
.last_save_gpr
= max_gpr
;
9854 for (i
= min_gpr
; i
<= max_gpr
; i
++)
9855 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9858 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9859 prologue and epilogue. */
9862 s390_register_info_set_ranges ()
9866 /* Find the first and the last save slot supposed to use the stack
9867 to set the restore range.
9868 Vararg regs might be marked as save to stack but only the
9869 call-saved regs really need restoring (i.e. r6). This code
9870 assumes that the vararg regs have not yet been recorded in
9871 cfun_gpr_save_slot. */
9872 for (i
= 0; i
< 16 && cfun_gpr_save_slot (i
) != SAVE_SLOT_STACK
; i
++);
9873 for (j
= 15; j
> i
&& cfun_gpr_save_slot (j
) != SAVE_SLOT_STACK
; j
--);
9874 cfun_frame_layout
.first_restore_gpr
= (i
== 16) ? -1 : i
;
9875 cfun_frame_layout
.last_restore_gpr
= (i
== 16) ? -1 : j
;
9876 cfun_frame_layout
.first_save_gpr
= (i
== 16) ? -1 : i
;
9877 cfun_frame_layout
.last_save_gpr
= (i
== 16) ? -1 : j
;
9880 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9881 for registers which need to be saved in function prologue.
9882 This function can be used until the insns emitted for save/restore
9883 of the regs are visible in the RTL stream. */
9886 s390_register_info ()
9889 char clobbered_regs
[32];
9891 gcc_assert (!epilogue_completed
);
9893 if (reload_completed
)
9894 /* After reload we rely on our own routine to determine which
9895 registers need saving. */
9896 s390_regs_ever_clobbered (clobbered_regs
);
9898 /* During reload we use regs_ever_live as a base since reload
9899 does changes in there which we otherwise would not be aware
9901 for (i
= 0; i
< 32; i
++)
9902 clobbered_regs
[i
] = df_regs_ever_live_p (i
);
9904 for (i
= 0; i
< 32; i
++)
9905 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_regs
[i
];
9907 /* Mark the call-saved FPRs which need to be saved.
9908 This needs to be done before checking the special GPRs since the
9909 stack pointer usage depends on whether high FPRs have to be saved
9911 cfun_frame_layout
.fpr_bitmap
= 0;
9912 cfun_frame_layout
.high_fprs
= 0;
9913 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
9914 if (clobbered_regs
[i
] && !call_used_regs
[i
])
9916 cfun_set_fpr_save (i
);
9917 if (i
>= FPR8_REGNUM
)
9918 cfun_frame_layout
.high_fprs
++;
9921 /* Register 12 is used for GOT address, but also as temp in prologue
9922 for split-stack stdarg functions (unless r14 is available). */
9924 |= ((flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
9925 || (flag_split_stack
&& cfun
->stdarg
9926 && (crtl
->is_leaf
|| TARGET_TPF_PROFILING
9927 || has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
))));
9929 clobbered_regs
[BASE_REGNUM
]
9930 |= (cfun
->machine
->base_reg
9931 && REGNO (cfun
->machine
->base_reg
) == BASE_REGNUM
);
9933 clobbered_regs
[HARD_FRAME_POINTER_REGNUM
]
9934 |= !!frame_pointer_needed
;
9936 /* On pre z900 machines this might take until machine dependent
9938 save_return_addr_p will only be set on non-zarch machines so
9939 there is no risk that r14 goes into an FPR instead of a stack
9941 clobbered_regs
[RETURN_REGNUM
]
9943 || TARGET_TPF_PROFILING
9944 || cfun_frame_layout
.save_return_addr_p
9945 || crtl
->calls_eh_return
);
9947 clobbered_regs
[STACK_POINTER_REGNUM
]
9949 || TARGET_TPF_PROFILING
9950 || cfun_save_high_fprs_p
9951 || get_frame_size () > 0
9952 || (reload_completed
&& cfun_frame_layout
.frame_size
> 0)
9953 || cfun
->calls_alloca
);
9955 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 16);
9957 for (i
= 6; i
< 16; i
++)
9958 if (clobbered_regs
[i
])
9959 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9961 s390_register_info_stdarg_fpr ();
9962 s390_register_info_gprtofpr ();
9963 s390_register_info_set_ranges ();
9964 /* stdarg functions might need to save GPRs 2 to 6. This might
9965 override the GPR->FPR save decision made by
9966 s390_register_info_gprtofpr for r6 since vararg regs must go to
9968 s390_register_info_stdarg_gpr ();
9971 /* Return true if REGNO is a global register, but not one
9972 of the special ones that need to be saved/restored in anyway. */
9975 global_not_special_regno_p (int regno
)
9977 return (global_regs
[regno
]
9978 /* These registers are special and need to be
9979 restored in any case. */
9980 && !(regno
== STACK_POINTER_REGNUM
9981 || regno
== RETURN_REGNUM
9982 || regno
== BASE_REGNUM
9983 || (flag_pic
&& regno
== (int)PIC_OFFSET_TABLE_REGNUM
)));
9986 /* This function is called by s390_optimize_prologue in order to get
9987 rid of unnecessary GPR save/restore instructions. The register info
9988 for the GPRs is re-computed and the ranges are re-calculated. */
9991 s390_optimize_register_info ()
9993 char clobbered_regs
[32];
9996 gcc_assert (epilogue_completed
);
9998 s390_regs_ever_clobbered (clobbered_regs
);
10000 /* Global registers do not need to be saved and restored unless it
10001 is one of our special regs. (r12, r13, r14, or r15). */
10002 for (i
= 0; i
< 32; i
++)
10003 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_not_special_regno_p (i
);
10005 /* There is still special treatment needed for cases invisible to
10006 s390_regs_ever_clobbered. */
10007 clobbered_regs
[RETURN_REGNUM
]
10008 |= (TARGET_TPF_PROFILING
10009 /* When expanding builtin_return_addr in ESA mode we do not
10010 know whether r14 will later be needed as scratch reg when
10011 doing branch splitting. So the builtin always accesses the
10012 r14 save slot and we need to stick to the save/restore
10013 decision for r14 even if it turns out that it didn't get
10015 || cfun_frame_layout
.save_return_addr_p
10016 || crtl
->calls_eh_return
);
10018 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 6);
10020 for (i
= 6; i
< 16; i
++)
10021 if (!clobbered_regs
[i
])
10022 cfun_gpr_save_slot (i
) = SAVE_SLOT_NONE
;
10024 s390_register_info_set_ranges ();
10025 s390_register_info_stdarg_gpr ();
10028 /* Fill cfun->machine with info about frame of current function. */
10031 s390_frame_info (void)
10033 HOST_WIDE_INT lowest_offset
;
10035 cfun_frame_layout
.first_save_gpr_slot
= cfun_frame_layout
.first_save_gpr
;
10036 cfun_frame_layout
.last_save_gpr_slot
= cfun_frame_layout
.last_save_gpr
;
10038 /* The va_arg builtin uses a constant distance of 16 *
10039 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10040 pointer. So even if we are going to save the stack pointer in an
10041 FPR we need the stack space in order to keep the offsets
10043 if (cfun
->stdarg
&& cfun_save_arg_fprs_p
)
10045 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10047 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10048 cfun_frame_layout
.first_save_gpr_slot
= STACK_POINTER_REGNUM
;
10051 cfun_frame_layout
.frame_size
= get_frame_size ();
10052 if (!TARGET_64BIT
&& cfun_frame_layout
.frame_size
> 0x7fff0000)
10053 fatal_error (input_location
,
10054 "total size of local variables exceeds architecture limit");
10056 if (!TARGET_PACKED_STACK
)
10058 /* Fixed stack layout. */
10059 cfun_frame_layout
.backchain_offset
= 0;
10060 cfun_frame_layout
.f0_offset
= 16 * UNITS_PER_LONG
;
10061 cfun_frame_layout
.f4_offset
= cfun_frame_layout
.f0_offset
+ 2 * 8;
10062 cfun_frame_layout
.f8_offset
= -cfun_frame_layout
.high_fprs
* 8;
10063 cfun_frame_layout
.gprs_offset
= (cfun_frame_layout
.first_save_gpr_slot
10066 else if (TARGET_BACKCHAIN
)
10068 /* Kernel stack layout - packed stack, backchain, no float */
10069 gcc_assert (TARGET_SOFT_FLOAT
);
10070 cfun_frame_layout
.backchain_offset
= (STACK_POINTER_OFFSET
10073 /* The distance between the backchain and the return address
10074 save slot must not change. So we always need a slot for the
10075 stack pointer which resides in between. */
10076 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10078 cfun_frame_layout
.gprs_offset
10079 = cfun_frame_layout
.backchain_offset
- cfun_gprs_save_area_size
;
10081 /* FPRs will not be saved. Nevertheless pick sane values to
10082 keep area calculations valid. */
10083 cfun_frame_layout
.f0_offset
=
10084 cfun_frame_layout
.f4_offset
=
10085 cfun_frame_layout
.f8_offset
= cfun_frame_layout
.gprs_offset
;
10091 /* Packed stack layout without backchain. */
10093 /* With stdarg FPRs need their dedicated slots. */
10094 num_fprs
= (TARGET_64BIT
&& cfun
->stdarg
? 2
10095 : (cfun_fpr_save_p (FPR4_REGNUM
) +
10096 cfun_fpr_save_p (FPR6_REGNUM
)));
10097 cfun_frame_layout
.f4_offset
= STACK_POINTER_OFFSET
- 8 * num_fprs
;
10099 num_fprs
= (cfun
->stdarg
? 2
10100 : (cfun_fpr_save_p (FPR0_REGNUM
)
10101 + cfun_fpr_save_p (FPR2_REGNUM
)));
10102 cfun_frame_layout
.f0_offset
= cfun_frame_layout
.f4_offset
- 8 * num_fprs
;
10104 cfun_frame_layout
.gprs_offset
10105 = cfun_frame_layout
.f0_offset
- cfun_gprs_save_area_size
;
10107 cfun_frame_layout
.f8_offset
= (cfun_frame_layout
.gprs_offset
10108 - cfun_frame_layout
.high_fprs
* 8);
10111 if (cfun_save_high_fprs_p
)
10112 cfun_frame_layout
.frame_size
+= cfun_frame_layout
.high_fprs
* 8;
10114 if (!crtl
->is_leaf
)
10115 cfun_frame_layout
.frame_size
+= crtl
->outgoing_args_size
;
10117 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10118 sized area at the bottom of the stack. This is required also for
10119 leaf functions. When GCC generates a local stack reference it
10120 will always add STACK_POINTER_OFFSET to all these references. */
10122 && !TARGET_TPF_PROFILING
10123 && cfun_frame_layout
.frame_size
== 0
10124 && !cfun
->calls_alloca
)
10127 /* Calculate the number of bytes we have used in our own register
10128 save area. With the packed stack layout we can re-use the
10129 remaining bytes for normal stack elements. */
10131 if (TARGET_PACKED_STACK
)
10132 lowest_offset
= MIN (MIN (cfun_frame_layout
.f0_offset
,
10133 cfun_frame_layout
.f4_offset
),
10134 cfun_frame_layout
.gprs_offset
);
10138 if (TARGET_BACKCHAIN
)
10139 lowest_offset
= MIN (lowest_offset
, cfun_frame_layout
.backchain_offset
);
10141 cfun_frame_layout
.frame_size
+= STACK_POINTER_OFFSET
- lowest_offset
;
10143 /* If under 31 bit an odd number of gprs has to be saved we have to
10144 adjust the frame size to sustain 8 byte alignment of stack
10146 cfun_frame_layout
.frame_size
= ((cfun_frame_layout
.frame_size
+
10147 STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
10148 & ~(STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
10151 /* Generate frame layout. Fills in register and frame data for the current
10152 function in cfun->machine. This routine can be called multiple times;
10153 it will re-do the complete frame layout every time. */
10156 s390_init_frame_layout (void)
10158 HOST_WIDE_INT frame_size
;
10161 /* After LRA the frame layout is supposed to be read-only and should
10162 not be re-computed. */
10163 if (reload_completed
)
10168 frame_size
= cfun_frame_layout
.frame_size
;
10170 /* Try to predict whether we'll need the base register. */
10171 base_used
= crtl
->uses_const_pool
10172 || (!DISP_IN_RANGE (frame_size
)
10173 && !CONST_OK_FOR_K (frame_size
));
10175 /* Decide which register to use as literal pool base. In small
10176 leaf functions, try to use an unused call-clobbered register
10177 as base register to avoid save/restore overhead. */
10179 cfun
->machine
->base_reg
= NULL_RTX
;
10185 /* Prefer r5 (most likely to be free). */
10186 for (br
= 5; br
>= 2 && df_regs_ever_live_p (br
); br
--)
10188 cfun
->machine
->base_reg
=
10189 gen_rtx_REG (Pmode
, (br
>= 2) ? br
: BASE_REGNUM
);
10192 s390_register_info ();
10193 s390_frame_info ();
10195 while (frame_size
!= cfun_frame_layout
.frame_size
);
10198 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10199 the TX is nonescaping. A transaction is considered escaping if
10200 there is at least one path from tbegin returning CC0 to the
10201 function exit block without an tend.
10203 The check so far has some limitations:
10204 - only single tbegin/tend BBs are supported
10205 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10206 - when CC is copied to a GPR and the CC0 check is done with the GPR
10207 this is not supported
10211 s390_optimize_nonescaping_tx (void)
10213 const unsigned int CC0
= 1 << 3;
10214 basic_block tbegin_bb
= NULL
;
10215 basic_block tend_bb
= NULL
;
10218 bool result
= true;
10220 rtx_insn
*tbegin_insn
= NULL
;
10222 if (!cfun
->machine
->tbegin_p
)
10225 for (bb_index
= 0; bb_index
< n_basic_blocks_for_fn (cfun
); bb_index
++)
10227 bb
= BASIC_BLOCK_FOR_FN (cfun
, bb_index
);
10232 FOR_BB_INSNS (bb
, insn
)
10234 rtx ite
, cc
, pat
, target
;
10235 unsigned HOST_WIDE_INT mask
;
10237 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
10240 pat
= PATTERN (insn
);
10242 if (GET_CODE (pat
) == PARALLEL
)
10243 pat
= XVECEXP (pat
, 0, 0);
10245 if (GET_CODE (pat
) != SET
10246 || GET_CODE (SET_SRC (pat
)) != UNSPEC_VOLATILE
)
10249 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TBEGIN
)
10253 tbegin_insn
= insn
;
10255 /* Just return if the tbegin doesn't have clobbers. */
10256 if (GET_CODE (PATTERN (insn
)) != PARALLEL
)
10259 if (tbegin_bb
!= NULL
)
10262 /* Find the next conditional jump. */
10263 for (tmp
= NEXT_INSN (insn
);
10265 tmp
= NEXT_INSN (tmp
))
10267 if (reg_set_p (gen_rtx_REG (CCmode
, CC_REGNUM
), tmp
))
10272 ite
= SET_SRC (PATTERN (tmp
));
10273 if (GET_CODE (ite
) != IF_THEN_ELSE
)
10276 cc
= XEXP (XEXP (ite
, 0), 0);
10277 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
))
10278 || GET_MODE (cc
) != CCRAWmode
10279 || GET_CODE (XEXP (XEXP (ite
, 0), 1)) != CONST_INT
)
10282 if (bb
->succs
->length () != 2)
10285 mask
= INTVAL (XEXP (XEXP (ite
, 0), 1));
10286 if (GET_CODE (XEXP (ite
, 0)) == NE
)
10290 target
= XEXP (ite
, 1);
10291 else if (mask
== (CC0
^ 0xf))
10292 target
= XEXP (ite
, 2);
10300 ei
= ei_start (bb
->succs
);
10301 e1
= ei_safe_edge (ei
);
10303 e2
= ei_safe_edge (ei
);
10305 if (e2
->flags
& EDGE_FALLTHRU
)
10308 e1
= ei_safe_edge (ei
);
10311 if (!(e1
->flags
& EDGE_FALLTHRU
))
10314 tbegin_bb
= (target
== pc_rtx
) ? e1
->dest
: e2
->dest
;
10316 if (tmp
== BB_END (bb
))
10321 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TEND
)
10323 if (tend_bb
!= NULL
)
10330 /* Either we successfully remove the FPR clobbers here or we are not
10331 able to do anything for this TX. Both cases don't qualify for
10333 cfun
->machine
->tbegin_p
= false;
10335 if (tbegin_bb
== NULL
|| tend_bb
== NULL
)
10338 calculate_dominance_info (CDI_POST_DOMINATORS
);
10339 result
= dominated_by_p (CDI_POST_DOMINATORS
, tbegin_bb
, tend_bb
);
10340 free_dominance_info (CDI_POST_DOMINATORS
);
10345 PATTERN (tbegin_insn
) = gen_rtx_PARALLEL (VOIDmode
,
10347 XVECEXP (PATTERN (tbegin_insn
), 0, 0),
10348 XVECEXP (PATTERN (tbegin_insn
), 0, 1)));
10349 INSN_CODE (tbegin_insn
) = -1;
10350 df_insn_rescan (tbegin_insn
);
10355 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10356 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10358 static unsigned int
10359 s390_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
10361 return s390_class_max_nregs (REGNO_REG_CLASS (regno
), mode
);
10364 /* Implement TARGET_HARD_REGNO_MODE_OK.
10366 Integer modes <= word size fit into any GPR.
10367 Integer modes > word size fit into successive GPRs, starting with
10368 an even-numbered register.
10369 SImode and DImode fit into FPRs as well.
10371 Floating point modes <= word size fit into any FPR or GPR.
10372 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10373 into any FPR, or an even-odd GPR pair.
10374 TFmode fits only into an even-odd FPR pair.
10376 Complex floating point modes fit either into two FPRs, or into
10377 successive GPRs (again starting with an even number).
10378 TCmode fits only into two successive even-odd FPR pairs.
10380 Condition code modes fit only into the CC register. */
10383 s390_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10385 if (!TARGET_VX
&& VECTOR_NOFP_REGNO_P (regno
))
10388 switch (REGNO_REG_CLASS (regno
))
10391 return ((GET_MODE_CLASS (mode
) == MODE_INT
10392 && s390_class_max_nregs (VEC_REGS
, mode
) == 1)
10394 || (TARGET_VXE
&& mode
== SFmode
)
10395 || s390_vector_mode_supported_p (mode
));
10399 && ((GET_MODE_CLASS (mode
) == MODE_INT
10400 && s390_class_max_nregs (FP_REGS
, mode
) == 1)
10402 || s390_vector_mode_supported_p (mode
)))
10405 if (REGNO_PAIR_OK (regno
, mode
))
10407 if (mode
== SImode
|| mode
== DImode
)
10410 if (FLOAT_MODE_P (mode
) && GET_MODE_CLASS (mode
) != MODE_VECTOR_FLOAT
)
10415 if (FRAME_REGNO_P (regno
) && mode
== Pmode
)
10420 if (REGNO_PAIR_OK (regno
, mode
))
10423 || (mode
!= TFmode
&& mode
!= TCmode
&& mode
!= TDmode
))
10428 if (GET_MODE_CLASS (mode
) == MODE_CC
)
10432 if (REGNO_PAIR_OK (regno
, mode
))
10434 if (mode
== SImode
|| mode
== Pmode
)
10445 /* Implement TARGET_MODES_TIEABLE_P. */
10448 s390_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10450 return ((mode1
== SFmode
|| mode1
== DFmode
)
10451 == (mode2
== SFmode
|| mode2
== DFmode
));
10454 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10457 s390_hard_regno_rename_ok (unsigned int old_reg
, unsigned int new_reg
)
10459 /* Once we've decided upon a register to use as base register, it must
10460 no longer be used for any other purpose. */
10461 if (cfun
->machine
->base_reg
)
10462 if (REGNO (cfun
->machine
->base_reg
) == old_reg
10463 || REGNO (cfun
->machine
->base_reg
) == new_reg
)
10466 /* Prevent regrename from using call-saved regs which haven't
10467 actually been saved. This is necessary since regrename assumes
10468 the backend save/restore decisions are based on
10469 df_regs_ever_live. Since we have our own routine we have to tell
10470 regrename manually about it. */
10471 if (GENERAL_REGNO_P (new_reg
)
10472 && !call_used_regs
[new_reg
]
10473 && cfun_gpr_save_slot (new_reg
) == SAVE_SLOT_NONE
)
10479 /* Return nonzero if register REGNO can be used as a scratch register
10483 s390_hard_regno_scratch_ok (unsigned int regno
)
10485 /* See s390_hard_regno_rename_ok. */
10486 if (GENERAL_REGNO_P (regno
)
10487 && !call_used_regs
[regno
]
10488 && cfun_gpr_save_slot (regno
) == SAVE_SLOT_NONE
)
10494 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10495 code that runs in z/Architecture mode, but conforms to the 31-bit
10496 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10497 bytes are saved across calls, however. */
10500 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
10503 /* For r12 we know that the only bits we actually care about are
10504 preserved across function calls. Since r12 is a fixed reg all
10505 accesses to r12 are generated by the backend.
10507 This workaround is necessary until gcse implements proper
10508 tracking of partially clobbered registers. */
10511 && GET_MODE_SIZE (mode
) > 4
10512 && (!flag_pic
|| regno
!= PIC_OFFSET_TABLE_REGNUM
)
10513 && ((regno
>= 6 && regno
<= 15) || regno
== 32))
10517 && GET_MODE_SIZE (mode
) > 8
10518 && (((TARGET_64BIT
&& regno
>= 24 && regno
<= 31))
10519 || (!TARGET_64BIT
&& (regno
== 18 || regno
== 19))))
10525 /* Maximum number of registers to represent a value of mode MODE
10526 in a register of class RCLASS. */
10529 s390_class_max_nregs (enum reg_class rclass
, machine_mode mode
)
10532 bool reg_pair_required_p
= false;
10538 reg_size
= TARGET_VX
? 16 : 8;
10540 /* TF and TD modes would fit into a VR but we put them into a
10541 register pair since we do not have 128bit FP instructions on
10544 && SCALAR_FLOAT_MODE_P (mode
)
10545 && GET_MODE_SIZE (mode
) >= 16
10546 && !(TARGET_VXE
&& mode
== TFmode
))
10547 reg_pair_required_p
= true;
10549 /* Even if complex types would fit into a single FPR/VR we force
10550 them into a register pair to deal with the parts more easily.
10551 (FIXME: What about complex ints?) */
10552 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
10553 reg_pair_required_p
= true;
10559 reg_size
= UNITS_PER_WORD
;
10563 if (reg_pair_required_p
)
10564 return 2 * ((GET_MODE_SIZE (mode
) / 2 + reg_size
- 1) / reg_size
);
10566 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
10569 /* Return nonzero if mode M describes a 128-bit float in a floating point
10573 s390_is_fpr128 (machine_mode m
)
10575 return m
== FPRX2mode
|| (!TARGET_VXE
&& m
== TFmode
);
10578 /* Return nonzero if mode M describes a 128-bit float in a vector
10582 s390_is_vr128 (machine_mode m
)
10584 return m
== V1TFmode
|| (TARGET_VXE
&& m
== TFmode
);
10587 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10590 s390_can_change_mode_class (machine_mode from_mode
,
10591 machine_mode to_mode
,
10592 reg_class_t rclass
)
10594 machine_mode small_mode
;
10595 machine_mode big_mode
;
10597 /* 128-bit values have different representations in floating point and
10598 vector registers. */
10599 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10600 && ((s390_is_fpr128 (from_mode
) && s390_is_vr128 (to_mode
))
10601 || (s390_is_vr128 (from_mode
) && s390_is_fpr128 (to_mode
))))
10604 if (GET_MODE_SIZE (from_mode
) == GET_MODE_SIZE (to_mode
))
10607 if (GET_MODE_SIZE (from_mode
) < GET_MODE_SIZE (to_mode
))
10609 small_mode
= from_mode
;
10610 big_mode
= to_mode
;
10614 small_mode
= to_mode
;
10615 big_mode
= from_mode
;
10618 /* Values residing in VRs are little-endian style. All modes are
10619 placed left-aligned in an VR. This means that we cannot allow
10620 switching between modes with differing sizes. Also if the vector
10621 facility is available we still place TFmode values in VR register
10622 pairs, since the only instructions we have operating on TFmodes
10623 only deal with register pairs. Therefore we have to allow DFmode
10624 subregs of TFmodes to enable the TFmode splitters. */
10625 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10626 && (GET_MODE_SIZE (small_mode
) < 8
10627 || s390_class_max_nregs (VEC_REGS
, big_mode
) == 1))
10630 /* Likewise for access registers, since they have only half the
10631 word size on 64-bit. */
10632 if (reg_classes_intersect_p (ACCESS_REGS
, rclass
))
10638 /* Return true if we use LRA instead of reload pass. */
10642 return s390_lra_flag
;
10645 /* Return true if register FROM can be eliminated via register TO. */
10648 s390_can_eliminate (const int from
, const int to
)
10650 /* We have not marked the base register as fixed.
10651 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10652 If a function requires the base register, we say here that this
10653 elimination cannot be performed. This will cause reload to free
10654 up the base register (as if it were fixed). On the other hand,
10655 if the current function does *not* require the base register, we
10656 say here the elimination succeeds, which in turn allows reload
10657 to allocate the base register for any other purpose. */
10658 if (from
== BASE_REGNUM
&& to
== BASE_REGNUM
)
10660 s390_init_frame_layout ();
10661 return cfun
->machine
->base_reg
== NULL_RTX
;
10664 /* Everything else must point into the stack frame. */
10665 gcc_assert (to
== STACK_POINTER_REGNUM
10666 || to
== HARD_FRAME_POINTER_REGNUM
);
10668 gcc_assert (from
== FRAME_POINTER_REGNUM
10669 || from
== ARG_POINTER_REGNUM
10670 || from
== RETURN_ADDRESS_POINTER_REGNUM
);
10672 /* Make sure we actually saved the return address. */
10673 if (from
== RETURN_ADDRESS_POINTER_REGNUM
)
10674 if (!crtl
->calls_eh_return
10676 && !cfun_frame_layout
.save_return_addr_p
)
10682 /* Return offset between register FROM and TO initially after prolog. */
10685 s390_initial_elimination_offset (int from
, int to
)
10687 HOST_WIDE_INT offset
;
10689 /* ??? Why are we called for non-eliminable pairs? */
10690 if (!s390_can_eliminate (from
, to
))
10695 case FRAME_POINTER_REGNUM
:
10696 offset
= (get_frame_size()
10697 + STACK_POINTER_OFFSET
10698 + crtl
->outgoing_args_size
);
10701 case ARG_POINTER_REGNUM
:
10702 s390_init_frame_layout ();
10703 offset
= cfun_frame_layout
.frame_size
+ STACK_POINTER_OFFSET
;
10706 case RETURN_ADDRESS_POINTER_REGNUM
:
10707 s390_init_frame_layout ();
10709 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10711 /* If it turns out that for stdarg nothing went into the reg
10712 save area we also do not need the return address
10714 if (cfun
->stdarg
&& !cfun_save_arg_fprs_p
)
10717 gcc_unreachable ();
10720 /* In order to make the following work it is not necessary for
10721 r14 to have a save slot. It is sufficient if one other GPR
10722 got one. Since the GPRs are always stored without gaps we
10723 are able to calculate where the r14 save slot would
10725 offset
= (cfun_frame_layout
.frame_size
+ cfun_frame_layout
.gprs_offset
+
10726 (RETURN_REGNUM
- cfun_frame_layout
.first_save_gpr_slot
) *
10735 gcc_unreachable ();
10741 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10742 to register BASE. Return generated insn. */
10745 save_fpr (rtx base
, int offset
, int regnum
)
10748 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10750 if (regnum
>= 16 && regnum
<= (16 + FP_ARG_NUM_REG
))
10751 set_mem_alias_set (addr
, get_varargs_alias_set ());
10753 set_mem_alias_set (addr
, get_frame_alias_set ());
10755 return emit_move_insn (addr
, gen_rtx_REG (DFmode
, regnum
));
10758 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10759 to register BASE. Return generated insn. */
10762 restore_fpr (rtx base
, int offset
, int regnum
)
10765 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10766 set_mem_alias_set (addr
, get_frame_alias_set ());
10768 return emit_move_insn (gen_rtx_REG (DFmode
, regnum
), addr
);
10771 /* Generate insn to save registers FIRST to LAST into
10772 the register save area located at offset OFFSET
10773 relative to register BASE. */
10776 save_gprs (rtx base
, int offset
, int first
, int last
)
10778 rtx addr
, insn
, note
;
10781 addr
= plus_constant (Pmode
, base
, offset
);
10782 addr
= gen_rtx_MEM (Pmode
, addr
);
10784 set_mem_alias_set (addr
, get_frame_alias_set ());
10786 /* Special-case single register. */
10790 insn
= gen_movdi (addr
, gen_rtx_REG (Pmode
, first
));
10792 insn
= gen_movsi (addr
, gen_rtx_REG (Pmode
, first
));
10794 if (!global_not_special_regno_p (first
))
10795 RTX_FRAME_RELATED_P (insn
) = 1;
10800 insn
= gen_store_multiple (addr
,
10801 gen_rtx_REG (Pmode
, first
),
10802 GEN_INT (last
- first
+ 1));
10804 if (first
<= 6 && cfun
->stdarg
)
10805 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
10807 rtx mem
= XEXP (XVECEXP (PATTERN (insn
), 0, i
), 0);
10809 if (first
+ i
<= 6)
10810 set_mem_alias_set (mem
, get_varargs_alias_set ());
10813 /* We need to set the FRAME_RELATED flag on all SETs
10814 inside the store-multiple pattern.
10816 However, we must not emit DWARF records for registers 2..5
10817 if they are stored for use by variable arguments ...
10819 ??? Unfortunately, it is not enough to simply not the
10820 FRAME_RELATED flags for those SETs, because the first SET
10821 of the PARALLEL is always treated as if it had the flag
10822 set, even if it does not. Therefore we emit a new pattern
10823 without those registers as REG_FRAME_RELATED_EXPR note. */
10825 if (first
>= 6 && !global_not_special_regno_p (first
))
10827 rtx pat
= PATTERN (insn
);
10829 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
10830 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
10831 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat
,
10833 RTX_FRAME_RELATED_P (XVECEXP (pat
, 0, i
)) = 1;
10835 RTX_FRAME_RELATED_P (insn
) = 1;
10837 else if (last
>= 6)
10841 for (start
= first
>= 6 ? first
: 6; start
<= last
; start
++)
10842 if (!global_not_special_regno_p (start
))
10848 addr
= plus_constant (Pmode
, base
,
10849 offset
+ (start
- first
) * UNITS_PER_LONG
);
10854 note
= gen_movdi (gen_rtx_MEM (Pmode
, addr
),
10855 gen_rtx_REG (Pmode
, start
));
10857 note
= gen_movsi (gen_rtx_MEM (Pmode
, addr
),
10858 gen_rtx_REG (Pmode
, start
));
10859 note
= PATTERN (note
);
10861 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10862 RTX_FRAME_RELATED_P (insn
) = 1;
10867 note
= gen_store_multiple (gen_rtx_MEM (Pmode
, addr
),
10868 gen_rtx_REG (Pmode
, start
),
10869 GEN_INT (last
- start
+ 1));
10870 note
= PATTERN (note
);
10872 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10874 for (i
= 0; i
< XVECLEN (note
, 0); i
++)
10875 if (GET_CODE (XVECEXP (note
, 0, i
)) == SET
10876 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note
,
10878 RTX_FRAME_RELATED_P (XVECEXP (note
, 0, i
)) = 1;
10880 RTX_FRAME_RELATED_P (insn
) = 1;
10886 /* Generate insn to restore registers FIRST to LAST from
10887 the register save area located at offset OFFSET
10888 relative to register BASE. */
10891 restore_gprs (rtx base
, int offset
, int first
, int last
)
10895 addr
= plus_constant (Pmode
, base
, offset
);
10896 addr
= gen_rtx_MEM (Pmode
, addr
);
10897 set_mem_alias_set (addr
, get_frame_alias_set ());
10899 /* Special-case single register. */
10903 insn
= gen_movdi (gen_rtx_REG (Pmode
, first
), addr
);
10905 insn
= gen_movsi (gen_rtx_REG (Pmode
, first
), addr
);
10907 RTX_FRAME_RELATED_P (insn
) = 1;
10911 insn
= gen_load_multiple (gen_rtx_REG (Pmode
, first
),
10913 GEN_INT (last
- first
+ 1));
10914 RTX_FRAME_RELATED_P (insn
) = 1;
10918 /* Return insn sequence to load the GOT register. */
10921 s390_load_got (void)
10925 /* We cannot use pic_offset_table_rtx here since we use this
10926 function also for non-pic if __tls_get_offset is called and in
10927 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10929 rtx got_rtx
= gen_rtx_REG (Pmode
, 12);
10933 emit_move_insn (got_rtx
, s390_got_symbol ());
10935 insns
= get_insns ();
10940 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10941 and the change to the stack pointer. */
10944 s390_emit_stack_tie (void)
10946 rtx mem
= gen_frame_mem (BLKmode
,
10947 gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
));
10949 emit_insn (gen_stack_tie (mem
));
10952 /* Copy GPRS into FPR save slots. */
10955 s390_save_gprs_to_fprs (void)
10959 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
10962 for (i
= 6; i
< 16; i
++)
10964 if (FP_REGNO_P (cfun_gpr_save_slot (i
)))
10967 emit_move_insn (gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
)),
10968 gen_rtx_REG (DImode
, i
));
10969 RTX_FRAME_RELATED_P (insn
) = 1;
10970 /* This prevents dwarf2cfi from interpreting the set. Doing
10971 so it might emit def_cfa_register infos setting an FPR as
10973 add_reg_note (insn
, REG_CFA_REGISTER
, copy_rtx (PATTERN (insn
)));
10978 /* Restore GPRs from FPR save slots. */
10981 s390_restore_gprs_from_fprs (void)
10985 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
10988 /* Restore the GPRs starting with the stack pointer. That way the
10989 stack pointer already has its original value when it comes to
10990 restoring the hard frame pointer. So we can set the cfa reg back
10991 to the stack pointer. */
10992 for (i
= STACK_POINTER_REGNUM
; i
>= 6; i
--)
10996 if (!FP_REGNO_P (cfun_gpr_save_slot (i
)))
10999 rtx fpr
= gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
));
11001 if (i
== STACK_POINTER_REGNUM
)
11002 insn
= emit_insn (gen_stack_restore_from_fpr (fpr
));
11004 insn
= emit_move_insn (gen_rtx_REG (DImode
, i
), fpr
);
11006 df_set_regs_ever_live (i
, true);
11007 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, i
));
11009 /* If either the stack pointer or the frame pointer get restored
11010 set the CFA value to its value at function start. Doing this
11011 for the frame pointer results in .cfi_def_cfa_register 15
11012 what is ok since if the stack pointer got modified it has
11013 been restored already. */
11014 if (i
== STACK_POINTER_REGNUM
|| i
== HARD_FRAME_POINTER_REGNUM
)
11015 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11016 plus_constant (Pmode
, stack_pointer_rtx
,
11017 STACK_POINTER_OFFSET
));
11018 RTX_FRAME_RELATED_P (insn
) = 1;
11023 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11028 const pass_data pass_data_s390_early_mach
=
11030 RTL_PASS
, /* type */
11031 "early_mach", /* name */
11032 OPTGROUP_NONE
, /* optinfo_flags */
11033 TV_MACH_DEP
, /* tv_id */
11034 0, /* properties_required */
11035 0, /* properties_provided */
11036 0, /* properties_destroyed */
11037 0, /* todo_flags_start */
11038 ( TODO_df_verify
| TODO_df_finish
), /* todo_flags_finish */
11041 class pass_s390_early_mach
: public rtl_opt_pass
11044 pass_s390_early_mach (gcc::context
*ctxt
)
11045 : rtl_opt_pass (pass_data_s390_early_mach
, ctxt
)
11048 /* opt_pass methods: */
11049 virtual unsigned int execute (function
*);
11051 }; // class pass_s390_early_mach
11054 pass_s390_early_mach::execute (function
*fun
)
11058 /* Try to get rid of the FPR clobbers. */
11059 s390_optimize_nonescaping_tx ();
11061 /* Re-compute register info. */
11062 s390_register_info ();
11064 /* If we're using a base register, ensure that it is always valid for
11065 the first non-prologue instruction. */
11066 if (fun
->machine
->base_reg
)
11067 emit_insn_at_entry (gen_main_pool (fun
->machine
->base_reg
));
11069 /* Annotate all constant pool references to let the scheduler know
11070 they implicitly use the base register. */
11071 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
11074 annotate_constant_pool_refs (insn
);
11075 df_insn_rescan (insn
);
11080 } // anon namespace
11083 make_pass_s390_early_mach (gcc::context
*ctxt
)
11085 return new pass_s390_early_mach (ctxt
);
11088 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11089 - push too big immediates to the literal pool and annotate the refs
11090 - emit frame related notes for stack pointer changes. */
11093 s390_prologue_plus_offset (rtx target
, rtx reg
, rtx offset
, bool frame_related_p
)
11096 rtx orig_offset
= offset
;
11098 gcc_assert (REG_P (target
));
11099 gcc_assert (REG_P (reg
));
11100 gcc_assert (CONST_INT_P (offset
));
11102 if (offset
== const0_rtx
) /* lr/lgr */
11104 insn
= emit_move_insn (target
, reg
);
11106 else if (DISP_IN_RANGE (INTVAL (offset
))) /* la */
11108 insn
= emit_move_insn (target
, gen_rtx_PLUS (Pmode
, reg
,
11113 if (!satisfies_constraint_K (offset
) /* ahi/aghi */
11115 || (!satisfies_constraint_Op (offset
) /* alfi/algfi */
11116 && !satisfies_constraint_On (offset
)))) /* slfi/slgfi */
11117 offset
= force_const_mem (Pmode
, offset
);
11121 insn
= emit_move_insn (target
, reg
);
11122 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11125 insn
= emit_insn (gen_add2_insn (target
, offset
));
11127 if (!CONST_INT_P (offset
))
11129 annotate_constant_pool_refs (insn
);
11131 if (frame_related_p
)
11132 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11133 gen_rtx_SET (target
,
11134 gen_rtx_PLUS (Pmode
, target
,
11139 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11141 /* If this is a stack adjustment and we are generating a stack clash
11142 prologue, then add a REG_STACK_CHECK note to signal that this insn
11143 should be left alone. */
11144 if (flag_stack_clash_protection
&& target
== stack_pointer_rtx
)
11145 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
11150 /* Emit a compare instruction with a volatile memory access as stack
11151 probe. It does not waste store tags and does not clobber any
11152 registers apart from the condition code. */
11154 s390_emit_stack_probe (rtx addr
)
11156 rtx mem
= gen_rtx_MEM (word_mode
, addr
);
11157 MEM_VOLATILE_P (mem
) = 1;
11158 emit_insn (gen_probe_stack (mem
));
11161 /* Use a runtime loop if we have to emit more probes than this. */
11162 #define MIN_UNROLL_PROBES 3
11164 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11165 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11166 probe relative to the stack pointer.
11168 Note that SIZE is negative.
11170 The return value is true if TEMP_REG has been clobbered. */
11172 allocate_stack_space (rtx size
, HOST_WIDE_INT last_probe_offset
,
11175 bool temp_reg_clobbered_p
= false;
11176 HOST_WIDE_INT probe_interval
11177 = 1 << param_stack_clash_protection_probe_interval
;
11178 HOST_WIDE_INT guard_size
11179 = 1 << param_stack_clash_protection_guard_size
;
11181 if (flag_stack_clash_protection
)
11183 if (last_probe_offset
+ -INTVAL (size
) < guard_size
)
11184 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
11187 rtx offset
= GEN_INT (probe_interval
- UNITS_PER_LONG
);
11188 HOST_WIDE_INT rounded_size
= -INTVAL (size
) & -probe_interval
;
11189 HOST_WIDE_INT num_probes
= rounded_size
/ probe_interval
;
11190 HOST_WIDE_INT residual
= -INTVAL (size
) - rounded_size
;
11192 if (num_probes
< MIN_UNROLL_PROBES
)
11194 /* Emit unrolled probe statements. */
11196 for (unsigned int i
= 0; i
< num_probes
; i
++)
11198 s390_prologue_plus_offset (stack_pointer_rtx
,
11200 GEN_INT (-probe_interval
), true);
11201 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11205 if (num_probes
> 0)
11206 last_probe_offset
= INTVAL (offset
);
11207 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
11211 /* Emit a loop probing the pages. */
11213 rtx_code_label
*loop_start_label
= gen_label_rtx ();
11215 /* From now on temp_reg will be the CFA register. */
11216 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11217 GEN_INT (-rounded_size
), true);
11218 emit_label (loop_start_label
);
11220 s390_prologue_plus_offset (stack_pointer_rtx
,
11222 GEN_INT (-probe_interval
), false);
11223 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11226 emit_cmp_and_jump_insns (stack_pointer_rtx
, temp_reg
,
11228 Pmode
, 1, loop_start_label
);
11230 /* Without this make_edges ICEes. */
11231 JUMP_LABEL (get_last_insn ()) = loop_start_label
;
11232 LABEL_NUSES (loop_start_label
) = 1;
11234 /* That's going to be a NOP since stack pointer and
11235 temp_reg are supposed to be the same here. We just
11236 emit it to set the CFA reg back to r15. */
11237 s390_prologue_plus_offset (stack_pointer_rtx
, temp_reg
,
11239 temp_reg_clobbered_p
= true;
11240 last_probe_offset
= INTVAL (offset
);
11241 dump_stack_clash_frame_info (PROBE_LOOP
, residual
!= 0);
11244 /* Handle any residual allocation request. */
11245 s390_prologue_plus_offset (stack_pointer_rtx
,
11247 GEN_INT (-residual
), true);
11248 last_probe_offset
+= residual
;
11249 if (last_probe_offset
>= probe_interval
)
11250 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11253 - UNITS_PER_LONG
)));
11255 return temp_reg_clobbered_p
;
11259 /* Subtract frame size from stack pointer. */
11260 s390_prologue_plus_offset (stack_pointer_rtx
,
11264 return temp_reg_clobbered_p
;
11267 /* Expand the prologue into a bunch of separate insns. */
11270 s390_emit_prologue (void)
11278 /* Choose best register to use for temp use within prologue.
11279 TPF with profiling must avoid the register 14 - the tracing function
11280 needs the original contents of r14 to be preserved. */
11282 if (!has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
)
11284 && !TARGET_TPF_PROFILING
)
11285 temp_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11286 else if (flag_split_stack
&& cfun
->stdarg
)
11287 temp_reg
= gen_rtx_REG (Pmode
, 12);
11289 temp_reg
= gen_rtx_REG (Pmode
, 1);
11291 /* When probing for stack-clash mitigation, we have to track the distance
11292 between the stack pointer and closest known reference.
11294 Most of the time we have to make a worst case assumption. The
11295 only exception is when TARGET_BACKCHAIN is active, in which case
11296 we know *sp (offset 0) was written. */
11297 HOST_WIDE_INT probe_interval
11298 = 1 << param_stack_clash_protection_probe_interval
;
11299 HOST_WIDE_INT last_probe_offset
11300 = (TARGET_BACKCHAIN
11301 ? (TARGET_PACKED_STACK
? STACK_POINTER_OFFSET
- UNITS_PER_LONG
: 0)
11302 : probe_interval
- (STACK_BOUNDARY
/ UNITS_PER_WORD
));
11304 s390_save_gprs_to_fprs ();
11306 /* Save call saved gprs. */
11307 if (cfun_frame_layout
.first_save_gpr
!= -1)
11309 insn
= save_gprs (stack_pointer_rtx
,
11310 cfun_frame_layout
.gprs_offset
+
11311 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11312 - cfun_frame_layout
.first_save_gpr_slot
),
11313 cfun_frame_layout
.first_save_gpr
,
11314 cfun_frame_layout
.last_save_gpr
);
11316 /* This is not 100% correct. If we have more than one register saved,
11317 then LAST_PROBE_OFFSET can move even closer to sp. */
11319 = (cfun_frame_layout
.gprs_offset
+
11320 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11321 - cfun_frame_layout
.first_save_gpr_slot
));
11326 /* Dummy insn to mark literal pool slot. */
11328 if (cfun
->machine
->base_reg
)
11329 emit_insn (gen_main_pool (cfun
->machine
->base_reg
));
11331 offset
= cfun_frame_layout
.f0_offset
;
11333 /* Save f0 and f2. */
11334 for (i
= FPR0_REGNUM
; i
<= FPR0_REGNUM
+ 1; i
++)
11336 if (cfun_fpr_save_p (i
))
11338 save_fpr (stack_pointer_rtx
, offset
, i
);
11339 if (offset
< last_probe_offset
)
11340 last_probe_offset
= offset
;
11343 else if (!TARGET_PACKED_STACK
|| cfun
->stdarg
)
11347 /* Save f4 and f6. */
11348 offset
= cfun_frame_layout
.f4_offset
;
11349 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11351 if (cfun_fpr_save_p (i
))
11353 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11354 if (offset
< last_probe_offset
)
11355 last_probe_offset
= offset
;
11358 /* If f4 and f6 are call clobbered they are saved due to
11359 stdargs and therefore are not frame related. */
11360 if (!call_used_regs
[i
])
11361 RTX_FRAME_RELATED_P (insn
) = 1;
11363 else if (!TARGET_PACKED_STACK
|| call_used_regs
[i
])
11367 if (TARGET_PACKED_STACK
11368 && cfun_save_high_fprs_p
11369 && cfun_frame_layout
.f8_offset
+ cfun_frame_layout
.high_fprs
* 8 > 0)
11371 offset
= (cfun_frame_layout
.f8_offset
11372 + (cfun_frame_layout
.high_fprs
- 1) * 8);
11374 for (i
= FPR15_REGNUM
; i
>= FPR8_REGNUM
&& offset
>= 0; i
--)
11375 if (cfun_fpr_save_p (i
))
11377 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11378 if (offset
< last_probe_offset
)
11379 last_probe_offset
= offset
;
11381 RTX_FRAME_RELATED_P (insn
) = 1;
11384 if (offset
>= cfun_frame_layout
.f8_offset
)
11388 if (!TARGET_PACKED_STACK
)
11389 next_fpr
= cfun_save_high_fprs_p
? FPR15_REGNUM
: 0;
11391 if (flag_stack_usage_info
)
11392 current_function_static_stack_size
= cfun_frame_layout
.frame_size
;
11394 /* Decrement stack pointer. */
11396 if (cfun_frame_layout
.frame_size
> 0)
11398 rtx frame_off
= GEN_INT (-cfun_frame_layout
.frame_size
);
11399 rtx_insn
*stack_pointer_backup_loc
;
11400 bool temp_reg_clobbered_p
;
11402 if (s390_stack_size
)
11404 HOST_WIDE_INT stack_guard
;
11406 if (s390_stack_guard
)
11407 stack_guard
= s390_stack_guard
;
11410 /* If no value for stack guard is provided the smallest power of 2
11411 larger than the current frame size is chosen. */
11413 while (stack_guard
< cfun_frame_layout
.frame_size
)
11417 if (cfun_frame_layout
.frame_size
>= s390_stack_size
)
11419 warning (0, "frame size of function %qs is %wd"
11420 " bytes exceeding user provided stack limit of "
11422 "An unconditional trap is added.",
11423 current_function_name(), cfun_frame_layout
.frame_size
,
11425 emit_insn (gen_trap ());
11430 /* stack_guard has to be smaller than s390_stack_size.
11431 Otherwise we would emit an AND with zero which would
11432 not match the test under mask pattern. */
11433 if (stack_guard
>= s390_stack_size
)
11435 warning (0, "frame size of function %qs is %wd"
11436 " bytes which is more than half the stack size. "
11437 "The dynamic check would not be reliable. "
11438 "No check emitted for this function.",
11439 current_function_name(),
11440 cfun_frame_layout
.frame_size
);
11444 HOST_WIDE_INT stack_check_mask
= ((s390_stack_size
- 1)
11445 & ~(stack_guard
- 1));
11447 rtx t
= gen_rtx_AND (Pmode
, stack_pointer_rtx
,
11448 GEN_INT (stack_check_mask
));
11450 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode
,
11452 t
, const0_rtx
, const0_rtx
));
11454 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode
,
11456 t
, const0_rtx
, const0_rtx
));
11461 if (s390_warn_framesize
> 0
11462 && cfun_frame_layout
.frame_size
>= s390_warn_framesize
)
11463 warning (0, "frame size of %qs is %wd bytes",
11464 current_function_name (), cfun_frame_layout
.frame_size
);
11466 if (s390_warn_dynamicstack_p
&& cfun
->calls_alloca
)
11467 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11469 /* Save the location where we could backup the incoming stack
11471 stack_pointer_backup_loc
= get_last_insn ();
11473 temp_reg_clobbered_p
= allocate_stack_space (frame_off
, last_probe_offset
,
11476 if (TARGET_BACKCHAIN
|| next_fpr
)
11478 if (temp_reg_clobbered_p
)
11480 /* allocate_stack_space had to make use of temp_reg and
11481 we need it to hold a backup of the incoming stack
11482 pointer. Calculate back that value from the current
11484 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11485 GEN_INT (cfun_frame_layout
.frame_size
),
11490 /* allocate_stack_space didn't actually required
11491 temp_reg. Insert the stack pointer backup insn
11492 before the stack pointer decrement code - knowing now
11493 that the value will survive. */
11494 emit_insn_after (gen_move_insn (temp_reg
, stack_pointer_rtx
),
11495 stack_pointer_backup_loc
);
11499 /* Set backchain. */
11501 if (TARGET_BACKCHAIN
)
11503 if (cfun_frame_layout
.backchain_offset
)
11504 addr
= gen_rtx_MEM (Pmode
,
11505 plus_constant (Pmode
, stack_pointer_rtx
,
11506 cfun_frame_layout
.backchain_offset
));
11508 addr
= gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
11509 set_mem_alias_set (addr
, get_frame_alias_set ());
11510 insn
= emit_insn (gen_move_insn (addr
, temp_reg
));
11513 /* If we support non-call exceptions (e.g. for Java),
11514 we need to make sure the backchain pointer is set up
11515 before any possibly trapping memory access. */
11516 if (TARGET_BACKCHAIN
&& cfun
->can_throw_non_call_exceptions
)
11518 addr
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
11519 emit_clobber (addr
);
11522 else if (flag_stack_clash_protection
)
11523 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
11525 /* Save fprs 8 - 15 (64 bit ABI). */
11527 if (cfun_save_high_fprs_p
&& next_fpr
)
11529 /* If the stack might be accessed through a different register
11530 we have to make sure that the stack pointer decrement is not
11531 moved below the use of the stack slots. */
11532 s390_emit_stack_tie ();
11534 insn
= emit_insn (gen_add2_insn (temp_reg
,
11535 GEN_INT (cfun_frame_layout
.f8_offset
)));
11539 for (i
= FPR8_REGNUM
; i
<= next_fpr
; i
++)
11540 if (cfun_fpr_save_p (i
))
11542 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
,
11543 cfun_frame_layout
.frame_size
11544 + cfun_frame_layout
.f8_offset
11547 insn
= save_fpr (temp_reg
, offset
, i
);
11549 RTX_FRAME_RELATED_P (insn
) = 1;
11550 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11551 gen_rtx_SET (gen_rtx_MEM (DFmode
, addr
),
11552 gen_rtx_REG (DFmode
, i
)));
11556 /* Set frame pointer, if needed. */
11558 if (frame_pointer_needed
)
11560 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
11561 RTX_FRAME_RELATED_P (insn
) = 1;
11564 /* Set up got pointer, if needed. */
11566 if (flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
11568 rtx_insn
*insns
= s390_load_got ();
11570 for (rtx_insn
*insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
11571 annotate_constant_pool_refs (insn
);
11576 #if TARGET_TPF != 0
11577 if (TARGET_TPF_PROFILING
)
11579 /* Generate a BAS instruction to serve as a function entry
11580 intercept to facilitate the use of tracing algorithms located
11581 at the branch target. */
11582 emit_insn (gen_prologue_tpf (
11583 GEN_INT (s390_tpf_trace_hook_prologue_check
),
11584 GEN_INT (s390_tpf_trace_hook_prologue_target
)));
11586 /* Emit a blockage here so that all code lies between the
11587 profiling mechanisms. */
11588 emit_insn (gen_blockage ());
11593 /* Expand the epilogue into a bunch of separate insns. */
11596 s390_emit_epilogue (bool sibcall
)
11598 rtx frame_pointer
, return_reg
= NULL_RTX
, cfa_restores
= NULL_RTX
;
11599 int area_bottom
, area_top
, offset
= 0;
11603 #if TARGET_TPF != 0
11604 if (TARGET_TPF_PROFILING
)
11606 /* Generate a BAS instruction to serve as a function entry
11607 intercept to facilitate the use of tracing algorithms located
11608 at the branch target. */
11610 /* Emit a blockage here so that all code lies between the
11611 profiling mechanisms. */
11612 emit_insn (gen_blockage ());
11614 emit_insn (gen_epilogue_tpf (
11615 GEN_INT (s390_tpf_trace_hook_epilogue_check
),
11616 GEN_INT (s390_tpf_trace_hook_epilogue_target
)));
11620 /* Check whether to use frame or stack pointer for restore. */
11622 frame_pointer
= (frame_pointer_needed
11623 ? hard_frame_pointer_rtx
: stack_pointer_rtx
);
11625 s390_frame_area (&area_bottom
, &area_top
);
11627 /* Check whether we can access the register save area.
11628 If not, increment the frame pointer as required. */
11630 if (area_top
<= area_bottom
)
11632 /* Nothing to restore. */
11634 else if (DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_bottom
)
11635 && DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_top
- 1))
11637 /* Area is in range. */
11638 offset
= cfun_frame_layout
.frame_size
;
11643 rtx frame_off
, cfa
;
11645 offset
= area_bottom
< 0 ? -area_bottom
: 0;
11646 frame_off
= GEN_INT (cfun_frame_layout
.frame_size
- offset
);
11648 cfa
= gen_rtx_SET (frame_pointer
,
11649 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11650 if (DISP_IN_RANGE (INTVAL (frame_off
)))
11654 set
= gen_rtx_SET (frame_pointer
,
11655 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11656 insn
= emit_insn (set
);
11660 if (!CONST_OK_FOR_K (INTVAL (frame_off
)))
11661 frame_off
= force_const_mem (Pmode
, frame_off
);
11663 insn
= emit_insn (gen_add2_insn (frame_pointer
, frame_off
));
11664 annotate_constant_pool_refs (insn
);
11666 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, cfa
);
11667 RTX_FRAME_RELATED_P (insn
) = 1;
11670 /* Restore call saved fprs. */
11674 if (cfun_save_high_fprs_p
)
11676 next_offset
= cfun_frame_layout
.f8_offset
;
11677 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
11679 if (cfun_fpr_save_p (i
))
11681 restore_fpr (frame_pointer
,
11682 offset
+ next_offset
, i
);
11684 = alloc_reg_note (REG_CFA_RESTORE
,
11685 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11694 next_offset
= cfun_frame_layout
.f4_offset
;
11696 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11698 if (cfun_fpr_save_p (i
))
11700 restore_fpr (frame_pointer
,
11701 offset
+ next_offset
, i
);
11703 = alloc_reg_note (REG_CFA_RESTORE
,
11704 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11707 else if (!TARGET_PACKED_STACK
)
11713 /* Restore call saved gprs. */
11715 if (cfun_frame_layout
.first_restore_gpr
!= -1)
11720 /* Check for global register and save them
11721 to stack location from where they get restored. */
11723 for (i
= cfun_frame_layout
.first_restore_gpr
;
11724 i
<= cfun_frame_layout
.last_restore_gpr
;
11727 if (global_not_special_regno_p (i
))
11729 addr
= plus_constant (Pmode
, frame_pointer
,
11730 offset
+ cfun_frame_layout
.gprs_offset
11731 + (i
- cfun_frame_layout
.first_save_gpr_slot
)
11733 addr
= gen_rtx_MEM (Pmode
, addr
);
11734 set_mem_alias_set (addr
, get_frame_alias_set ());
11735 emit_move_insn (addr
, gen_rtx_REG (Pmode
, i
));
11739 = alloc_reg_note (REG_CFA_RESTORE
,
11740 gen_rtx_REG (Pmode
, i
), cfa_restores
);
11743 /* Fetch return address from stack before load multiple,
11744 this will do good for scheduling.
11746 Only do this if we already decided that r14 needs to be
11747 saved to a stack slot. (And not just because r14 happens to
11748 be in between two GPRs which need saving.) Otherwise it
11749 would be difficult to take that decision back in
11750 s390_optimize_prologue.
11752 This optimization is only helpful on in-order machines. */
11754 && cfun_gpr_save_slot (RETURN_REGNUM
) == SAVE_SLOT_STACK
11755 && s390_tune
<= PROCESSOR_2097_Z10
)
11757 int return_regnum
= find_unused_clobbered_reg();
11759 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11761 && return_regnum
== INDIRECT_BRANCH_THUNK_REGNUM
))
11763 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM
!= 4);
11766 return_reg
= gen_rtx_REG (Pmode
, return_regnum
);
11768 addr
= plus_constant (Pmode
, frame_pointer
,
11769 offset
+ cfun_frame_layout
.gprs_offset
11771 - cfun_frame_layout
.first_save_gpr_slot
)
11773 addr
= gen_rtx_MEM (Pmode
, addr
);
11774 set_mem_alias_set (addr
, get_frame_alias_set ());
11775 emit_move_insn (return_reg
, addr
);
11777 /* Once we did that optimization we have to make sure
11778 s390_optimize_prologue does not try to remove the store
11779 of r14 since we will not be able to find the load issued
11781 cfun_frame_layout
.save_return_addr_p
= true;
11784 insn
= restore_gprs (frame_pointer
,
11785 offset
+ cfun_frame_layout
.gprs_offset
11786 + (cfun_frame_layout
.first_restore_gpr
11787 - cfun_frame_layout
.first_save_gpr_slot
)
11789 cfun_frame_layout
.first_restore_gpr
,
11790 cfun_frame_layout
.last_restore_gpr
);
11791 insn
= emit_insn (insn
);
11792 REG_NOTES (insn
) = cfa_restores
;
11793 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11794 plus_constant (Pmode
, stack_pointer_rtx
,
11795 STACK_POINTER_OFFSET
));
11796 RTX_FRAME_RELATED_P (insn
) = 1;
11799 s390_restore_gprs_from_fprs ();
11803 if (!return_reg
&& !s390_can_use_return_insn ())
11804 /* We planned to emit (return), be we are not allowed to. */
11805 return_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11808 /* Emit (return) and (use). */
11809 emit_jump_insn (gen_return_use (return_reg
));
11811 /* The fact that RETURN_REGNUM is used is already reflected by
11812 EPILOGUE_USES. Emit plain (return). */
11813 emit_jump_insn (gen_return ());
11817 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11820 s300_set_up_by_prologue (hard_reg_set_container
*regs
)
11822 if (cfun
->machine
->base_reg
11823 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
11824 SET_HARD_REG_BIT (regs
->set
, REGNO (cfun
->machine
->base_reg
));
11827 /* -fsplit-stack support. */
11829 /* A SYMBOL_REF for __morestack. */
11830 static GTY(()) rtx morestack_ref
;
11832 /* When using -fsplit-stack, the allocation routines set a field in
11833 the TCB to the bottom of the stack plus this much space, measured
11836 #define SPLIT_STACK_AVAILABLE 1024
11838 /* Emit the parmblock for __morestack into .rodata section. It
11839 consists of 3 pointer size entries:
11841 - size of stack arguments
11842 - offset between parm block and __morestack return label */
11845 s390_output_split_stack_data (rtx parm_block
, rtx call_done
,
11846 rtx frame_size
, rtx args_size
)
11848 rtx ops
[] = { parm_block
, call_done
};
11850 switch_to_section (targetm
.asm_out
.function_rodata_section
11851 (current_function_decl
, false));
11854 output_asm_insn (".align\t8", NULL
);
11856 output_asm_insn (".align\t4", NULL
);
11858 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
11859 CODE_LABEL_NUMBER (parm_block
));
11862 output_asm_insn (".quad\t%0", &frame_size
);
11863 output_asm_insn (".quad\t%0", &args_size
);
11864 output_asm_insn (".quad\t%1-%0", ops
);
11868 output_asm_insn (".long\t%0", &frame_size
);
11869 output_asm_insn (".long\t%0", &args_size
);
11870 output_asm_insn (".long\t%1-%0", ops
);
11873 switch_to_section (current_function_section ());
11876 /* Emit -fsplit-stack prologue, which goes before the regular function
11880 s390_expand_split_stack_prologue (void)
11882 rtx r1
, guard
, cc
= NULL
;
11884 /* Offset from thread pointer to __private_ss. */
11885 int psso
= TARGET_64BIT
? 0x38 : 0x20;
11886 /* Pointer size in bytes. */
11887 /* Frame size and argument size - the two parameters to __morestack. */
11888 HOST_WIDE_INT frame_size
= cfun_frame_layout
.frame_size
;
11889 /* Align argument size to 8 bytes - simplifies __morestack code. */
11890 HOST_WIDE_INT args_size
= crtl
->args
.size
>= 0
11891 ? ((crtl
->args
.size
+ 7) & ~7)
11893 /* Label to be called by __morestack. */
11894 rtx_code_label
*call_done
= NULL
;
11895 rtx_code_label
*parm_base
= NULL
;
11898 gcc_assert (flag_split_stack
&& reload_completed
);
11900 r1
= gen_rtx_REG (Pmode
, 1);
11902 /* If no stack frame will be allocated, don't do anything. */
11905 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11907 /* If va_start is used, just use r15. */
11908 emit_move_insn (r1
,
11909 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11910 GEN_INT (STACK_POINTER_OFFSET
)));
11916 if (morestack_ref
== NULL_RTX
)
11918 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11919 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
11920 | SYMBOL_FLAG_FUNCTION
);
11923 if (CONST_OK_FOR_K (frame_size
) || CONST_OK_FOR_Op (frame_size
))
11925 /* If frame_size will fit in an add instruction, do a stack space
11926 check, and only call __morestack if there's not enough space. */
11928 /* Get thread pointer. r1 is the only register we can always destroy - r0
11929 could contain a static chain (and cannot be used to address memory
11930 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11931 emit_insn (gen_get_thread_pointer (Pmode
, r1
));
11932 /* Aim at __private_ss. */
11933 guard
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, r1
, psso
));
11935 /* If less that 1kiB used, skip addition and compare directly with
11937 if (frame_size
> SPLIT_STACK_AVAILABLE
)
11939 emit_move_insn (r1
, guard
);
11941 emit_insn (gen_adddi3 (r1
, r1
, GEN_INT (frame_size
)));
11943 emit_insn (gen_addsi3 (r1
, r1
, GEN_INT (frame_size
)));
11947 /* Compare the (maybe adjusted) guard with the stack pointer. */
11948 cc
= s390_emit_compare (LT
, stack_pointer_rtx
, guard
);
11951 call_done
= gen_label_rtx ();
11952 parm_base
= gen_label_rtx ();
11953 LABEL_NUSES (parm_base
)++;
11954 LABEL_NUSES (call_done
)++;
11956 /* %r1 = litbase. */
11957 insn
= emit_move_insn (r1
, gen_rtx_LABEL_REF (VOIDmode
, parm_base
));
11958 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11959 LABEL_NUSES (parm_base
)++;
11961 /* Now, we need to call __morestack. It has very special calling
11962 conventions: it preserves param/return/static chain registers for
11963 calling main function body, and looks for its own parameters at %r1. */
11965 tmp
= gen_split_stack_cond_call (Pmode
,
11969 GEN_INT (frame_size
),
11970 GEN_INT (args_size
),
11973 tmp
= gen_split_stack_call (Pmode
,
11977 GEN_INT (frame_size
),
11978 GEN_INT (args_size
));
11980 insn
= emit_jump_insn (tmp
);
11981 JUMP_LABEL (insn
) = call_done
;
11982 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11983 add_reg_note (insn
, REG_LABEL_OPERAND
, call_done
);
11987 /* Mark the jump as very unlikely to be taken. */
11988 add_reg_br_prob_note (insn
,
11989 profile_probability::very_unlikely ());
11991 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11993 /* If va_start is used, and __morestack was not called, just use
11995 emit_move_insn (r1
,
11996 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11997 GEN_INT (STACK_POINTER_OFFSET
)));
12005 /* __morestack will call us here. */
12007 emit_label (call_done
);
12010 /* We may have to tell the dataflow pass that the split stack prologue
12011 is initializing a register. */
12014 s390_live_on_entry (bitmap regs
)
12016 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12018 gcc_assert (flag_split_stack
);
12019 bitmap_set_bit (regs
, 1);
12023 /* Return true if the function can use simple_return to return outside
12024 of a shrink-wrapped region. At present shrink-wrapping is supported
12028 s390_can_use_simple_return_insn (void)
12033 /* Return true if the epilogue is guaranteed to contain only a return
12034 instruction and if a direct return can therefore be used instead.
12035 One of the main advantages of using direct return instructions
12036 is that we can then use conditional returns. */
12039 s390_can_use_return_insn (void)
12043 if (!reload_completed
)
12049 if (TARGET_TPF_PROFILING
)
12052 for (i
= 0; i
< 16; i
++)
12053 if (cfun_gpr_save_slot (i
) != SAVE_SLOT_NONE
)
12056 /* For 31 bit this is not covered by the frame_size check below
12057 since f4, f6 are saved in the register save area without needing
12058 additional stack space. */
12060 && (cfun_fpr_save_p (FPR4_REGNUM
) || cfun_fpr_save_p (FPR6_REGNUM
)))
12063 if (cfun
->machine
->base_reg
12064 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
12067 return cfun_frame_layout
.frame_size
== 0;
12070 /* The VX ABI differs for vararg functions. Therefore we need the
12071 prototype of the callee to be available when passing vector type
12073 static const char *
12074 s390_invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
12076 return ((TARGET_VX_ABI
12078 && VECTOR_TYPE_P (TREE_TYPE (val
))
12079 && (funcdecl
== NULL_TREE
12080 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
12081 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
12082 ? N_("vector argument passed to unprototyped function")
12087 /* Return the size in bytes of a function argument of
12088 type TYPE and/or mode MODE. At least one of TYPE or
12089 MODE must be specified. */
12092 s390_function_arg_size (machine_mode mode
, const_tree type
)
12095 return int_size_in_bytes (type
);
12097 /* No type info available for some library calls ... */
12098 if (mode
!= BLKmode
)
12099 return GET_MODE_SIZE (mode
);
12101 /* If we have neither type nor mode, abort */
12102 gcc_unreachable ();
12105 /* Return true if a function argument of type TYPE and mode MODE
12106 is to be passed in a vector register, if available. */
12109 s390_function_arg_vector (machine_mode mode
, const_tree type
)
12111 if (!TARGET_VX_ABI
)
12114 if (s390_function_arg_size (mode
, type
) > 16)
12117 /* No type info available for some library calls ... */
12119 return VECTOR_MODE_P (mode
);
12121 /* The ABI says that record types with a single member are treated
12122 just like that member would be. */
12123 int empty_base_seen
= 0;
12124 const_tree orig_type
= type
;
12125 while (TREE_CODE (type
) == RECORD_TYPE
)
12127 tree field
, single
= NULL_TREE
;
12129 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12131 if (TREE_CODE (field
) != FIELD_DECL
)
12134 if (DECL_FIELD_ABI_IGNORED (field
))
12136 if (lookup_attribute ("no_unique_address",
12137 DECL_ATTRIBUTES (field
)))
12138 empty_base_seen
|= 2;
12140 empty_base_seen
|= 1;
12144 if (single
== NULL_TREE
)
12145 single
= TREE_TYPE (field
);
12150 if (single
== NULL_TREE
)
12154 /* If the field declaration adds extra byte due to
12155 e.g. padding this is not accepted as vector type. */
12156 if (int_size_in_bytes (single
) <= 0
12157 || int_size_in_bytes (single
) != int_size_in_bytes (type
))
12163 if (!VECTOR_TYPE_P (type
))
12166 if (warn_psabi
&& empty_base_seen
)
12168 static unsigned last_reported_type_uid
;
12169 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12170 if (uid
!= last_reported_type_uid
)
12172 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12173 last_reported_type_uid
= uid
;
12174 if (empty_base_seen
& 1)
12175 inform (input_location
,
12176 "parameter passing for argument of type %qT when C++17 "
12177 "is enabled changed to match C++14 %{in GCC 10.1%}",
12180 inform (input_location
,
12181 "parameter passing for argument of type %qT with "
12182 "%<[[no_unique_address]]%> members changed "
12183 "%{in GCC 10.1%}", orig_type
, url
);
12189 /* Return true if a function argument of type TYPE and mode MODE
12190 is to be passed in a floating-point register, if available. */
12193 s390_function_arg_float (machine_mode mode
, const_tree type
)
12195 if (s390_function_arg_size (mode
, type
) > 8)
12198 /* Soft-float changes the ABI: no floating-point registers are used. */
12199 if (TARGET_SOFT_FLOAT
)
12202 /* No type info available for some library calls ... */
12204 return mode
== SFmode
|| mode
== DFmode
|| mode
== SDmode
|| mode
== DDmode
;
12206 /* The ABI says that record types with a single member are treated
12207 just like that member would be. */
12208 int empty_base_seen
= 0;
12209 const_tree orig_type
= type
;
12210 while (TREE_CODE (type
) == RECORD_TYPE
)
12212 tree field
, single
= NULL_TREE
;
12214 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12216 if (TREE_CODE (field
) != FIELD_DECL
)
12218 if (DECL_FIELD_ABI_IGNORED (field
))
12220 if (lookup_attribute ("no_unique_address",
12221 DECL_ATTRIBUTES (field
)))
12222 empty_base_seen
|= 2;
12224 empty_base_seen
|= 1;
12228 if (single
== NULL_TREE
)
12229 single
= TREE_TYPE (field
);
12234 if (single
== NULL_TREE
)
12240 if (TREE_CODE (type
) != REAL_TYPE
)
12243 if (warn_psabi
&& empty_base_seen
)
12245 static unsigned last_reported_type_uid
;
12246 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12247 if (uid
!= last_reported_type_uid
)
12249 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12250 last_reported_type_uid
= uid
;
12251 if (empty_base_seen
& 1)
12252 inform (input_location
,
12253 "parameter passing for argument of type %qT when C++17 "
12254 "is enabled changed to match C++14 %{in GCC 10.1%}",
12257 inform (input_location
,
12258 "parameter passing for argument of type %qT with "
12259 "%<[[no_unique_address]]%> members changed "
12260 "%{in GCC 10.1%}", orig_type
, url
);
12267 /* Return true if a function argument of type TYPE and mode MODE
12268 is to be passed in an integer register, or a pair of integer
12269 registers, if available. */
12272 s390_function_arg_integer (machine_mode mode
, const_tree type
)
12274 int size
= s390_function_arg_size (mode
, type
);
12278 /* No type info available for some library calls ... */
12280 return GET_MODE_CLASS (mode
) == MODE_INT
12281 || (TARGET_SOFT_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
));
12283 /* We accept small integral (and similar) types. */
12284 if (INTEGRAL_TYPE_P (type
)
12285 || POINTER_TYPE_P (type
)
12286 || TREE_CODE (type
) == NULLPTR_TYPE
12287 || TREE_CODE (type
) == OFFSET_TYPE
12288 || (TARGET_SOFT_FLOAT
&& TREE_CODE (type
) == REAL_TYPE
))
12291 /* We also accept structs of size 1, 2, 4, 8 that are not
12292 passed in floating-point registers. */
12293 if (AGGREGATE_TYPE_P (type
)
12294 && exact_log2 (size
) >= 0
12295 && !s390_function_arg_float (mode
, type
))
12301 /* Return 1 if a function argument ARG is to be passed by reference.
12302 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12303 are passed by value, all other structures (and complex numbers) are
12304 passed by reference. */
12307 s390_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
12309 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12311 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12317 if (tree type
= arg
.type
)
12319 if (AGGREGATE_TYPE_P (type
) && exact_log2 (size
) < 0)
12322 if (TREE_CODE (type
) == COMPLEX_TYPE
12323 || TREE_CODE (type
) == VECTOR_TYPE
)
12330 /* Update the data in CUM to advance over argument ARG. */
12333 s390_function_arg_advance (cumulative_args_t cum_v
,
12334 const function_arg_info
&arg
)
12336 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12338 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12340 /* We are called for unnamed vector stdarg arguments which are
12341 passed on the stack. In this case this hook does not have to
12342 do anything since stack arguments are tracked by common
12348 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12352 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12354 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12355 cum
->gprs
+= ((size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
);
12358 gcc_unreachable ();
12361 /* Define where to put the arguments to a function.
12362 Value is zero to push the argument on the stack,
12363 or a hard register in which to store the argument.
12365 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12366 the preceding args and about the function being called.
12367 ARG is a description of the argument.
12369 On S/390, we use general purpose registers 2 through 6 to
12370 pass integer, pointer, and certain structure arguments, and
12371 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12372 to pass floating point arguments. All remaining arguments
12373 are pushed to the stack. */
12376 s390_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
12378 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12381 s390_check_type_for_vector_abi (arg
.type
, true, false);
12383 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12385 /* Vector arguments being part of the ellipsis are passed on the
12387 if (!arg
.named
|| (cum
->vrs
+ 1 > VEC_ARG_NUM_REG
))
12390 return gen_rtx_REG (arg
.mode
, cum
->vrs
+ FIRST_VEC_ARG_REGNO
);
12392 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12394 if (cum
->fprs
+ 1 > FP_ARG_NUM_REG
)
12397 return gen_rtx_REG (arg
.mode
, cum
->fprs
+ 16);
12399 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12401 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12402 int n_gprs
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12404 if (cum
->gprs
+ n_gprs
> GP_ARG_NUM_REG
)
12406 else if (n_gprs
== 1 || UNITS_PER_WORD
== UNITS_PER_LONG
)
12407 return gen_rtx_REG (arg
.mode
, cum
->gprs
+ 2);
12408 else if (n_gprs
== 2)
12410 rtvec p
= rtvec_alloc (2);
12413 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 2),
12416 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 3),
12419 return gen_rtx_PARALLEL (arg
.mode
, p
);
12423 /* After the real arguments, expand_call calls us once again with an
12424 end marker. Whatever we return here is passed as operand 2 to the
12427 We don't need this feature ... */
12428 else if (arg
.end_marker_p ())
12431 gcc_unreachable ();
12434 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12435 left-justified when placed on the stack during parameter passing. */
12437 static pad_direction
12438 s390_function_arg_padding (machine_mode mode
, const_tree type
)
12440 if (s390_function_arg_vector (mode
, type
))
12443 return default_function_arg_padding (mode
, type
);
12446 /* Return true if return values of type TYPE should be returned
12447 in a memory buffer whose address is passed by the caller as
12448 hidden first argument. */
12451 s390_return_in_memory (const_tree type
, const_tree fundecl ATTRIBUTE_UNUSED
)
12453 /* We accept small integral (and similar) types. */
12454 if (INTEGRAL_TYPE_P (type
)
12455 || POINTER_TYPE_P (type
)
12456 || TREE_CODE (type
) == OFFSET_TYPE
12457 || TREE_CODE (type
) == REAL_TYPE
)
12458 return int_size_in_bytes (type
) > 8;
12460 /* vector types which fit into a VR. */
12462 && VECTOR_TYPE_P (type
)
12463 && int_size_in_bytes (type
) <= 16)
12466 /* Aggregates and similar constructs are always returned
12468 if (AGGREGATE_TYPE_P (type
)
12469 || TREE_CODE (type
) == COMPLEX_TYPE
12470 || VECTOR_TYPE_P (type
))
12473 /* ??? We get called on all sorts of random stuff from
12474 aggregate_value_p. We can't abort, but it's not clear
12475 what's safe to return. Pretend it's a struct I guess. */
12479 /* Function arguments and return values are promoted to word size. */
12481 static machine_mode
12482 s390_promote_function_mode (const_tree type
, machine_mode mode
,
12484 const_tree fntype ATTRIBUTE_UNUSED
,
12485 int for_return ATTRIBUTE_UNUSED
)
12487 if (INTEGRAL_MODE_P (mode
)
12488 && GET_MODE_SIZE (mode
) < UNITS_PER_LONG
)
12490 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
12491 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
12498 /* Define where to return a (scalar) value of type RET_TYPE.
12499 If RET_TYPE is null, define where to return a (scalar)
12500 value of mode MODE from a libcall. */
12503 s390_function_and_libcall_value (machine_mode mode
,
12504 const_tree ret_type
,
12505 const_tree fntype_or_decl
,
12506 bool outgoing ATTRIBUTE_UNUSED
)
12508 /* For vector return types it is important to use the RET_TYPE
12509 argument whenever available since the middle-end might have
12510 changed the mode to a scalar mode. */
12511 bool vector_ret_type_p
= ((ret_type
&& VECTOR_TYPE_P (ret_type
))
12512 || (!ret_type
&& VECTOR_MODE_P (mode
)));
12514 /* For normal functions perform the promotion as
12515 promote_function_mode would do. */
12518 int unsignedp
= TYPE_UNSIGNED (ret_type
);
12519 mode
= promote_function_mode (ret_type
, mode
, &unsignedp
,
12520 fntype_or_decl
, 1);
12523 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
12524 || SCALAR_FLOAT_MODE_P (mode
)
12525 || (TARGET_VX_ABI
&& vector_ret_type_p
));
12526 gcc_assert (GET_MODE_SIZE (mode
) <= (TARGET_VX_ABI
? 16 : 8));
12528 if (TARGET_VX_ABI
&& vector_ret_type_p
)
12529 return gen_rtx_REG (mode
, FIRST_VEC_ARG_REGNO
);
12530 else if (TARGET_HARD_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
))
12531 return gen_rtx_REG (mode
, 16);
12532 else if (GET_MODE_SIZE (mode
) <= UNITS_PER_LONG
12533 || UNITS_PER_LONG
== UNITS_PER_WORD
)
12534 return gen_rtx_REG (mode
, 2);
12535 else if (GET_MODE_SIZE (mode
) == 2 * UNITS_PER_LONG
)
12537 /* This case is triggered when returning a 64 bit value with
12538 -m31 -mzarch. Although the value would fit into a single
12539 register it has to be forced into a 32 bit register pair in
12540 order to match the ABI. */
12541 rtvec p
= rtvec_alloc (2);
12544 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 2), const0_rtx
);
12546 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 3), GEN_INT (4));
12548 return gen_rtx_PARALLEL (mode
, p
);
12551 gcc_unreachable ();
12554 /* Define where to return a scalar return value of type RET_TYPE. */
12557 s390_function_value (const_tree ret_type
, const_tree fn_decl_or_type
,
12560 return s390_function_and_libcall_value (TYPE_MODE (ret_type
), ret_type
,
12561 fn_decl_or_type
, outgoing
);
12564 /* Define where to return a scalar libcall return value of mode
12568 s390_libcall_value (machine_mode mode
, const_rtx fun ATTRIBUTE_UNUSED
)
12570 return s390_function_and_libcall_value (mode
, NULL_TREE
,
12575 /* Create and return the va_list datatype.
12577 On S/390, va_list is an array type equivalent to
12579 typedef struct __va_list_tag
12583 void *__overflow_arg_area;
12584 void *__reg_save_area;
12587 where __gpr and __fpr hold the number of general purpose
12588 or floating point arguments used up to now, respectively,
12589 __overflow_arg_area points to the stack location of the
12590 next argument passed on the stack, and __reg_save_area
12591 always points to the start of the register area in the
12592 call frame of the current function. The function prologue
12593 saves all registers used for argument passing into this
12594 area if the function uses variable arguments. */
12597 s390_build_builtin_va_list (void)
12599 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
12601 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
12604 build_decl (BUILTINS_LOCATION
,
12605 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
12607 f_gpr
= build_decl (BUILTINS_LOCATION
,
12608 FIELD_DECL
, get_identifier ("__gpr"),
12609 long_integer_type_node
);
12610 f_fpr
= build_decl (BUILTINS_LOCATION
,
12611 FIELD_DECL
, get_identifier ("__fpr"),
12612 long_integer_type_node
);
12613 f_ovf
= build_decl (BUILTINS_LOCATION
,
12614 FIELD_DECL
, get_identifier ("__overflow_arg_area"),
12616 f_sav
= build_decl (BUILTINS_LOCATION
,
12617 FIELD_DECL
, get_identifier ("__reg_save_area"),
12620 va_list_gpr_counter_field
= f_gpr
;
12621 va_list_fpr_counter_field
= f_fpr
;
12623 DECL_FIELD_CONTEXT (f_gpr
) = record
;
12624 DECL_FIELD_CONTEXT (f_fpr
) = record
;
12625 DECL_FIELD_CONTEXT (f_ovf
) = record
;
12626 DECL_FIELD_CONTEXT (f_sav
) = record
;
12628 TYPE_STUB_DECL (record
) = type_decl
;
12629 TYPE_NAME (record
) = type_decl
;
12630 TYPE_FIELDS (record
) = f_gpr
;
12631 DECL_CHAIN (f_gpr
) = f_fpr
;
12632 DECL_CHAIN (f_fpr
) = f_ovf
;
12633 DECL_CHAIN (f_ovf
) = f_sav
;
12635 layout_type (record
);
12637 /* The correct type is an array type of one element. */
12638 return build_array_type (record
, build_index_type (size_zero_node
));
12641 /* Implement va_start by filling the va_list structure VALIST.
12642 STDARG_P is always true, and ignored.
12643 NEXTARG points to the first anonymous stack argument.
12645 The following global variables are used to initialize
12646 the va_list structure:
12649 holds number of gprs and fprs used for named arguments.
12650 crtl->args.arg_offset_rtx:
12651 holds the offset of the first anonymous stack argument
12652 (relative to the virtual arg pointer). */
12655 s390_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
12657 HOST_WIDE_INT n_gpr
, n_fpr
;
12659 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12660 tree gpr
, fpr
, ovf
, sav
, t
;
12662 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12663 f_fpr
= DECL_CHAIN (f_gpr
);
12664 f_ovf
= DECL_CHAIN (f_fpr
);
12665 f_sav
= DECL_CHAIN (f_ovf
);
12667 valist
= build_simple_mem_ref (valist
);
12668 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12669 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12670 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12671 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12673 /* Count number of gp and fp argument registers used. */
12675 n_gpr
= crtl
->args
.info
.gprs
;
12676 n_fpr
= crtl
->args
.info
.fprs
;
12678 if (cfun
->va_list_gpr_size
)
12680 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
12681 build_int_cst (NULL_TREE
, n_gpr
));
12682 TREE_SIDE_EFFECTS (t
) = 1;
12683 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12686 if (cfun
->va_list_fpr_size
)
12688 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
12689 build_int_cst (NULL_TREE
, n_fpr
));
12690 TREE_SIDE_EFFECTS (t
) = 1;
12691 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12694 if (flag_split_stack
12695 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
12697 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12702 reg
= gen_reg_rtx (Pmode
);
12703 cfun
->machine
->split_stack_varargs_pointer
= reg
;
12706 emit_move_insn (reg
, gen_rtx_REG (Pmode
, 1));
12707 seq
= get_insns ();
12710 push_topmost_sequence ();
12711 emit_insn_after (seq
, entry_of_function ());
12712 pop_topmost_sequence ();
12715 /* Find the overflow area.
12716 FIXME: This currently is too pessimistic when the vector ABI is
12717 enabled. In that case we *always* set up the overflow area
12719 if (n_gpr
+ cfun
->va_list_gpr_size
> GP_ARG_NUM_REG
12720 || n_fpr
+ cfun
->va_list_fpr_size
> FP_ARG_NUM_REG
12723 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12724 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
12726 t
= make_tree (TREE_TYPE (ovf
), cfun
->machine
->split_stack_varargs_pointer
);
12728 off
= INTVAL (crtl
->args
.arg_offset_rtx
);
12729 off
= off
< 0 ? 0 : off
;
12730 if (TARGET_DEBUG_ARG
)
12731 fprintf (stderr
, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12732 (int)n_gpr
, (int)n_fpr
, off
);
12734 t
= fold_build_pointer_plus_hwi (t
, off
);
12736 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
12737 TREE_SIDE_EFFECTS (t
) = 1;
12738 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12741 /* Find the register save area. */
12742 if ((cfun
->va_list_gpr_size
&& n_gpr
< GP_ARG_NUM_REG
)
12743 || (cfun
->va_list_fpr_size
&& n_fpr
< FP_ARG_NUM_REG
))
12745 t
= make_tree (TREE_TYPE (sav
), return_address_pointer_rtx
);
12746 t
= fold_build_pointer_plus_hwi (t
, -RETURN_REGNUM
* UNITS_PER_LONG
);
12748 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
12749 TREE_SIDE_EFFECTS (t
) = 1;
12750 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12754 /* Implement va_arg by updating the va_list structure
12755 VALIST as required to retrieve an argument of type
12756 TYPE, and returning that argument.
12758 Generates code equivalent to:
12760 if (integral value) {
12761 if (size <= 4 && args.gpr < 5 ||
12762 size > 4 && args.gpr < 4 )
12763 ret = args.reg_save_area[args.gpr+8]
12765 ret = *args.overflow_arg_area++;
12766 } else if (vector value) {
12767 ret = *args.overflow_arg_area;
12768 args.overflow_arg_area += size / 8;
12769 } else if (float value) {
12771 ret = args.reg_save_area[args.fpr+64]
12773 ret = *args.overflow_arg_area++;
12774 } else if (aggregate value) {
12776 ret = *args.reg_save_area[args.gpr]
12778 ret = **args.overflow_arg_area++;
12782 s390_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
12783 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
12785 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12786 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
12787 int indirect_p
, size
, n_reg
, sav_ofs
, sav_scale
, max_reg
;
12788 tree lab_false
, lab_over
= NULL_TREE
;
12789 tree addr
= create_tmp_var (ptr_type_node
, "addr");
12790 bool left_align_p
; /* How a value < UNITS_PER_LONG is aligned within
12793 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12794 f_fpr
= DECL_CHAIN (f_gpr
);
12795 f_ovf
= DECL_CHAIN (f_fpr
);
12796 f_sav
= DECL_CHAIN (f_ovf
);
12798 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12799 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12800 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12802 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12803 both appear on a lhs. */
12804 valist
= unshare_expr (valist
);
12805 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12807 size
= int_size_in_bytes (type
);
12809 s390_check_type_for_vector_abi (type
, true, false);
12811 if (pass_va_arg_by_reference (type
))
12813 if (TARGET_DEBUG_ARG
)
12815 fprintf (stderr
, "va_arg: aggregate type");
12819 /* Aggregates are passed by reference. */
12824 /* kernel stack layout on 31 bit: It is assumed here that no padding
12825 will be added by s390_frame_info because for va_args always an even
12826 number of gprs has to be saved r15-r2 = 14 regs. */
12827 sav_ofs
= 2 * UNITS_PER_LONG
;
12828 sav_scale
= UNITS_PER_LONG
;
12829 size
= UNITS_PER_LONG
;
12830 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12831 left_align_p
= false;
12833 else if (s390_function_arg_vector (TYPE_MODE (type
), type
))
12835 if (TARGET_DEBUG_ARG
)
12837 fprintf (stderr
, "va_arg: vector type");
12847 left_align_p
= true;
12849 else if (s390_function_arg_float (TYPE_MODE (type
), type
))
12851 if (TARGET_DEBUG_ARG
)
12853 fprintf (stderr
, "va_arg: float type");
12857 /* FP args go in FP registers, if present. */
12861 sav_ofs
= 16 * UNITS_PER_LONG
;
12863 max_reg
= FP_ARG_NUM_REG
- n_reg
;
12864 left_align_p
= false;
12868 if (TARGET_DEBUG_ARG
)
12870 fprintf (stderr
, "va_arg: other type");
12874 /* Otherwise into GP registers. */
12877 n_reg
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12879 /* kernel stack layout on 31 bit: It is assumed here that no padding
12880 will be added by s390_frame_info because for va_args always an even
12881 number of gprs has to be saved r15-r2 = 14 regs. */
12882 sav_ofs
= 2 * UNITS_PER_LONG
;
12884 if (size
< UNITS_PER_LONG
)
12885 sav_ofs
+= UNITS_PER_LONG
- size
;
12887 sav_scale
= UNITS_PER_LONG
;
12888 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12889 left_align_p
= false;
12892 /* Pull the value out of the saved registers ... */
12894 if (reg
!= NULL_TREE
)
12897 if (reg > ((typeof (reg))max_reg))
12900 addr = sav + sav_ofs + reg * save_scale;
12907 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
12908 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
12910 t
= fold_convert (TREE_TYPE (reg
), size_int (max_reg
));
12911 t
= build2 (GT_EXPR
, boolean_type_node
, reg
, t
);
12912 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
12913 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
12914 gimplify_and_add (t
, pre_p
);
12916 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
12917 u
= build2 (MULT_EXPR
, TREE_TYPE (reg
), reg
,
12918 fold_convert (TREE_TYPE (reg
), size_int (sav_scale
)));
12919 t
= fold_build_pointer_plus (t
, u
);
12921 gimplify_assign (addr
, t
, pre_p
);
12923 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
12925 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
12928 /* ... Otherwise out of the overflow area. */
12931 if (size
< UNITS_PER_LONG
&& !left_align_p
)
12932 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
- size
);
12934 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
12936 gimplify_assign (addr
, t
, pre_p
);
12938 if (size
< UNITS_PER_LONG
&& left_align_p
)
12939 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
);
12941 t
= fold_build_pointer_plus_hwi (t
, size
);
12943 gimplify_assign (ovf
, t
, pre_p
);
12945 if (reg
!= NULL_TREE
)
12946 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
12949 /* Increment register save count. */
12953 u
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (reg
), reg
,
12954 fold_convert (TREE_TYPE (reg
), size_int (n_reg
)));
12955 gimplify_and_add (u
, pre_p
);
12960 t
= build_pointer_type_for_mode (build_pointer_type (type
),
12962 addr
= fold_convert (t
, addr
);
12963 addr
= build_va_arg_indirect_ref (addr
);
12967 t
= build_pointer_type_for_mode (type
, ptr_mode
, true);
12968 addr
= fold_convert (t
, addr
);
12971 return build_va_arg_indirect_ref (addr
);
12974 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12976 DEST - Register location where CC will be stored.
12977 TDB - Pointer to a 256 byte area where to store the transaction.
12978 diagnostic block. NULL if TDB is not needed.
12979 RETRY - Retry count value. If non-NULL a retry loop for CC2
12981 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12982 of the tbegin instruction pattern. */
12985 s390_expand_tbegin (rtx dest
, rtx tdb
, rtx retry
, bool clobber_fprs_p
)
12987 rtx retry_plus_two
= gen_reg_rtx (SImode
);
12988 rtx retry_reg
= gen_reg_rtx (SImode
);
12989 rtx_code_label
*retry_label
= NULL
;
12991 if (retry
!= NULL_RTX
)
12993 emit_move_insn (retry_reg
, retry
);
12994 emit_insn (gen_addsi3 (retry_plus_two
, retry_reg
, const2_rtx
));
12995 emit_insn (gen_addsi3 (retry_reg
, retry_reg
, const1_rtx
));
12996 retry_label
= gen_label_rtx ();
12997 emit_label (retry_label
);
13000 if (clobber_fprs_p
)
13003 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13006 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13010 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13013 emit_move_insn (dest
, gen_rtx_UNSPEC (SImode
,
13014 gen_rtvec (1, gen_rtx_REG (CCRAWmode
,
13016 UNSPEC_CC_TO_INT
));
13017 if (retry
!= NULL_RTX
)
13019 const int CC0
= 1 << 3;
13020 const int CC1
= 1 << 2;
13021 const int CC3
= 1 << 0;
13023 rtx count
= gen_reg_rtx (SImode
);
13024 rtx_code_label
*leave_label
= gen_label_rtx ();
13026 /* Exit for success and permanent failures. */
13027 jump
= s390_emit_jump (leave_label
,
13028 gen_rtx_EQ (VOIDmode
,
13029 gen_rtx_REG (CCRAWmode
, CC_REGNUM
),
13030 gen_rtx_CONST_INT (VOIDmode
, CC0
| CC1
| CC3
)));
13031 LABEL_NUSES (leave_label
) = 1;
13033 /* CC2 - transient failure. Perform retry with ppa. */
13034 emit_move_insn (count
, retry_plus_two
);
13035 emit_insn (gen_subsi3 (count
, count
, retry_reg
));
13036 emit_insn (gen_tx_assist (count
));
13037 jump
= emit_jump_insn (gen_doloop_si64 (retry_label
,
13040 JUMP_LABEL (jump
) = retry_label
;
13041 LABEL_NUSES (retry_label
) = 1;
13042 emit_label (leave_label
);
13047 /* Return the decl for the target specific builtin with the function
13051 s390_builtin_decl (unsigned fcode
, bool initialized_p ATTRIBUTE_UNUSED
)
13053 if (fcode
>= S390_BUILTIN_MAX
)
13054 return error_mark_node
;
13056 return s390_builtin_decls
[fcode
];
13059 /* We call mcount before the function prologue. So a profiled leaf
13060 function should stay a leaf function. */
13063 s390_keep_leaf_when_profiled ()
13068 /* Output assembly code for the trampoline template to
13071 On S/390, we use gpr 1 internally in the trampoline code;
13072 gpr 0 is used to hold the static chain. */
13075 s390_asm_trampoline_template (FILE *file
)
13078 op
[0] = gen_rtx_REG (Pmode
, 0);
13079 op
[1] = gen_rtx_REG (Pmode
, 1);
13083 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13084 output_asm_insn ("lmg\t%0,%1,14(%1)", op
); /* 6 byte */
13085 output_asm_insn ("br\t%1", op
); /* 2 byte */
13086 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 10));
13090 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13091 output_asm_insn ("lm\t%0,%1,6(%1)", op
); /* 4 byte */
13092 output_asm_insn ("br\t%1", op
); /* 2 byte */
13093 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 8));
13097 /* Emit RTL insns to initialize the variable parts of a trampoline.
13098 FNADDR is an RTX for the address of the function's pure code.
13099 CXT is an RTX for the static chain value for the function. */
13102 s390_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
13104 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
13107 emit_block_move (m_tramp
, assemble_trampoline_template (),
13108 GEN_INT (2 * UNITS_PER_LONG
), BLOCK_OP_NORMAL
);
13110 mem
= adjust_address (m_tramp
, Pmode
, 2 * UNITS_PER_LONG
);
13111 emit_move_insn (mem
, cxt
);
13112 mem
= adjust_address (m_tramp
, Pmode
, 3 * UNITS_PER_LONG
);
13113 emit_move_insn (mem
, fnaddr
);
13117 output_asm_nops (const char *user
, int hw
)
13119 asm_fprintf (asm_out_file
, "\t# NOPs for %s (%d halfwords)\n", user
, hw
);
13124 output_asm_insn ("brcl\t0,0", NULL
);
13129 output_asm_insn ("bc\t0,0", NULL
);
13134 output_asm_insn ("bcr\t0,0", NULL
);
13140 /* Output assembler code to FILE to call a profiler hook. */
13143 s390_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
13147 fprintf (file
, "# function profiler \n");
13149 op
[0] = gen_rtx_REG (Pmode
, RETURN_REGNUM
);
13150 op
[1] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
13151 op
[1] = gen_rtx_MEM (Pmode
, plus_constant (Pmode
, op
[1], UNITS_PER_LONG
));
13152 op
[3] = GEN_INT (UNITS_PER_LONG
);
13154 op
[2] = gen_rtx_SYMBOL_REF (Pmode
, flag_fentry
? "__fentry__" : "_mcount");
13155 SYMBOL_REF_FLAGS (op
[2]) |= SYMBOL_FLAG_FUNCTION
;
13156 if (flag_pic
&& !TARGET_64BIT
)
13158 op
[2] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[2]), UNSPEC_PLT31
);
13159 op
[2] = gen_rtx_CONST (Pmode
, op
[2]);
13162 if (flag_record_mcount
)
13163 fprintf (file
, "1:\n");
13167 if (flag_nop_mcount
)
13168 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13169 else if (cfun
->static_chain_decl
)
13170 warning (OPT_Wcannot_profile
, "nested functions cannot be profiled "
13171 "with %<-mfentry%> on s390");
13173 output_asm_insn ("brasl\t0,%2%K2", op
);
13175 else if (TARGET_64BIT
)
13177 if (flag_nop_mcount
)
13178 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* brasl */ 3 +
13182 output_asm_insn ("stg\t%0,%1", op
);
13183 if (flag_dwarf2_cfi_asm
)
13184 output_asm_insn (".cfi_rel_offset\t%0,%3", op
);
13185 output_asm_insn ("brasl\t%0,%2%K2", op
);
13186 output_asm_insn ("lg\t%0,%1", op
);
13187 if (flag_dwarf2_cfi_asm
)
13188 output_asm_insn (".cfi_restore\t%0", op
);
13193 if (flag_nop_mcount
)
13194 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* brasl */ 3 +
13198 output_asm_insn ("st\t%0,%1", op
);
13199 if (flag_dwarf2_cfi_asm
)
13200 output_asm_insn (".cfi_rel_offset\t%0,%3", op
);
13201 output_asm_insn ("brasl\t%0,%2%K2", op
);
13202 output_asm_insn ("l\t%0,%1", op
);
13203 if (flag_dwarf2_cfi_asm
)
13204 output_asm_insn (".cfi_restore\t%0", op
);
13208 if (flag_record_mcount
)
13210 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
13211 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
13212 fprintf (file
, "\t.previous\n");
13216 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13217 into its SYMBOL_REF_FLAGS. */
13220 s390_encode_section_info (tree decl
, rtx rtl
, int first
)
13222 default_encode_section_info (decl
, rtl
, first
);
13224 if (TREE_CODE (decl
) == VAR_DECL
)
13226 /* Store the alignment to be able to check if we can use
13227 a larl/load-relative instruction. We only handle the cases
13228 that can go wrong (i.e. no FUNC_DECLs). */
13229 if (DECL_ALIGN (decl
) == 0 || DECL_ALIGN (decl
) % 16)
13230 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13231 else if (DECL_ALIGN (decl
) % 32)
13232 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13233 else if (DECL_ALIGN (decl
) % 64)
13234 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13237 /* Literal pool references don't have a decl so they are handled
13238 differently here. We rely on the information in the MEM_ALIGN
13239 entry to decide upon the alignment. */
13241 && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
13242 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl
, 0)))
13244 if (MEM_ALIGN (rtl
) == 0 || MEM_ALIGN (rtl
) % 16)
13245 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13246 else if (MEM_ALIGN (rtl
) % 32)
13247 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13248 else if (MEM_ALIGN (rtl
) % 64)
13249 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13253 /* Output thunk to FILE that implements a C++ virtual function call (with
13254 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13255 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13256 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13257 relative to the resulting this pointer. */
13260 s390_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
13261 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
13264 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
13268 assemble_start_function (thunk
, fnname
);
13269 /* Make sure unwind info is emitted for the thunk if needed. */
13270 final_start_function (emit_barrier (), file
, 1);
13272 /* Operand 0 is the target function. */
13273 op
[0] = XEXP (DECL_RTL (function
), 0);
13274 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (op
[0]))
13279 op
[0] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[0]), UNSPEC_GOT
);
13280 op
[0] = gen_rtx_CONST (Pmode
, op
[0]);
13284 /* Operand 1 is the 'this' pointer. */
13285 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
13286 op
[1] = gen_rtx_REG (Pmode
, 3);
13288 op
[1] = gen_rtx_REG (Pmode
, 2);
13290 /* Operand 2 is the delta. */
13291 op
[2] = GEN_INT (delta
);
13293 /* Operand 3 is the vcall_offset. */
13294 op
[3] = GEN_INT (vcall_offset
);
13296 /* Operand 4 is the temporary register. */
13297 op
[4] = gen_rtx_REG (Pmode
, 1);
13299 /* Operands 5 to 8 can be used as labels. */
13305 /* Operand 9 can be used for temporary register. */
13308 /* Generate code. */
13311 /* Setup literal pool pointer if required. */
13312 if ((!DISP_IN_RANGE (delta
)
13313 && !CONST_OK_FOR_K (delta
)
13314 && !CONST_OK_FOR_Os (delta
))
13315 || (!DISP_IN_RANGE (vcall_offset
)
13316 && !CONST_OK_FOR_K (vcall_offset
)
13317 && !CONST_OK_FOR_Os (vcall_offset
)))
13319 op
[5] = gen_label_rtx ();
13320 output_asm_insn ("larl\t%4,%5", op
);
13323 /* Add DELTA to this pointer. */
13326 if (CONST_OK_FOR_J (delta
))
13327 output_asm_insn ("la\t%1,%2(%1)", op
);
13328 else if (DISP_IN_RANGE (delta
))
13329 output_asm_insn ("lay\t%1,%2(%1)", op
);
13330 else if (CONST_OK_FOR_K (delta
))
13331 output_asm_insn ("aghi\t%1,%2", op
);
13332 else if (CONST_OK_FOR_Os (delta
))
13333 output_asm_insn ("agfi\t%1,%2", op
);
13336 op
[6] = gen_label_rtx ();
13337 output_asm_insn ("agf\t%1,%6-%5(%4)", op
);
13341 /* Perform vcall adjustment. */
13344 if (DISP_IN_RANGE (vcall_offset
))
13346 output_asm_insn ("lg\t%4,0(%1)", op
);
13347 output_asm_insn ("ag\t%1,%3(%4)", op
);
13349 else if (CONST_OK_FOR_K (vcall_offset
))
13351 output_asm_insn ("lghi\t%4,%3", op
);
13352 output_asm_insn ("ag\t%4,0(%1)", op
);
13353 output_asm_insn ("ag\t%1,0(%4)", op
);
13355 else if (CONST_OK_FOR_Os (vcall_offset
))
13357 output_asm_insn ("lgfi\t%4,%3", op
);
13358 output_asm_insn ("ag\t%4,0(%1)", op
);
13359 output_asm_insn ("ag\t%1,0(%4)", op
);
13363 op
[7] = gen_label_rtx ();
13364 output_asm_insn ("llgf\t%4,%7-%5(%4)", op
);
13365 output_asm_insn ("ag\t%4,0(%1)", op
);
13366 output_asm_insn ("ag\t%1,0(%4)", op
);
13370 /* Jump to target. */
13371 output_asm_insn ("jg\t%0%K0", op
);
13373 /* Output literal pool if required. */
13376 output_asm_insn (".align\t4", op
);
13377 targetm
.asm_out
.internal_label (file
, "L",
13378 CODE_LABEL_NUMBER (op
[5]));
13382 targetm
.asm_out
.internal_label (file
, "L",
13383 CODE_LABEL_NUMBER (op
[6]));
13384 output_asm_insn (".long\t%2", op
);
13388 targetm
.asm_out
.internal_label (file
, "L",
13389 CODE_LABEL_NUMBER (op
[7]));
13390 output_asm_insn (".long\t%3", op
);
13395 /* Setup base pointer if required. */
13397 || (!DISP_IN_RANGE (delta
)
13398 && !CONST_OK_FOR_K (delta
)
13399 && !CONST_OK_FOR_Os (delta
))
13400 || (!DISP_IN_RANGE (delta
)
13401 && !CONST_OK_FOR_K (vcall_offset
)
13402 && !CONST_OK_FOR_Os (vcall_offset
)))
13404 op
[5] = gen_label_rtx ();
13405 output_asm_insn ("basr\t%4,0", op
);
13406 targetm
.asm_out
.internal_label (file
, "L",
13407 CODE_LABEL_NUMBER (op
[5]));
13410 /* Add DELTA to this pointer. */
13413 if (CONST_OK_FOR_J (delta
))
13414 output_asm_insn ("la\t%1,%2(%1)", op
);
13415 else if (DISP_IN_RANGE (delta
))
13416 output_asm_insn ("lay\t%1,%2(%1)", op
);
13417 else if (CONST_OK_FOR_K (delta
))
13418 output_asm_insn ("ahi\t%1,%2", op
);
13419 else if (CONST_OK_FOR_Os (delta
))
13420 output_asm_insn ("afi\t%1,%2", op
);
13423 op
[6] = gen_label_rtx ();
13424 output_asm_insn ("a\t%1,%6-%5(%4)", op
);
13428 /* Perform vcall adjustment. */
13431 if (CONST_OK_FOR_J (vcall_offset
))
13433 output_asm_insn ("l\t%4,0(%1)", op
);
13434 output_asm_insn ("a\t%1,%3(%4)", op
);
13436 else if (DISP_IN_RANGE (vcall_offset
))
13438 output_asm_insn ("l\t%4,0(%1)", op
);
13439 output_asm_insn ("ay\t%1,%3(%4)", op
);
13441 else if (CONST_OK_FOR_K (vcall_offset
))
13443 output_asm_insn ("lhi\t%4,%3", op
);
13444 output_asm_insn ("a\t%4,0(%1)", op
);
13445 output_asm_insn ("a\t%1,0(%4)", op
);
13447 else if (CONST_OK_FOR_Os (vcall_offset
))
13449 output_asm_insn ("iilf\t%4,%3", op
);
13450 output_asm_insn ("a\t%4,0(%1)", op
);
13451 output_asm_insn ("a\t%1,0(%4)", op
);
13455 op
[7] = gen_label_rtx ();
13456 output_asm_insn ("l\t%4,%7-%5(%4)", op
);
13457 output_asm_insn ("a\t%4,0(%1)", op
);
13458 output_asm_insn ("a\t%1,0(%4)", op
);
13461 /* We had to clobber the base pointer register.
13462 Re-setup the base pointer (with a different base). */
13463 op
[5] = gen_label_rtx ();
13464 output_asm_insn ("basr\t%4,0", op
);
13465 targetm
.asm_out
.internal_label (file
, "L",
13466 CODE_LABEL_NUMBER (op
[5]));
13469 /* Jump to target. */
13470 op
[8] = gen_label_rtx ();
13473 output_asm_insn ("l\t%4,%8-%5(%4)", op
);
13474 else if (!nonlocal
)
13475 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13476 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13477 else if (flag_pic
== 1)
13479 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13480 output_asm_insn ("l\t%4,%0(%4)", op
);
13482 else if (flag_pic
== 2)
13484 op
[9] = gen_rtx_REG (Pmode
, 0);
13485 output_asm_insn ("l\t%9,%8-4-%5(%4)", op
);
13486 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13487 output_asm_insn ("ar\t%4,%9", op
);
13488 output_asm_insn ("l\t%4,0(%4)", op
);
13491 output_asm_insn ("br\t%4", op
);
13493 /* Output literal pool. */
13494 output_asm_insn (".align\t4", op
);
13496 if (nonlocal
&& flag_pic
== 2)
13497 output_asm_insn (".long\t%0", op
);
13500 op
[0] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13501 SYMBOL_REF_FLAGS (op
[0]) = SYMBOL_FLAG_LOCAL
;
13504 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[8]));
13506 output_asm_insn (".long\t%0", op
);
13508 output_asm_insn (".long\t%0-%5", op
);
13512 targetm
.asm_out
.internal_label (file
, "L",
13513 CODE_LABEL_NUMBER (op
[6]));
13514 output_asm_insn (".long\t%2", op
);
13518 targetm
.asm_out
.internal_label (file
, "L",
13519 CODE_LABEL_NUMBER (op
[7]));
13520 output_asm_insn (".long\t%3", op
);
13523 final_end_function ();
13524 assemble_end_function (thunk
, fnname
);
13527 /* Output either an indirect jump or an indirect call
13528 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13529 using a branch trampoline disabling branch target prediction. */
13532 s390_indirect_branch_via_thunk (unsigned int regno
,
13533 unsigned int return_addr_regno
,
13534 rtx comparison_operator
,
13535 enum s390_indirect_branch_type type
)
13537 enum s390_indirect_branch_option option
;
13539 if (type
== s390_indirect_branch_type_return
)
13541 if (s390_return_addr_from_memory ())
13542 option
= s390_opt_function_return_mem
;
13544 option
= s390_opt_function_return_reg
;
13546 else if (type
== s390_indirect_branch_type_jump
)
13547 option
= s390_opt_indirect_branch_jump
;
13548 else if (type
== s390_indirect_branch_type_call
)
13549 option
= s390_opt_indirect_branch_call
;
13551 gcc_unreachable ();
13553 if (TARGET_INDIRECT_BRANCH_TABLE
)
13557 ASM_GENERATE_INTERNAL_LABEL (label
,
13558 indirect_branch_table_label
[option
],
13559 indirect_branch_table_label_no
[option
]++);
13560 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13563 if (return_addr_regno
!= INVALID_REGNUM
)
13565 gcc_assert (comparison_operator
== NULL_RTX
);
13566 fprintf (asm_out_file
, " \tbrasl\t%%r%d,", return_addr_regno
);
13570 fputs (" \tjg", asm_out_file
);
13571 if (comparison_operator
!= NULL_RTX
)
13572 print_operand (asm_out_file
, comparison_operator
, 'C');
13574 fputs ("\t", asm_out_file
);
13577 if (TARGET_CPU_Z10
)
13578 fprintf (asm_out_file
,
13579 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
"\n",
13582 fprintf (asm_out_file
,
13583 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
"\n",
13584 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
13586 if ((option
== s390_opt_indirect_branch_jump
13587 && cfun
->machine
->indirect_branch_jump
== indirect_branch_thunk
)
13588 || (option
== s390_opt_indirect_branch_call
13589 && cfun
->machine
->indirect_branch_call
== indirect_branch_thunk
)
13590 || (option
== s390_opt_function_return_reg
13591 && cfun
->machine
->function_return_reg
== indirect_branch_thunk
)
13592 || (option
== s390_opt_function_return_mem
13593 && cfun
->machine
->function_return_mem
== indirect_branch_thunk
))
13595 if (TARGET_CPU_Z10
)
13596 indirect_branch_z10thunk_mask
|= (1 << regno
);
13598 indirect_branch_prez10thunk_mask
|= (1 << regno
);
13602 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13603 either be an address register or a label pointing to the location
13604 of the jump instruction. */
13607 s390_indirect_branch_via_inline_thunk (rtx execute_target
)
13609 if (TARGET_INDIRECT_BRANCH_TABLE
)
13613 ASM_GENERATE_INTERNAL_LABEL (label
,
13614 indirect_branch_table_label
[s390_opt_indirect_branch_jump
],
13615 indirect_branch_table_label_no
[s390_opt_indirect_branch_jump
]++);
13616 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13620 fputs ("\t.machinemode zarch\n", asm_out_file
);
13622 if (REG_P (execute_target
))
13623 fprintf (asm_out_file
, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target
));
13625 output_asm_insn ("\texrl\t%%r0,%0", &execute_target
);
13628 fputs ("\t.machinemode esa\n", asm_out_file
);
13630 fputs ("0:\tj\t0b\n", asm_out_file
);
13634 s390_valid_pointer_mode (scalar_int_mode mode
)
13636 return (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
));
13639 /* Checks whether the given CALL_EXPR would use a caller
13640 saved register. This is used to decide whether sibling call
13641 optimization could be performed on the respective function
13645 s390_call_saved_register_used (tree call_expr
)
13647 CUMULATIVE_ARGS cum_v
;
13648 cumulative_args_t cum
;
13653 INIT_CUMULATIVE_ARGS (cum_v
, NULL
, NULL
, 0, 0);
13654 cum
= pack_cumulative_args (&cum_v
);
13656 for (i
= 0; i
< call_expr_nargs (call_expr
); i
++)
13658 parameter
= CALL_EXPR_ARG (call_expr
, i
);
13659 gcc_assert (parameter
);
13661 /* For an undeclared variable passed as parameter we will get
13662 an ERROR_MARK node here. */
13663 if (TREE_CODE (parameter
) == ERROR_MARK
)
13666 /* We assume that in the target function all parameters are
13667 named. This only has an impact on vector argument register
13668 usage none of which is call-saved. */
13669 function_arg_info
arg (TREE_TYPE (parameter
), /*named=*/true);
13670 apply_pass_by_reference_rules (&cum_v
, arg
);
13672 parm_rtx
= s390_function_arg (cum
, arg
);
13674 s390_function_arg_advance (cum
, arg
);
13679 if (REG_P (parm_rtx
))
13681 for (reg
= 0; reg
< REG_NREGS (parm_rtx
); reg
++)
13682 if (!call_used_or_fixed_reg_p (reg
+ REGNO (parm_rtx
)))
13686 if (GET_CODE (parm_rtx
) == PARALLEL
)
13690 for (i
= 0; i
< XVECLEN (parm_rtx
, 0); i
++)
13692 rtx r
= XEXP (XVECEXP (parm_rtx
, 0, i
), 0);
13694 gcc_assert (REG_P (r
));
13696 for (reg
= 0; reg
< REG_NREGS (r
); reg
++)
13697 if (!call_used_or_fixed_reg_p (reg
+ REGNO (r
)))
13706 /* Return true if the given call expression can be
13707 turned into a sibling call.
13708 DECL holds the declaration of the function to be called whereas
13709 EXP is the call expression itself. */
13712 s390_function_ok_for_sibcall (tree decl
, tree exp
)
13714 /* The TPF epilogue uses register 1. */
13715 if (TARGET_TPF_PROFILING
)
13718 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13719 which would have to be restored before the sibcall. */
13720 if (!TARGET_64BIT
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
13723 /* The thunks for indirect branches require r1 if no exrl is
13724 available. r1 might not be available when doing a sibling
13726 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13731 /* Register 6 on s390 is available as an argument register but unfortunately
13732 "caller saved". This makes functions needing this register for arguments
13733 not suitable for sibcalls. */
13734 return !s390_call_saved_register_used (exp
);
13737 /* Return the fixed registers used for condition codes. */
13740 s390_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
13743 *p2
= INVALID_REGNUM
;
13748 /* This function is used by the call expanders of the machine description.
13749 It emits the call insn itself together with the necessary operations
13750 to adjust the target address and returns the emitted insn.
13751 ADDR_LOCATION is the target address rtx
13752 TLS_CALL the location of the thread-local symbol
13753 RESULT_REG the register where the result of the call should be stored
13754 RETADDR_REG the register where the return address should be stored
13755 If this parameter is NULL_RTX the call is considered
13756 to be a sibling call. */
13759 s390_emit_call (rtx addr_location
, rtx tls_call
, rtx result_reg
,
13762 bool plt31_call_p
= false;
13764 rtx vec
[4] = { NULL_RTX
};
13766 rtx
*call
= &vec
[0];
13767 rtx
*clobber_ret_reg
= &vec
[1];
13768 rtx
*use
= &vec
[2];
13769 rtx
*clobber_thunk_reg
= &vec
[3];
13772 /* Direct function calls need special treatment. */
13773 if (GET_CODE (addr_location
) == SYMBOL_REF
)
13775 /* When calling a global routine in PIC mode, we must
13776 replace the symbol itself with the PLT stub. */
13777 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (addr_location
) && !TARGET_64BIT
)
13779 if (retaddr_reg
!= NULL_RTX
)
13781 addr_location
= gen_rtx_UNSPEC (Pmode
,
13782 gen_rtvec (1, addr_location
),
13784 addr_location
= gen_rtx_CONST (Pmode
, addr_location
);
13785 plt31_call_p
= true;
13788 /* For -fpic code the PLT entries might use r12 which is
13789 call-saved. Therefore we cannot do a sibcall when
13790 calling directly using a symbol ref. When reaching
13791 this point we decided (in s390_function_ok_for_sibcall)
13792 to do a sibcall for a function pointer but one of the
13793 optimizers was able to get rid of the function pointer
13794 by propagating the symbol ref into the call. This
13795 optimization is illegal for S/390 so we turn the direct
13796 call into a indirect call again. */
13797 addr_location
= force_reg (Pmode
, addr_location
);
13801 /* If it is already an indirect call or the code above moved the
13802 SYMBOL_REF to somewhere else make sure the address can be found in
13804 if (retaddr_reg
== NULL_RTX
13805 && GET_CODE (addr_location
) != SYMBOL_REF
13808 emit_move_insn (gen_rtx_REG (Pmode
, SIBCALL_REGNUM
), addr_location
);
13809 addr_location
= gen_rtx_REG (Pmode
, SIBCALL_REGNUM
);
13812 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13813 && GET_CODE (addr_location
) != SYMBOL_REF
13816 /* Indirect branch thunks require the target to be a single GPR. */
13817 addr_location
= force_reg (Pmode
, addr_location
);
13819 /* Without exrl the indirect branch thunks need an additional
13820 register for larl;ex */
13821 if (!TARGET_CPU_Z10
)
13823 *clobber_thunk_reg
= gen_rtx_REG (Pmode
, INDIRECT_BRANCH_THUNK_REGNUM
);
13824 *clobber_thunk_reg
= gen_rtx_CLOBBER (VOIDmode
, *clobber_thunk_reg
);
13828 addr_location
= gen_rtx_MEM (QImode
, addr_location
);
13829 *call
= gen_rtx_CALL (VOIDmode
, addr_location
, const0_rtx
);
13831 if (result_reg
!= NULL_RTX
)
13832 *call
= gen_rtx_SET (result_reg
, *call
);
13834 if (retaddr_reg
!= NULL_RTX
)
13836 *clobber_ret_reg
= gen_rtx_CLOBBER (VOIDmode
, retaddr_reg
);
13838 if (tls_call
!= NULL_RTX
)
13839 *use
= gen_rtx_USE (VOIDmode
, tls_call
);
13843 for (i
= 0; i
< 4; i
++)
13844 if (vec
[i
] != NULL_RTX
)
13852 v
= rtvec_alloc (elts
);
13853 for (i
= 0; i
< 4; i
++)
13854 if (vec
[i
] != NULL_RTX
)
13856 RTVEC_ELT (v
, e
) = vec
[i
];
13860 *call
= gen_rtx_PARALLEL (VOIDmode
, v
);
13863 insn
= emit_call_insn (*call
);
13865 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13866 if (plt31_call_p
|| tls_call
!= NULL_RTX
)
13868 /* s390_function_ok_for_sibcall should
13869 have denied sibcalls in this case. */
13870 gcc_assert (retaddr_reg
!= NULL_RTX
);
13871 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, 12));
13876 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13879 s390_conditional_register_usage (void)
13884 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
13885 fixed_regs
[BASE_REGNUM
] = 0;
13886 fixed_regs
[RETURN_REGNUM
] = 0;
13889 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13890 call_used_regs
[i
] = 0;
13894 call_used_regs
[FPR4_REGNUM
] = 0;
13895 call_used_regs
[FPR6_REGNUM
] = 0;
13898 if (TARGET_SOFT_FLOAT
)
13900 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13904 /* Disable v16 - v31 for non-vector target. */
13907 for (i
= VR16_REGNUM
; i
<= VR31_REGNUM
; i
++)
13908 fixed_regs
[i
] = call_used_regs
[i
] = 1;
13912 /* Corresponding function to eh_return expander. */
13914 static GTY(()) rtx s390_tpf_eh_return_symbol
;
13916 s390_emit_tpf_eh_return (rtx target
)
13921 if (!s390_tpf_eh_return_symbol
)
13923 s390_tpf_eh_return_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tpf_eh_return");
13924 SYMBOL_REF_FLAGS (s390_tpf_eh_return_symbol
) |= SYMBOL_FLAG_FUNCTION
;
13927 reg
= gen_rtx_REG (Pmode
, 2);
13928 orig_ra
= gen_rtx_REG (Pmode
, 3);
13930 emit_move_insn (reg
, target
);
13931 emit_move_insn (orig_ra
, get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
));
13932 insn
= s390_emit_call (s390_tpf_eh_return_symbol
, NULL_RTX
, reg
,
13933 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
13934 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), reg
);
13935 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), orig_ra
);
13937 emit_move_insn (EH_RETURN_HANDLER_RTX
, reg
);
13940 /* Rework the prologue/epilogue to avoid saving/restoring
13941 registers unnecessarily. */
13944 s390_optimize_prologue (void)
13946 rtx_insn
*insn
, *new_insn
, *next_insn
;
13948 /* Do a final recompute of the frame-related data. */
13949 s390_optimize_register_info ();
13951 /* If all special registers are in fact used, there's nothing we
13952 can do, so no point in walking the insn list. */
13954 if (cfun_frame_layout
.first_save_gpr
<= BASE_REGNUM
13955 && cfun_frame_layout
.last_save_gpr
>= BASE_REGNUM
)
13958 /* Search for prologue/epilogue insns and replace them. */
13959 for (insn
= get_insns (); insn
; insn
= next_insn
)
13961 int first
, last
, off
;
13962 rtx set
, base
, offset
;
13965 next_insn
= NEXT_INSN (insn
);
13967 if (! NONJUMP_INSN_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
13970 pat
= PATTERN (insn
);
13972 /* Remove ldgr/lgdr instructions used for saving and restore
13973 GPRs if possible. */
13978 if (INSN_CODE (insn
) == CODE_FOR_stack_restore_from_fpr
)
13979 tmp_pat
= XVECEXP (pat
, 0, 0);
13981 if (GET_CODE (tmp_pat
) == SET
13982 && GET_MODE (SET_SRC (tmp_pat
)) == DImode
13983 && REG_P (SET_SRC (tmp_pat
))
13984 && REG_P (SET_DEST (tmp_pat
)))
13986 int src_regno
= REGNO (SET_SRC (tmp_pat
));
13987 int dest_regno
= REGNO (SET_DEST (tmp_pat
));
13991 if (!((GENERAL_REGNO_P (src_regno
)
13992 && FP_REGNO_P (dest_regno
))
13993 || (FP_REGNO_P (src_regno
)
13994 && GENERAL_REGNO_P (dest_regno
))))
13997 gpr_regno
= GENERAL_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
13998 fpr_regno
= FP_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
14000 /* GPR must be call-saved, FPR must be call-clobbered. */
14001 if (!call_used_regs
[fpr_regno
]
14002 || call_used_regs
[gpr_regno
])
14005 /* It must not happen that what we once saved in an FPR now
14006 needs a stack slot. */
14007 gcc_assert (cfun_gpr_save_slot (gpr_regno
) != SAVE_SLOT_STACK
);
14009 if (cfun_gpr_save_slot (gpr_regno
) == SAVE_SLOT_NONE
)
14011 remove_insn (insn
);
14017 if (GET_CODE (pat
) == PARALLEL
14018 && store_multiple_operation (pat
, VOIDmode
))
14020 set
= XVECEXP (pat
, 0, 0);
14021 first
= REGNO (SET_SRC (set
));
14022 last
= first
+ XVECLEN (pat
, 0) - 1;
14023 offset
= const0_rtx
;
14024 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
14025 off
= INTVAL (offset
);
14027 if (GET_CODE (base
) != REG
|| off
< 0)
14029 if (cfun_frame_layout
.first_save_gpr
!= -1
14030 && (cfun_frame_layout
.first_save_gpr
< first
14031 || cfun_frame_layout
.last_save_gpr
> last
))
14033 if (REGNO (base
) != STACK_POINTER_REGNUM
14034 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14036 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
14039 if (cfun_frame_layout
.first_save_gpr
!= -1)
14041 rtx s_pat
= save_gprs (base
,
14042 off
+ (cfun_frame_layout
.first_save_gpr
14043 - first
) * UNITS_PER_LONG
,
14044 cfun_frame_layout
.first_save_gpr
,
14045 cfun_frame_layout
.last_save_gpr
);
14046 new_insn
= emit_insn_before (s_pat
, insn
);
14047 INSN_ADDRESSES_NEW (new_insn
, -1);
14050 remove_insn (insn
);
14054 if (cfun_frame_layout
.first_save_gpr
== -1
14055 && GET_CODE (pat
) == SET
14056 && GENERAL_REG_P (SET_SRC (pat
))
14057 && GET_CODE (SET_DEST (pat
)) == MEM
)
14060 first
= REGNO (SET_SRC (set
));
14061 offset
= const0_rtx
;
14062 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
14063 off
= INTVAL (offset
);
14065 if (GET_CODE (base
) != REG
|| off
< 0)
14067 if (REGNO (base
) != STACK_POINTER_REGNUM
14068 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14071 remove_insn (insn
);
14075 if (GET_CODE (pat
) == PARALLEL
14076 && load_multiple_operation (pat
, VOIDmode
))
14078 set
= XVECEXP (pat
, 0, 0);
14079 first
= REGNO (SET_DEST (set
));
14080 last
= first
+ XVECLEN (pat
, 0) - 1;
14081 offset
= const0_rtx
;
14082 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
14083 off
= INTVAL (offset
);
14085 if (GET_CODE (base
) != REG
|| off
< 0)
14088 if (cfun_frame_layout
.first_restore_gpr
!= -1
14089 && (cfun_frame_layout
.first_restore_gpr
< first
14090 || cfun_frame_layout
.last_restore_gpr
> last
))
14092 if (REGNO (base
) != STACK_POINTER_REGNUM
14093 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14095 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
14098 if (cfun_frame_layout
.first_restore_gpr
!= -1)
14100 rtx rpat
= restore_gprs (base
,
14101 off
+ (cfun_frame_layout
.first_restore_gpr
14102 - first
) * UNITS_PER_LONG
,
14103 cfun_frame_layout
.first_restore_gpr
,
14104 cfun_frame_layout
.last_restore_gpr
);
14106 /* Remove REG_CFA_RESTOREs for registers that we no
14107 longer need to save. */
14108 REG_NOTES (rpat
) = REG_NOTES (insn
);
14109 for (rtx
*ptr
= ®_NOTES (rpat
); *ptr
; )
14110 if (REG_NOTE_KIND (*ptr
) == REG_CFA_RESTORE
14111 && ((int) REGNO (XEXP (*ptr
, 0))
14112 < cfun_frame_layout
.first_restore_gpr
))
14113 *ptr
= XEXP (*ptr
, 1);
14115 ptr
= &XEXP (*ptr
, 1);
14116 new_insn
= emit_insn_before (rpat
, insn
);
14117 RTX_FRAME_RELATED_P (new_insn
) = 1;
14118 INSN_ADDRESSES_NEW (new_insn
, -1);
14121 remove_insn (insn
);
14125 if (cfun_frame_layout
.first_restore_gpr
== -1
14126 && GET_CODE (pat
) == SET
14127 && GENERAL_REG_P (SET_DEST (pat
))
14128 && GET_CODE (SET_SRC (pat
)) == MEM
)
14131 first
= REGNO (SET_DEST (set
));
14132 offset
= const0_rtx
;
14133 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
14134 off
= INTVAL (offset
);
14136 if (GET_CODE (base
) != REG
|| off
< 0)
14139 if (REGNO (base
) != STACK_POINTER_REGNUM
14140 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14143 remove_insn (insn
);
14149 /* On z10 and later the dynamic branch prediction must see the
14150 backward jump within a certain windows. If not it falls back to
14151 the static prediction. This function rearranges the loop backward
14152 branch in a way which makes the static prediction always correct.
14153 The function returns true if it added an instruction. */
14155 s390_fix_long_loop_prediction (rtx_insn
*insn
)
14157 rtx set
= single_set (insn
);
14158 rtx code_label
, label_ref
;
14159 rtx_insn
*uncond_jump
;
14160 rtx_insn
*cur_insn
;
14164 /* This will exclude branch on count and branch on index patterns
14165 since these are correctly statically predicted.
14167 The additional check for a PARALLEL is required here since
14168 single_set might be != NULL for PARALLELs where the set of the
14169 iteration variable is dead. */
14170 if (GET_CODE (PATTERN (insn
)) == PARALLEL
14172 || SET_DEST (set
) != pc_rtx
14173 || GET_CODE (SET_SRC(set
)) != IF_THEN_ELSE
)
14176 /* Skip conditional returns. */
14177 if (ANY_RETURN_P (XEXP (SET_SRC (set
), 1))
14178 && XEXP (SET_SRC (set
), 2) == pc_rtx
)
14181 label_ref
= (GET_CODE (XEXP (SET_SRC (set
), 1)) == LABEL_REF
?
14182 XEXP (SET_SRC (set
), 1) : XEXP (SET_SRC (set
), 2));
14184 gcc_assert (GET_CODE (label_ref
) == LABEL_REF
);
14186 code_label
= XEXP (label_ref
, 0);
14188 if (INSN_ADDRESSES (INSN_UID (code_label
)) == -1
14189 || INSN_ADDRESSES (INSN_UID (insn
)) == -1
14190 || (INSN_ADDRESSES (INSN_UID (insn
))
14191 - INSN_ADDRESSES (INSN_UID (code_label
)) < PREDICT_DISTANCE
))
14194 for (distance
= 0, cur_insn
= PREV_INSN (insn
);
14195 distance
< PREDICT_DISTANCE
- 6;
14196 distance
+= get_attr_length (cur_insn
), cur_insn
= PREV_INSN (cur_insn
))
14197 if (!cur_insn
|| JUMP_P (cur_insn
) || LABEL_P (cur_insn
))
14200 rtx_code_label
*new_label
= gen_label_rtx ();
14201 uncond_jump
= emit_jump_insn_after (
14202 gen_rtx_SET (pc_rtx
,
14203 gen_rtx_LABEL_REF (VOIDmode
, code_label
)),
14205 emit_label_after (new_label
, uncond_jump
);
14207 tmp
= XEXP (SET_SRC (set
), 1);
14208 XEXP (SET_SRC (set
), 1) = XEXP (SET_SRC (set
), 2);
14209 XEXP (SET_SRC (set
), 2) = tmp
;
14210 INSN_CODE (insn
) = -1;
14212 XEXP (label_ref
, 0) = new_label
;
14213 JUMP_LABEL (insn
) = new_label
;
14214 JUMP_LABEL (uncond_jump
) = code_label
;
14219 /* Returns 1 if INSN reads the value of REG for purposes not related
14220 to addressing of memory, and 0 otherwise. */
14222 s390_non_addr_reg_read_p (rtx reg
, rtx_insn
*insn
)
14224 return reg_referenced_p (reg
, PATTERN (insn
))
14225 && !reg_used_in_mem_p (REGNO (reg
), PATTERN (insn
));
14228 /* Starting from INSN find_cond_jump looks downwards in the insn
14229 stream for a single jump insn which is the last user of the
14230 condition code set in INSN. */
14232 find_cond_jump (rtx_insn
*insn
)
14234 for (; insn
; insn
= NEXT_INSN (insn
))
14238 if (LABEL_P (insn
))
14241 if (!JUMP_P (insn
))
14243 if (reg_mentioned_p (gen_rtx_REG (CCmode
, CC_REGNUM
), insn
))
14248 /* This will be triggered by a return. */
14249 if (GET_CODE (PATTERN (insn
)) != SET
)
14252 gcc_assert (SET_DEST (PATTERN (insn
)) == pc_rtx
);
14253 ite
= SET_SRC (PATTERN (insn
));
14255 if (GET_CODE (ite
) != IF_THEN_ELSE
)
14258 cc
= XEXP (XEXP (ite
, 0), 0);
14259 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
)))
14262 if (find_reg_note (insn
, REG_DEAD
, cc
))
14270 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14271 the semantics does not change. If NULL_RTX is passed as COND the
14272 function tries to find the conditional jump starting with INSN. */
14274 s390_swap_cmp (rtx cond
, rtx
*op0
, rtx
*op1
, rtx_insn
*insn
)
14278 if (cond
== NULL_RTX
)
14280 rtx_insn
*jump
= find_cond_jump (NEXT_INSN (insn
));
14281 rtx set
= jump
? single_set (jump
) : NULL_RTX
;
14283 if (set
== NULL_RTX
)
14286 cond
= XEXP (SET_SRC (set
), 0);
14291 PUT_CODE (cond
, swap_condition (GET_CODE (cond
)));
14294 /* On z10, instructions of the compare-and-branch family have the
14295 property to access the register occurring as second operand with
14296 its bits complemented. If such a compare is grouped with a second
14297 instruction that accesses the same register non-complemented, and
14298 if that register's value is delivered via a bypass, then the
14299 pipeline recycles, thereby causing significant performance decline.
14300 This function locates such situations and exchanges the two
14301 operands of the compare. The function return true whenever it
14304 s390_z10_optimize_cmp (rtx_insn
*insn
)
14306 rtx_insn
*prev_insn
, *next_insn
;
14307 bool insn_added_p
= false;
14308 rtx cond
, *op0
, *op1
;
14310 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
14312 /* Handle compare and branch and branch on count
14314 rtx pattern
= single_set (insn
);
14317 || SET_DEST (pattern
) != pc_rtx
14318 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
)
14321 cond
= XEXP (SET_SRC (pattern
), 0);
14322 op0
= &XEXP (cond
, 0);
14323 op1
= &XEXP (cond
, 1);
14325 else if (GET_CODE (PATTERN (insn
)) == SET
)
14329 /* Handle normal compare instructions. */
14330 src
= SET_SRC (PATTERN (insn
));
14331 dest
= SET_DEST (PATTERN (insn
));
14334 || !CC_REGNO_P (REGNO (dest
))
14335 || GET_CODE (src
) != COMPARE
)
14338 /* s390_swap_cmp will try to find the conditional
14339 jump when passing NULL_RTX as condition. */
14341 op0
= &XEXP (src
, 0);
14342 op1
= &XEXP (src
, 1);
14347 if (!REG_P (*op0
) || !REG_P (*op1
))
14350 if (GET_MODE_CLASS (GET_MODE (*op0
)) != MODE_INT
)
14353 /* Swap the COMPARE arguments and its mask if there is a
14354 conflicting access in the previous insn. */
14355 prev_insn
= prev_active_insn (insn
);
14356 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14357 && reg_referenced_p (*op1
, PATTERN (prev_insn
)))
14358 s390_swap_cmp (cond
, op0
, op1
, insn
);
14360 /* Check if there is a conflict with the next insn. If there
14361 was no conflict with the previous insn, then swap the
14362 COMPARE arguments and its mask. If we already swapped
14363 the operands, or if swapping them would cause a conflict
14364 with the previous insn, issue a NOP after the COMPARE in
14365 order to separate the two instuctions. */
14366 next_insn
= next_active_insn (insn
);
14367 if (next_insn
!= NULL_RTX
&& INSN_P (next_insn
)
14368 && s390_non_addr_reg_read_p (*op1
, next_insn
))
14370 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14371 && s390_non_addr_reg_read_p (*op0
, prev_insn
))
14373 if (REGNO (*op1
) == 0)
14374 emit_insn_after (gen_nop_lr1 (), insn
);
14376 emit_insn_after (gen_nop_lr0 (), insn
);
14377 insn_added_p
= true;
14380 s390_swap_cmp (cond
, op0
, op1
, insn
);
14382 return insn_added_p
;
14385 /* Number of INSNs to be scanned backward in the last BB of the loop
14386 and forward in the first BB of the loop. This usually should be a
14387 bit more than the number of INSNs which could go into one
14389 #define S390_OSC_SCAN_INSN_NUM 5
14391 /* Scan LOOP for static OSC collisions and return true if a osc_break
14392 should be issued for this loop. */
14394 s390_adjust_loop_scan_osc (struct loop
* loop
)
14397 HARD_REG_SET modregs
, newregs
;
14398 rtx_insn
*insn
, *store_insn
= NULL
;
14400 struct s390_address addr_store
, addr_load
;
14401 subrtx_iterator::array_type array
;
14404 CLEAR_HARD_REG_SET (modregs
);
14407 FOR_BB_INSNS_REVERSE (loop
->latch
, insn
)
14409 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14413 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14416 find_all_hard_reg_sets (insn
, &newregs
, true);
14417 modregs
|= newregs
;
14419 set
= single_set (insn
);
14423 if (MEM_P (SET_DEST (set
))
14424 && s390_decompose_address (XEXP (SET_DEST (set
), 0), &addr_store
))
14431 if (store_insn
== NULL_RTX
)
14435 FOR_BB_INSNS (loop
->header
, insn
)
14437 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14440 if (insn
== store_insn
)
14444 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14447 find_all_hard_reg_sets (insn
, &newregs
, true);
14448 modregs
|= newregs
;
14450 set
= single_set (insn
);
14454 /* An intermediate store disrupts static OSC checking
14456 if (MEM_P (SET_DEST (set
))
14457 && s390_decompose_address (XEXP (SET_DEST (set
), 0), NULL
))
14460 FOR_EACH_SUBRTX (iter
, array
, SET_SRC (set
), NONCONST
)
14462 && s390_decompose_address (XEXP (*iter
, 0), &addr_load
)
14463 && rtx_equal_p (addr_load
.base
, addr_store
.base
)
14464 && rtx_equal_p (addr_load
.indx
, addr_store
.indx
)
14465 && rtx_equal_p (addr_load
.disp
, addr_store
.disp
))
14467 if ((addr_load
.base
!= NULL_RTX
14468 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.base
)))
14469 || (addr_load
.indx
!= NULL_RTX
14470 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.indx
))))
14477 /* Look for adjustments which can be done on simple innermost
14480 s390_adjust_loops ()
14482 struct loop
*loop
= NULL
;
14485 compute_bb_for_insn ();
14487 /* Find the loops. */
14488 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
14490 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
14494 flow_loop_dump (loop
, dump_file
, NULL
, 0);
14495 fprintf (dump_file
, ";; OSC loop scan Loop: ");
14497 if (loop
->latch
== NULL
14498 || pc_set (BB_END (loop
->latch
)) == NULL_RTX
14499 || !s390_adjust_loop_scan_osc (loop
))
14503 if (loop
->latch
== NULL
)
14504 fprintf (dump_file
, " muliple backward jumps\n");
14507 fprintf (dump_file
, " header insn: %d latch insn: %d ",
14508 INSN_UID (BB_HEAD (loop
->header
)),
14509 INSN_UID (BB_END (loop
->latch
)));
14510 if (pc_set (BB_END (loop
->latch
)) == NULL_RTX
)
14511 fprintf (dump_file
, " loop does not end with jump\n");
14513 fprintf (dump_file
, " not instrumented\n");
14519 rtx_insn
*new_insn
;
14522 fprintf (dump_file
, " adding OSC break insn: ");
14523 new_insn
= emit_insn_before (gen_osc_break (),
14524 BB_END (loop
->latch
));
14525 INSN_ADDRESSES_NEW (new_insn
, -1);
14529 loop_optimizer_finalize ();
14531 df_finish_pass (false);
14534 /* Perform machine-dependent processing. */
14539 struct constant_pool
*pool
;
14541 int hw_before
, hw_after
;
14543 if (s390_tune
== PROCESSOR_2964_Z13
)
14544 s390_adjust_loops ();
14546 /* Make sure all splits have been performed; splits after
14547 machine_dependent_reorg might confuse insn length counts. */
14548 split_all_insns_noflow ();
14550 /* Install the main literal pool and the associated base
14551 register load insns. The literal pool might be > 4096 bytes in
14552 size, so that some of its elements cannot be directly accessed.
14554 To fix this, we split the single literal pool into multiple
14555 pool chunks, reloading the pool base register at various
14556 points throughout the function to ensure it always points to
14557 the pool chunk the following code expects. */
14559 /* Collect the literal pool. */
14560 pool
= s390_mainpool_start ();
14563 /* Finish up literal pool related changes. */
14564 s390_mainpool_finish (pool
);
14568 /* If literal pool overflowed, chunkify it. */
14569 pool
= s390_chunkify_start ();
14570 s390_chunkify_finish (pool
);
14573 /* Generate out-of-pool execute target insns. */
14574 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14579 label
= s390_execute_label (insn
);
14583 gcc_assert (label
!= const0_rtx
);
14585 target
= emit_label (XEXP (label
, 0));
14586 INSN_ADDRESSES_NEW (target
, -1);
14590 target
= emit_jump_insn (s390_execute_target (insn
));
14591 /* This is important in order to keep a table jump
14592 pointing at the jump table label. Only this makes it
14593 being recognized as table jump. */
14594 JUMP_LABEL (target
) = JUMP_LABEL (insn
);
14597 target
= emit_insn (s390_execute_target (insn
));
14598 INSN_ADDRESSES_NEW (target
, -1);
14601 /* Try to optimize prologue and epilogue further. */
14602 s390_optimize_prologue ();
14604 /* Walk over the insns and do some >=z10 specific changes. */
14605 if (s390_tune
>= PROCESSOR_2097_Z10
)
14608 bool insn_added_p
= false;
14610 /* The insn lengths and addresses have to be up to date for the
14611 following manipulations. */
14612 shorten_branches (get_insns ());
14614 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14616 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14620 insn_added_p
|= s390_fix_long_loop_prediction (insn
);
14622 if ((GET_CODE (PATTERN (insn
)) == PARALLEL
14623 || GET_CODE (PATTERN (insn
)) == SET
)
14624 && s390_tune
== PROCESSOR_2097_Z10
)
14625 insn_added_p
|= s390_z10_optimize_cmp (insn
);
14628 /* Adjust branches if we added new instructions. */
14630 shorten_branches (get_insns ());
14633 s390_function_num_hotpatch_hw (current_function_decl
, &hw_before
, &hw_after
);
14638 /* Insert NOPs for hotpatching. */
14639 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14641 1. inside the area covered by debug information to allow setting
14642 breakpoints at the NOPs,
14643 2. before any insn which results in an asm instruction,
14644 3. before in-function labels to avoid jumping to the NOPs, for
14645 example as part of a loop,
14646 4. before any barrier in case the function is completely empty
14647 (__builtin_unreachable ()) and has neither internal labels nor
14650 if (active_insn_p (insn
) || BARRIER_P (insn
) || LABEL_P (insn
))
14652 /* Output a series of NOPs before the first active insn. */
14653 while (insn
&& hw_after
> 0)
14657 emit_insn_before (gen_nop_6_byte (), insn
);
14660 else if (hw_after
>= 2)
14662 emit_insn_before (gen_nop_4_byte (), insn
);
14667 emit_insn_before (gen_nop_2_byte (), insn
);
14674 /* Return true if INSN is a fp load insn writing register REGNO. */
14676 s390_fpload_toreg (rtx_insn
*insn
, unsigned int regno
)
14679 enum attr_type flag
= s390_safe_attr_type (insn
);
14681 if (flag
!= TYPE_FLOADSF
&& flag
!= TYPE_FLOADDF
)
14684 set
= single_set (insn
);
14686 if (set
== NULL_RTX
)
14689 if (!REG_P (SET_DEST (set
)) || !MEM_P (SET_SRC (set
)))
14692 if (REGNO (SET_DEST (set
)) != regno
)
14698 /* This value describes the distance to be avoided between an
14699 arithmetic fp instruction and an fp load writing the same register.
14700 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14701 fine but the exact value has to be avoided. Otherwise the FP
14702 pipeline will throw an exception causing a major penalty. */
14703 #define Z10_EARLYLOAD_DISTANCE 7
14705 /* Rearrange the ready list in order to avoid the situation described
14706 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14707 moved to the very end of the ready list. */
14709 s390_z10_prevent_earlyload_conflicts (rtx_insn
**ready
, int *nready_p
)
14711 unsigned int regno
;
14712 int nready
= *nready_p
;
14717 enum attr_type flag
;
14720 /* Skip DISTANCE - 1 active insns. */
14721 for (insn
= last_scheduled_insn
, distance
= Z10_EARLYLOAD_DISTANCE
- 1;
14722 distance
> 0 && insn
!= NULL_RTX
;
14723 distance
--, insn
= prev_active_insn (insn
))
14724 if (CALL_P (insn
) || JUMP_P (insn
))
14727 if (insn
== NULL_RTX
)
14730 set
= single_set (insn
);
14732 if (set
== NULL_RTX
|| !REG_P (SET_DEST (set
))
14733 || GET_MODE_CLASS (GET_MODE (SET_DEST (set
))) != MODE_FLOAT
)
14736 flag
= s390_safe_attr_type (insn
);
14738 if (flag
== TYPE_FLOADSF
|| flag
== TYPE_FLOADDF
)
14741 regno
= REGNO (SET_DEST (set
));
14744 while (!s390_fpload_toreg (ready
[i
], regno
) && i
> 0)
14751 memmove (&ready
[1], &ready
[0], sizeof (rtx_insn
*) * i
);
14755 /* Returns TRUE if BB is entered via a fallthru edge and all other
14756 incoming edges are less than likely. */
14758 s390_bb_fallthru_entry_likely (basic_block bb
)
14760 edge e
, fallthru_edge
;
14766 fallthru_edge
= find_fallthru_edge (bb
->preds
);
14767 if (!fallthru_edge
)
14770 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14771 if (e
!= fallthru_edge
14772 && e
->probability
>= profile_probability::likely ())
14778 struct s390_sched_state
14780 /* Number of insns in the group. */
14782 /* Execution side of the group. */
14784 /* Group can only hold two insns. */
14786 } s390_sched_state
;
14788 static struct s390_sched_state sched_state
= {0, 1, false};
14790 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14791 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14792 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14793 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14794 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14796 static unsigned int
14797 s390_get_sched_attrmask (rtx_insn
*insn
)
14799 unsigned int mask
= 0;
14803 case PROCESSOR_2827_ZEC12
:
14804 if (get_attr_zEC12_cracked (insn
))
14805 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14806 if (get_attr_zEC12_expanded (insn
))
14807 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14808 if (get_attr_zEC12_endgroup (insn
))
14809 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14810 if (get_attr_zEC12_groupalone (insn
))
14811 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14813 case PROCESSOR_2964_Z13
:
14814 if (get_attr_z13_cracked (insn
))
14815 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14816 if (get_attr_z13_expanded (insn
))
14817 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14818 if (get_attr_z13_endgroup (insn
))
14819 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14820 if (get_attr_z13_groupalone (insn
))
14821 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14822 if (get_attr_z13_groupoftwo (insn
))
14823 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14825 case PROCESSOR_3906_Z14
:
14826 if (get_attr_z14_cracked (insn
))
14827 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14828 if (get_attr_z14_expanded (insn
))
14829 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14830 if (get_attr_z14_endgroup (insn
))
14831 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14832 if (get_attr_z14_groupalone (insn
))
14833 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14834 if (get_attr_z14_groupoftwo (insn
))
14835 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14837 case PROCESSOR_8561_Z15
:
14838 case PROCESSOR_ARCH14
:
14839 if (get_attr_z15_cracked (insn
))
14840 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14841 if (get_attr_z15_expanded (insn
))
14842 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14843 if (get_attr_z15_endgroup (insn
))
14844 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14845 if (get_attr_z15_groupalone (insn
))
14846 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14847 if (get_attr_z15_groupoftwo (insn
))
14848 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14851 gcc_unreachable ();
14856 static unsigned int
14857 s390_get_unit_mask (rtx_insn
*insn
, int *units
)
14859 unsigned int mask
= 0;
14863 case PROCESSOR_2964_Z13
:
14865 if (get_attr_z13_unit_lsu (insn
))
14867 if (get_attr_z13_unit_fxa (insn
))
14869 if (get_attr_z13_unit_fxb (insn
))
14871 if (get_attr_z13_unit_vfu (insn
))
14874 case PROCESSOR_3906_Z14
:
14876 if (get_attr_z14_unit_lsu (insn
))
14878 if (get_attr_z14_unit_fxa (insn
))
14880 if (get_attr_z14_unit_fxb (insn
))
14882 if (get_attr_z14_unit_vfu (insn
))
14885 case PROCESSOR_8561_Z15
:
14886 case PROCESSOR_ARCH14
:
14888 if (get_attr_z15_unit_lsu (insn
))
14890 if (get_attr_z15_unit_fxa (insn
))
14892 if (get_attr_z15_unit_fxb (insn
))
14894 if (get_attr_z15_unit_vfu (insn
))
14898 gcc_unreachable ();
14904 s390_is_fpd (rtx_insn
*insn
)
14906 if (insn
== NULL_RTX
)
14909 return get_attr_z13_unit_fpd (insn
) || get_attr_z14_unit_fpd (insn
)
14910 || get_attr_z15_unit_fpd (insn
);
14914 s390_is_fxd (rtx_insn
*insn
)
14916 if (insn
== NULL_RTX
)
14919 return get_attr_z13_unit_fxd (insn
) || get_attr_z14_unit_fxd (insn
)
14920 || get_attr_z15_unit_fxd (insn
);
14923 /* Returns TRUE if INSN is a long-running instruction. */
14925 s390_is_longrunning (rtx_insn
*insn
)
14927 if (insn
== NULL_RTX
)
14930 return s390_is_fxd (insn
) || s390_is_fpd (insn
);
14934 /* Return the scheduling score for INSN. The higher the score the
14935 better. The score is calculated from the OOO scheduling attributes
14936 of INSN and the scheduling state sched_state. */
14938 s390_sched_score (rtx_insn
*insn
)
14940 unsigned int mask
= s390_get_sched_attrmask (insn
);
14943 switch (sched_state
.group_state
)
14946 /* Try to put insns into the first slot which would otherwise
14948 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14949 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
14951 if ((mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
14955 /* Prefer not cracked insns while trying to put together a
14957 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14958 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14959 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14961 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) == 0)
14963 /* If we are in a group of two already, try to schedule another
14964 group-of-two insn to avoid shortening another group. */
14965 if (sched_state
.group_of_two
14966 && (mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
14970 /* Prefer not cracked insns while trying to put together a
14972 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14973 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14974 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14976 /* Prefer endgroup insns in the last slot. */
14977 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
14979 /* Try to avoid group-of-two insns in the last slot as they will
14980 shorten this group as well as the next one. */
14981 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
14982 score
= MAX (0, score
- 15);
14986 if (s390_tune
>= PROCESSOR_2964_Z13
)
14989 unsigned unit_mask
, m
= 1;
14991 unit_mask
= s390_get_unit_mask (insn
, &units
);
14992 gcc_assert (units
<= MAX_SCHED_UNITS
);
14994 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14995 ago the last insn of this unit type got scheduled. This is
14996 supposed to help providing a proper instruction mix to the
14998 for (i
= 0; i
< units
; i
++, m
<<= 1)
15000 score
+= (last_scheduled_unit_distance
[i
][sched_state
.side
]
15001 * MAX_SCHED_MIX_SCORE
/ MAX_SCHED_MIX_DISTANCE
);
15003 int other_side
= 1 - sched_state
.side
;
15005 /* Try to delay long-running insns when side is busy. */
15006 if (s390_is_longrunning (insn
))
15008 if (s390_is_fxd (insn
))
15010 if (fxd_longrunning
[sched_state
.side
]
15011 && fxd_longrunning
[other_side
]
15012 <= fxd_longrunning
[sched_state
.side
])
15013 score
= MAX (0, score
- 10);
15015 else if (fxd_longrunning
[other_side
]
15016 >= fxd_longrunning
[sched_state
.side
])
15020 if (s390_is_fpd (insn
))
15022 if (fpd_longrunning
[sched_state
.side
]
15023 && fpd_longrunning
[other_side
]
15024 <= fpd_longrunning
[sched_state
.side
])
15025 score
= MAX (0, score
- 10);
15027 else if (fpd_longrunning
[other_side
]
15028 >= fpd_longrunning
[sched_state
.side
])
15037 /* This function is called via hook TARGET_SCHED_REORDER before
15038 issuing one insn from list READY which contains *NREADYP entries.
15039 For target z10 it reorders load instructions to avoid early load
15040 conflicts in the floating point pipeline */
15042 s390_sched_reorder (FILE *file
, int verbose
,
15043 rtx_insn
**ready
, int *nreadyp
, int clock ATTRIBUTE_UNUSED
)
15045 if (s390_tune
== PROCESSOR_2097_Z10
15046 && reload_completed
15048 s390_z10_prevent_earlyload_conflicts (ready
, nreadyp
);
15050 if (s390_tune
>= PROCESSOR_2827_ZEC12
15051 && reload_completed
15055 int last_index
= *nreadyp
- 1;
15056 int max_index
= -1;
15057 int max_score
= -1;
15060 /* Just move the insn with the highest score to the top (the
15061 end) of the list. A full sort is not needed since a conflict
15062 in the hazard recognition cannot happen. So the top insn in
15063 the ready list will always be taken. */
15064 for (i
= last_index
; i
>= 0; i
--)
15068 if (recog_memoized (ready
[i
]) < 0)
15071 score
= s390_sched_score (ready
[i
]);
15072 if (score
> max_score
)
15079 if (max_index
!= -1)
15081 if (max_index
!= last_index
)
15083 tmp
= ready
[max_index
];
15084 ready
[max_index
] = ready
[last_index
];
15085 ready
[last_index
] = tmp
;
15089 ";;\t\tBACKEND: move insn %d to the top of list\n",
15090 INSN_UID (ready
[last_index
]));
15092 else if (verbose
> 5)
15094 ";;\t\tBACKEND: best insn %d already on top\n",
15095 INSN_UID (ready
[last_index
]));
15100 fprintf (file
, "ready list ooo attributes - sched state: %d\n",
15101 sched_state
.group_state
);
15103 for (i
= last_index
; i
>= 0; i
--)
15105 unsigned int sched_mask
;
15106 rtx_insn
*insn
= ready
[i
];
15108 if (recog_memoized (insn
) < 0)
15111 sched_mask
= s390_get_sched_attrmask (insn
);
15112 fprintf (file
, ";;\t\tBACKEND: insn %d score: %d: ",
15114 s390_sched_score (insn
));
15115 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15116 ((M) & sched_mask) ? #ATTR : "");
15117 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15118 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15119 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15120 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15121 #undef PRINT_SCHED_ATTR
15122 if (s390_tune
>= PROCESSOR_2964_Z13
)
15124 unsigned int unit_mask
, m
= 1;
15127 unit_mask
= s390_get_unit_mask (insn
, &units
);
15128 fprintf (file
, "(units:");
15129 for (j
= 0; j
< units
; j
++, m
<<= 1)
15131 fprintf (file
, " u%d", j
);
15132 fprintf (file
, ")");
15134 fprintf (file
, "\n");
15139 return s390_issue_rate ();
15143 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15144 the scheduler has issued INSN. It stores the last issued insn into
15145 last_scheduled_insn in order to make it available for
15146 s390_sched_reorder. */
15148 s390_sched_variable_issue (FILE *file
, int verbose
, rtx_insn
*insn
, int more
)
15150 last_scheduled_insn
= insn
;
15152 bool ends_group
= false;
15154 if (s390_tune
>= PROCESSOR_2827_ZEC12
15155 && reload_completed
15156 && recog_memoized (insn
) >= 0)
15158 unsigned int mask
= s390_get_sched_attrmask (insn
);
15160 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15161 sched_state
.group_of_two
= true;
15163 /* If this is a group-of-two insn, we actually ended the last group
15164 and this insn is the first one of the new group. */
15165 if (sched_state
.group_state
== 2 && sched_state
.group_of_two
)
15167 sched_state
.side
= sched_state
.side
? 0 : 1;
15168 sched_state
.group_state
= 0;
15171 /* Longrunning and side bookkeeping. */
15172 for (int i
= 0; i
< 2; i
++)
15174 fxd_longrunning
[i
] = MAX (0, fxd_longrunning
[i
] - 1);
15175 fpd_longrunning
[i
] = MAX (0, fpd_longrunning
[i
] - 1);
15178 unsigned latency
= insn_default_latency (insn
);
15179 if (s390_is_longrunning (insn
))
15181 if (s390_is_fxd (insn
))
15182 fxd_longrunning
[sched_state
.side
] = latency
;
15184 fpd_longrunning
[sched_state
.side
] = latency
;
15187 if (s390_tune
>= PROCESSOR_2964_Z13
)
15190 unsigned unit_mask
, m
= 1;
15192 unit_mask
= s390_get_unit_mask (insn
, &units
);
15193 gcc_assert (units
<= MAX_SCHED_UNITS
);
15195 for (i
= 0; i
< units
; i
++, m
<<= 1)
15197 last_scheduled_unit_distance
[i
][sched_state
.side
] = 0;
15198 else if (last_scheduled_unit_distance
[i
][sched_state
.side
]
15199 < MAX_SCHED_MIX_DISTANCE
)
15200 last_scheduled_unit_distance
[i
][sched_state
.side
]++;
15203 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
15204 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0
15205 || (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0
15206 || (mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
15208 sched_state
.group_state
= 0;
15213 switch (sched_state
.group_state
)
15216 sched_state
.group_state
++;
15219 sched_state
.group_state
++;
15220 if (sched_state
.group_of_two
)
15222 sched_state
.group_state
= 0;
15227 sched_state
.group_state
++;
15235 unsigned int sched_mask
;
15237 sched_mask
= s390_get_sched_attrmask (insn
);
15239 fprintf (file
, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn
));
15240 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15241 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15242 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15243 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15244 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15245 #undef PRINT_SCHED_ATTR
15247 if (s390_tune
>= PROCESSOR_2964_Z13
)
15249 unsigned int unit_mask
, m
= 1;
15252 unit_mask
= s390_get_unit_mask (insn
, &units
);
15253 fprintf (file
, "(units:");
15254 for (j
= 0; j
< units
; j
++, m
<<= 1)
15256 fprintf (file
, " %d", j
);
15257 fprintf (file
, ")");
15259 fprintf (file
, " sched state: %d\n", sched_state
.group_state
);
15261 if (s390_tune
>= PROCESSOR_2964_Z13
)
15265 s390_get_unit_mask (insn
, &units
);
15267 fprintf (file
, ";;\t\tBACKEND: units on this side unused for: ");
15268 for (j
= 0; j
< units
; j
++)
15269 fprintf (file
, "%d:%d ", j
,
15270 last_scheduled_unit_distance
[j
][sched_state
.side
]);
15271 fprintf (file
, "\n");
15275 /* If this insn ended a group, the next will be on the other side. */
15278 sched_state
.group_state
= 0;
15279 sched_state
.side
= sched_state
.side
? 0 : 1;
15280 sched_state
.group_of_two
= false;
15284 if (GET_CODE (PATTERN (insn
)) != USE
15285 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
15292 s390_sched_init (FILE *file ATTRIBUTE_UNUSED
,
15293 int verbose ATTRIBUTE_UNUSED
,
15294 int max_ready ATTRIBUTE_UNUSED
)
15296 /* If the next basic block is most likely entered via a fallthru edge
15297 we keep the last sched state. Otherwise we start a new group.
15298 The scheduler traverses basic blocks in "instruction stream" ordering
15299 so if we see a fallthru edge here, sched_state will be of its
15302 current_sched_info->prev_head is the insn before the first insn of the
15303 block of insns to be scheduled.
15305 rtx_insn
*insn
= current_sched_info
->prev_head
15306 ? NEXT_INSN (current_sched_info
->prev_head
) : NULL
;
15307 basic_block bb
= insn
? BLOCK_FOR_INSN (insn
) : NULL
;
15308 if (s390_tune
< PROCESSOR_2964_Z13
|| !s390_bb_fallthru_entry_likely (bb
))
15310 last_scheduled_insn
= NULL
;
15311 memset (last_scheduled_unit_distance
, 0,
15312 MAX_SCHED_UNITS
* NUM_SIDES
* sizeof (int));
15313 sched_state
.group_state
= 0;
15314 sched_state
.group_of_two
= false;
15318 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15319 a new number struct loop *loop should be unrolled if tuned for cpus with
15320 a built-in stride prefetcher.
15321 The loop is analyzed for memory accesses by calling check_dpu for
15322 each rtx of the loop. Depending on the loop_depth and the amount of
15323 memory accesses a new number <=nunroll is returned to improve the
15324 behavior of the hardware prefetch unit. */
15326 s390_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
15331 unsigned mem_count
= 0;
15333 if (s390_tune
< PROCESSOR_2097_Z10
)
15336 /* Count the number of memory references within the loop body. */
15337 bbs
= get_loop_body (loop
);
15338 subrtx_iterator::array_type array
;
15339 for (i
= 0; i
< loop
->num_nodes
; i
++)
15340 FOR_BB_INSNS (bbs
[i
], insn
)
15341 if (INSN_P (insn
) && INSN_CODE (insn
) != -1)
15345 /* The runtime of small loops with memory block operations
15346 will be determined by the memory operation. Doing
15347 unrolling doesn't help here. Measurements to confirm
15348 this where only done on recent CPU levels. So better do
15349 not change anything for older CPUs. */
15350 if (s390_tune
>= PROCESSOR_2964_Z13
15351 && loop
->ninsns
<= BLOCK_MEM_OPS_LOOP_INSNS
15352 && ((set
= single_set (insn
)) != NULL_RTX
)
15353 && ((GET_MODE (SET_DEST (set
)) == BLKmode
15354 && (GET_MODE (SET_SRC (set
)) == BLKmode
15355 || SET_SRC (set
) == const0_rtx
))
15356 || (GET_CODE (SET_SRC (set
)) == COMPARE
15357 && GET_MODE (XEXP (SET_SRC (set
), 0)) == BLKmode
15358 && GET_MODE (XEXP (SET_SRC (set
), 1)) == BLKmode
)))
15361 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
15367 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15368 if (mem_count
== 0)
15371 switch (loop_depth(loop
))
15374 return MIN (nunroll
, 28 / mem_count
);
15376 return MIN (nunroll
, 22 / mem_count
);
15378 return MIN (nunroll
, 16 / mem_count
);
15382 /* Restore the current options. This is a hook function and also called
15386 s390_function_specific_restore (struct gcc_options
*opts
,
15387 struct gcc_options */
* opts_set */
,
15388 struct cl_target_option
*ptr ATTRIBUTE_UNUSED
)
15390 opts
->x_s390_cost_pointer
= (long)processor_table
[opts
->x_s390_tune
].cost
;
15394 s390_default_align (struct gcc_options
*opts
)
15396 /* Set the default function alignment to 16 in order to get rid of
15397 some unwanted performance effects. */
15398 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
15399 && opts
->x_s390_tune
>= PROCESSOR_2964_Z13
)
15400 opts
->x_str_align_functions
= "16";
15404 s390_override_options_after_change (void)
15406 s390_default_align (&global_options
);
15410 s390_option_override_internal (struct gcc_options
*opts
,
15411 struct gcc_options
*opts_set
)
15413 /* Architecture mode defaults according to ABI. */
15414 if (!(opts_set
->x_target_flags
& MASK_ZARCH
))
15417 opts
->x_target_flags
|= MASK_ZARCH
;
15419 opts
->x_target_flags
&= ~MASK_ZARCH
;
15422 /* Set the march default in case it hasn't been specified on cmdline. */
15423 if (!opts_set
->x_s390_arch
)
15424 opts
->x_s390_arch
= PROCESSOR_2064_Z900
;
15426 opts
->x_s390_arch_flags
= processor_flags_table
[(int) opts
->x_s390_arch
];
15428 /* Determine processor to tune for. */
15429 if (!opts_set
->x_s390_tune
)
15430 opts
->x_s390_tune
= opts
->x_s390_arch
;
15432 opts
->x_s390_tune_flags
= processor_flags_table
[opts
->x_s390_tune
];
15434 /* Sanity checks. */
15435 if (opts
->x_s390_arch
== PROCESSOR_NATIVE
15436 || opts
->x_s390_tune
== PROCESSOR_NATIVE
)
15437 gcc_unreachable ();
15438 if (TARGET_64BIT
&& !TARGET_ZARCH_P (opts
->x_target_flags
))
15439 error ("64-bit ABI not supported in ESA/390 mode");
15441 if (opts
->x_s390_indirect_branch
== indirect_branch_thunk_inline
15442 || opts
->x_s390_indirect_branch_call
== indirect_branch_thunk_inline
15443 || opts
->x_s390_function_return
== indirect_branch_thunk_inline
15444 || opts
->x_s390_function_return_reg
== indirect_branch_thunk_inline
15445 || opts
->x_s390_function_return_mem
== indirect_branch_thunk_inline
)
15446 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15448 if (opts
->x_s390_indirect_branch
!= indirect_branch_keep
)
15450 if (!opts_set
->x_s390_indirect_branch_call
)
15451 opts
->x_s390_indirect_branch_call
= opts
->x_s390_indirect_branch
;
15453 if (!opts_set
->x_s390_indirect_branch_jump
)
15454 opts
->x_s390_indirect_branch_jump
= opts
->x_s390_indirect_branch
;
15457 if (opts
->x_s390_function_return
!= indirect_branch_keep
)
15459 if (!opts_set
->x_s390_function_return_reg
)
15460 opts
->x_s390_function_return_reg
= opts
->x_s390_function_return
;
15462 if (!opts_set
->x_s390_function_return_mem
)
15463 opts
->x_s390_function_return_mem
= opts
->x_s390_function_return
;
15466 /* Enable hardware transactions if available and not explicitly
15467 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15468 if (!TARGET_OPT_HTM_P (opts_set
->x_target_flags
))
15470 if (TARGET_CPU_HTM_P (opts
) && TARGET_ZARCH_P (opts
->x_target_flags
))
15471 opts
->x_target_flags
|= MASK_OPT_HTM
;
15473 opts
->x_target_flags
&= ~MASK_OPT_HTM
;
15476 if (TARGET_OPT_VX_P (opts_set
->x_target_flags
))
15478 if (TARGET_OPT_VX_P (opts
->x_target_flags
))
15480 if (!TARGET_CPU_VX_P (opts
))
15481 error ("hardware vector support not available on %s",
15482 processor_table
[(int)opts
->x_s390_arch
].name
);
15483 if (TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15484 error ("hardware vector support not available with "
15485 "%<-msoft-float%>");
15490 if (TARGET_CPU_VX_P (opts
))
15491 /* Enable vector support if available and not explicitly disabled
15492 by user. E.g. with -m31 -march=z13 -mzarch */
15493 opts
->x_target_flags
|= MASK_OPT_VX
;
15495 opts
->x_target_flags
&= ~MASK_OPT_VX
;
15498 /* Use hardware DFP if available and not explicitly disabled by
15499 user. E.g. with -m31 -march=z10 -mzarch */
15500 if (!TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15502 if (TARGET_DFP_P (opts
))
15503 opts
->x_target_flags
|= MASK_HARD_DFP
;
15505 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15508 if (TARGET_HARD_DFP_P (opts
->x_target_flags
) && !TARGET_DFP_P (opts
))
15510 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15512 if (!TARGET_CPU_DFP_P (opts
))
15513 error ("hardware decimal floating point instructions"
15514 " not available on %s",
15515 processor_table
[(int)opts
->x_s390_arch
].name
);
15516 if (!TARGET_ZARCH_P (opts
->x_target_flags
))
15517 error ("hardware decimal floating point instructions"
15518 " not available in ESA/390 mode");
15521 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15524 if (TARGET_SOFT_FLOAT_P (opts_set
->x_target_flags
)
15525 && TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15527 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
)
15528 && TARGET_HARD_DFP_P (opts
->x_target_flags
))
15529 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15530 "%<-msoft-float%>");
15532 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15535 if (TARGET_BACKCHAIN_P (opts
->x_target_flags
)
15536 && TARGET_PACKED_STACK_P (opts
->x_target_flags
)
15537 && TARGET_HARD_FLOAT_P (opts
->x_target_flags
))
15538 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15539 "supported in combination");
15541 if (opts
->x_s390_stack_size
)
15543 if (opts
->x_s390_stack_guard
>= opts
->x_s390_stack_size
)
15544 error ("stack size must be greater than the stack guard value");
15545 else if (opts
->x_s390_stack_size
> 1 << 16)
15546 error ("stack size must not be greater than 64k");
15548 else if (opts
->x_s390_stack_guard
)
15549 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15551 /* Our implementation of the stack probe requires the probe interval
15552 to be used as displacement in an address operand. The maximum
15553 probe interval currently is 64k. This would exceed short
15554 displacements. Trim that value down to 4k if that happens. This
15555 might result in too many probes being generated only on the
15556 oldest supported machine level z900. */
15557 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval
)))
15558 param_stack_clash_protection_probe_interval
= 12;
15560 #if TARGET_TPF != 0
15561 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_check
))
15562 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15564 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_target
))
15565 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15567 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_check
))
15568 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15570 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_target
))
15571 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15573 if (s390_tpf_trace_skip
)
15575 opts
->x_s390_tpf_trace_hook_prologue_target
= TPF_TRACE_PROLOGUE_SKIP_TARGET
;
15576 opts
->x_s390_tpf_trace_hook_epilogue_target
= TPF_TRACE_EPILOGUE_SKIP_TARGET
;
15580 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15581 if (!TARGET_LONG_DOUBLE_128_P (opts_set
->x_target_flags
))
15582 opts
->x_target_flags
|= MASK_LONG_DOUBLE_128
;
15585 if (opts
->x_s390_tune
>= PROCESSOR_2097_Z10
)
15587 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unrolled_insns
,
15589 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unroll_times
, 32);
15590 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peeled_insns
,
15592 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peel_times
,
15596 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_pending_list_length
,
15598 /* values for loop prefetching */
15599 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_line_size
, 256);
15600 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_size
, 128);
15601 /* s390 has more than 2 levels and the size is much larger. Since
15602 we are always running virtualized assume that we only get a small
15603 part of the caches above l1. */
15604 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l2_cache_size
, 1500);
15605 SET_OPTION_IF_UNSET (opts
, opts_set
,
15606 param_prefetch_min_insn_to_mem_ratio
, 2);
15607 SET_OPTION_IF_UNSET (opts
, opts_set
, param_simultaneous_prefetches
, 6);
15609 /* Use the alternative scheduling-pressure algorithm by default. */
15610 SET_OPTION_IF_UNSET (opts
, opts_set
, param_sched_pressure_algorithm
, 2);
15611 SET_OPTION_IF_UNSET (opts
, opts_set
, param_min_vect_loop_bound
, 2);
15613 /* Set the default alignment. */
15614 s390_default_align (opts
);
15616 /* Call target specific restore function to do post-init work. At the moment,
15617 this just sets opts->x_s390_cost_pointer. */
15618 s390_function_specific_restore (opts
, opts_set
, NULL
);
15620 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15621 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15622 not the case when the code runs before the prolog. */
15623 if (opts
->x_flag_fentry
&& !TARGET_64BIT
)
15624 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15628 s390_option_override (void)
15631 cl_deferred_option
*opt
;
15632 vec
<cl_deferred_option
> *v
=
15633 (vec
<cl_deferred_option
> *) s390_deferred_options
;
15636 FOR_EACH_VEC_ELT (*v
, i
, opt
)
15638 switch (opt
->opt_index
)
15640 case OPT_mhotpatch_
:
15644 char *s
= strtok (ASTRDUP (opt
->arg
), ",");
15645 char *t
= strtok (NULL
, "\0");
15649 val1
= integral_argument (s
);
15650 val2
= integral_argument (t
);
15657 if (val1
== -1 || val2
== -1)
15659 /* argument is not a plain number */
15660 error ("arguments to %qs should be non-negative integers",
15664 else if (val1
> s390_hotpatch_hw_max
15665 || val2
> s390_hotpatch_hw_max
)
15667 error ("argument to %qs is too large (max. %d)",
15668 "-mhotpatch=n,m", s390_hotpatch_hw_max
);
15671 s390_hotpatch_hw_before_label
= val1
;
15672 s390_hotpatch_hw_after_label
= val2
;
15676 gcc_unreachable ();
15680 /* Set up function hooks. */
15681 init_machine_status
= s390_init_machine_status
;
15683 s390_option_override_internal (&global_options
, &global_options_set
);
15685 /* Save the initial options in case the user does function specific
15687 target_option_default_node
15688 = build_target_option_node (&global_options
, &global_options_set
);
15689 target_option_current_node
= target_option_default_node
;
15691 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15692 requires the arch flags to be evaluated already. Since prefetching
15693 is beneficial on s390, we enable it if available. */
15694 if (flag_prefetch_loop_arrays
< 0 && HAVE_prefetch
&& optimize
>= 3)
15695 flag_prefetch_loop_arrays
= 1;
15697 if (!s390_pic_data_is_text_relative
&& !flag_pic
)
15698 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15699 "%<-fpic%>/%<-fPIC%>");
15703 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15704 debuggers do not yet support DWARF 3/4. */
15705 if (!global_options_set
.x_dwarf_strict
)
15707 if (!global_options_set
.x_dwarf_version
)
15712 #if S390_USE_TARGET_ATTRIBUTE
15713 /* Inner function to process the attribute((target(...))), take an argument and
15714 set the current options from the argument. If we have a list, recursively go
15718 s390_valid_target_attribute_inner_p (tree args
,
15719 struct gcc_options
*opts
,
15720 struct gcc_options
*new_opts_set
,
15726 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15727 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15728 static const struct
15730 const char *string
;
15734 int only_as_pragma
;
15737 S390_ATTRIB ("arch=", OPT_march_
, 1),
15738 S390_ATTRIB ("tune=", OPT_mtune_
, 1),
15739 /* uinteger options */
15740 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_
, 1),
15741 S390_ATTRIB ("stack-size=", OPT_mstack_size_
, 1),
15742 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_
, 1),
15743 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_
, 1),
15745 S390_ATTRIB ("backchain", OPT_mbackchain
, 0),
15746 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp
, 0),
15747 S390_ATTRIB ("hard-float", OPT_mhard_float
, 0),
15748 S390_ATTRIB ("htm", OPT_mhtm
, 0),
15749 S390_ATTRIB ("vx", OPT_mvx
, 0),
15750 S390_ATTRIB ("packed-stack", OPT_mpacked_stack
, 0),
15751 S390_ATTRIB ("small-exec", OPT_msmall_exec
, 0),
15752 S390_ATTRIB ("soft-float", OPT_msoft_float
, 0),
15753 S390_ATTRIB ("mvcle", OPT_mmvcle
, 0),
15754 S390_PRAGMA ("zvector", OPT_mzvector
, 0),
15755 /* boolean options */
15756 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack
, 0),
15761 /* If this is a list, recurse to get the options. */
15762 if (TREE_CODE (args
) == TREE_LIST
)
15765 int num_pragma_values
;
15768 /* Note: attribs.c:decl_attributes prepends the values from
15769 current_target_pragma to the list of target attributes. To determine
15770 whether we're looking at a value of the attribute or the pragma we
15771 assume that the first [list_length (current_target_pragma)] values in
15772 the list are the values from the pragma. */
15773 num_pragma_values
= (!force_pragma
&& current_target_pragma
!= NULL
)
15774 ? list_length (current_target_pragma
) : 0;
15775 for (i
= 0; args
; args
= TREE_CHAIN (args
), i
++)
15779 is_pragma
= (force_pragma
|| i
< num_pragma_values
);
15780 if (TREE_VALUE (args
)
15781 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args
),
15782 opts
, new_opts_set
,
15791 else if (TREE_CODE (args
) != STRING_CST
)
15793 error ("attribute %<target%> argument not a string");
15797 /* Handle multiple arguments separated by commas. */
15798 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
15800 while (next_optstr
&& *next_optstr
!= '\0')
15802 char *p
= next_optstr
;
15804 char *comma
= strchr (next_optstr
, ',');
15805 size_t len
, opt_len
;
15811 enum cl_var_type var_type
;
15817 len
= comma
- next_optstr
;
15818 next_optstr
= comma
+ 1;
15823 next_optstr
= NULL
;
15826 /* Recognize no-xxx. */
15827 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
15836 /* Find the option. */
15839 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
15841 opt_len
= attrs
[i
].len
;
15842 if (ch
== attrs
[i
].string
[0]
15843 && ((attrs
[i
].has_arg
) ? len
> opt_len
: len
== opt_len
)
15844 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
15846 opt
= attrs
[i
].opt
;
15847 if (!opt_set_p
&& cl_options
[opt
].cl_reject_negative
)
15849 mask
= cl_options
[opt
].var_value
;
15850 var_type
= cl_options
[opt
].var_type
;
15856 /* Process the option. */
15859 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15862 else if (attrs
[i
].only_as_pragma
&& !force_pragma
)
15864 /* Value is not allowed for the target attribute. */
15865 error ("value %qs is not supported by attribute %<target%>",
15870 else if (var_type
== CLVC_BIT_SET
|| var_type
== CLVC_BIT_CLEAR
)
15872 if (var_type
== CLVC_BIT_CLEAR
)
15873 opt_set_p
= !opt_set_p
;
15876 opts
->x_target_flags
|= mask
;
15878 opts
->x_target_flags
&= ~mask
;
15879 new_opts_set
->x_target_flags
|= mask
;
15882 else if (cl_options
[opt
].var_type
== CLVC_BOOLEAN
)
15886 if (cl_options
[opt
].cl_uinteger
)
15888 /* Unsigned integer argument. Code based on the function
15889 decode_cmdline_option () in opts-common.c. */
15890 value
= integral_argument (p
+ opt_len
);
15893 value
= (opt_set_p
) ? 1 : 0;
15897 struct cl_decoded_option decoded
;
15899 /* Value range check; only implemented for numeric and boolean
15900 options at the moment. */
15901 generate_option (opt
, NULL
, value
, CL_TARGET
, &decoded
);
15902 s390_handle_option (opts
, new_opts_set
, &decoded
, input_location
);
15903 set_option (opts
, new_opts_set
, opt
, value
,
15904 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15909 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15914 else if (cl_options
[opt
].var_type
== CLVC_ENUM
)
15919 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
15921 set_option (opts
, new_opts_set
, opt
, value
,
15922 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15926 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15932 gcc_unreachable ();
15937 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15940 s390_valid_target_attribute_tree (tree args
,
15941 struct gcc_options
*opts
,
15942 const struct gcc_options
*opts_set
,
15945 tree t
= NULL_TREE
;
15946 struct gcc_options new_opts_set
;
15948 memset (&new_opts_set
, 0, sizeof (new_opts_set
));
15950 /* Process each of the options on the chain. */
15951 if (! s390_valid_target_attribute_inner_p (args
, opts
, &new_opts_set
,
15953 return error_mark_node
;
15955 /* If some option was set (even if it has not changed), rerun
15956 s390_option_override_internal, and then save the options away. */
15957 if (new_opts_set
.x_target_flags
15958 || new_opts_set
.x_s390_arch
15959 || new_opts_set
.x_s390_tune
15960 || new_opts_set
.x_s390_stack_guard
15961 || new_opts_set
.x_s390_stack_size
15962 || new_opts_set
.x_s390_branch_cost
15963 || new_opts_set
.x_s390_warn_framesize
15964 || new_opts_set
.x_s390_warn_dynamicstack_p
)
15966 const unsigned char *src
= (const unsigned char *)opts_set
;
15967 unsigned char *dest
= (unsigned char *)&new_opts_set
;
15970 /* Merge the original option flags into the new ones. */
15971 for (i
= 0; i
< sizeof(*opts_set
); i
++)
15974 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15975 s390_option_override_internal (opts
, &new_opts_set
);
15976 /* Save the current options unless we are validating options for
15978 t
= build_target_option_node (opts
, &new_opts_set
);
15983 /* Hook to validate attribute((target("string"))). */
15986 s390_valid_target_attribute_p (tree fndecl
,
15987 tree
ARG_UNUSED (name
),
15989 int ARG_UNUSED (flags
))
15991 struct gcc_options func_options
, func_options_set
;
15992 tree new_target
, new_optimize
;
15995 /* attribute((target("default"))) does nothing, beyond
15996 affecting multi-versioning. */
15997 if (TREE_VALUE (args
)
15998 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
15999 && TREE_CHAIN (args
) == NULL_TREE
16000 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
16004 = build_optimization_node (&global_options
, &global_options_set
);
16006 /* Get the optimization options of the current function. */
16007 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
16009 if (!func_optimize
)
16010 func_optimize
= old_optimize
;
16012 /* Init func_options. */
16013 memset (&func_options
, 0, sizeof (func_options
));
16014 init_options_struct (&func_options
, NULL
);
16015 lang_hooks
.init_options_struct (&func_options
);
16016 memset (&func_options_set
, 0, sizeof (func_options_set
));
16018 cl_optimization_restore (&func_options
, &func_options_set
,
16019 TREE_OPTIMIZATION (func_optimize
));
16021 /* Initialize func_options to the default before its target options can
16023 cl_target_option_restore (&func_options
, &func_options_set
,
16024 TREE_TARGET_OPTION (target_option_default_node
));
16026 new_target
= s390_valid_target_attribute_tree (args
, &func_options
,
16027 &global_options_set
,
16029 current_target_pragma
));
16030 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
16031 if (new_target
== error_mark_node
)
16033 else if (fndecl
&& new_target
)
16035 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
16036 if (old_optimize
!= new_optimize
)
16037 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
16042 /* Hook to determine if one function can safely inline another. */
16045 s390_can_inline_p (tree caller
, tree callee
)
16047 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
16048 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
16051 callee_tree
= target_option_default_node
;
16053 caller_tree
= target_option_default_node
;
16054 if (callee_tree
== caller_tree
)
16057 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
16058 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
16061 if ((caller_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
))
16062 != (callee_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
)))
16065 /* Don't inline functions to be compiled for a more recent arch into a
16066 function for an older arch. */
16067 else if (caller_opts
->x_s390_arch
< callee_opts
->x_s390_arch
)
16070 /* Inlining a hard float function into a soft float function is only
16071 allowed if the hard float function doesn't actually make use of
16074 We are called from FEs for multi-versioning call optimization, so
16075 beware of ipa_fn_summaries not available. */
16076 else if (((TARGET_SOFT_FLOAT_P (caller_opts
->x_target_flags
)
16077 && !TARGET_SOFT_FLOAT_P (callee_opts
->x_target_flags
))
16078 || (!TARGET_HARD_DFP_P (caller_opts
->x_target_flags
)
16079 && TARGET_HARD_DFP_P (callee_opts
->x_target_flags
)))
16080 && (! ipa_fn_summaries
16081 || ipa_fn_summaries
->get
16082 (cgraph_node::get (callee
))->fp_expressions
))
16089 /* Set VAL to correct enum value according to the indirect-branch or
16090 function-return attribute in ATTR. */
16093 s390_indirect_branch_attrvalue (tree attr
, enum indirect_branch
*val
)
16095 const char *str
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
16096 if (strcmp (str
, "keep") == 0)
16097 *val
= indirect_branch_keep
;
16098 else if (strcmp (str
, "thunk") == 0)
16099 *val
= indirect_branch_thunk
;
16100 else if (strcmp (str
, "thunk-inline") == 0)
16101 *val
= indirect_branch_thunk_inline
;
16102 else if (strcmp (str
, "thunk-extern") == 0)
16103 *val
= indirect_branch_thunk_extern
;
16106 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16107 from either the cmdline or the function attributes in
16111 s390_indirect_branch_settings (tree fndecl
)
16118 /* Initialize with the cmdline options and let the attributes
16120 cfun
->machine
->indirect_branch_jump
= s390_indirect_branch_jump
;
16121 cfun
->machine
->indirect_branch_call
= s390_indirect_branch_call
;
16123 cfun
->machine
->function_return_reg
= s390_function_return_reg
;
16124 cfun
->machine
->function_return_mem
= s390_function_return_mem
;
16126 if ((attr
= lookup_attribute ("indirect_branch",
16127 DECL_ATTRIBUTES (fndecl
))))
16129 s390_indirect_branch_attrvalue (attr
,
16130 &cfun
->machine
->indirect_branch_jump
);
16131 s390_indirect_branch_attrvalue (attr
,
16132 &cfun
->machine
->indirect_branch_call
);
16135 if ((attr
= lookup_attribute ("indirect_branch_jump",
16136 DECL_ATTRIBUTES (fndecl
))))
16137 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_jump
);
16139 if ((attr
= lookup_attribute ("indirect_branch_call",
16140 DECL_ATTRIBUTES (fndecl
))))
16141 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_call
);
16143 if ((attr
= lookup_attribute ("function_return",
16144 DECL_ATTRIBUTES (fndecl
))))
16146 s390_indirect_branch_attrvalue (attr
,
16147 &cfun
->machine
->function_return_reg
);
16148 s390_indirect_branch_attrvalue (attr
,
16149 &cfun
->machine
->function_return_mem
);
16152 if ((attr
= lookup_attribute ("function_return_reg",
16153 DECL_ATTRIBUTES (fndecl
))))
16154 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_reg
);
16156 if ((attr
= lookup_attribute ("function_return_mem",
16157 DECL_ATTRIBUTES (fndecl
))))
16158 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_mem
);
16161 #if S390_USE_TARGET_ATTRIBUTE
16162 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16166 s390_activate_target_options (tree new_tree
)
16168 cl_target_option_restore (&global_options
, &global_options_set
,
16169 TREE_TARGET_OPTION (new_tree
));
16170 if (TREE_TARGET_GLOBALS (new_tree
))
16171 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
16172 else if (new_tree
== target_option_default_node
)
16173 restore_target_globals (&default_target_globals
);
16175 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
16176 s390_previous_fndecl
= NULL_TREE
;
16180 /* Establish appropriate back-end context for processing the function
16181 FNDECL. The argument might be NULL to indicate processing at top
16182 level, outside of any function scope. */
16184 s390_set_current_function (tree fndecl
)
16186 #if S390_USE_TARGET_ATTRIBUTE
16187 /* Only change the context if the function changes. This hook is called
16188 several times in the course of compiling a function, and we don't want to
16189 slow things down too much or call target_reinit when it isn't safe. */
16190 if (fndecl
== s390_previous_fndecl
)
16192 s390_indirect_branch_settings (fndecl
);
16197 if (s390_previous_fndecl
== NULL_TREE
)
16198 old_tree
= target_option_current_node
;
16199 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
))
16200 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
);
16202 old_tree
= target_option_default_node
;
16204 if (fndecl
== NULL_TREE
)
16206 if (old_tree
!= target_option_current_node
)
16207 s390_activate_target_options (target_option_current_node
);
16211 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
16212 if (new_tree
== NULL_TREE
)
16213 new_tree
= target_option_default_node
;
16215 if (old_tree
!= new_tree
)
16216 s390_activate_target_options (new_tree
);
16217 s390_previous_fndecl
= fndecl
;
16219 s390_indirect_branch_settings (fndecl
);
16222 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16225 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
16226 unsigned int align ATTRIBUTE_UNUSED
,
16227 enum by_pieces_operation op ATTRIBUTE_UNUSED
,
16228 bool speed_p ATTRIBUTE_UNUSED
)
16230 return (size
== 1 || size
== 2
16231 || size
== 4 || (TARGET_ZARCH
&& size
== 8));
16234 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16237 s390_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
16239 tree sfpc
= s390_builtin_decls
[S390_BUILTIN_s390_sfpc
];
16240 tree efpc
= s390_builtin_decls
[S390_BUILTIN_s390_efpc
];
16241 tree call_efpc
= build_call_expr (efpc
, 0);
16242 tree fenv_var
= create_tmp_var_raw (unsigned_type_node
);
16244 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16245 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16246 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16247 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16248 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16249 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16251 /* Generates the equivalent of feholdexcept (&fenv_var)
16253 fenv_var = __builtin_s390_efpc ();
16254 __builtin_s390_sfpc (fenv_var & mask) */
16255 tree old_fpc
= build4 (TARGET_EXPR
, unsigned_type_node
, fenv_var
, call_efpc
,
16256 NULL_TREE
, NULL_TREE
);
16258 = build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
,
16259 build_int_cst (unsigned_type_node
,
16260 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
16261 | FPC_EXCEPTION_MASK
)));
16262 tree set_new_fpc
= build_call_expr (sfpc
, 1, new_fpc
);
16263 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, old_fpc
, set_new_fpc
);
16265 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16267 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16268 new_fpc
= build2 (BIT_AND_EXPR
, unsigned_type_node
, call_efpc
,
16269 build_int_cst (unsigned_type_node
,
16270 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
)));
16271 *clear
= build_call_expr (sfpc
, 1, new_fpc
);
16273 /* Generates the equivalent of feupdateenv (fenv_var)
16275 old_fpc = __builtin_s390_efpc ();
16276 __builtin_s390_sfpc (fenv_var);
16277 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16279 old_fpc
= create_tmp_var_raw (unsigned_type_node
);
16280 tree store_old_fpc
= build4 (TARGET_EXPR
, void_type_node
, old_fpc
, call_efpc
,
16281 NULL_TREE
, NULL_TREE
);
16283 set_new_fpc
= build_call_expr (sfpc
, 1, fenv_var
);
16285 tree raise_old_except
= build2 (BIT_AND_EXPR
, unsigned_type_node
, old_fpc
,
16286 build_int_cst (unsigned_type_node
,
16288 raise_old_except
= build2 (RSHIFT_EXPR
, unsigned_type_node
, raise_old_except
,
16289 build_int_cst (unsigned_type_node
,
16291 tree atomic_feraiseexcept
16292 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
16293 raise_old_except
= build_call_expr (atomic_feraiseexcept
,
16294 1, raise_old_except
);
16296 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
16297 build2 (COMPOUND_EXPR
, void_type_node
,
16298 store_old_fpc
, set_new_fpc
),
16301 #undef FPC_EXCEPTION_MASK
16302 #undef FPC_FLAGS_MASK
16303 #undef FPC_DXC_MASK
16304 #undef FPC_EXCEPTION_MASK_SHIFT
16305 #undef FPC_FLAGS_SHIFT
16306 #undef FPC_DXC_SHIFT
16309 /* Return the vector mode to be used for inner mode MODE when doing
16311 static machine_mode
16312 s390_preferred_simd_mode (scalar_mode mode
)
16340 /* Our hardware does not require vectors to be strictly aligned. */
16342 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED
,
16343 const_tree type ATTRIBUTE_UNUSED
,
16344 int misalignment ATTRIBUTE_UNUSED
,
16345 bool is_packed ATTRIBUTE_UNUSED
)
16350 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
16354 /* The vector ABI requires vector types to be aligned on an 8 byte
16355 boundary (our stack alignment). However, we allow this to be
16356 overriden by the user, while this definitely breaks the ABI. */
16357 static HOST_WIDE_INT
16358 s390_vector_alignment (const_tree type
)
16360 tree size
= TYPE_SIZE (type
);
16362 if (!TARGET_VX_ABI
)
16363 return default_vector_alignment (type
);
16365 if (TYPE_USER_ALIGN (type
))
16366 return TYPE_ALIGN (type
);
16368 if (tree_fits_uhwi_p (size
)
16369 && tree_to_uhwi (size
) < BIGGEST_ALIGNMENT
)
16370 return tree_to_uhwi (size
);
16372 return BIGGEST_ALIGNMENT
;
16375 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16376 LARL instruction. */
16378 static HOST_WIDE_INT
16379 s390_constant_alignment (const_tree
, HOST_WIDE_INT align
)
16381 return MAX (align
, 16);
16384 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16385 /* Implement TARGET_ASM_FILE_START. */
16387 s390_asm_file_start (void)
16389 default_file_start ();
16390 s390_asm_output_machine_for_arch (asm_out_file
);
16394 /* Implement TARGET_ASM_FILE_END. */
16396 s390_asm_file_end (void)
16398 #ifdef HAVE_AS_GNU_ATTRIBUTE
16399 varpool_node
*vnode
;
16400 cgraph_node
*cnode
;
16402 FOR_EACH_VARIABLE (vnode
)
16403 if (TREE_PUBLIC (vnode
->decl
))
16404 s390_check_type_for_vector_abi (TREE_TYPE (vnode
->decl
), false, false);
16406 FOR_EACH_FUNCTION (cnode
)
16407 if (TREE_PUBLIC (cnode
->decl
))
16408 s390_check_type_for_vector_abi (TREE_TYPE (cnode
->decl
), false, false);
16411 if (s390_vector_abi
!= 0)
16412 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
16415 file_end_indicate_exec_stack ();
16417 if (flag_split_stack
)
16418 file_end_indicate_split_stack ();
16421 /* Return true if TYPE is a vector bool type. */
16423 s390_vector_bool_type_p (const_tree type
)
16425 return TYPE_VECTOR_OPAQUE (type
);
16428 /* Return the diagnostic message string if the binary operation OP is
16429 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16431 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
16433 bool bool1_p
, bool2_p
;
16437 machine_mode mode1
, mode2
;
16439 if (!TARGET_ZVECTOR
)
16442 if (!VECTOR_TYPE_P (type1
) || !VECTOR_TYPE_P (type2
))
16445 bool1_p
= s390_vector_bool_type_p (type1
);
16446 bool2_p
= s390_vector_bool_type_p (type2
);
16448 /* Mixing signed and unsigned types is forbidden for all
16450 if (!bool1_p
&& !bool2_p
16451 && TYPE_UNSIGNED (type1
) != TYPE_UNSIGNED (type2
))
16452 return N_("types differ in signedness");
16454 plusminus_p
= (op
== PLUS_EXPR
|| op
== MINUS_EXPR
);
16455 muldiv_p
= (op
== MULT_EXPR
|| op
== RDIV_EXPR
|| op
== TRUNC_DIV_EXPR
16456 || op
== CEIL_DIV_EXPR
|| op
== FLOOR_DIV_EXPR
16457 || op
== ROUND_DIV_EXPR
);
16458 compare_p
= (op
== LT_EXPR
|| op
== LE_EXPR
|| op
== GT_EXPR
|| op
== GE_EXPR
16459 || op
== EQ_EXPR
|| op
== NE_EXPR
);
16461 if (bool1_p
&& bool2_p
&& (plusminus_p
|| muldiv_p
))
16462 return N_("binary operator does not support two vector bool operands");
16464 if (bool1_p
!= bool2_p
&& (muldiv_p
|| compare_p
))
16465 return N_("binary operator does not support vector bool operand");
16467 mode1
= TYPE_MODE (type1
);
16468 mode2
= TYPE_MODE (type2
);
16470 if (bool1_p
!= bool2_p
&& plusminus_p
16471 && (GET_MODE_CLASS (mode1
) == MODE_VECTOR_FLOAT
16472 || GET_MODE_CLASS (mode2
) == MODE_VECTOR_FLOAT
))
16473 return N_("binary operator does not support mixing vector "
16474 "bool with floating point vector operands");
16479 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
16480 /* Implement TARGET_C_EXCESS_PRECISION to maintain historic behavior with older
16483 For historical reasons, float_t and double_t had been typedef'ed to
16484 double on s390, causing operations on float_t to operate in a higher
16485 precision than is necessary. However, it is not the case that SFmode
16486 operations have implicit excess precision, and we generate more optimal
16487 code if we let the compiler know no implicit extra precision is added.
16489 With a glibc with that "historic" definition, configure will enable this hook
16490 to set FLT_EVAL_METHOD to 1 for -fexcess-precision=standard (e.g., as implied
16491 by -std=cXY). That means when we are compiling with -fexcess-precision=fast,
16492 the value we set for FLT_EVAL_METHOD will be out of line with the actual
16493 precision of float_t.
16495 Newer versions of glibc will be modified to derive the definition of float_t
16496 from FLT_EVAL_METHOD on s390x, as on many other architectures. There,
16497 configure will disable this hook by default, so that we defer to the default
16498 of FLT_EVAL_METHOD_PROMOTE_TO_FLOAT and a resulting typedef of float_t to
16499 float. Note that in that scenario, float_t and FLT_EVAL_METHOD will be in
16500 line independent of -fexcess-precision. */
16502 static enum flt_eval_method
16503 s390_excess_precision (enum excess_precision_type type
)
16507 case EXCESS_PRECISION_TYPE_IMPLICIT
:
16508 case EXCESS_PRECISION_TYPE_FAST
:
16509 /* The fastest type to promote to will always be the native type,
16510 whether that occurs with implicit excess precision or
16512 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
16513 case EXCESS_PRECISION_TYPE_STANDARD
:
16514 /* Otherwise, when we are in a standards compliant mode, to
16515 ensure consistency with the implementation in glibc, report that
16516 float is evaluated to the range and precision of double. */
16517 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE
;
16519 gcc_unreachable ();
16521 return FLT_EVAL_METHOD_UNPREDICTABLE
;
16525 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16527 static unsigned HOST_WIDE_INT
16528 s390_asan_shadow_offset (void)
16530 return TARGET_64BIT
? HOST_WIDE_INT_1U
<< 52 : HOST_WIDE_INT_UC (0x20000000);
16533 #ifdef HAVE_GAS_HIDDEN
16534 # define USE_HIDDEN_LINKONCE 1
16536 # define USE_HIDDEN_LINKONCE 0
16539 /* Output an indirect branch trampoline for target register REGNO. */
16542 s390_output_indirect_thunk_function (unsigned int regno
, bool z10_p
)
16545 char thunk_label
[32];
16549 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
, regno
);
16551 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
,
16552 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
16554 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
16555 get_identifier (thunk_label
),
16556 build_function_type_list (void_type_node
, NULL_TREE
));
16557 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
16558 NULL_TREE
, void_type_node
);
16559 TREE_PUBLIC (decl
) = 1;
16560 TREE_STATIC (decl
) = 1;
16561 DECL_IGNORED_P (decl
) = 1;
16563 if (USE_HIDDEN_LINKONCE
)
16565 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
16567 targetm
.asm_out
.unique_section (decl
, 0);
16568 switch_to_section (get_named_section (decl
, NULL
, 0));
16570 targetm
.asm_out
.globalize_label (asm_out_file
, thunk_label
);
16571 fputs ("\t.hidden\t", asm_out_file
);
16572 assemble_name (asm_out_file
, thunk_label
);
16573 putc ('\n', asm_out_file
);
16574 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, thunk_label
, decl
);
16578 switch_to_section (text_section
);
16579 ASM_OUTPUT_LABEL (asm_out_file
, thunk_label
);
16582 DECL_INITIAL (decl
) = make_node (BLOCK
);
16583 current_function_decl
= decl
;
16584 allocate_struct_function (decl
, false);
16585 init_function_start (decl
);
16586 cfun
->is_thunk
= true;
16587 first_function_block_is_cold
= false;
16588 final_start_function (emit_barrier (), asm_out_file
, 1);
16590 /* This makes CFI at least usable for indirect jumps.
16592 Stopping in the thunk: backtrace will point to the thunk target
16593 is if it was interrupted by a signal. For a call this means that
16594 the call chain will be: caller->callee->thunk */
16595 if (flag_asynchronous_unwind_tables
&& flag_dwarf2_cfi_asm
)
16597 fputs ("\t.cfi_signal_frame\n", asm_out_file
);
16598 fprintf (asm_out_file
, "\t.cfi_return_column %d\n", regno
);
16599 for (i
= 0; i
< FPR15_REGNUM
; i
++)
16600 fprintf (asm_out_file
, "\t.cfi_same_value %s\n", reg_names
[i
]);
16607 /* We generate a thunk for z10 compiled code although z10 is
16608 currently not enabled. Tell the assembler to accept the
16610 if (!TARGET_CPU_Z10
)
16612 fputs ("\t.machine push\n", asm_out_file
);
16613 fputs ("\t.machine z10\n", asm_out_file
);
16615 /* We use exrl even if -mzarch hasn't been specified on the
16616 command line so we have to tell the assembler to accept
16619 fputs ("\t.machinemode zarch\n", asm_out_file
);
16621 fputs ("\texrl\t0,1f\n", asm_out_file
);
16624 fputs ("\t.machinemode esa\n", asm_out_file
);
16626 if (!TARGET_CPU_Z10
)
16627 fputs ("\t.machine pop\n", asm_out_file
);
16632 fprintf (asm_out_file
, "\tlarl\t%%r%d,1f\n",
16633 INDIRECT_BRANCH_THUNK_REGNUM
);
16636 fprintf (asm_out_file
, "\tex\t0,0(%%r%d)\n",
16637 INDIRECT_BRANCH_THUNK_REGNUM
);
16641 fputs ("0:\tj\t0b\n", asm_out_file
);
16643 /* 1: br <regno> */
16644 fprintf (asm_out_file
, "1:\tbr\t%%r%d\n", regno
);
16646 final_end_function ();
16647 init_insn_lengths ();
16648 free_after_compilation (cfun
);
16650 current_function_decl
= NULL
;
16653 /* Implement the asm.code_end target hook. */
16656 s390_code_end (void)
16660 for (i
= 1; i
< 16; i
++)
16662 if (indirect_branch_z10thunk_mask
& (1 << i
))
16663 s390_output_indirect_thunk_function (i
, true);
16665 if (indirect_branch_prez10thunk_mask
& (1 << i
))
16666 s390_output_indirect_thunk_function (i
, false);
16669 if (TARGET_INDIRECT_BRANCH_TABLE
)
16674 for (o
= 0; o
< INDIRECT_BRANCH_NUM_OPTIONS
; o
++)
16676 if (indirect_branch_table_label_no
[o
] == 0)
16679 switch_to_section (get_section (indirect_branch_table_name
[o
],
16682 for (i
= 0; i
< indirect_branch_table_label_no
[o
]; i
++)
16684 char label_start
[32];
16686 ASM_GENERATE_INTERNAL_LABEL (label_start
,
16687 indirect_branch_table_label
[o
], i
);
16689 fputs ("\t.long\t", asm_out_file
);
16690 assemble_name_raw (asm_out_file
, label_start
);
16691 fputs ("-.\n", asm_out_file
);
16693 switch_to_section (current_function_section ());
16698 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16701 s390_case_values_threshold (void)
16703 /* Disabling branch prediction for indirect jumps makes jump tables
16704 much more expensive. */
16705 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP
)
16708 return default_case_values_threshold ();
16711 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16712 back-end specific dependencies.
16714 Establish an ANTI dependency between r11 and r15 restores from FPRs
16715 to prevent the instructions scheduler from reordering them since
16716 this would break CFI. No further handling in the sched_reorder
16717 hook is required since the r11 and r15 restore will never appear in
16718 the same ready list with that change. */
16720 s390_sched_dependencies_evaluation (rtx_insn
*head
, rtx_insn
*tail
)
16722 if (!frame_pointer_needed
|| !epilogue_completed
)
16725 while (head
!= tail
&& DEBUG_INSN_P (head
))
16726 head
= NEXT_INSN (head
);
16728 rtx_insn
*r15_restore
= NULL
, *r11_restore
= NULL
;
16730 for (rtx_insn
*insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16732 rtx set
= single_set (insn
);
16734 || !RTX_FRAME_RELATED_P (insn
)
16736 || !REG_P (SET_DEST (set
))
16737 || !FP_REG_P (SET_SRC (set
)))
16740 if (REGNO (SET_DEST (set
)) == HARD_FRAME_POINTER_REGNUM
)
16741 r11_restore
= insn
;
16743 if (REGNO (SET_DEST (set
)) == STACK_POINTER_REGNUM
)
16744 r15_restore
= insn
;
16747 if (r11_restore
== NULL
|| r15_restore
== NULL
)
16749 add_dependence (r11_restore
, r15_restore
, REG_DEP_ANTI
);
16752 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16754 static unsigned HOST_WIDE_INT
16755 s390_shift_truncation_mask (machine_mode mode
)
16757 return mode
== DImode
|| mode
== SImode
? 63 : 0;
16760 /* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional
16764 f_constraint_p (const char *constraint
)
16766 bool seen_f_p
= false;
16767 bool seen_v_p
= false;
16769 for (size_t i
= 0, c_len
= strlen (constraint
); i
< c_len
;
16770 i
+= CONSTRAINT_LEN (constraint
[i
], constraint
+ i
))
16772 if (constraint
[i
] == 'f')
16774 if (constraint
[i
] == 'v')
16778 /* Treat "fv" constraints as "v", because LRA will choose the widest register
16780 return seen_f_p
&& !seen_v_p
;
16783 /* Return TRUE iff X is a hard floating-point (and not a vector) register. */
16786 s390_hard_fp_reg_p (rtx x
)
16788 if (!(REG_P (x
) && HARD_REGISTER_P (x
) && REG_ATTRS (x
)))
16791 tree decl
= REG_EXPR (x
);
16792 if (!(HAS_DECL_ASSEMBLER_NAME_P (decl
) && DECL_ASSEMBLER_NAME_SET_P (decl
)))
16795 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
16797 return name
[0] == '*' && name
[1] == 'f';
16800 /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
16801 constraints when long doubles are stored in vector registers. */
16804 s390_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &inputs
,
16805 vec
<machine_mode
> &input_modes
,
16806 vec
<const char *> &constraints
, vec
<rtx
> & /*clobbers*/,
16807 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
16810 /* Long doubles are stored in FPR pairs - nothing to do. */
16813 rtx_insn
*after_md_seq
= NULL
, *after_md_end
= NULL
;
16815 unsigned ninputs
= inputs
.length ();
16816 unsigned noutputs
= outputs
.length ();
16817 for (unsigned i
= 0; i
< noutputs
; i
++)
16819 if (GET_MODE (outputs
[i
]) != TFmode
)
16820 /* Not a long double - nothing to do. */
16822 const char *constraint
= constraints
[i
];
16823 bool allows_mem
, allows_reg
, is_inout
;
16824 bool ok
= parse_output_constraint (&constraint
, i
, ninputs
, noutputs
,
16825 &allows_mem
, &allows_reg
, &is_inout
);
16827 if (!f_constraint_p (constraint
))
16828 /* Long double with a constraint other than "=f" - nothing to do. */
16830 gcc_assert (allows_reg
);
16831 gcc_assert (!is_inout
);
16832 /* Copy output value from a FPR pair into a vector register. */
16834 push_to_sequence2 (after_md_seq
, after_md_end
);
16835 if (s390_hard_fp_reg_p (outputs
[i
]))
16837 fprx2
= gen_rtx_REG (FPRX2mode
, REGNO (outputs
[i
]));
16838 /* The first half is already at the correct location, copy only the
16839 * second one. Use the UNSPEC pattern instead of the SUBREG one,
16840 * since s390_can_change_mode_class() rejects
16841 * (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */
16842 rtx v1
= gen_rtx_REG (V2DFmode
, REGNO (outputs
[i
]));
16843 rtx v3
= gen_rtx_REG (V2DFmode
, REGNO (outputs
[i
]) + 1);
16844 emit_insn (gen_vec_permiv2df (v1
, v1
, v3
, const0_rtx
));
16848 fprx2
= gen_reg_rtx (FPRX2mode
);
16849 emit_insn (gen_fprx2_to_tf (outputs
[i
], fprx2
));
16851 after_md_seq
= get_insns ();
16852 after_md_end
= get_last_insn ();
16854 outputs
[i
] = fprx2
;
16857 for (unsigned i
= 0; i
< ninputs
; i
++)
16859 if (GET_MODE (inputs
[i
]) != TFmode
)
16860 /* Not a long double - nothing to do. */
16862 const char *constraint
= constraints
[noutputs
+ i
];
16863 bool allows_mem
, allows_reg
;
16864 bool ok
= parse_input_constraint (&constraint
, i
, ninputs
, noutputs
, 0,
16865 constraints
.address (), &allows_mem
,
16868 if (!f_constraint_p (constraint
))
16869 /* Long double with a constraint other than "f" (or "=f" for inout
16870 operands) - nothing to do. */
16872 gcc_assert (allows_reg
);
16873 /* Copy input value from a vector register into a FPR pair. */
16875 if (s390_hard_fp_reg_p (inputs
[i
]))
16877 fprx2
= gen_rtx_REG (FPRX2mode
, REGNO (inputs
[i
]));
16878 /* Copy only the second half. */
16879 rtx v1
= gen_rtx_REG (V2DFmode
, REGNO (inputs
[i
]) + 1);
16880 rtx v2
= gen_rtx_REG (V2DFmode
, REGNO (inputs
[i
]));
16881 emit_insn (gen_vec_permiv2df (v1
, v2
, v1
, GEN_INT (3)));
16885 fprx2
= gen_reg_rtx (FPRX2mode
);
16886 emit_insn (gen_tf_to_fprx2 (fprx2
, inputs
[i
]));
16889 input_modes
[i
] = FPRX2mode
;
16892 return after_md_seq
;
16895 /* Initialize GCC target structure. */
16897 #undef TARGET_ASM_ALIGNED_HI_OP
16898 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16899 #undef TARGET_ASM_ALIGNED_DI_OP
16900 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16901 #undef TARGET_ASM_INTEGER
16902 #define TARGET_ASM_INTEGER s390_assemble_integer
16904 #undef TARGET_ASM_OPEN_PAREN
16905 #define TARGET_ASM_OPEN_PAREN ""
16907 #undef TARGET_ASM_CLOSE_PAREN
16908 #define TARGET_ASM_CLOSE_PAREN ""
16910 #undef TARGET_OPTION_OVERRIDE
16911 #define TARGET_OPTION_OVERRIDE s390_option_override
16913 #ifdef TARGET_THREAD_SSP_OFFSET
16914 #undef TARGET_STACK_PROTECT_GUARD
16915 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16918 #undef TARGET_ENCODE_SECTION_INFO
16919 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16921 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16922 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16925 #undef TARGET_HAVE_TLS
16926 #define TARGET_HAVE_TLS true
16928 #undef TARGET_CANNOT_FORCE_CONST_MEM
16929 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16931 #undef TARGET_DELEGITIMIZE_ADDRESS
16932 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16934 #undef TARGET_LEGITIMIZE_ADDRESS
16935 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16937 #undef TARGET_RETURN_IN_MEMORY
16938 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16940 #undef TARGET_INIT_BUILTINS
16941 #define TARGET_INIT_BUILTINS s390_init_builtins
16942 #undef TARGET_EXPAND_BUILTIN
16943 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16944 #undef TARGET_BUILTIN_DECL
16945 #define TARGET_BUILTIN_DECL s390_builtin_decl
16947 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16948 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16950 #undef TARGET_ASM_OUTPUT_MI_THUNK
16951 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16952 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16953 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16955 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
16956 /* This hook is only needed to maintain the historic behavior with glibc
16957 versions that typedef float_t to double. */
16958 #undef TARGET_C_EXCESS_PRECISION
16959 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16962 #undef TARGET_SCHED_ADJUST_PRIORITY
16963 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16964 #undef TARGET_SCHED_ISSUE_RATE
16965 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16966 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16967 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16969 #undef TARGET_SCHED_VARIABLE_ISSUE
16970 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16971 #undef TARGET_SCHED_REORDER
16972 #define TARGET_SCHED_REORDER s390_sched_reorder
16973 #undef TARGET_SCHED_INIT
16974 #define TARGET_SCHED_INIT s390_sched_init
16976 #undef TARGET_CANNOT_COPY_INSN_P
16977 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16978 #undef TARGET_RTX_COSTS
16979 #define TARGET_RTX_COSTS s390_rtx_costs
16980 #undef TARGET_ADDRESS_COST
16981 #define TARGET_ADDRESS_COST s390_address_cost
16982 #undef TARGET_REGISTER_MOVE_COST
16983 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16984 #undef TARGET_MEMORY_MOVE_COST
16985 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16986 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16987 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16988 s390_builtin_vectorization_cost
16990 #undef TARGET_MACHINE_DEPENDENT_REORG
16991 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16993 #undef TARGET_VALID_POINTER_MODE
16994 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16996 #undef TARGET_BUILD_BUILTIN_VA_LIST
16997 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16998 #undef TARGET_EXPAND_BUILTIN_VA_START
16999 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
17000 #undef TARGET_ASAN_SHADOW_OFFSET
17001 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
17002 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
17003 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
17005 #undef TARGET_PROMOTE_FUNCTION_MODE
17006 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
17007 #undef TARGET_PASS_BY_REFERENCE
17008 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
17010 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
17011 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
17013 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
17014 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
17015 #undef TARGET_FUNCTION_ARG
17016 #define TARGET_FUNCTION_ARG s390_function_arg
17017 #undef TARGET_FUNCTION_ARG_ADVANCE
17018 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
17019 #undef TARGET_FUNCTION_ARG_PADDING
17020 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
17021 #undef TARGET_FUNCTION_VALUE
17022 #define TARGET_FUNCTION_VALUE s390_function_value
17023 #undef TARGET_LIBCALL_VALUE
17024 #define TARGET_LIBCALL_VALUE s390_libcall_value
17025 #undef TARGET_STRICT_ARGUMENT_NAMING
17026 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
17028 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
17029 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
17031 #undef TARGET_FIXED_CONDITION_CODE_REGS
17032 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
17034 #undef TARGET_CC_MODES_COMPATIBLE
17035 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
17037 #undef TARGET_INVALID_WITHIN_DOLOOP
17038 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
17041 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
17042 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
17045 #undef TARGET_DWARF_FRAME_REG_MODE
17046 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
17048 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
17049 #undef TARGET_MANGLE_TYPE
17050 #define TARGET_MANGLE_TYPE s390_mangle_type
17053 #undef TARGET_SCALAR_MODE_SUPPORTED_P
17054 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
17056 #undef TARGET_VECTOR_MODE_SUPPORTED_P
17057 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
17059 #undef TARGET_PREFERRED_RELOAD_CLASS
17060 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
17062 #undef TARGET_SECONDARY_RELOAD
17063 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
17064 #undef TARGET_SECONDARY_MEMORY_NEEDED
17065 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
17066 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
17067 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
17069 #undef TARGET_LIBGCC_CMP_RETURN_MODE
17070 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
17072 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
17073 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
17075 #undef TARGET_LEGITIMATE_ADDRESS_P
17076 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
17078 #undef TARGET_LEGITIMATE_CONSTANT_P
17079 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
17081 #undef TARGET_LRA_P
17082 #define TARGET_LRA_P s390_lra_p
17084 #undef TARGET_CAN_ELIMINATE
17085 #define TARGET_CAN_ELIMINATE s390_can_eliminate
17087 #undef TARGET_CONDITIONAL_REGISTER_USAGE
17088 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
17090 #undef TARGET_LOOP_UNROLL_ADJUST
17091 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
17093 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17094 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
17095 #undef TARGET_TRAMPOLINE_INIT
17096 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
17099 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
17100 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
17102 #undef TARGET_UNWIND_WORD_MODE
17103 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
17105 #undef TARGET_CANONICALIZE_COMPARISON
17106 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
17108 #undef TARGET_HARD_REGNO_SCRATCH_OK
17109 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
17111 #undef TARGET_HARD_REGNO_NREGS
17112 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
17113 #undef TARGET_HARD_REGNO_MODE_OK
17114 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
17115 #undef TARGET_MODES_TIEABLE_P
17116 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
17118 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
17119 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
17120 s390_hard_regno_call_part_clobbered
17122 #undef TARGET_ATTRIBUTE_TABLE
17123 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
17125 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
17126 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
17128 #undef TARGET_SET_UP_BY_PROLOGUE
17129 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
17131 #undef TARGET_EXTRA_LIVE_ON_ENTRY
17132 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
17134 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
17135 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
17136 s390_use_by_pieces_infrastructure_p
17138 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
17139 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
17141 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
17142 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
17144 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
17145 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
17147 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
17148 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
17150 #undef TARGET_VECTOR_ALIGNMENT
17151 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
17153 #undef TARGET_INVALID_BINARY_OP
17154 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
17156 #ifdef HAVE_AS_MACHINE_MACHINEMODE
17157 #undef TARGET_ASM_FILE_START
17158 #define TARGET_ASM_FILE_START s390_asm_file_start
17161 #undef TARGET_ASM_FILE_END
17162 #define TARGET_ASM_FILE_END s390_asm_file_end
17164 #undef TARGET_SET_CURRENT_FUNCTION
17165 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
17167 #if S390_USE_TARGET_ATTRIBUTE
17168 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
17169 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
17171 #undef TARGET_CAN_INLINE_P
17172 #define TARGET_CAN_INLINE_P s390_can_inline_p
17175 #undef TARGET_OPTION_RESTORE
17176 #define TARGET_OPTION_RESTORE s390_function_specific_restore
17178 #undef TARGET_CAN_CHANGE_MODE_CLASS
17179 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
17181 #undef TARGET_CONSTANT_ALIGNMENT
17182 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
17184 #undef TARGET_ASM_CODE_END
17185 #define TARGET_ASM_CODE_END s390_code_end
17187 #undef TARGET_CASE_VALUES_THRESHOLD
17188 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
17190 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
17191 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
17192 s390_sched_dependencies_evaluation
17194 #undef TARGET_SHIFT_TRUNCATION_MASK
17195 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
17197 /* Use only short displacement, since long displacement is not available for
17198 the floating point instructions. */
17199 #undef TARGET_MAX_ANCHOR_OFFSET
17200 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
17202 #undef TARGET_MD_ASM_ADJUST
17203 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
17205 struct gcc_target targetm
= TARGET_INITIALIZER
;
17207 #include "gt-s390.h"