1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2020 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
27 #include "coretypes.h"
30 #include "target-globals.h"
39 #include "stringpool.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
55 #include "conditions.h"
57 #include "insn-attr.h"
69 #include "cfgcleanup.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
77 #include "tree-pass.h"
82 #include "tm-constrs.h"
84 #include "symbol-summary.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
89 /* This file should be included last. */
90 #include "target-def.h"
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode
);
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl
;
97 /* Define the specific costs for a given cpu. */
99 struct processor_costs
102 const int m
; /* cost of an M instruction. */
103 const int mghi
; /* cost of an MGHI instruction. */
104 const int mh
; /* cost of an MH instruction. */
105 const int mhi
; /* cost of an MHI instruction. */
106 const int ml
; /* cost of an ML instruction. */
107 const int mr
; /* cost of an MR instruction. */
108 const int ms
; /* cost of an MS instruction. */
109 const int msg
; /* cost of an MSG instruction. */
110 const int msgf
; /* cost of an MSGF instruction. */
111 const int msgfr
; /* cost of an MSGFR instruction. */
112 const int msgr
; /* cost of an MSGR instruction. */
113 const int msr
; /* cost of an MSR instruction. */
114 const int mult_df
; /* cost of multiplication in DFmode. */
117 const int sqxbr
; /* cost of square root in TFmode. */
118 const int sqdbr
; /* cost of square root in DFmode. */
119 const int sqebr
; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr
; /* cost of multiply and add in DFmode. */
122 const int maebr
; /* cost of multiply and add in SFmode. */
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
137 struct processor_costs z900_cost
=
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
169 struct processor_costs z990_cost
=
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
201 struct processor_costs z9_109_cost
=
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
233 struct processor_costs z10_cost
=
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
265 struct processor_costs z196_cost
=
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
297 struct processor_costs zEC12_cost
=
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
328 const struct s390_processor processor_table
[] =
330 { "z900", "z900", PROCESSOR_2064_Z900
, &z900_cost
, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990
, &z990_cost
, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109
, &z9_109_cost
, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC
, &z9_109_cost
, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10
, &z10_cost
, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196
, &z196_cost
, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12
, &zEC12_cost
, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13
, &zEC12_cost
, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14
, &zEC12_cost
, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15
, &zEC12_cost
, 13 },
340 { "native", "", PROCESSOR_NATIVE
, NULL
, 0 }
343 extern int reload_completed
;
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
346 static rtx_insn
*last_scheduled_insn
;
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance
[MAX_SCHED_UNITS
][NUM_SIDES
];
352 /* Estimate of number of cycles a long-running insn occupies an
354 static int fxd_longrunning
[NUM_SIDES
];
355 static int fpd_longrunning
[NUM_SIDES
];
357 /* The maximum score added for an instruction whose unit hasn't been
358 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
359 give instruction mix scheduling more priority over instruction
361 #define MAX_SCHED_MIX_SCORE 2
363 /* The maximum distance up to which individual scores will be
364 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
365 Increase this with the OOO windows size of the machine. */
366 #define MAX_SCHED_MIX_DISTANCE 70
368 /* Structure used to hold the components of a S/390 memory
369 address. A legitimate address on S/390 is of the general
371 base + index + displacement
372 where any of the components is optional.
374 base and index are registers of the class ADDR_REGS,
375 displacement is an unsigned 12-bit immediate constant. */
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378 loops. This value is used in the unroll adjust hook to detect such
379 loops. Current max is 9 coming from the memcmp loop. */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
396 ? cfun_frame_layout.fpr_bitmap & 0x0f \
397 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
401 (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
403 (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405 cfun->machine->frame_layout.gpr_save_slots[REGNO]
407 /* Number of GPRs and FPRs used for argument passing. */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
412 /* A couple of shortcuts. */
413 #define CONST_OK_FOR_J(x) \
414 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
424 #define REGNO_PAIR_OK(REGNO, MODE) \
425 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
427 /* That's the read ahead of the dynamic branch prediction unit in
428 bytes on a z10 (or higher) CPU. */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
431 /* Masks per jump target register indicating which thunk need to be
433 static GTY(()) int indirect_branch_prez10thunk_mask
= 0;
434 static GTY(()) int indirect_branch_z10thunk_mask
= 0;
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
438 enum s390_indirect_branch_option
440 s390_opt_indirect_branch_jump
= 0,
441 s390_opt_indirect_branch_call
,
442 s390_opt_function_return_reg
,
443 s390_opt_function_return_mem
446 static GTY(()) int indirect_branch_table_label_no
[INDIRECT_BRANCH_NUM_OPTIONS
] = { 0 };
447 const char *indirect_branch_table_label
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
448 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
450 { ".s390_indirect_jump", ".s390_indirect_call",
451 ".s390_return_reg", ".s390_return_mem" };
454 s390_return_addr_from_memory ()
456 return cfun_gpr_save_slot(RETURN_REGNUM
) == SAVE_SLOT_STACK
;
459 /* Indicate which ABI has been used for passing vector args.
460 0 - no vector type arguments have been passed where the ABI is relevant
461 1 - the old ABI has been used
462 2 - a vector type argument has been passed either in a vector register
463 or on the stack by value */
464 static int s390_vector_abi
= 0;
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467 switch. The vector ABI affects only vector data types. There are
468 two aspects of the vector ABI relevant here:
470 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471 ABI and natural alignment with the old.
473 2. vector <= 16 bytes are passed in VRs or by value on the stack
474 with the new ABI but by reference on the stack with the old.
476 If ARG_P is true TYPE is used for a function argument or return
477 value. The ABI marker then is set for all vector data types. If
478 ARG_P is false only type 1 vectors are being checked. */
481 s390_check_type_for_vector_abi (const_tree type
, bool arg_p
, bool in_struct_p
)
483 static hash_set
<const_tree
> visited_types_hash
;
488 if (type
== NULL_TREE
|| TREE_CODE (type
) == ERROR_MARK
)
491 if (visited_types_hash
.contains (type
))
494 visited_types_hash
.add (type
);
496 if (VECTOR_TYPE_P (type
))
498 int type_size
= int_size_in_bytes (type
);
500 /* Outside arguments only the alignment is changing and this
501 only happens for vector types >= 16 bytes. */
502 if (!arg_p
&& type_size
< 16)
505 /* In arguments vector types > 16 are passed as before (GCC
506 never enforced the bigger alignment for arguments which was
507 required by the old vector ABI). However, it might still be
508 ABI relevant due to the changed alignment if it is a struct
510 if (arg_p
&& type_size
> 16 && !in_struct_p
)
513 s390_vector_abi
= TARGET_VX_ABI
? 2 : 1;
515 else if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == ARRAY_TYPE
)
517 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 natural alignment there will never be ABI dependent padding
519 in an array type. That's why we do not set in_struct_p to
521 s390_check_type_for_vector_abi (TREE_TYPE (type
), arg_p
, in_struct_p
);
523 else if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
527 /* Check the return type. */
528 s390_check_type_for_vector_abi (TREE_TYPE (type
), true, false);
530 for (arg_chain
= TYPE_ARG_TYPES (type
);
532 arg_chain
= TREE_CHAIN (arg_chain
))
533 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain
), true, false);
535 else if (RECORD_OR_UNION_TYPE_P (type
))
539 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
541 if (TREE_CODE (field
) != FIELD_DECL
)
544 s390_check_type_for_vector_abi (TREE_TYPE (field
), arg_p
, true);
550 /* System z builtins. */
552 #include "s390-builtins.h"
554 const unsigned int bflags_builtin
[S390_BUILTIN_MAX
+ 1] =
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
566 const unsigned int opflags_builtin
[S390_BUILTIN_MAX
+ 1] =
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
578 const unsigned int bflags_overloaded_builtin
[S390_OVERLOADED_BUILTIN_MAX
+ 1] =
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
591 bflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
604 opflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
616 tree s390_builtin_types
[BT_MAX
];
617 tree s390_builtin_fn_types
[BT_FN_MAX
];
618 tree s390_builtin_decls
[S390_BUILTIN_MAX
+
619 S390_OVERLOADED_BUILTIN_MAX
+
620 S390_OVERLOADED_BUILTIN_VAR_MAX
];
622 static enum insn_code
const code_for_builtin
[S390_BUILTIN_MAX
+ 1] = {
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
628 #define OB_DEF_VAR(...)
630 #include "s390-builtins.def"
635 s390_init_builtins (void)
637 /* These definitions are being used in s390-builtins.def. */
638 tree returns_twice_attr
= tree_cons (get_identifier ("returns_twice"),
640 tree noreturn_attr
= tree_cons (get_identifier ("noreturn"), NULL
, NULL
);
641 tree c_uint64_type_node
;
643 /* The uint64_type_node from tree.c is not compatible to the C99
644 uint64_t data type. What we want is c_uint64_type_node from
645 c-common.c. But since backend code is not supposed to interface
646 with the frontend we recreate it here. */
648 c_uint64_type_node
= long_unsigned_type_node
;
650 c_uint64_type_node
= long_long_unsigned_type_node
;
653 #define DEF_TYPE(INDEX, NODE, CONST_P) \
654 if (s390_builtin_types[INDEX] == NULL) \
655 s390_builtin_types[INDEX] = (!CONST_P) ? \
656 (NODE) : build_type_variant ((NODE), 1, 0);
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
660 if (s390_builtin_types[INDEX] == NULL) \
661 s390_builtin_types[INDEX] = \
662 build_pointer_type (s390_builtin_types[INDEX_BASE]);
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
666 if (s390_builtin_types[INDEX] == NULL) \
667 s390_builtin_types[INDEX] = \
668 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
672 if (s390_builtin_types[INDEX] == NULL) \
673 s390_builtin_types[INDEX] = \
674 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
678 if (s390_builtin_types[INDEX] == NULL) \
679 s390_builtin_types[INDEX] = \
680 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
683 #define DEF_FN_TYPE(INDEX, args...) \
684 if (s390_builtin_fn_types[INDEX] == NULL) \
685 s390_builtin_fn_types[INDEX] = \
686 build_function_type_list (args, NULL_TREE);
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
693 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
694 s390_builtin_decls[S390_BUILTIN_##NAME] = \
695 add_builtin_function ("__builtin_" #NAME, \
696 s390_builtin_fn_types[FNTYPE], \
697 S390_BUILTIN_##NAME, \
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
703 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
705 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719 builtin DECL. The operand flags from s390-builtins.def have to
720 passed as OP_FLAGS. */
722 s390_const_operand_ok (tree arg
, int argnum
, int op_flags
, tree decl
)
724 if (O_UIMM_P (op_flags
))
726 int bitwidths
[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727 int bitwidth
= bitwidths
[op_flags
- O_U1
];
729 if (!tree_fits_uhwi_p (arg
)
730 || tree_to_uhwi (arg
) > (HOST_WIDE_INT_1U
<< bitwidth
) - 1)
732 error ("constant argument %d for builtin %qF is out of range "
733 "(0..%wu)", argnum
, decl
,
734 (HOST_WIDE_INT_1U
<< bitwidth
) - 1);
739 if (O_SIMM_P (op_flags
))
741 int bitwidths
[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742 int bitwidth
= bitwidths
[op_flags
- O_S2
];
744 if (!tree_fits_shwi_p (arg
)
745 || tree_to_shwi (arg
) < -(HOST_WIDE_INT_1
<< (bitwidth
- 1))
746 || tree_to_shwi (arg
) > ((HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1))
748 error ("constant argument %d for builtin %qF is out of range "
749 "(%wd..%wd)", argnum
, decl
,
750 -(HOST_WIDE_INT_1
<< (bitwidth
- 1)),
751 (HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1);
758 /* Expand an expression EXP that calls a built-in function,
759 with result going to TARGET if that's convenient
760 (and in mode MODE if that's convenient).
761 SUBTARGET may be used as the target for computing one of EXP's operands.
762 IGNORE is nonzero if the value is to be ignored. */
765 s390_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
766 machine_mode mode ATTRIBUTE_UNUSED
,
767 int ignore ATTRIBUTE_UNUSED
)
771 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
772 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
773 enum insn_code icode
;
774 rtx op
[MAX_ARGS
], pat
;
778 call_expr_arg_iterator iter
;
779 unsigned int all_op_flags
= opflags_for_builtin (fcode
);
780 machine_mode last_vec_mode
= VOIDmode
;
782 if (TARGET_DEBUG_ARG
)
785 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 (int)fcode
, IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
787 bflags_for_builtin (fcode
));
790 if (S390_USE_TARGET_ATTRIBUTE
)
794 bflags
= bflags_for_builtin (fcode
);
795 if ((bflags
& B_HTM
) && !TARGET_HTM
)
797 error ("builtin %qF is not supported without %<-mhtm%> "
798 "(default with %<-march=zEC12%> and higher).", fndecl
);
801 if (((bflags
& B_VX
) || (bflags
& B_VXE
)) && !TARGET_VX
)
803 error ("builtin %qF requires %<-mvx%> "
804 "(default with %<-march=z13%> and higher).", fndecl
);
808 if ((bflags
& B_VXE
) && !TARGET_VXE
)
810 error ("Builtin %qF requires z14 or higher.", fndecl
);
814 if ((bflags
& B_VXE2
) && !TARGET_VXE2
)
816 error ("Builtin %qF requires z15 or higher.", fndecl
);
820 if (fcode
>= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821 && fcode
< S390_ALL_BUILTIN_MAX
)
825 else if (fcode
< S390_OVERLOADED_BUILTIN_OFFSET
)
827 icode
= code_for_builtin
[fcode
];
828 /* Set a flag in the machine specific cfun part in order to support
829 saving/restoring of FPRs. */
830 if (fcode
== S390_BUILTIN_tbegin
|| fcode
== S390_BUILTIN_tbegin_retry
)
831 cfun
->machine
->tbegin_p
= true;
833 else if (fcode
< S390_OVERLOADED_BUILTIN_VAR_OFFSET
)
835 error ("unresolved overloaded builtin");
839 internal_error ("bad builtin fcode");
842 internal_error ("bad builtin icode");
844 nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
848 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
850 || GET_MODE (target
) != tmode
851 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
852 target
= gen_reg_rtx (tmode
);
854 /* There are builtins (e.g. vec_promote) with no vector
855 arguments but an element selector. So we have to also look
856 at the vector return type when emitting the modulo
858 if (VECTOR_MODE_P (insn_data
[icode
].operand
[0].mode
))
859 last_vec_mode
= insn_data
[icode
].operand
[0].mode
;
863 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
866 const struct insn_operand_data
*insn_op
;
867 unsigned int op_flags
= all_op_flags
& ((1 << O_SHIFT
) - 1);
869 all_op_flags
= all_op_flags
>> O_SHIFT
;
871 if (arg
== error_mark_node
)
873 if (arity
>= MAX_ARGS
)
876 if (O_IMM_P (op_flags
)
877 && TREE_CODE (arg
) != INTEGER_CST
)
879 error ("constant value required for builtin %qF argument %d",
884 if (!s390_const_operand_ok (arg
, arity
+ 1, op_flags
, fndecl
))
887 insn_op
= &insn_data
[icode
].operand
[arity
+ nonvoid
];
888 op
[arity
] = expand_expr (arg
, NULL_RTX
, insn_op
->mode
, EXPAND_NORMAL
);
890 /* expand_expr truncates constants to the target mode only if it
891 is "convenient". However, our checks below rely on this
893 if (CONST_INT_P (op
[arity
])
894 && SCALAR_INT_MODE_P (insn_op
->mode
)
895 && GET_MODE (op
[arity
]) != insn_op
->mode
)
896 op
[arity
] = GEN_INT (trunc_int_for_mode (INTVAL (op
[arity
]),
899 /* Wrap the expanded RTX for pointer types into a MEM expr with
900 the proper mode. This allows us to use e.g. (match_operand
901 "memory_operand"..) in the insn patterns instead of (mem
902 (match_operand "address_operand)). This is helpful for
903 patterns not just accepting MEMs. */
904 if (POINTER_TYPE_P (TREE_TYPE (arg
))
905 && insn_op
->predicate
!= address_operand
)
906 op
[arity
] = gen_rtx_MEM (insn_op
->mode
, op
[arity
]);
908 /* Expand the module operation required on element selectors. */
909 if (op_flags
== O_ELEM
)
911 gcc_assert (last_vec_mode
!= VOIDmode
);
912 op
[arity
] = simplify_expand_binop (SImode
, code_to_optab (AND
),
914 GEN_INT (GET_MODE_NUNITS (last_vec_mode
) - 1),
915 NULL_RTX
, 1, OPTAB_DIRECT
);
918 /* Record the vector mode used for an element selector. This assumes:
919 1. There is no builtin with two different vector modes and an element selector
920 2. The element selector comes after the vector type it is referring to.
921 This currently the true for all the builtins but FIXME we
922 should better check for that. */
923 if (VECTOR_MODE_P (insn_op
->mode
))
924 last_vec_mode
= insn_op
->mode
;
926 if (insn_op
->predicate (op
[arity
], insn_op
->mode
))
932 /* A memory operand is rejected by the memory_operand predicate.
933 Try making the address legal by copying it into a register. */
934 if (MEM_P (op
[arity
])
935 && insn_op
->predicate
== memory_operand
936 && (GET_MODE (XEXP (op
[arity
], 0)) == Pmode
937 || GET_MODE (XEXP (op
[arity
], 0)) == VOIDmode
))
939 op
[arity
] = replace_equiv_address (op
[arity
],
940 copy_to_mode_reg (Pmode
,
941 XEXP (op
[arity
], 0)));
943 /* Some of the builtins require different modes/types than the
944 pattern in order to implement a specific API. Instead of
945 adding many expanders which do the mode change we do it here.
946 E.g. s390_vec_add_u128 required to have vector unsigned char
947 arguments is mapped to addti3. */
948 else if (insn_op
->mode
!= VOIDmode
949 && GET_MODE (op
[arity
]) != VOIDmode
950 && GET_MODE (op
[arity
]) != insn_op
->mode
951 && ((tmp_rtx
= simplify_gen_subreg (insn_op
->mode
, op
[arity
],
952 GET_MODE (op
[arity
]), 0))
958 /* The predicate rejects the operand although the mode is fine.
959 Copy the operand to register. */
960 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
)
961 && (GET_MODE (op
[arity
]) == insn_op
->mode
962 || GET_MODE (op
[arity
]) == VOIDmode
963 || (insn_op
->predicate
== address_operand
964 && GET_MODE (op
[arity
]) == Pmode
)))
966 /* An address_operand usually has VOIDmode in the expander
967 so we cannot use this. */
968 machine_mode target_mode
=
969 (insn_op
->predicate
== address_operand
970 ? (machine_mode
) Pmode
: insn_op
->mode
);
971 op
[arity
] = copy_to_mode_reg (target_mode
, op
[arity
]);
974 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
))
976 error ("invalid argument %d for builtin %qF", arity
+ 1, fndecl
);
985 pat
= GEN_FCN (icode
) (target
);
989 pat
= GEN_FCN (icode
) (target
, op
[0]);
991 pat
= GEN_FCN (icode
) (op
[0]);
995 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
997 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
1001 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
1003 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
1007 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
1009 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
1013 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
1015 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
1019 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1021 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1037 static const int s390_hotpatch_hw_max
= 1000000;
1038 static int s390_hotpatch_hw_before_label
= 0;
1039 static int s390_hotpatch_hw_after_label
= 0;
1041 /* Check whether the hotpatch attribute is applied to a function and, if it has
1042 an argument, the argument is valid. */
1045 s390_handle_hotpatch_attribute (tree
*node
, tree name
, tree args
,
1046 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1052 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1054 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1056 *no_add_attrs
= true;
1058 if (args
!= NULL
&& TREE_CHAIN (args
) != NULL
)
1060 expr
= TREE_VALUE (args
);
1061 expr2
= TREE_VALUE (TREE_CHAIN (args
));
1063 if (args
== NULL
|| TREE_CHAIN (args
) == NULL
)
1065 else if (TREE_CODE (expr
) != INTEGER_CST
1066 || !INTEGRAL_TYPE_P (TREE_TYPE (expr
))
1067 || wi::gtu_p (wi::to_wide (expr
), s390_hotpatch_hw_max
))
1069 else if (TREE_CODE (expr2
) != INTEGER_CST
1070 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2
))
1071 || wi::gtu_p (wi::to_wide (expr2
), s390_hotpatch_hw_max
))
1077 error ("requested %qE attribute is not a comma separated pair of"
1078 " non-negative integer constants or too large (max. %d)", name
,
1079 s390_hotpatch_hw_max
);
1080 *no_add_attrs
= true;
1086 /* Expand the s390_vector_bool type attribute. */
1089 s390_handle_vectorbool_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1090 tree args ATTRIBUTE_UNUSED
,
1091 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1093 tree type
= *node
, result
= NULL_TREE
;
1096 while (POINTER_TYPE_P (type
)
1097 || TREE_CODE (type
) == FUNCTION_TYPE
1098 || TREE_CODE (type
) == METHOD_TYPE
1099 || TREE_CODE (type
) == ARRAY_TYPE
)
1100 type
= TREE_TYPE (type
);
1102 mode
= TYPE_MODE (type
);
1105 case E_DImode
: case E_V2DImode
:
1106 result
= s390_builtin_types
[BT_BV2DI
];
1108 case E_SImode
: case E_V4SImode
:
1109 result
= s390_builtin_types
[BT_BV4SI
];
1111 case E_HImode
: case E_V8HImode
:
1112 result
= s390_builtin_types
[BT_BV8HI
];
1114 case E_QImode
: case E_V16QImode
:
1115 result
= s390_builtin_types
[BT_BV16QI
];
1121 *no_add_attrs
= true; /* No need to hang on to the attribute. */
1124 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
1129 /* Check syntax of function decl attributes having a string type value. */
1132 s390_handle_string_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1133 tree args ATTRIBUTE_UNUSED
,
1134 int flags ATTRIBUTE_UNUSED
,
1139 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1141 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1143 *no_add_attrs
= true;
1146 cst
= TREE_VALUE (args
);
1148 if (TREE_CODE (cst
) != STRING_CST
)
1150 warning (OPT_Wattributes
,
1151 "%qE attribute requires a string constant argument",
1153 *no_add_attrs
= true;
1156 if (is_attribute_p ("indirect_branch", name
)
1157 || is_attribute_p ("indirect_branch_call", name
)
1158 || is_attribute_p ("function_return", name
)
1159 || is_attribute_p ("function_return_reg", name
)
1160 || is_attribute_p ("function_return_mem", name
))
1162 if (strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1163 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1164 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1166 warning (OPT_Wattributes
,
1167 "argument to %qE attribute is not "
1168 "(keep|thunk|thunk-extern)", name
);
1169 *no_add_attrs
= true;
1173 if (is_attribute_p ("indirect_branch_jump", name
)
1174 && strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1175 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1176 && strcmp (TREE_STRING_POINTER (cst
), "thunk-inline") != 0
1177 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1179 warning (OPT_Wattributes
,
1180 "argument to %qE attribute is not "
1181 "(keep|thunk|thunk-inline|thunk-extern)", name
);
1182 *no_add_attrs
= true;
1188 static const struct attribute_spec s390_attribute_table
[] = {
1189 { "hotpatch", 2, 2, true, false, false, false,
1190 s390_handle_hotpatch_attribute
, NULL
},
1191 { "s390_vector_bool", 0, 0, false, true, false, true,
1192 s390_handle_vectorbool_attribute
, NULL
},
1193 { "indirect_branch", 1, 1, true, false, false, false,
1194 s390_handle_string_attribute
, NULL
},
1195 { "indirect_branch_jump", 1, 1, true, false, false, false,
1196 s390_handle_string_attribute
, NULL
},
1197 { "indirect_branch_call", 1, 1, true, false, false, false,
1198 s390_handle_string_attribute
, NULL
},
1199 { "function_return", 1, 1, true, false, false, false,
1200 s390_handle_string_attribute
, NULL
},
1201 { "function_return_reg", 1, 1, true, false, false, false,
1202 s390_handle_string_attribute
, NULL
},
1203 { "function_return_mem", 1, 1, true, false, false, false,
1204 s390_handle_string_attribute
, NULL
},
1207 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1210 /* Return the alignment for LABEL. We default to the -falign-labels
1211 value except for the literal pool base label. */
1213 s390_label_align (rtx_insn
*label
)
1215 rtx_insn
*prev_insn
= prev_active_insn (label
);
1218 if (prev_insn
== NULL_RTX
)
1221 set
= single_set (prev_insn
);
1223 if (set
== NULL_RTX
)
1226 src
= SET_SRC (set
);
1228 /* Don't align literal pool base labels. */
1229 if (GET_CODE (src
) == UNSPEC
1230 && XINT (src
, 1) == UNSPEC_MAIN_BASE
)
1234 return align_labels
.levels
[0].log
;
1237 static GTY(()) rtx got_symbol
;
1239 /* Return the GOT table symbol. The symbol will be created when the
1240 function is invoked for the first time. */
1243 s390_got_symbol (void)
1247 got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1248 SYMBOL_REF_FLAGS (got_symbol
) = SYMBOL_FLAG_LOCAL
;
1254 static scalar_int_mode
1255 s390_libgcc_cmp_return_mode (void)
1257 return TARGET_64BIT
? DImode
: SImode
;
1260 static scalar_int_mode
1261 s390_libgcc_shift_count_mode (void)
1263 return TARGET_64BIT
? DImode
: SImode
;
1266 static scalar_int_mode
1267 s390_unwind_word_mode (void)
1269 return TARGET_64BIT
? DImode
: SImode
;
1272 /* Return true if the back end supports mode MODE. */
1274 s390_scalar_mode_supported_p (scalar_mode mode
)
1276 /* In contrast to the default implementation reject TImode constants on 31bit
1277 TARGET_ZARCH for ABI compliance. */
1278 if (!TARGET_64BIT
&& TARGET_ZARCH
&& mode
== TImode
)
1281 if (DECIMAL_FLOAT_MODE_P (mode
))
1282 return default_decimal_float_supported_p ();
1284 return default_scalar_mode_supported_p (mode
);
1287 /* Return true if the back end supports vector mode MODE. */
1289 s390_vector_mode_supported_p (machine_mode mode
)
1293 if (!VECTOR_MODE_P (mode
)
1295 || GET_MODE_SIZE (mode
) > 16)
1298 inner
= GET_MODE_INNER (mode
);
1316 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1319 s390_set_has_landing_pad_p (bool value
)
1321 cfun
->machine
->has_landing_pad_p
= value
;
1324 /* If two condition code modes are compatible, return a condition code
1325 mode which is compatible with both. Otherwise, return
1329 s390_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
1337 if (m2
== CCUmode
|| m2
== CCTmode
|| m2
== CCZ1mode
1338 || m2
== CCSmode
|| m2
== CCSRmode
|| m2
== CCURmode
)
1359 /* Return true if SET either doesn't set the CC register, or else
1360 the source and destination have matching CC modes and that
1361 CC mode is at least as constrained as REQ_MODE. */
1364 s390_match_ccmode_set (rtx set
, machine_mode req_mode
)
1366 machine_mode set_mode
;
1368 gcc_assert (GET_CODE (set
) == SET
);
1370 /* These modes are supposed to be used only in CC consumer
1372 gcc_assert (req_mode
!= CCVIALLmode
&& req_mode
!= CCVIANYmode
1373 && req_mode
!= CCVFALLmode
&& req_mode
!= CCVFANYmode
);
1375 if (GET_CODE (SET_DEST (set
)) != REG
|| !CC_REGNO_P (REGNO (SET_DEST (set
))))
1378 set_mode
= GET_MODE (SET_DEST (set
));
1400 if (req_mode
!= set_mode
)
1405 if (req_mode
!= CCSmode
&& req_mode
!= CCUmode
&& req_mode
!= CCTmode
1406 && req_mode
!= CCSRmode
&& req_mode
!= CCURmode
1407 && req_mode
!= CCZ1mode
)
1413 if (req_mode
!= CCAmode
)
1421 return (GET_MODE (SET_SRC (set
)) == set_mode
);
1424 /* Return true if every SET in INSN that sets the CC register
1425 has source and destination with matching CC modes and that
1426 CC mode is at least as constrained as REQ_MODE.
1427 If REQ_MODE is VOIDmode, always return false. */
1430 s390_match_ccmode (rtx_insn
*insn
, machine_mode req_mode
)
1434 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1435 if (req_mode
== VOIDmode
)
1438 if (GET_CODE (PATTERN (insn
)) == SET
)
1439 return s390_match_ccmode_set (PATTERN (insn
), req_mode
);
1441 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1442 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
1444 rtx set
= XVECEXP (PATTERN (insn
), 0, i
);
1445 if (GET_CODE (set
) == SET
)
1446 if (!s390_match_ccmode_set (set
, req_mode
))
1453 /* If a test-under-mask instruction can be used to implement
1454 (compare (and ... OP1) OP2), return the CC mode required
1455 to do that. Otherwise, return VOIDmode.
1456 MIXED is true if the instruction can distinguish between
1457 CC1 and CC2 for mixed selected bits (TMxx), it is false
1458 if the instruction cannot (TM). */
1461 s390_tm_ccmode (rtx op1
, rtx op2
, bool mixed
)
1465 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1466 if (GET_CODE (op1
) != CONST_INT
|| GET_CODE (op2
) != CONST_INT
)
1469 /* Selected bits all zero: CC0.
1470 e.g.: int a; if ((a & (16 + 128)) == 0) */
1471 if (INTVAL (op2
) == 0)
1474 /* Selected bits all one: CC3.
1475 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476 if (INTVAL (op2
) == INTVAL (op1
))
1479 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1481 if ((a & (16 + 128)) == 16) -> CCT1
1482 if ((a & (16 + 128)) == 128) -> CCT2 */
1485 bit1
= exact_log2 (INTVAL (op2
));
1486 bit0
= exact_log2 (INTVAL (op1
) ^ INTVAL (op2
));
1487 if (bit0
!= -1 && bit1
!= -1)
1488 return bit0
> bit1
? CCT1mode
: CCT2mode
;
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495 OP0 and OP1 of a COMPARE, return the mode to be used for the
1499 s390_select_ccmode (enum rtx_code code
, rtx op0
, rtx op1
)
1505 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1506 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1508 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1509 && CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1))))
1511 if ((GET_CODE (op0
) == PLUS
|| GET_CODE (op0
) == MINUS
1512 || GET_CODE (op1
) == NEG
)
1513 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1516 if (GET_CODE (op0
) == AND
)
1518 /* Check whether we can potentially do it via TM. */
1519 machine_mode ccmode
;
1520 ccmode
= s390_tm_ccmode (XEXP (op0
, 1), op1
, 1);
1521 if (ccmode
!= VOIDmode
)
1523 /* Relax CCTmode to CCZmode to allow fall-back to AND
1524 if that turns out to be beneficial. */
1525 return ccmode
== CCTmode
? CCZmode
: ccmode
;
1529 if (register_operand (op0
, HImode
)
1530 && GET_CODE (op1
) == CONST_INT
1531 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 65535))
1533 if (register_operand (op0
, QImode
)
1534 && GET_CODE (op1
) == CONST_INT
1535 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 255))
1544 /* The only overflow condition of NEG and ABS happens when
1545 -INT_MAX is used as parameter, which stays negative. So
1546 we have an overflow from a positive value to a negative.
1547 Using CCAP mode the resulting cc can be used for comparisons. */
1548 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1549 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1552 /* If constants are involved in an add instruction it is possible to use
1553 the resulting cc for comparisons with zero. Knowing the sign of the
1554 constant the overflow behavior gets predictable. e.g.:
1555 int a, b; if ((b = a + c) > 0)
1556 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1557 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1558 && (CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1)))
1559 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0
, 1)), 'O', "Os")
1560 /* Avoid INT32_MIN on 32 bit. */
1561 && (!TARGET_ZARCH
|| INTVAL (XEXP (op0
, 1)) != -0x7fffffff - 1))))
1563 if (INTVAL (XEXP((op0
), 1)) < 0)
1571 if (HONOR_NANS (op0
) || HONOR_NANS (op1
))
1582 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1583 && GET_CODE (op1
) != CONST_INT
)
1589 if (GET_CODE (op0
) == PLUS
1590 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1593 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1594 && GET_CODE (op1
) != CONST_INT
)
1600 if (GET_CODE (op0
) == MINUS
1601 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1604 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1605 && GET_CODE (op1
) != CONST_INT
)
1614 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615 that we can implement more efficiently. */
1618 s390_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
1619 bool op0_preserve_value
)
1621 if (op0_preserve_value
)
1624 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1625 if ((*code
== EQ
|| *code
== NE
)
1626 && *op1
== const0_rtx
1627 && GET_CODE (*op0
) == ZERO_EXTRACT
1628 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1629 && GET_CODE (XEXP (*op0
, 2)) == CONST_INT
1630 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1632 rtx inner
= XEXP (*op0
, 0);
1633 HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (GET_MODE (inner
));
1634 HOST_WIDE_INT len
= INTVAL (XEXP (*op0
, 1));
1635 HOST_WIDE_INT pos
= INTVAL (XEXP (*op0
, 2));
1637 if (len
> 0 && len
< modesize
1638 && pos
>= 0 && pos
+ len
<= modesize
1639 && modesize
<= HOST_BITS_PER_WIDE_INT
)
1641 unsigned HOST_WIDE_INT block
;
1642 block
= (HOST_WIDE_INT_1U
<< len
) - 1;
1643 block
<<= modesize
- pos
- len
;
1645 *op0
= gen_rtx_AND (GET_MODE (inner
), inner
,
1646 gen_int_mode (block
, GET_MODE (inner
)));
1650 /* Narrow AND of memory against immediate to enable TM. */
1651 if ((*code
== EQ
|| *code
== NE
)
1652 && *op1
== const0_rtx
1653 && GET_CODE (*op0
) == AND
1654 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1655 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1657 rtx inner
= XEXP (*op0
, 0);
1658 rtx mask
= XEXP (*op0
, 1);
1660 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1661 if (GET_CODE (inner
) == SUBREG
1662 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner
)))
1663 && (GET_MODE_SIZE (GET_MODE (inner
))
1664 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
1666 & GET_MODE_MASK (GET_MODE (inner
))
1667 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner
))))
1669 inner
= SUBREG_REG (inner
);
1671 /* Do not change volatile MEMs. */
1672 if (MEM_P (inner
) && !MEM_VOLATILE_P (inner
))
1674 int part
= s390_single_part (XEXP (*op0
, 1),
1675 GET_MODE (inner
), QImode
, 0);
1678 mask
= gen_int_mode (s390_extract_part (mask
, QImode
, 0), QImode
);
1679 inner
= adjust_address_nv (inner
, QImode
, part
);
1680 *op0
= gen_rtx_AND (QImode
, inner
, mask
);
1685 /* Narrow comparisons against 0xffff to HImode if possible. */
1686 if ((*code
== EQ
|| *code
== NE
)
1687 && GET_CODE (*op1
) == CONST_INT
1688 && INTVAL (*op1
) == 0xffff
1689 && SCALAR_INT_MODE_P (GET_MODE (*op0
))
1690 && (nonzero_bits (*op0
, GET_MODE (*op0
))
1691 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1693 *op0
= gen_lowpart (HImode
, *op0
);
1697 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1698 if (GET_CODE (*op0
) == UNSPEC
1699 && XINT (*op0
, 1) == UNSPEC_STRCMPCC_TO_INT
1700 && XVECLEN (*op0
, 0) == 1
1701 && GET_MODE (XVECEXP (*op0
, 0, 0)) == CCUmode
1702 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1703 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1704 && *op1
== const0_rtx
)
1706 enum rtx_code new_code
= UNKNOWN
;
1709 case EQ
: new_code
= EQ
; break;
1710 case NE
: new_code
= NE
; break;
1711 case LT
: new_code
= GTU
; break;
1712 case GT
: new_code
= LTU
; break;
1713 case LE
: new_code
= GEU
; break;
1714 case GE
: new_code
= LEU
; break;
1718 if (new_code
!= UNKNOWN
)
1720 *op0
= XVECEXP (*op0
, 0, 0);
1725 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1726 if (GET_CODE (*op0
) == UNSPEC
1727 && XINT (*op0
, 1) == UNSPEC_CC_TO_INT
1728 && XVECLEN (*op0
, 0) == 1
1729 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1730 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1731 && CONST_INT_P (*op1
))
1733 enum rtx_code new_code
= UNKNOWN
;
1734 switch (GET_MODE (XVECEXP (*op0
, 0, 0)))
1740 case EQ
: new_code
= EQ
; break;
1741 case NE
: new_code
= NE
; break;
1748 if (new_code
!= UNKNOWN
)
1750 /* For CCRAWmode put the required cc mask into the second
1752 if (GET_MODE (XVECEXP (*op0
, 0, 0)) == CCRAWmode
1753 && INTVAL (*op1
) >= 0 && INTVAL (*op1
) <= 3)
1754 *op1
= gen_rtx_CONST_INT (VOIDmode
, 1 << (3 - INTVAL (*op1
)));
1755 *op0
= XVECEXP (*op0
, 0, 0);
1760 /* Simplify cascaded EQ, NE with const0_rtx. */
1761 if ((*code
== NE
|| *code
== EQ
)
1762 && (GET_CODE (*op0
) == EQ
|| GET_CODE (*op0
) == NE
)
1763 && GET_MODE (*op0
) == SImode
1764 && GET_MODE (XEXP (*op0
, 0)) == CCZ1mode
1765 && REG_P (XEXP (*op0
, 0))
1766 && XEXP (*op0
, 1) == const0_rtx
1767 && *op1
== const0_rtx
)
1769 if ((*code
== EQ
&& GET_CODE (*op0
) == NE
)
1770 || (*code
== NE
&& GET_CODE (*op0
) == EQ
))
1774 *op0
= XEXP (*op0
, 0);
1777 /* Prefer register over memory as first operand. */
1778 if (MEM_P (*op0
) && REG_P (*op1
))
1780 rtx tem
= *op0
; *op0
= *op1
; *op1
= tem
;
1781 *code
= (int)swap_condition ((enum rtx_code
)*code
);
1784 /* A comparison result is compared against zero. Replace it with
1785 the (perhaps inverted) original comparison.
1786 This probably should be done by simplify_relational_operation. */
1787 if ((*code
== EQ
|| *code
== NE
)
1788 && *op1
== const0_rtx
1789 && COMPARISON_P (*op0
)
1790 && CC_REG_P (XEXP (*op0
, 0)))
1792 enum rtx_code new_code
;
1795 new_code
= reversed_comparison_code_parts (GET_CODE (*op0
),
1797 XEXP (*op0
, 1), NULL
);
1799 new_code
= GET_CODE (*op0
);
1801 if (new_code
!= UNKNOWN
)
1804 *op1
= XEXP (*op0
, 1);
1805 *op0
= XEXP (*op0
, 0);
1809 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1811 && (*code
== EQ
|| *code
== NE
)
1812 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1813 && GET_CODE (*op0
) == NOT
)
1815 machine_mode mode
= GET_MODE (*op0
);
1816 *op0
= gen_rtx_XOR (mode
, XEXP (*op0
, 0), *op1
);
1817 *op0
= gen_rtx_NOT (mode
, *op0
);
1821 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1823 && (*code
== EQ
|| *code
== NE
)
1824 && (GET_CODE (*op0
) == AND
|| GET_CODE (*op0
) == IOR
)
1825 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1826 && CONST_INT_P (*op1
)
1827 && *op1
== constm1_rtx
)
1829 machine_mode mode
= GET_MODE (*op0
);
1830 rtx op00
= gen_rtx_NOT (mode
, XEXP (*op0
, 0));
1831 rtx op01
= gen_rtx_NOT (mode
, XEXP (*op0
, 1));
1833 if (GET_CODE (*op0
) == AND
)
1834 *op0
= gen_rtx_IOR (mode
, op00
, op01
);
1836 *op0
= gen_rtx_AND (mode
, op00
, op01
);
1843 /* Emit a compare instruction suitable to implement the comparison
1844 OP0 CODE OP1. Return the correct condition RTL to be placed in
1845 the IF_THEN_ELSE of the conditional branch testing the result. */
1848 s390_emit_compare (enum rtx_code code
, rtx op0
, rtx op1
)
1850 machine_mode mode
= s390_select_ccmode (code
, op0
, op1
);
1853 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
1855 /* Do not output a redundant compare instruction if a
1856 compare_and_swap pattern already computed the result and the
1857 machine modes are compatible. */
1858 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0
), mode
)
1864 cc
= gen_rtx_REG (mode
, CC_REGNUM
);
1865 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (mode
, op0
, op1
)));
1868 return gen_rtx_fmt_ee (code
, VOIDmode
, cc
, const0_rtx
);
1871 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872 MEM, whose address is a pseudo containing the original MEM's address. */
1875 s390_legitimize_cs_operand (rtx mem
)
1879 if (!contains_symbol_ref_p (mem
))
1881 tmp
= gen_reg_rtx (Pmode
);
1882 emit_move_insn (tmp
, copy_rtx (XEXP (mem
, 0)));
1883 return change_address (mem
, VOIDmode
, tmp
);
1886 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1888 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889 conditional branch testing the result. */
1892 s390_emit_compare_and_swap (enum rtx_code code
, rtx old
, rtx mem
,
1893 rtx cmp
, rtx new_rtx
, machine_mode ccmode
)
1897 mem
= s390_legitimize_cs_operand (mem
);
1898 cc
= gen_rtx_REG (ccmode
, CC_REGNUM
);
1899 switch (GET_MODE (mem
))
1902 emit_insn (gen_atomic_compare_and_swapsi_internal (old
, mem
, cmp
,
1906 emit_insn (gen_atomic_compare_and_swapdi_internal (old
, mem
, cmp
,
1910 emit_insn (gen_atomic_compare_and_swapti_internal (old
, mem
, cmp
,
1918 return s390_emit_compare (code
, cc
, const0_rtx
);
1921 /* Emit a jump instruction to TARGET and return it. If COND is
1922 NULL_RTX, emit an unconditional jump, else a conditional jump under
1926 s390_emit_jump (rtx target
, rtx cond
)
1930 target
= gen_rtx_LABEL_REF (VOIDmode
, target
);
1932 target
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, target
, pc_rtx
);
1934 insn
= gen_rtx_SET (pc_rtx
, target
);
1935 return emit_jump_insn (insn
);
1938 /* Return branch condition mask to implement a branch
1939 specified by CODE. Return -1 for invalid comparisons. */
1942 s390_branch_condition_mask (rtx code
)
1944 const int CC0
= 1 << 3;
1945 const int CC1
= 1 << 2;
1946 const int CC2
= 1 << 1;
1947 const int CC3
= 1 << 0;
1949 gcc_assert (GET_CODE (XEXP (code
, 0)) == REG
);
1950 gcc_assert (REGNO (XEXP (code
, 0)) == CC_REGNUM
);
1951 gcc_assert (XEXP (code
, 1) == const0_rtx
1952 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
1953 && CONST_INT_P (XEXP (code
, 1))));
1956 switch (GET_MODE (XEXP (code
, 0)))
1960 switch (GET_CODE (code
))
1962 case EQ
: return CC0
;
1963 case NE
: return CC1
| CC2
| CC3
;
1969 switch (GET_CODE (code
))
1971 case EQ
: return CC1
;
1972 case NE
: return CC0
| CC2
| CC3
;
1978 switch (GET_CODE (code
))
1980 case EQ
: return CC2
;
1981 case NE
: return CC0
| CC1
| CC3
;
1987 switch (GET_CODE (code
))
1989 case EQ
: return CC3
;
1990 case NE
: return CC0
| CC1
| CC2
;
1996 switch (GET_CODE (code
))
1998 case EQ
: return CC0
| CC2
;
1999 case NE
: return CC1
| CC3
;
2005 switch (GET_CODE (code
))
2007 case LTU
: return CC2
| CC3
; /* carry */
2008 case GEU
: return CC0
| CC1
; /* no carry */
2014 switch (GET_CODE (code
))
2016 case GTU
: return CC0
| CC1
; /* borrow */
2017 case LEU
: return CC2
| CC3
; /* no borrow */
2023 switch (GET_CODE (code
))
2025 case EQ
: return CC0
| CC2
;
2026 case NE
: return CC1
| CC3
;
2027 case LTU
: return CC1
;
2028 case GTU
: return CC3
;
2029 case LEU
: return CC1
| CC2
;
2030 case GEU
: return CC2
| CC3
;
2035 switch (GET_CODE (code
))
2037 case EQ
: return CC0
;
2038 case NE
: return CC1
| CC2
| CC3
;
2039 case LTU
: return CC1
;
2040 case GTU
: return CC2
;
2041 case LEU
: return CC0
| CC1
;
2042 case GEU
: return CC0
| CC2
;
2048 switch (GET_CODE (code
))
2050 case EQ
: return CC0
;
2051 case NE
: return CC2
| CC1
| CC3
;
2052 case LTU
: return CC2
;
2053 case GTU
: return CC1
;
2054 case LEU
: return CC0
| CC2
;
2055 case GEU
: return CC0
| CC1
;
2061 switch (GET_CODE (code
))
2063 case EQ
: return CC0
;
2064 case NE
: return CC1
| CC2
| CC3
;
2065 case LT
: return CC1
| CC3
;
2066 case GT
: return CC2
;
2067 case LE
: return CC0
| CC1
| CC3
;
2068 case GE
: return CC0
| CC2
;
2074 switch (GET_CODE (code
))
2076 case EQ
: return CC0
;
2077 case NE
: return CC1
| CC2
| CC3
;
2078 case LT
: return CC1
;
2079 case GT
: return CC2
| CC3
;
2080 case LE
: return CC0
| CC1
;
2081 case GE
: return CC0
| CC2
| CC3
;
2087 switch (GET_CODE (code
))
2089 case EQ
: return CC0
| CC1
| CC2
;
2090 case NE
: return CC3
;
2097 switch (GET_CODE (code
))
2099 case EQ
: return CC0
;
2100 case NE
: return CC1
| CC2
| CC3
;
2101 case LT
: return CC1
;
2102 case GT
: return CC2
;
2103 case LE
: return CC0
| CC1
;
2104 case GE
: return CC0
| CC2
;
2105 case UNORDERED
: return CC3
;
2106 case ORDERED
: return CC0
| CC1
| CC2
;
2107 case UNEQ
: return CC0
| CC3
;
2108 case UNLT
: return CC1
| CC3
;
2109 case UNGT
: return CC2
| CC3
;
2110 case UNLE
: return CC0
| CC1
| CC3
;
2111 case UNGE
: return CC0
| CC2
| CC3
;
2112 case LTGT
: return CC1
| CC2
;
2118 switch (GET_CODE (code
))
2120 case EQ
: return CC0
;
2121 case NE
: return CC2
| CC1
| CC3
;
2122 case LT
: return CC2
;
2123 case GT
: return CC1
;
2124 case LE
: return CC0
| CC2
;
2125 case GE
: return CC0
| CC1
;
2126 case UNORDERED
: return CC3
;
2127 case ORDERED
: return CC0
| CC2
| CC1
;
2128 case UNEQ
: return CC0
| CC3
;
2129 case UNLT
: return CC2
| CC3
;
2130 case UNGT
: return CC1
| CC3
;
2131 case UNLE
: return CC0
| CC2
| CC3
;
2132 case UNGE
: return CC0
| CC1
| CC3
;
2133 case LTGT
: return CC2
| CC1
;
2138 /* Vector comparison modes. */
2139 /* CC2 will never be set. It however is part of the negated
2142 switch (GET_CODE (code
))
2147 case GE
: return CC0
;
2148 /* The inverted modes are in fact *any* modes. */
2152 case LT
: return CC3
| CC1
| CC2
;
2157 switch (GET_CODE (code
))
2162 case GE
: return CC0
| CC1
;
2163 /* The inverted modes are in fact *all* modes. */
2167 case LT
: return CC3
| CC2
;
2171 switch (GET_CODE (code
))
2175 case GE
: return CC0
;
2176 /* The inverted modes are in fact *any* modes. */
2179 case UNLT
: return CC3
| CC1
| CC2
;
2184 switch (GET_CODE (code
))
2188 case GE
: return CC0
| CC1
;
2189 /* The inverted modes are in fact *all* modes. */
2192 case UNLT
: return CC3
| CC2
;
2197 switch (GET_CODE (code
))
2200 return INTVAL (XEXP (code
, 1));
2202 return (INTVAL (XEXP (code
, 1))) ^ 0xf;
2213 /* Return branch condition mask to implement a compare and branch
2214 specified by CODE. Return -1 for invalid comparisons. */
2217 s390_compare_and_branch_condition_mask (rtx code
)
2219 const int CC0
= 1 << 3;
2220 const int CC1
= 1 << 2;
2221 const int CC2
= 1 << 1;
2223 switch (GET_CODE (code
))
2247 /* If INV is false, return assembler mnemonic string to implement
2248 a branch specified by CODE. If INV is true, return mnemonic
2249 for the corresponding inverted branch. */
2252 s390_branch_condition_mnemonic (rtx code
, int inv
)
2256 static const char *const mnemonic
[16] =
2258 NULL
, "o", "h", "nle",
2259 "l", "nhe", "lh", "ne",
2260 "e", "nlh", "he", "nl",
2261 "le", "nh", "no", NULL
2264 if (GET_CODE (XEXP (code
, 0)) == REG
2265 && REGNO (XEXP (code
, 0)) == CC_REGNUM
2266 && (XEXP (code
, 1) == const0_rtx
2267 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2268 && CONST_INT_P (XEXP (code
, 1)))))
2269 mask
= s390_branch_condition_mask (code
);
2271 mask
= s390_compare_and_branch_condition_mask (code
);
2273 gcc_assert (mask
>= 0);
2278 gcc_assert (mask
>= 1 && mask
<= 14);
2280 return mnemonic
[mask
];
2283 /* Return the part of op which has a value different from def.
2284 The size of the part is determined by mode.
2285 Use this function only if you already know that op really
2286 contains such a part. */
2288 unsigned HOST_WIDE_INT
2289 s390_extract_part (rtx op
, machine_mode mode
, int def
)
2291 unsigned HOST_WIDE_INT value
= 0;
2292 int max_parts
= HOST_BITS_PER_WIDE_INT
/ GET_MODE_BITSIZE (mode
);
2293 int part_bits
= GET_MODE_BITSIZE (mode
);
2294 unsigned HOST_WIDE_INT part_mask
= (HOST_WIDE_INT_1U
<< part_bits
) - 1;
2297 for (i
= 0; i
< max_parts
; i
++)
2300 value
= UINTVAL (op
);
2302 value
>>= part_bits
;
2304 if ((value
& part_mask
) != (def
& part_mask
))
2305 return value
& part_mask
;
2311 /* If OP is an integer constant of mode MODE with exactly one
2312 part of mode PART_MODE unequal to DEF, return the number of that
2313 part. Otherwise, return -1. */
2316 s390_single_part (rtx op
,
2318 machine_mode part_mode
,
2321 unsigned HOST_WIDE_INT value
= 0;
2322 int n_parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (part_mode
);
2323 unsigned HOST_WIDE_INT part_mask
2324 = (HOST_WIDE_INT_1U
<< GET_MODE_BITSIZE (part_mode
)) - 1;
2327 if (GET_CODE (op
) != CONST_INT
)
2330 for (i
= 0; i
< n_parts
; i
++)
2333 value
= UINTVAL (op
);
2335 value
>>= GET_MODE_BITSIZE (part_mode
);
2337 if ((value
& part_mask
) != (def
& part_mask
))
2345 return part
== -1 ? -1 : n_parts
- 1 - part
;
2348 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2349 bits and no other bits are set in (the lower SIZE bits of) IN.
2351 PSTART and PEND can be used to obtain the start and end
2352 position (inclusive) of the bitfield relative to 64
2353 bits. *PSTART / *PEND gives the position of the first/last bit
2354 of the bitfield counting from the highest order bit starting
2358 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in
, int size
,
2359 int *pstart
, int *pend
)
2363 int lowbit
= HOST_BITS_PER_WIDE_INT
- 1;
2364 int highbit
= HOST_BITS_PER_WIDE_INT
- size
;
2365 unsigned HOST_WIDE_INT bitmask
= HOST_WIDE_INT_1U
;
2367 gcc_assert (!!pstart
== !!pend
);
2368 for (start
= lowbit
; start
>= highbit
; bitmask
<<= 1, start
--)
2371 /* Look for the rightmost bit of a contiguous range of ones. */
2378 /* Look for the firt zero bit after the range of ones. */
2379 if (! (bitmask
& in
))
2383 /* We're one past the last one-bit. */
2387 /* No one bits found. */
2390 if (start
> highbit
)
2392 unsigned HOST_WIDE_INT mask
;
2394 /* Calculate a mask for all bits beyond the contiguous bits. */
2395 mask
= ((~HOST_WIDE_INT_0U
>> highbit
)
2396 & (~HOST_WIDE_INT_0U
<< (lowbit
- start
+ 1)));
2398 /* There are more bits set beyond the first range of one bits. */
2411 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412 if ~IN contains a contiguous bitfield. In that case, *END is <
2415 If WRAP_P is true, a bitmask that wraps around is also tested.
2416 When a wraparoud occurs *START is greater than *END (in
2417 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418 part of the range. If WRAP_P is false, no wraparound is
2422 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in
, bool wrap_p
,
2423 int size
, int *start
, int *end
)
2425 int bs
= HOST_BITS_PER_WIDE_INT
;
2428 gcc_assert (!!start
== !!end
);
2429 if ((in
& ((~HOST_WIDE_INT_0U
) >> (bs
- size
))) == 0)
2430 /* This cannot be expressed as a contiguous bitmask. Exit early because
2431 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2434 b
= s390_contiguous_bitmask_nowrap_p (in
, size
, start
, end
);
2439 b
= s390_contiguous_bitmask_nowrap_p (~in
, size
, start
, end
);
2445 gcc_assert (s
>= 1);
2446 *start
= ((e
+ 1) & (bs
- 1));
2447 *end
= ((s
- 1 + bs
) & (bs
- 1));
2453 /* Return true if OP contains the same contiguous bitfield in *all*
2454 its elements. START and END can be used to obtain the start and
2455 end position of the bitfield.
2457 START/STOP give the position of the first/last bit of the bitfield
2458 counting from the lowest order bit starting with zero. In order to
2459 use these values for S/390 instructions this has to be converted to
2460 "bits big endian" style. */
2463 s390_contiguous_bitmask_vector_p (rtx op
, int *start
, int *end
)
2465 unsigned HOST_WIDE_INT mask
;
2470 /* Handle floats by bitcasting them to ints. */
2471 op
= gen_lowpart (related_int_vector_mode (GET_MODE (op
)).require (), op
);
2473 gcc_assert (!!start
== !!end
);
2474 if (!const_vec_duplicate_p (op
, &elt
)
2475 || !CONST_INT_P (elt
))
2478 size
= GET_MODE_UNIT_BITSIZE (GET_MODE (op
));
2480 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2484 mask
= UINTVAL (elt
);
2486 b
= s390_contiguous_bitmask_p (mask
, true, size
, start
, end
);
2491 *start
-= (HOST_BITS_PER_WIDE_INT
- size
);
2492 *end
-= (HOST_BITS_PER_WIDE_INT
- size
);
2500 /* Return true if C consists only of byte chunks being either 0 or
2501 0xff. If MASK is !=NULL a byte mask is generated which is
2502 appropriate for the vector generate byte mask instruction. */
2505 s390_bytemask_vector_p (rtx op
, unsigned *mask
)
2508 unsigned tmp_mask
= 0;
2509 int nunit
, unit_size
;
2511 if (!VECTOR_MODE_P (GET_MODE (op
))
2512 || GET_CODE (op
) != CONST_VECTOR
2513 || !CONST_INT_P (XVECEXP (op
, 0, 0)))
2516 nunit
= GET_MODE_NUNITS (GET_MODE (op
));
2517 unit_size
= GET_MODE_UNIT_SIZE (GET_MODE (op
));
2519 for (i
= 0; i
< nunit
; i
++)
2521 unsigned HOST_WIDE_INT c
;
2524 if (!CONST_INT_P (XVECEXP (op
, 0, i
)))
2527 c
= UINTVAL (XVECEXP (op
, 0, i
));
2528 for (j
= 0; j
< unit_size
; j
++)
2530 if ((c
& 0xff) != 0 && (c
& 0xff) != 0xff)
2532 tmp_mask
|= (c
& 1) << ((nunit
- 1 - i
) * unit_size
+ j
);
2533 c
= c
>> BITS_PER_UNIT
;
2543 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2544 equivalent to a shift followed by the AND. In particular, CONTIG
2545 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2546 for ROTL indicate a rotate to the right. */
2549 s390_extzv_shift_ok (int bitsize
, int rotl
, unsigned HOST_WIDE_INT contig
)
2554 ok
= s390_contiguous_bitmask_nowrap_p (contig
, bitsize
, &start
, &end
);
2558 return (64 - end
>= rotl
);
2561 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2563 rotl
= -rotl
+ (64 - bitsize
);
2564 return (start
>= rotl
);
2568 /* Check whether we can (and want to) split a double-word
2569 move in mode MODE from SRC to DST into two single-word
2570 moves, moving the subword FIRST_SUBWORD first. */
2573 s390_split_ok_p (rtx dst
, rtx src
, machine_mode mode
, int first_subword
)
2575 /* Floating point and vector registers cannot be split. */
2576 if (FP_REG_P (src
) || FP_REG_P (dst
) || VECTOR_REG_P (src
) || VECTOR_REG_P (dst
))
2579 /* Non-offsettable memory references cannot be split. */
2580 if ((GET_CODE (src
) == MEM
&& !offsettable_memref_p (src
))
2581 || (GET_CODE (dst
) == MEM
&& !offsettable_memref_p (dst
)))
2584 /* Moving the first subword must not clobber a register
2585 needed to move the second subword. */
2586 if (register_operand (dst
, mode
))
2588 rtx subreg
= operand_subword (dst
, first_subword
, 0, mode
);
2589 if (reg_overlap_mentioned_p (subreg
, src
))
2596 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2597 and [MEM2, MEM2 + SIZE] do overlap and false
2601 s390_overlap_p (rtx mem1
, rtx mem2
, HOST_WIDE_INT size
)
2603 rtx addr1
, addr2
, addr_delta
;
2604 HOST_WIDE_INT delta
;
2606 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2612 addr1
= XEXP (mem1
, 0);
2613 addr2
= XEXP (mem2
, 0);
2615 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2617 /* This overlapping check is used by peepholes merging memory block operations.
2618 Overlapping operations would otherwise be recognized by the S/390 hardware
2619 and would fall back to a slower implementation. Allowing overlapping
2620 operations would lead to slow code but not to wrong code. Therefore we are
2621 somewhat optimistic if we cannot prove that the memory blocks are
2623 That's why we return false here although this may accept operations on
2624 overlapping memory areas. */
2625 if (!addr_delta
|| GET_CODE (addr_delta
) != CONST_INT
)
2628 delta
= INTVAL (addr_delta
);
2631 || (delta
> 0 && delta
< size
)
2632 || (delta
< 0 && -delta
< size
))
2638 /* Check whether the address of memory reference MEM2 equals exactly
2639 the address of memory reference MEM1 plus DELTA. Return true if
2640 we can prove this to be the case, false otherwise. */
2643 s390_offset_p (rtx mem1
, rtx mem2
, rtx delta
)
2645 rtx addr1
, addr2
, addr_delta
;
2647 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2650 addr1
= XEXP (mem1
, 0);
2651 addr2
= XEXP (mem2
, 0);
2653 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2654 if (!addr_delta
|| !rtx_equal_p (addr_delta
, delta
))
2660 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2663 s390_expand_logical_operator (enum rtx_code code
, machine_mode mode
,
2666 machine_mode wmode
= mode
;
2667 rtx dst
= operands
[0];
2668 rtx src1
= operands
[1];
2669 rtx src2
= operands
[2];
2672 /* If we cannot handle the operation directly, use a temp register. */
2673 if (!s390_logical_operator_ok_p (operands
))
2674 dst
= gen_reg_rtx (mode
);
2676 /* QImode and HImode patterns make sense only if we have a destination
2677 in memory. Otherwise perform the operation in SImode. */
2678 if ((mode
== QImode
|| mode
== HImode
) && GET_CODE (dst
) != MEM
)
2681 /* Widen operands if required. */
2684 if (GET_CODE (dst
) == SUBREG
2685 && (tem
= simplify_subreg (wmode
, dst
, mode
, 0)) != 0)
2687 else if (REG_P (dst
))
2688 dst
= gen_rtx_SUBREG (wmode
, dst
, 0);
2690 dst
= gen_reg_rtx (wmode
);
2692 if (GET_CODE (src1
) == SUBREG
2693 && (tem
= simplify_subreg (wmode
, src1
, mode
, 0)) != 0)
2695 else if (GET_MODE (src1
) != VOIDmode
)
2696 src1
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src1
), 0);
2698 if (GET_CODE (src2
) == SUBREG
2699 && (tem
= simplify_subreg (wmode
, src2
, mode
, 0)) != 0)
2701 else if (GET_MODE (src2
) != VOIDmode
)
2702 src2
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src2
), 0);
2705 /* Emit the instruction. */
2706 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, wmode
, src1
, src2
));
2707 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
2708 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
2710 /* Fix up the destination if needed. */
2711 if (dst
!= operands
[0])
2712 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
2715 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2718 s390_logical_operator_ok_p (rtx
*operands
)
2720 /* If the destination operand is in memory, it needs to coincide
2721 with one of the source operands. After reload, it has to be
2722 the first source operand. */
2723 if (GET_CODE (operands
[0]) == MEM
)
2724 return rtx_equal_p (operands
[0], operands
[1])
2725 || (!reload_completed
&& rtx_equal_p (operands
[0], operands
[2]));
2730 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2731 operand IMMOP to switch from SS to SI type instructions. */
2734 s390_narrow_logical_operator (enum rtx_code code
, rtx
*memop
, rtx
*immop
)
2736 int def
= code
== AND
? -1 : 0;
2740 gcc_assert (GET_CODE (*memop
) == MEM
);
2741 gcc_assert (!MEM_VOLATILE_P (*memop
));
2743 mask
= s390_extract_part (*immop
, QImode
, def
);
2744 part
= s390_single_part (*immop
, GET_MODE (*memop
), QImode
, def
);
2745 gcc_assert (part
>= 0);
2747 *memop
= adjust_address (*memop
, QImode
, part
);
2748 *immop
= gen_int_mode (mask
, QImode
);
2752 /* How to allocate a 'struct machine_function'. */
2754 static struct machine_function
*
2755 s390_init_machine_status (void)
2757 return ggc_cleared_alloc
<machine_function
> ();
2760 /* Map for smallest class containing reg regno. */
2762 const enum reg_class regclass_map
[FIRST_PSEUDO_REGISTER
] =
2763 { GENERAL_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 0 */
2764 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 4 */
2765 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 8 */
2766 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 12 */
2767 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 16 */
2768 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 20 */
2769 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 24 */
2770 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 28 */
2771 ADDR_REGS
, CC_REGS
, ADDR_REGS
, ADDR_REGS
, /* 32 */
2772 ACCESS_REGS
, ACCESS_REGS
, VEC_REGS
, VEC_REGS
, /* 36 */
2773 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 40 */
2774 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 44 */
2775 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 48 */
2776 VEC_REGS
, VEC_REGS
/* 52 */
2779 /* Return attribute type of insn. */
2781 static enum attr_type
2782 s390_safe_attr_type (rtx_insn
*insn
)
2784 if (recog_memoized (insn
) >= 0)
2785 return get_attr_type (insn
);
2790 /* Return attribute relative_long of insn. */
2793 s390_safe_relative_long_p (rtx_insn
*insn
)
2795 if (recog_memoized (insn
) >= 0)
2796 return get_attr_relative_long (insn
) == RELATIVE_LONG_YES
;
2801 /* Return true if DISP is a valid short displacement. */
2804 s390_short_displacement (rtx disp
)
2806 /* No displacement is OK. */
2810 /* Without the long displacement facility we don't need to
2811 distingiush between long and short displacement. */
2812 if (!TARGET_LONG_DISPLACEMENT
)
2815 /* Integer displacement in range. */
2816 if (GET_CODE (disp
) == CONST_INT
)
2817 return INTVAL (disp
) >= 0 && INTVAL (disp
) < 4096;
2819 /* GOT offset is not OK, the GOT can be large. */
2820 if (GET_CODE (disp
) == CONST
2821 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
2822 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
2823 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTNTPOFF
))
2826 /* All other symbolic constants are literal pool references,
2827 which are OK as the literal pool must be small. */
2828 if (GET_CODE (disp
) == CONST
)
2834 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2835 If successful, also determines the
2836 following characteristics of `ref': `is_ptr' - whether it can be an
2837 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2838 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2839 considered a literal pool pointer for purposes of avoiding two different
2840 literal pool pointers per insn during or after reload (`B' constraint). */
2842 s390_decompose_constant_pool_ref (rtx
*ref
, rtx
*disp
, bool *is_ptr
,
2843 bool *is_base_ptr
, bool *is_pool_ptr
)
2848 if (GET_CODE (*ref
) == UNSPEC
)
2849 switch (XINT (*ref
, 1))
2853 *disp
= gen_rtx_UNSPEC (Pmode
,
2854 gen_rtvec (1, XVECEXP (*ref
, 0, 0)),
2855 UNSPEC_LTREL_OFFSET
);
2859 *ref
= XVECEXP (*ref
, 0, 1);
2866 if (!REG_P (*ref
) || GET_MODE (*ref
) != Pmode
)
2869 if (REGNO (*ref
) == STACK_POINTER_REGNUM
2870 || REGNO (*ref
) == FRAME_POINTER_REGNUM
2871 || ((reload_completed
|| reload_in_progress
)
2872 && frame_pointer_needed
2873 && REGNO (*ref
) == HARD_FRAME_POINTER_REGNUM
)
2874 || REGNO (*ref
) == ARG_POINTER_REGNUM
2876 && REGNO (*ref
) == PIC_OFFSET_TABLE_REGNUM
))
2877 *is_ptr
= *is_base_ptr
= true;
2879 if ((reload_completed
|| reload_in_progress
)
2880 && *ref
== cfun
->machine
->base_reg
)
2881 *is_ptr
= *is_base_ptr
= *is_pool_ptr
= true;
2886 /* Decompose a RTL expression ADDR for a memory address into
2887 its components, returned in OUT.
2889 Returns false if ADDR is not a valid memory address, true
2890 otherwise. If OUT is NULL, don't return the components,
2891 but check for validity only.
2893 Note: Only addresses in canonical form are recognized.
2894 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2895 canonical form so that they will be recognized. */
2898 s390_decompose_address (rtx addr
, struct s390_address
*out
)
2900 HOST_WIDE_INT offset
= 0;
2901 rtx base
= NULL_RTX
;
2902 rtx indx
= NULL_RTX
;
2903 rtx disp
= NULL_RTX
;
2905 bool pointer
= false;
2906 bool base_ptr
= false;
2907 bool indx_ptr
= false;
2908 bool literal_pool
= false;
2910 /* We may need to substitute the literal pool base register into the address
2911 below. However, at this point we do not know which register is going to
2912 be used as base, so we substitute the arg pointer register. This is going
2913 to be treated as holding a pointer below -- it shouldn't be used for any
2915 rtx fake_pool_base
= gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
);
2917 /* Decompose address into base + index + displacement. */
2919 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == UNSPEC
)
2922 else if (GET_CODE (addr
) == PLUS
)
2924 rtx op0
= XEXP (addr
, 0);
2925 rtx op1
= XEXP (addr
, 1);
2926 enum rtx_code code0
= GET_CODE (op0
);
2927 enum rtx_code code1
= GET_CODE (op1
);
2929 if (code0
== REG
|| code0
== UNSPEC
)
2931 if (code1
== REG
|| code1
== UNSPEC
)
2933 indx
= op0
; /* index + base */
2939 base
= op0
; /* base + displacement */
2944 else if (code0
== PLUS
)
2946 indx
= XEXP (op0
, 0); /* index + base + disp */
2947 base
= XEXP (op0
, 1);
2958 disp
= addr
; /* displacement */
2960 /* Extract integer part of displacement. */
2964 if (GET_CODE (disp
) == CONST_INT
)
2966 offset
= INTVAL (disp
);
2969 else if (GET_CODE (disp
) == CONST
2970 && GET_CODE (XEXP (disp
, 0)) == PLUS
2971 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
2973 offset
= INTVAL (XEXP (XEXP (disp
, 0), 1));
2974 disp
= XEXP (XEXP (disp
, 0), 0);
2978 /* Strip off CONST here to avoid special case tests later. */
2979 if (disp
&& GET_CODE (disp
) == CONST
)
2980 disp
= XEXP (disp
, 0);
2982 /* We can convert literal pool addresses to
2983 displacements by basing them off the base register. */
2984 if (disp
&& GET_CODE (disp
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (disp
))
2989 base
= fake_pool_base
, literal_pool
= true;
2991 /* Mark up the displacement. */
2992 disp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, disp
),
2993 UNSPEC_LTREL_OFFSET
);
2996 /* Validate base register. */
2997 if (!s390_decompose_constant_pool_ref (&base
, &disp
, &pointer
, &base_ptr
,
3001 /* Validate index register. */
3002 if (!s390_decompose_constant_pool_ref (&indx
, &disp
, &pointer
, &indx_ptr
,
3006 /* Prefer to use pointer as base, not index. */
3007 if (base
&& indx
&& !base_ptr
3008 && (indx_ptr
|| (!REG_POINTER (base
) && REG_POINTER (indx
))))
3015 /* Validate displacement. */
3018 /* If virtual registers are involved, the displacement will change later
3019 anyway as the virtual registers get eliminated. This could make a
3020 valid displacement invalid, but it is more likely to make an invalid
3021 displacement valid, because we sometimes access the register save area
3022 via negative offsets to one of those registers.
3023 Thus we don't check the displacement for validity here. If after
3024 elimination the displacement turns out to be invalid after all,
3025 this is fixed up by reload in any case. */
3026 /* LRA maintains always displacements up to date and we need to
3027 know the displacement is right during all LRA not only at the
3028 final elimination. */
3030 || (base
!= arg_pointer_rtx
3031 && indx
!= arg_pointer_rtx
3032 && base
!= return_address_pointer_rtx
3033 && indx
!= return_address_pointer_rtx
3034 && base
!= frame_pointer_rtx
3035 && indx
!= frame_pointer_rtx
3036 && base
!= virtual_stack_vars_rtx
3037 && indx
!= virtual_stack_vars_rtx
))
3038 if (!DISP_IN_RANGE (offset
))
3043 /* All the special cases are pointers. */
3046 /* In the small-PIC case, the linker converts @GOT
3047 and @GOTNTPOFF offsets to possible displacements. */
3048 if (GET_CODE (disp
) == UNSPEC
3049 && (XINT (disp
, 1) == UNSPEC_GOT
3050 || XINT (disp
, 1) == UNSPEC_GOTNTPOFF
)
3056 /* Accept pool label offsets. */
3057 else if (GET_CODE (disp
) == UNSPEC
3058 && XINT (disp
, 1) == UNSPEC_POOL_OFFSET
)
3061 /* Accept literal pool references. */
3062 else if (GET_CODE (disp
) == UNSPEC
3063 && XINT (disp
, 1) == UNSPEC_LTREL_OFFSET
)
3065 /* In case CSE pulled a non literal pool reference out of
3066 the pool we have to reject the address. This is
3067 especially important when loading the GOT pointer on non
3068 zarch CPUs. In this case the literal pool contains an lt
3069 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3070 will most likely exceed the displacement. */
3071 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
3072 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp
, 0, 0)))
3075 orig_disp
= gen_rtx_CONST (Pmode
, disp
);
3078 /* If we have an offset, make sure it does not
3079 exceed the size of the constant pool entry.
3080 Otherwise we might generate an out-of-range
3081 displacement for the base register form. */
3082 rtx sym
= XVECEXP (disp
, 0, 0);
3083 if (offset
>= GET_MODE_SIZE (get_pool_mode (sym
)))
3086 orig_disp
= plus_constant (Pmode
, orig_disp
, offset
);
3101 out
->disp
= orig_disp
;
3102 out
->pointer
= pointer
;
3103 out
->literal_pool
= literal_pool
;
3109 /* Decompose a RTL expression OP for an address style operand into its
3110 components, and return the base register in BASE and the offset in
3111 OFFSET. While OP looks like an address it is never supposed to be
3114 Return true if OP is a valid address operand, false if not. */
3117 s390_decompose_addrstyle_without_index (rtx op
, rtx
*base
,
3118 HOST_WIDE_INT
*offset
)
3122 /* We can have an integer constant, an address register,
3123 or a sum of the two. */
3124 if (CONST_SCALAR_INT_P (op
))
3129 if (op
&& GET_CODE (op
) == PLUS
&& CONST_SCALAR_INT_P (XEXP (op
, 1)))
3134 while (op
&& GET_CODE (op
) == SUBREG
)
3135 op
= SUBREG_REG (op
);
3137 if (op
&& GET_CODE (op
) != REG
)
3142 if (off
== NULL_RTX
)
3144 else if (CONST_INT_P (off
))
3145 *offset
= INTVAL (off
);
3146 else if (CONST_WIDE_INT_P (off
))
3147 /* The offset will anyway be cut down to 12 bits so take just
3148 the lowest order chunk of the wide int. */
3149 *offset
= CONST_WIDE_INT_ELT (off
, 0);
3159 /* Check that OP is a valid shift count operand.
3160 It should be of the following structure:
3161 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3162 where subreg, and and plus are optional.
3164 If IMPLICIT_MASK is > 0 and OP contains and
3166 it is checked whether IMPLICIT_MASK and the immediate match.
3167 Otherwise, no checking is performed.
3170 s390_valid_shift_count (rtx op
, HOST_WIDE_INT implicit_mask
)
3173 while (GET_CODE (op
) == SUBREG
&& subreg_lowpart_p (op
))
3176 /* Check for an and with proper constant. */
3177 if (GET_CODE (op
) == AND
)
3179 rtx op1
= XEXP (op
, 0);
3180 rtx imm
= XEXP (op
, 1);
3182 if (GET_CODE (op1
) == SUBREG
&& subreg_lowpart_p (op1
))
3183 op1
= XEXP (op1
, 0);
3185 if (!(register_operand (op1
, GET_MODE (op1
)) || GET_CODE (op1
) == PLUS
))
3188 if (!immediate_operand (imm
, GET_MODE (imm
)))
3191 HOST_WIDE_INT val
= INTVAL (imm
);
3192 if (implicit_mask
> 0
3193 && (val
& implicit_mask
) != implicit_mask
)
3199 /* Check the rest. */
3200 return s390_decompose_addrstyle_without_index (op
, NULL
, NULL
);
3203 /* Return true if CODE is a valid address without index. */
3206 s390_legitimate_address_without_index_p (rtx op
)
3208 struct s390_address addr
;
3210 if (!s390_decompose_address (XEXP (op
, 0), &addr
))
3219 /* Return TRUE if ADDR is an operand valid for a load/store relative
3220 instruction. Be aware that the alignment of the operand needs to
3221 be checked separately.
3222 Valid addresses are single references or a sum of a reference and a
3223 constant integer. Return these parts in SYMREF and ADDEND. You can
3224 pass NULL in REF and/or ADDEND if you are not interested in these
3228 s390_loadrelative_operand_p (rtx addr
, rtx
*symref
, HOST_WIDE_INT
*addend
)
3230 HOST_WIDE_INT tmpaddend
= 0;
3232 if (GET_CODE (addr
) == CONST
)
3233 addr
= XEXP (addr
, 0);
3235 if (GET_CODE (addr
) == PLUS
)
3237 if (!CONST_INT_P (XEXP (addr
, 1)))
3240 tmpaddend
= INTVAL (XEXP (addr
, 1));
3241 addr
= XEXP (addr
, 0);
3244 if (GET_CODE (addr
) == SYMBOL_REF
3245 || (GET_CODE (addr
) == UNSPEC
3246 && (XINT (addr
, 1) == UNSPEC_GOTENT
3247 || XINT (addr
, 1) == UNSPEC_PLT
)))
3252 *addend
= tmpaddend
;
3259 /* Return true if the address in OP is valid for constraint letter C
3260 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3261 pool MEMs should be accepted. Only the Q, R, S, T constraint
3262 letters are allowed for C. */
3265 s390_check_qrst_address (char c
, rtx op
, bool lit_pool_ok
)
3268 struct s390_address addr
;
3269 bool decomposed
= false;
3271 if (!address_operand (op
, GET_MODE (op
)))
3274 /* This check makes sure that no symbolic address (except literal
3275 pool references) are accepted by the R or T constraints. */
3276 if (s390_loadrelative_operand_p (op
, &symref
, NULL
)
3278 || !SYMBOL_REF_P (symref
)
3279 || !CONSTANT_POOL_ADDRESS_P (symref
)))
3282 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3285 if (!s390_decompose_address (op
, &addr
))
3287 if (addr
.literal_pool
)
3292 /* With reload, we sometimes get intermediate address forms that are
3293 actually invalid as-is, but we need to accept them in the most
3294 generic cases below ('R' or 'T'), since reload will in fact fix
3295 them up. LRA behaves differently here; we never see such forms,
3296 but on the other hand, we need to strictly reject every invalid
3297 address form. After both reload and LRA invalid address forms
3298 must be rejected, because nothing will fix them up later. Perform
3299 this check right up front. */
3300 if (lra_in_progress
|| reload_completed
)
3302 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3309 case 'Q': /* no index short displacement */
3310 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3314 if (!s390_short_displacement (addr
.disp
))
3318 case 'R': /* with index short displacement */
3319 if (TARGET_LONG_DISPLACEMENT
)
3321 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3323 if (!s390_short_displacement (addr
.disp
))
3326 /* Any invalid address here will be fixed up by reload,
3327 so accept it for the most generic constraint. */
3330 case 'S': /* no index long displacement */
3331 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3337 case 'T': /* with index long displacement */
3338 /* Any invalid address here will be fixed up by reload,
3339 so accept it for the most generic constraint. */
3349 /* Evaluates constraint strings described by the regular expression
3350 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3351 the constraint given in STR, or 0 else. */
3354 s390_mem_constraint (const char *str
, rtx op
)
3361 /* Check for offsettable variants of memory constraints. */
3362 if (!MEM_P (op
) || MEM_VOLATILE_P (op
))
3364 if ((reload_completed
|| reload_in_progress
)
3365 ? !offsettable_memref_p (op
) : !offsettable_nonstrict_memref_p (op
))
3367 return s390_check_qrst_address (str
[1], XEXP (op
, 0), true);
3369 /* Check for non-literal-pool variants of memory constraints. */
3372 return s390_check_qrst_address (str
[1], XEXP (op
, 0), false);
3377 if (GET_CODE (op
) != MEM
)
3379 return s390_check_qrst_address (c
, XEXP (op
, 0), true);
3381 /* Simply check for the basic form of a shift count. Reload will
3382 take care of making sure we have a proper base register. */
3383 if (!s390_decompose_addrstyle_without_index (op
, NULL
, NULL
))
3387 return s390_check_qrst_address (str
[1], op
, true);
3395 /* Evaluates constraint strings starting with letter O. Input
3396 parameter C is the second letter following the "O" in the constraint
3397 string. Returns 1 if VALUE meets the respective constraint and 0
3401 s390_O_constraint_str (const char c
, HOST_WIDE_INT value
)
3409 return trunc_int_for_mode (value
, SImode
) == value
;
3413 || s390_single_part (GEN_INT (value
), DImode
, SImode
, 0) == 1;
3416 return s390_single_part (GEN_INT (value
- 1), DImode
, SImode
, -1) == 1;
3424 /* Evaluates constraint strings starting with letter N. Parameter STR
3425 contains the letters following letter "N" in the constraint string.
3426 Returns true if VALUE matches the constraint. */
3429 s390_N_constraint_str (const char *str
, HOST_WIDE_INT value
)
3431 machine_mode mode
, part_mode
;
3433 int part
, part_goal
;
3439 part_goal
= str
[0] - '0';
3483 if (GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (part_mode
))
3486 part
= s390_single_part (GEN_INT (value
), mode
, part_mode
, def
);
3489 if (part_goal
!= -1 && part_goal
!= part
)
3496 /* Returns true if the input parameter VALUE is a float zero. */
3499 s390_float_const_zero_p (rtx value
)
3501 return (GET_MODE_CLASS (GET_MODE (value
)) == MODE_FLOAT
3502 && value
== CONST0_RTX (GET_MODE (value
)));
3505 /* Implement TARGET_REGISTER_MOVE_COST. */
3508 s390_register_move_cost (machine_mode mode
,
3509 reg_class_t from
, reg_class_t to
)
3511 /* On s390, copy between fprs and gprs is expensive. */
3513 /* It becomes somewhat faster having ldgr/lgdr. */
3514 if (TARGET_Z10
&& GET_MODE_SIZE (mode
) == 8)
3516 /* ldgr is single cycle. */
3517 if (reg_classes_intersect_p (from
, GENERAL_REGS
)
3518 && reg_classes_intersect_p (to
, FP_REGS
))
3520 /* lgdr needs 3 cycles. */
3521 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
3522 && reg_classes_intersect_p (from
, FP_REGS
))
3526 /* Otherwise copying is done via memory. */
3527 if ((reg_classes_intersect_p (from
, GENERAL_REGS
)
3528 && reg_classes_intersect_p (to
, FP_REGS
))
3529 || (reg_classes_intersect_p (from
, FP_REGS
)
3530 && reg_classes_intersect_p (to
, GENERAL_REGS
)))
3533 /* We usually do not want to copy via CC. */
3534 if (reg_classes_intersect_p (from
, CC_REGS
)
3535 || reg_classes_intersect_p (to
, CC_REGS
))
3541 /* Implement TARGET_MEMORY_MOVE_COST. */
3544 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
3545 reg_class_t rclass ATTRIBUTE_UNUSED
,
3546 bool in ATTRIBUTE_UNUSED
)
3551 /* Compute a (partial) cost for rtx X. Return true if the complete
3552 cost has been computed, and false if subexpressions should be
3553 scanned. In either case, *TOTAL contains the cost result. The
3554 initial value of *TOTAL is the default value computed by
3555 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3556 code of the superexpression of x. */
3559 s390_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
3560 int opno ATTRIBUTE_UNUSED
,
3561 int *total
, bool speed ATTRIBUTE_UNUSED
)
3563 int code
= GET_CODE (x
);
3571 case CONST_WIDE_INT
:
3578 /* Without this a conditional move instruction would be
3579 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3580 comparison operator). That's a bit pessimistic. */
3582 if (!TARGET_Z196
|| GET_CODE (SET_SRC (x
)) != IF_THEN_ELSE
)
3585 rtx cond
= XEXP (SET_SRC (x
), 0);
3587 if (!CC_REG_P (XEXP (cond
, 0)) || !CONST_INT_P (XEXP (cond
, 1)))
3590 /* It is going to be a load/store on condition. Make it
3591 slightly more expensive than a normal load. */
3592 *total
= COSTS_N_INSNS (1) + 1;
3594 rtx dst
= SET_DEST (x
);
3595 rtx then
= XEXP (SET_SRC (x
), 1);
3596 rtx els
= XEXP (SET_SRC (x
), 2);
3598 /* It is a real IF-THEN-ELSE. An additional move will be
3599 needed to implement that. */
3602 && !rtx_equal_p (dst
, then
)
3603 && !rtx_equal_p (dst
, els
))
3604 *total
+= COSTS_N_INSNS (1) / 2;
3606 /* A minor penalty for constants we cannot directly handle. */
3607 if ((CONST_INT_P (then
) || CONST_INT_P (els
))
3608 && (!TARGET_Z13
|| MEM_P (dst
)
3609 || (CONST_INT_P (then
) && !satisfies_constraint_K (then
))
3610 || (CONST_INT_P (els
) && !satisfies_constraint_K (els
))))
3611 *total
+= COSTS_N_INSNS (1) / 2;
3613 /* A store on condition can only handle register src operands. */
3614 if (MEM_P (dst
) && (!REG_P (then
) || !REG_P (els
)))
3615 *total
+= COSTS_N_INSNS (1) / 2;
3623 && (mode
== SImode
|| mode
== DImode
)
3624 && GET_CODE (XEXP (x
, 0)) == NOT
3625 && GET_CODE (XEXP (x
, 1)) == NOT
)
3627 *total
= COSTS_N_INSNS (1);
3628 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3630 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3636 if (GET_CODE (XEXP (x
, 0)) == AND
3637 && GET_CODE (XEXP (x
, 1)) == ASHIFT
3638 && REG_P (XEXP (XEXP (x
, 0), 0))
3639 && REG_P (XEXP (XEXP (x
, 1), 0))
3640 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3641 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3642 && (UINTVAL (XEXP (XEXP (x
, 0), 1)) ==
3643 (HOST_WIDE_INT_1U
<< UINTVAL (XEXP (XEXP (x
, 1), 1))) - 1))
3645 *total
= COSTS_N_INSNS (2);
3649 /* ~AND on a 128 bit mode. This can be done using a vector
3652 && GET_CODE (XEXP (x
, 0)) == NOT
3653 && GET_CODE (XEXP (x
, 1)) == NOT
3654 && REG_P (XEXP (XEXP (x
, 0), 0))
3655 && REG_P (XEXP (XEXP (x
, 1), 0))
3656 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x
, 0), 0))) == 16
3657 && s390_hard_regno_mode_ok (VR0_REGNUM
,
3658 GET_MODE (XEXP (XEXP (x
, 0), 0))))
3660 *total
= COSTS_N_INSNS (1);
3664 *total
= COSTS_N_INSNS (1);
3670 && (mode
== SImode
|| mode
== DImode
)
3671 && GET_CODE (XEXP (x
, 0)) == NOT
3672 && GET_CODE (XEXP (x
, 1)) == NOT
)
3674 *total
= COSTS_N_INSNS (1);
3675 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3677 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3692 *total
= COSTS_N_INSNS (1);
3700 rtx left
= XEXP (x
, 0);
3701 rtx right
= XEXP (x
, 1);
3702 if (GET_CODE (right
) == CONST_INT
3703 && CONST_OK_FOR_K (INTVAL (right
)))
3704 *total
= s390_cost
->mhi
;
3705 else if (GET_CODE (left
) == SIGN_EXTEND
)
3706 *total
= s390_cost
->mh
;
3708 *total
= s390_cost
->ms
; /* msr, ms, msy */
3713 rtx left
= XEXP (x
, 0);
3714 rtx right
= XEXP (x
, 1);
3717 if (GET_CODE (right
) == CONST_INT
3718 && CONST_OK_FOR_K (INTVAL (right
)))
3719 *total
= s390_cost
->mghi
;
3720 else if (GET_CODE (left
) == SIGN_EXTEND
)
3721 *total
= s390_cost
->msgf
;
3723 *total
= s390_cost
->msg
; /* msgr, msg */
3725 else /* TARGET_31BIT */
3727 if (GET_CODE (left
) == SIGN_EXTEND
3728 && GET_CODE (right
) == SIGN_EXTEND
)
3729 /* mulsidi case: mr, m */
3730 *total
= s390_cost
->m
;
3731 else if (GET_CODE (left
) == ZERO_EXTEND
3732 && GET_CODE (right
) == ZERO_EXTEND
)
3733 /* umulsidi case: ml, mlr */
3734 *total
= s390_cost
->ml
;
3736 /* Complex calculation is required. */
3737 *total
= COSTS_N_INSNS (40);
3743 *total
= s390_cost
->mult_df
;
3746 *total
= s390_cost
->mxbr
;
3757 *total
= s390_cost
->madbr
;
3760 *total
= s390_cost
->maebr
;
3765 /* Negate in the third argument is free: FMSUB. */
3766 if (GET_CODE (XEXP (x
, 2)) == NEG
)
3768 *total
+= (rtx_cost (XEXP (x
, 0), mode
, FMA
, 0, speed
)
3769 + rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
)
3770 + rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, FMA
, 2, speed
));
3777 if (mode
== TImode
) /* 128 bit division */
3778 *total
= s390_cost
->dlgr
;
3779 else if (mode
== DImode
)
3781 rtx right
= XEXP (x
, 1);
3782 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3783 *total
= s390_cost
->dlr
;
3784 else /* 64 by 64 bit division */
3785 *total
= s390_cost
->dlgr
;
3787 else if (mode
== SImode
) /* 32 bit division */
3788 *total
= s390_cost
->dlr
;
3795 rtx right
= XEXP (x
, 1);
3796 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3798 *total
= s390_cost
->dsgfr
;
3800 *total
= s390_cost
->dr
;
3801 else /* 64 by 64 bit division */
3802 *total
= s390_cost
->dsgr
;
3804 else if (mode
== SImode
) /* 32 bit division */
3805 *total
= s390_cost
->dlr
;
3806 else if (mode
== SFmode
)
3808 *total
= s390_cost
->debr
;
3810 else if (mode
== DFmode
)
3812 *total
= s390_cost
->ddbr
;
3814 else if (mode
== TFmode
)
3816 *total
= s390_cost
->dxbr
;
3822 *total
= s390_cost
->sqebr
;
3823 else if (mode
== DFmode
)
3824 *total
= s390_cost
->sqdbr
;
3826 *total
= s390_cost
->sqxbr
;
3831 if (outer_code
== MULT
|| outer_code
== DIV
|| outer_code
== MOD
3832 || outer_code
== PLUS
|| outer_code
== MINUS
3833 || outer_code
== COMPARE
)
3838 *total
= COSTS_N_INSNS (1);
3840 /* nxrk, nxgrk ~(a^b)==0 */
3842 && GET_CODE (XEXP (x
, 0)) == NOT
3843 && XEXP (x
, 1) == const0_rtx
3844 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == XOR
3845 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3848 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3850 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
3855 /* nnrk, nngrk, nork, nogrk */
3857 && (GET_CODE (XEXP (x
, 0)) == AND
|| GET_CODE (XEXP (x
, 0)) == IOR
)
3858 && XEXP (x
, 1) == const0_rtx
3859 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3860 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == NOT
3861 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == NOT
3864 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3866 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 1), 0)))
3871 if (GET_CODE (XEXP (x
, 0)) == AND
3872 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3873 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
3875 rtx op0
= XEXP (XEXP (x
, 0), 0);
3876 rtx op1
= XEXP (XEXP (x
, 0), 1);
3877 rtx op2
= XEXP (x
, 1);
3879 if (memory_operand (op0
, GET_MODE (op0
))
3880 && s390_tm_ccmode (op1
, op2
, 0) != VOIDmode
)
3882 if (register_operand (op0
, GET_MODE (op0
))
3883 && s390_tm_ccmode (op1
, op2
, 1) != VOIDmode
)
3893 /* Return the cost of an address rtx ADDR. */
3896 s390_address_cost (rtx addr
, machine_mode mode ATTRIBUTE_UNUSED
,
3897 addr_space_t as ATTRIBUTE_UNUSED
,
3898 bool speed ATTRIBUTE_UNUSED
)
3900 struct s390_address ad
;
3901 if (!s390_decompose_address (addr
, &ad
))
3904 return ad
.indx
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3907 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3909 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
3911 int misalign ATTRIBUTE_UNUSED
)
3913 switch (type_of_cost
)
3921 case vector_gather_load
:
3922 case vector_scatter_store
:
3925 case cond_branch_not_taken
:
3927 case vec_promote_demote
:
3928 case unaligned_load
:
3929 case unaligned_store
:
3932 case cond_branch_taken
:
3936 return TYPE_VECTOR_SUBPARTS (vectype
) - 1;
3943 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3944 otherwise return 0. */
3947 tls_symbolic_operand (rtx op
)
3949 if (GET_CODE (op
) != SYMBOL_REF
)
3951 return SYMBOL_REF_TLS_MODEL (op
);
3954 /* Split DImode access register reference REG (on 64-bit) into its constituent
3955 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3956 gen_highpart cannot be used as they assume all registers are word-sized,
3957 while our access registers have only half that size. */
3960 s390_split_access_reg (rtx reg
, rtx
*lo
, rtx
*hi
)
3962 gcc_assert (TARGET_64BIT
);
3963 gcc_assert (ACCESS_REG_P (reg
));
3964 gcc_assert (GET_MODE (reg
) == DImode
);
3965 gcc_assert (!(REGNO (reg
) & 1));
3967 *lo
= gen_rtx_REG (SImode
, REGNO (reg
) + 1);
3968 *hi
= gen_rtx_REG (SImode
, REGNO (reg
));
3971 /* Return true if OP contains a symbol reference */
3974 symbolic_reference_mentioned_p (rtx op
)
3979 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3982 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3983 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3989 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3990 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3994 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4001 /* Return true if OP contains a reference to a thread-local symbol. */
4004 tls_symbolic_reference_mentioned_p (rtx op
)
4009 if (GET_CODE (op
) == SYMBOL_REF
)
4010 return tls_symbolic_operand (op
);
4012 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4013 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4019 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4020 if (tls_symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4024 else if (fmt
[i
] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op
, i
)))
4032 /* Return true if OP is a legitimate general operand when
4033 generating PIC code. It is given that flag_pic is on
4034 and that OP satisfies CONSTANT_P. */
4037 legitimate_pic_operand_p (rtx op
)
4039 /* Accept all non-symbolic constants. */
4040 if (!SYMBOLIC_CONST (op
))
4043 /* Accept addresses that can be expressed relative to (pc). */
4044 if (larl_operand (op
, VOIDmode
))
4047 /* Reject everything else; must be handled
4048 via emit_symbolic_move. */
4052 /* Returns true if the constant value OP is a legitimate general operand.
4053 It is given that OP satisfies CONSTANT_P. */
4056 s390_legitimate_constant_p (machine_mode mode
, rtx op
)
4058 if (TARGET_VX
&& VECTOR_MODE_P (mode
) && GET_CODE (op
) == CONST_VECTOR
)
4060 if (GET_MODE_SIZE (mode
) != 16)
4063 if (!satisfies_constraint_j00 (op
)
4064 && !satisfies_constraint_jm1 (op
)
4065 && !satisfies_constraint_jKK (op
)
4066 && !satisfies_constraint_jxx (op
)
4067 && !satisfies_constraint_jyy (op
))
4071 /* Accept all non-symbolic constants. */
4072 if (!SYMBOLIC_CONST (op
))
4075 /* Accept immediate LARL operands. */
4076 if (larl_operand (op
, mode
))
4079 /* Thread-local symbols are never legal constants. This is
4080 so that emit_call knows that computing such addresses
4081 might require a function call. */
4082 if (TLS_SYMBOLIC_CONST (op
))
4085 /* In the PIC case, symbolic constants must *not* be
4086 forced into the literal pool. We accept them here,
4087 so that they will be handled by emit_symbolic_move. */
4091 /* All remaining non-PIC symbolic constants are
4092 forced into the literal pool. */
4096 /* Determine if it's legal to put X into the constant pool. This
4097 is not possible if X contains the address of a symbol that is
4098 not constant (TLS) or not known at final link time (PIC). */
4101 s390_cannot_force_const_mem (machine_mode mode
, rtx x
)
4103 switch (GET_CODE (x
))
4107 case CONST_WIDE_INT
:
4109 /* Accept all non-symbolic constants. */
4113 /* Accept an unary '-' only on scalar numeric constants. */
4114 switch (GET_CODE (XEXP (x
, 0)))
4118 case CONST_WIDE_INT
:
4125 /* Labels are OK iff we are non-PIC. */
4126 return flag_pic
!= 0;
4129 /* 'Naked' TLS symbol references are never OK,
4130 non-TLS symbols are OK iff we are non-PIC. */
4131 if (tls_symbolic_operand (x
))
4134 return flag_pic
!= 0;
4137 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0));
4140 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0))
4141 || s390_cannot_force_const_mem (mode
, XEXP (x
, 1));
4144 switch (XINT (x
, 1))
4146 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4147 case UNSPEC_LTREL_OFFSET
:
4155 case UNSPEC_GOTNTPOFF
:
4156 case UNSPEC_INDNTPOFF
:
4159 /* If the literal pool shares the code section, be put
4160 execute template placeholders into the pool as well. */
4172 /* Returns true if the constant value OP is a legitimate general
4173 operand during and after reload. The difference to
4174 legitimate_constant_p is that this function will not accept
4175 a constant that would need to be forced to the literal pool
4176 before it can be used as operand.
4177 This function accepts all constants which can be loaded directly
4181 legitimate_reload_constant_p (rtx op
)
4183 /* Accept la(y) operands. */
4184 if (GET_CODE (op
) == CONST_INT
4185 && DISP_IN_RANGE (INTVAL (op
)))
4188 /* Accept l(g)hi/l(g)fi operands. */
4189 if (GET_CODE (op
) == CONST_INT
4190 && (CONST_OK_FOR_K (INTVAL (op
)) || CONST_OK_FOR_Os (INTVAL (op
))))
4193 /* Accept lliXX operands. */
4195 && GET_CODE (op
) == CONST_INT
4196 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4197 && s390_single_part (op
, word_mode
, HImode
, 0) >= 0)
4201 && GET_CODE (op
) == CONST_INT
4202 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4203 && s390_single_part (op
, word_mode
, SImode
, 0) >= 0)
4206 /* Accept larl operands. */
4207 if (larl_operand (op
, VOIDmode
))
4210 /* Accept floating-point zero operands that fit into a single GPR. */
4211 if (GET_CODE (op
) == CONST_DOUBLE
4212 && s390_float_const_zero_p (op
)
4213 && GET_MODE_SIZE (GET_MODE (op
)) <= UNITS_PER_WORD
)
4216 /* Accept double-word operands that can be split. */
4217 if (GET_CODE (op
) == CONST_WIDE_INT
4218 || (GET_CODE (op
) == CONST_INT
4219 && trunc_int_for_mode (INTVAL (op
), word_mode
) != INTVAL (op
)))
4221 machine_mode dword_mode
= word_mode
== SImode
? DImode
: TImode
;
4222 rtx hi
= operand_subword (op
, 0, 0, dword_mode
);
4223 rtx lo
= operand_subword (op
, 1, 0, dword_mode
);
4224 return legitimate_reload_constant_p (hi
)
4225 && legitimate_reload_constant_p (lo
);
4228 /* Everything else cannot be handled without reload. */
4232 /* Returns true if the constant value OP is a legitimate fp operand
4233 during and after reload.
4234 This function accepts all constants which can be loaded directly
4238 legitimate_reload_fp_constant_p (rtx op
)
4240 /* Accept floating-point zero operands if the load zero instruction
4241 can be used. Prior to z196 the load fp zero instruction caused a
4242 performance penalty if the result is used as BFP number. */
4244 && GET_CODE (op
) == CONST_DOUBLE
4245 && s390_float_const_zero_p (op
))
4251 /* Returns true if the constant value OP is a legitimate vector operand
4252 during and after reload.
4253 This function accepts all constants which can be loaded directly
4257 legitimate_reload_vector_constant_p (rtx op
)
4259 if (TARGET_VX
&& GET_MODE_SIZE (GET_MODE (op
)) == 16
4260 && (satisfies_constraint_j00 (op
)
4261 || satisfies_constraint_jm1 (op
)
4262 || satisfies_constraint_jKK (op
)
4263 || satisfies_constraint_jxx (op
)
4264 || satisfies_constraint_jyy (op
)))
4270 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4271 return the class of reg to actually use. */
4274 s390_preferred_reload_class (rtx op
, reg_class_t rclass
)
4276 switch (GET_CODE (op
))
4278 /* Constants we cannot reload into general registers
4279 must be forced into the literal pool. */
4283 case CONST_WIDE_INT
:
4284 if (reg_class_subset_p (GENERAL_REGS
, rclass
)
4285 && legitimate_reload_constant_p (op
))
4286 return GENERAL_REGS
;
4287 else if (reg_class_subset_p (ADDR_REGS
, rclass
)
4288 && legitimate_reload_constant_p (op
))
4290 else if (reg_class_subset_p (FP_REGS
, rclass
)
4291 && legitimate_reload_fp_constant_p (op
))
4293 else if (reg_class_subset_p (VEC_REGS
, rclass
)
4294 && legitimate_reload_vector_constant_p (op
))
4299 /* If a symbolic constant or a PLUS is reloaded,
4300 it is most likely being used as an address, so
4301 prefer ADDR_REGS. If 'class' is not a superset
4302 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4304 /* Symrefs cannot be pushed into the literal pool with -fPIC
4305 so we *MUST NOT* return NO_REGS for these cases
4306 (s390_cannot_force_const_mem will return true).
4308 On the other hand we MUST return NO_REGS for symrefs with
4309 invalid addend which might have been pushed to the literal
4310 pool (no -fPIC). Usually we would expect them to be
4311 handled via secondary reload but this does not happen if
4312 they are used as literal pool slot replacement in reload
4313 inheritance (see emit_input_reload_insns). */
4314 if (GET_CODE (XEXP (op
, 0)) == PLUS
4315 && GET_CODE (XEXP (XEXP(op
, 0), 0)) == SYMBOL_REF
4316 && GET_CODE (XEXP (XEXP(op
, 0), 1)) == CONST_INT
)
4318 if (flag_pic
&& reg_class_subset_p (ADDR_REGS
, rclass
))
4326 if (!legitimate_reload_constant_p (op
))
4330 /* load address will be used. */
4331 if (reg_class_subset_p (ADDR_REGS
, rclass
))
4343 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4344 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4348 s390_check_symref_alignment (rtx addr
, HOST_WIDE_INT alignment
)
4350 HOST_WIDE_INT addend
;
4353 /* The "required alignment" might be 0 (e.g. for certain structs
4354 accessed via BLKmode). Early abort in this case, as well as when
4355 an alignment > 8 is required. */
4356 if (alignment
< 2 || alignment
> 8)
4359 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4362 if (addend
& (alignment
- 1))
4365 if (GET_CODE (symref
) == SYMBOL_REF
)
4367 /* s390_encode_section_info is not called for anchors, since they don't
4368 have corresponding VAR_DECLs. Therefore, we cannot rely on
4369 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4370 if (SYMBOL_REF_ANCHOR_P (symref
))
4372 HOST_WIDE_INT block_offset
= SYMBOL_REF_BLOCK_OFFSET (symref
);
4373 unsigned int block_alignment
= (SYMBOL_REF_BLOCK (symref
)->alignment
4376 gcc_assert (block_offset
>= 0);
4377 return ((block_offset
& (alignment
- 1)) == 0
4378 && block_alignment
>= alignment
);
4381 /* We have load-relative instructions for 2-byte, 4-byte, and
4382 8-byte alignment so allow only these. */
4385 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref
);
4386 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref
);
4387 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref
);
4388 default: return false;
4392 if (GET_CODE (symref
) == UNSPEC
4393 && alignment
<= UNITS_PER_LONG
)
4399 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4400 operand SCRATCH is used to reload the even part of the address and
4404 s390_reload_larl_operand (rtx reg
, rtx addr
, rtx scratch
)
4406 HOST_WIDE_INT addend
;
4409 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4413 /* Easy case. The addend is even so larl will do fine. */
4414 emit_move_insn (reg
, addr
);
4417 /* We can leave the scratch register untouched if the target
4418 register is a valid base register. */
4419 if (REGNO (reg
) < FIRST_PSEUDO_REGISTER
4420 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
)
4423 gcc_assert (REGNO (scratch
) < FIRST_PSEUDO_REGISTER
);
4424 gcc_assert (REGNO_REG_CLASS (REGNO (scratch
)) == ADDR_REGS
);
4427 emit_move_insn (scratch
,
4428 gen_rtx_CONST (Pmode
,
4429 gen_rtx_PLUS (Pmode
, symref
,
4430 GEN_INT (addend
- 1))));
4432 emit_move_insn (scratch
, symref
);
4434 /* Increment the address using la in order to avoid clobbering cc. */
4435 s390_load_address (reg
, gen_rtx_PLUS (Pmode
, scratch
, const1_rtx
));
4439 /* Generate what is necessary to move between REG and MEM using
4440 SCRATCH. The direction is given by TOMEM. */
4443 s390_reload_symref_address (rtx reg
, rtx mem
, rtx scratch
, bool tomem
)
4445 /* Reload might have pulled a constant out of the literal pool.
4446 Force it back in. */
4447 if (CONST_INT_P (mem
) || GET_CODE (mem
) == CONST_DOUBLE
4448 || GET_CODE (mem
) == CONST_WIDE_INT
4449 || GET_CODE (mem
) == CONST_VECTOR
4450 || GET_CODE (mem
) == CONST
)
4451 mem
= force_const_mem (GET_MODE (reg
), mem
);
4453 gcc_assert (MEM_P (mem
));
4455 /* For a load from memory we can leave the scratch register
4456 untouched if the target register is a valid base register. */
4458 && REGNO (reg
) < FIRST_PSEUDO_REGISTER
4459 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
4460 && GET_MODE (reg
) == GET_MODE (scratch
))
4463 /* Load address into scratch register. Since we can't have a
4464 secondary reload for a secondary reload we have to cover the case
4465 where larl would need a secondary reload here as well. */
4466 s390_reload_larl_operand (scratch
, XEXP (mem
, 0), scratch
);
4468 /* Now we can use a standard load/store to do the move. */
4470 emit_move_insn (replace_equiv_address (mem
, scratch
), reg
);
4472 emit_move_insn (reg
, replace_equiv_address (mem
, scratch
));
4475 /* Inform reload about cases where moving X with a mode MODE to a register in
4476 RCLASS requires an extra scratch or immediate register. Return the class
4477 needed for the immediate register. */
4480 s390_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
4481 machine_mode mode
, secondary_reload_info
*sri
)
4483 enum reg_class rclass
= (enum reg_class
) rclass_i
;
4485 /* Intermediate register needed. */
4486 if (reg_classes_intersect_p (CC_REGS
, rclass
))
4487 return GENERAL_REGS
;
4491 /* The vst/vl vector move instructions allow only for short
4494 && GET_CODE (XEXP (x
, 0)) == PLUS
4495 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4496 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x
, 0), 1)))
4497 && reg_class_subset_p (rclass
, VEC_REGS
)
4498 && (!reg_class_subset_p (rclass
, FP_REGS
)
4499 || (GET_MODE_SIZE (mode
) > 8
4500 && s390_class_max_nregs (FP_REGS
, mode
) == 1)))
4503 sri
->icode
= (TARGET_64BIT
?
4504 CODE_FOR_reloaddi_la_in
:
4505 CODE_FOR_reloadsi_la_in
);
4507 sri
->icode
= (TARGET_64BIT
?
4508 CODE_FOR_reloaddi_la_out
:
4509 CODE_FOR_reloadsi_la_out
);
4515 HOST_WIDE_INT offset
;
4518 /* On z10 several optimizer steps may generate larl operands with
4521 && s390_loadrelative_operand_p (x
, &symref
, &offset
)
4523 && !SYMBOL_FLAG_NOTALIGN2_P (symref
)
4524 && (offset
& 1) == 1)
4525 sri
->icode
= ((mode
== DImode
) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4526 : CODE_FOR_reloadsi_larl_odd_addend_z10
);
4528 /* Handle all the (mem (symref)) accesses we cannot use the z10
4529 instructions for. */
4531 && s390_loadrelative_operand_p (XEXP (x
, 0), NULL
, NULL
)
4533 || !reg_class_subset_p (rclass
, GENERAL_REGS
)
4534 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
4535 || !s390_check_symref_alignment (XEXP (x
, 0),
4536 GET_MODE_SIZE (mode
))))
4538 #define __SECONDARY_RELOAD_CASE(M,m) \
4541 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4542 CODE_FOR_reload##m##di_tomem_z10; \
4544 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4545 CODE_FOR_reload##m##si_tomem_z10; \
4548 switch (GET_MODE (x
))
4550 __SECONDARY_RELOAD_CASE (QI
, qi
);
4551 __SECONDARY_RELOAD_CASE (HI
, hi
);
4552 __SECONDARY_RELOAD_CASE (SI
, si
);
4553 __SECONDARY_RELOAD_CASE (DI
, di
);
4554 __SECONDARY_RELOAD_CASE (TI
, ti
);
4555 __SECONDARY_RELOAD_CASE (SF
, sf
);
4556 __SECONDARY_RELOAD_CASE (DF
, df
);
4557 __SECONDARY_RELOAD_CASE (TF
, tf
);
4558 __SECONDARY_RELOAD_CASE (SD
, sd
);
4559 __SECONDARY_RELOAD_CASE (DD
, dd
);
4560 __SECONDARY_RELOAD_CASE (TD
, td
);
4561 __SECONDARY_RELOAD_CASE (V1QI
, v1qi
);
4562 __SECONDARY_RELOAD_CASE (V2QI
, v2qi
);
4563 __SECONDARY_RELOAD_CASE (V4QI
, v4qi
);
4564 __SECONDARY_RELOAD_CASE (V8QI
, v8qi
);
4565 __SECONDARY_RELOAD_CASE (V16QI
, v16qi
);
4566 __SECONDARY_RELOAD_CASE (V1HI
, v1hi
);
4567 __SECONDARY_RELOAD_CASE (V2HI
, v2hi
);
4568 __SECONDARY_RELOAD_CASE (V4HI
, v4hi
);
4569 __SECONDARY_RELOAD_CASE (V8HI
, v8hi
);
4570 __SECONDARY_RELOAD_CASE (V1SI
, v1si
);
4571 __SECONDARY_RELOAD_CASE (V2SI
, v2si
);
4572 __SECONDARY_RELOAD_CASE (V4SI
, v4si
);
4573 __SECONDARY_RELOAD_CASE (V1DI
, v1di
);
4574 __SECONDARY_RELOAD_CASE (V2DI
, v2di
);
4575 __SECONDARY_RELOAD_CASE (V1TI
, v1ti
);
4576 __SECONDARY_RELOAD_CASE (V1SF
, v1sf
);
4577 __SECONDARY_RELOAD_CASE (V2SF
, v2sf
);
4578 __SECONDARY_RELOAD_CASE (V4SF
, v4sf
);
4579 __SECONDARY_RELOAD_CASE (V1DF
, v1df
);
4580 __SECONDARY_RELOAD_CASE (V2DF
, v2df
);
4581 __SECONDARY_RELOAD_CASE (V1TF
, v1tf
);
4585 #undef __SECONDARY_RELOAD_CASE
4589 /* We need a scratch register when loading a PLUS expression which
4590 is not a legitimate operand of the LOAD ADDRESS instruction. */
4591 /* LRA can deal with transformation of plus op very well -- so we
4592 don't need to prompt LRA in this case. */
4593 if (! lra_in_progress
&& in_p
&& s390_plus_operand (x
, mode
))
4594 sri
->icode
= (TARGET_64BIT
?
4595 CODE_FOR_reloaddi_plus
: CODE_FOR_reloadsi_plus
);
4597 /* Performing a multiword move from or to memory we have to make sure the
4598 second chunk in memory is addressable without causing a displacement
4599 overflow. If that would be the case we calculate the address in
4600 a scratch register. */
4602 && GET_CODE (XEXP (x
, 0)) == PLUS
4603 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4604 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x
, 0), 1))
4605 + GET_MODE_SIZE (mode
) - 1))
4607 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4608 in a s_operand address since we may fallback to lm/stm. So we only
4609 have to care about overflows in the b+i+d case. */
4610 if ((reg_classes_intersect_p (GENERAL_REGS
, rclass
)
4611 && s390_class_max_nregs (GENERAL_REGS
, mode
) > 1
4612 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == PLUS
)
4613 /* For FP_REGS no lm/stm is available so this check is triggered
4614 for displacement overflows in b+i+d and b+d like addresses. */
4615 || (reg_classes_intersect_p (FP_REGS
, rclass
)
4616 && s390_class_max_nregs (FP_REGS
, mode
) > 1))
4619 sri
->icode
= (TARGET_64BIT
?
4620 CODE_FOR_reloaddi_la_in
:
4621 CODE_FOR_reloadsi_la_in
);
4623 sri
->icode
= (TARGET_64BIT
?
4624 CODE_FOR_reloaddi_la_out
:
4625 CODE_FOR_reloadsi_la_out
);
4629 /* A scratch address register is needed when a symbolic constant is
4630 copied to r0 compiling with -fPIC. In other cases the target
4631 register might be used as temporary (see legitimize_pic_address). */
4632 if (in_p
&& SYMBOLIC_CONST (x
) && flag_pic
== 2 && rclass
!= ADDR_REGS
)
4633 sri
->icode
= (TARGET_64BIT
?
4634 CODE_FOR_reloaddi_PIC_addr
:
4635 CODE_FOR_reloadsi_PIC_addr
);
4637 /* Either scratch or no register needed. */
4641 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4643 We need secondary memory to move data between GPRs and FPRs.
4645 - With DFP the ldgr lgdr instructions are available. Due to the
4646 different alignment we cannot use them for SFmode. For 31 bit a
4647 64 bit value in GPR would be a register pair so here we still
4648 need to go via memory.
4650 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4651 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4652 in full VRs so as before also on z13 we do these moves via
4655 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4658 s390_secondary_memory_needed (machine_mode mode
,
4659 reg_class_t class1
, reg_class_t class2
)
4661 return (((reg_classes_intersect_p (class1
, VEC_REGS
)
4662 && reg_classes_intersect_p (class2
, GENERAL_REGS
))
4663 || (reg_classes_intersect_p (class1
, GENERAL_REGS
)
4664 && reg_classes_intersect_p (class2
, VEC_REGS
)))
4665 && (TARGET_TPF
|| !TARGET_DFP
|| !TARGET_64BIT
4666 || GET_MODE_SIZE (mode
) != 8)
4667 && (!TARGET_VX
|| (SCALAR_FLOAT_MODE_P (mode
)
4668 && GET_MODE_SIZE (mode
) > 8)));
4671 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4673 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4674 because the movsi and movsf patterns don't handle r/f moves. */
4677 s390_secondary_memory_needed_mode (machine_mode mode
)
4679 if (GET_MODE_BITSIZE (mode
) < 32)
4680 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
4684 /* Generate code to load SRC, which is PLUS that is not a
4685 legitimate operand for the LA instruction, into TARGET.
4686 SCRATCH may be used as scratch register. */
4689 s390_expand_plus_operand (rtx target
, rtx src
,
4693 struct s390_address ad
;
4695 /* src must be a PLUS; get its two operands. */
4696 gcc_assert (GET_CODE (src
) == PLUS
);
4697 gcc_assert (GET_MODE (src
) == Pmode
);
4699 /* Check if any of the two operands is already scheduled
4700 for replacement by reload. This can happen e.g. when
4701 float registers occur in an address. */
4702 sum1
= find_replacement (&XEXP (src
, 0));
4703 sum2
= find_replacement (&XEXP (src
, 1));
4704 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4706 /* If the address is already strictly valid, there's nothing to do. */
4707 if (!s390_decompose_address (src
, &ad
)
4708 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4709 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
4711 /* Otherwise, one of the operands cannot be an address register;
4712 we reload its value into the scratch register. */
4713 if (true_regnum (sum1
) < 1 || true_regnum (sum1
) > 15)
4715 emit_move_insn (scratch
, sum1
);
4718 if (true_regnum (sum2
) < 1 || true_regnum (sum2
) > 15)
4720 emit_move_insn (scratch
, sum2
);
4724 /* According to the way these invalid addresses are generated
4725 in reload.c, it should never happen (at least on s390) that
4726 *neither* of the PLUS components, after find_replacements
4727 was applied, is an address register. */
4728 if (sum1
== scratch
&& sum2
== scratch
)
4734 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4737 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4738 is only ever performed on addresses, so we can mark the
4739 sum as legitimate for LA in any case. */
4740 s390_load_address (target
, src
);
4744 /* Return true if ADDR is a valid memory address.
4745 STRICT specifies whether strict register checking applies. */
4748 s390_legitimate_address_p (machine_mode mode
, rtx addr
, bool strict
)
4750 struct s390_address ad
;
4753 && larl_operand (addr
, VOIDmode
)
4754 && (mode
== VOIDmode
4755 || s390_check_symref_alignment (addr
, GET_MODE_SIZE (mode
))))
4758 if (!s390_decompose_address (addr
, &ad
))
4761 /* The vector memory instructions only support short displacements.
4762 Reject invalid displacements early to prevent plenty of lay
4763 instructions to be generated later which then cannot be merged
4766 && VECTOR_MODE_P (mode
)
4767 && ad
.disp
!= NULL_RTX
4768 && CONST_INT_P (ad
.disp
)
4769 && !SHORT_DISP_IN_RANGE (INTVAL (ad
.disp
)))
4774 if (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4777 if (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
)))
4783 && !(REGNO (ad
.base
) >= FIRST_PSEUDO_REGISTER
4784 || REGNO_REG_CLASS (REGNO (ad
.base
)) == ADDR_REGS
))
4788 && !(REGNO (ad
.indx
) >= FIRST_PSEUDO_REGISTER
4789 || REGNO_REG_CLASS (REGNO (ad
.indx
)) == ADDR_REGS
))
4795 /* Return true if OP is a valid operand for the LA instruction.
4796 In 31-bit, we need to prove that the result is used as an
4797 address, as LA performs only a 31-bit addition. */
4800 legitimate_la_operand_p (rtx op
)
4802 struct s390_address addr
;
4803 if (!s390_decompose_address (op
, &addr
))
4806 return (TARGET_64BIT
|| addr
.pointer
);
4809 /* Return true if it is valid *and* preferable to use LA to
4810 compute the sum of OP1 and OP2. */
4813 preferred_la_operand_p (rtx op1
, rtx op2
)
4815 struct s390_address addr
;
4817 if (op2
!= const0_rtx
)
4818 op1
= gen_rtx_PLUS (Pmode
, op1
, op2
);
4820 if (!s390_decompose_address (op1
, &addr
))
4822 if (addr
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (addr
.base
)))
4824 if (addr
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (addr
.indx
)))
4827 /* Avoid LA instructions with index (and base) register on z196 or
4828 later; it is preferable to use regular add instructions when
4829 possible. Starting with zEC12 the la with index register is
4830 "uncracked" again but still slower than a regular add. */
4831 if (addr
.indx
&& s390_tune
>= PROCESSOR_2817_Z196
)
4834 if (!TARGET_64BIT
&& !addr
.pointer
)
4840 if ((addr
.base
&& REG_P (addr
.base
) && REG_POINTER (addr
.base
))
4841 || (addr
.indx
&& REG_P (addr
.indx
) && REG_POINTER (addr
.indx
)))
4847 /* Emit a forced load-address operation to load SRC into DST.
4848 This will use the LOAD ADDRESS instruction even in situations
4849 where legitimate_la_operand_p (SRC) returns false. */
4852 s390_load_address (rtx dst
, rtx src
)
4855 emit_move_insn (dst
, src
);
4857 emit_insn (gen_force_la_31 (dst
, src
));
4860 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4863 s390_rel_address_ok_p (rtx symbol_ref
)
4867 if (symbol_ref
== s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref
))
4870 decl
= SYMBOL_REF_DECL (symbol_ref
);
4872 if (!flag_pic
|| SYMBOL_REF_LOCAL_P (symbol_ref
))
4873 return (s390_pic_data_is_text_relative
4875 && TREE_CODE (decl
) == FUNCTION_DECL
));
4880 /* Return a legitimate reference for ORIG (an address) using the
4881 register REG. If REG is 0, a new pseudo is generated.
4883 There are two types of references that must be handled:
4885 1. Global data references must load the address from the GOT, via
4886 the PIC reg. An insn is emitted to do this load, and the reg is
4889 2. Static data references, constant pool addresses, and code labels
4890 compute the address as an offset from the GOT, whose base is in
4891 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4892 differentiate them from global data objects. The returned
4893 address is the PIC reg + an unspec constant.
4895 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4896 reg also appears in the address. */
4899 legitimize_pic_address (rtx orig
, rtx reg
)
4902 rtx addend
= const0_rtx
;
4905 gcc_assert (!TLS_SYMBOLIC_CONST (addr
));
4907 if (GET_CODE (addr
) == CONST
)
4908 addr
= XEXP (addr
, 0);
4910 if (GET_CODE (addr
) == PLUS
)
4912 addend
= XEXP (addr
, 1);
4913 addr
= XEXP (addr
, 0);
4916 if ((GET_CODE (addr
) == LABEL_REF
4917 || (SYMBOL_REF_P (addr
) && s390_rel_address_ok_p (addr
))
4918 || (GET_CODE (addr
) == UNSPEC
&&
4919 (XINT (addr
, 1) == UNSPEC_GOTENT
4920 || XINT (addr
, 1) == UNSPEC_PLT
)))
4921 && GET_CODE (addend
) == CONST_INT
)
4923 /* This can be locally addressed. */
4925 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4926 rtx const_addr
= (GET_CODE (addr
) == UNSPEC
?
4927 gen_rtx_CONST (Pmode
, addr
) : addr
);
4929 if (larl_operand (const_addr
, VOIDmode
)
4930 && INTVAL (addend
) < HOST_WIDE_INT_1
<< 31
4931 && INTVAL (addend
) >= -(HOST_WIDE_INT_1
<< 31))
4933 if (INTVAL (addend
) & 1)
4935 /* LARL can't handle odd offsets, so emit a pair of LARL
4937 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4939 if (!DISP_IN_RANGE (INTVAL (addend
)))
4941 HOST_WIDE_INT even
= INTVAL (addend
) - 1;
4942 addr
= gen_rtx_PLUS (Pmode
, addr
, GEN_INT (even
));
4943 addr
= gen_rtx_CONST (Pmode
, addr
);
4944 addend
= const1_rtx
;
4947 emit_move_insn (temp
, addr
);
4948 new_rtx
= gen_rtx_PLUS (Pmode
, temp
, addend
);
4952 s390_load_address (reg
, new_rtx
);
4958 /* If the offset is even, we can just use LARL. This
4959 will happen automatically. */
4964 /* No larl - Access local symbols relative to the GOT. */
4966 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4968 if (reload_in_progress
|| reload_completed
)
4969 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
4971 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
4972 if (addend
!= const0_rtx
)
4973 addr
= gen_rtx_PLUS (Pmode
, addr
, addend
);
4974 addr
= gen_rtx_CONST (Pmode
, addr
);
4975 addr
= force_const_mem (Pmode
, addr
);
4976 emit_move_insn (temp
, addr
);
4978 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
4981 s390_load_address (reg
, new_rtx
);
4986 else if (GET_CODE (addr
) == SYMBOL_REF
&& addend
== const0_rtx
)
4988 /* A non-local symbol reference without addend.
4990 The symbol ref is wrapped into an UNSPEC to make sure the
4991 proper operand modifier (@GOT or @GOTENT) will be emitted.
4992 This will tell the linker to put the symbol into the GOT.
4994 Additionally the code dereferencing the GOT slot is emitted here.
4996 An addend to the symref needs to be added afterwards.
4997 legitimize_pic_address calls itself recursively to handle
4998 that case. So no need to do it here. */
5001 reg
= gen_reg_rtx (Pmode
);
5005 /* Use load relative if possible.
5006 lgrl <target>, sym@GOTENT */
5007 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5008 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5009 new_rtx
= gen_const_mem (GET_MODE (reg
), new_rtx
);
5011 emit_move_insn (reg
, new_rtx
);
5014 else if (flag_pic
== 1)
5016 /* Assume GOT offset is a valid displacement operand (< 4k
5017 or < 512k with z990). This is handled the same way in
5018 both 31- and 64-bit code (@GOT).
5019 lg <target>, sym@GOT(r12) */
5021 if (reload_in_progress
|| reload_completed
)
5022 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5024 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5025 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5026 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5027 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5028 emit_move_insn (reg
, new_rtx
);
5033 /* If the GOT offset might be >= 4k, we determine the position
5034 of the GOT entry via a PC-relative LARL (@GOTENT).
5035 larl temp, sym@GOTENT
5036 lg <target>, 0(temp) */
5038 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5040 gcc_assert (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
5041 || REGNO_REG_CLASS (REGNO (temp
)) == ADDR_REGS
);
5043 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5044 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5045 emit_move_insn (temp
, new_rtx
);
5046 new_rtx
= gen_const_mem (Pmode
, temp
);
5047 emit_move_insn (reg
, new_rtx
);
5052 else if (GET_CODE (addr
) == UNSPEC
&& GET_CODE (addend
) == CONST_INT
)
5054 gcc_assert (XVECLEN (addr
, 0) == 1);
5055 switch (XINT (addr
, 1))
5057 /* These address symbols (or PLT slots) relative to the GOT
5058 (not GOT slots!). In general this will exceed the
5059 displacement range so these value belong into the literal
5063 new_rtx
= force_const_mem (Pmode
, orig
);
5066 /* For -fPIC the GOT size might exceed the displacement
5067 range so make sure the value is in the literal pool. */
5070 new_rtx
= force_const_mem (Pmode
, orig
);
5073 /* For @GOTENT larl is used. This is handled like local
5079 /* For @PLT larl is used. This is handled like local
5085 /* Everything else cannot happen. */
5090 else if (addend
!= const0_rtx
)
5092 /* Otherwise, compute the sum. */
5094 rtx base
= legitimize_pic_address (addr
, reg
);
5095 new_rtx
= legitimize_pic_address (addend
,
5096 base
== reg
? NULL_RTX
: reg
);
5097 if (GET_CODE (new_rtx
) == CONST_INT
)
5098 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
5101 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
5103 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
5104 new_rtx
= XEXP (new_rtx
, 1);
5106 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
5109 if (GET_CODE (new_rtx
) == CONST
)
5110 new_rtx
= XEXP (new_rtx
, 0);
5111 new_rtx
= force_operand (new_rtx
, 0);
5117 /* Load the thread pointer into a register. */
5120 s390_get_thread_pointer (void)
5122 rtx tp
= gen_reg_rtx (Pmode
);
5124 emit_insn (gen_get_thread_pointer (Pmode
, tp
));
5126 mark_reg_pointer (tp
, BITS_PER_WORD
);
5131 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5132 in s390_tls_symbol which always refers to __tls_get_offset.
5133 The returned offset is written to RESULT_REG and an USE rtx is
5134 generated for TLS_CALL. */
5136 static GTY(()) rtx s390_tls_symbol
;
5139 s390_emit_tls_call_insn (rtx result_reg
, rtx tls_call
)
5144 emit_insn (s390_load_got ());
5146 if (!s390_tls_symbol
)
5147 s390_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tls_get_offset");
5149 insn
= s390_emit_call (s390_tls_symbol
, tls_call
, result_reg
,
5150 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
5152 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), result_reg
);
5153 RTL_CONST_CALL_P (insn
) = 1;
5156 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5157 this (thread-local) address. REG may be used as temporary. */
5160 legitimize_tls_address (rtx addr
, rtx reg
)
5162 rtx new_rtx
, tls_call
, temp
, base
, r2
;
5165 if (GET_CODE (addr
) == SYMBOL_REF
)
5166 switch (tls_symbolic_operand (addr
))
5168 case TLS_MODEL_GLOBAL_DYNAMIC
:
5170 r2
= gen_rtx_REG (Pmode
, 2);
5171 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_TLSGD
);
5172 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5173 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5174 emit_move_insn (r2
, new_rtx
);
5175 s390_emit_tls_call_insn (r2
, tls_call
);
5176 insn
= get_insns ();
5179 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5180 temp
= gen_reg_rtx (Pmode
);
5181 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5183 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5186 s390_load_address (reg
, new_rtx
);
5191 case TLS_MODEL_LOCAL_DYNAMIC
:
5193 r2
= gen_rtx_REG (Pmode
, 2);
5194 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM
);
5195 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5196 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5197 emit_move_insn (r2
, new_rtx
);
5198 s390_emit_tls_call_insn (r2
, tls_call
);
5199 insn
= get_insns ();
5202 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM_NTPOFF
);
5203 temp
= gen_reg_rtx (Pmode
);
5204 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5206 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5207 base
= gen_reg_rtx (Pmode
);
5208 s390_load_address (base
, new_rtx
);
5210 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_DTPOFF
);
5211 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5212 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5213 temp
= gen_reg_rtx (Pmode
);
5214 emit_move_insn (temp
, new_rtx
);
5216 new_rtx
= gen_rtx_PLUS (Pmode
, base
, temp
);
5219 s390_load_address (reg
, new_rtx
);
5224 case TLS_MODEL_INITIAL_EXEC
:
5227 /* Assume GOT offset < 4k. This is handled the same way
5228 in both 31- and 64-bit code. */
5230 if (reload_in_progress
|| reload_completed
)
5231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5233 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTNTPOFF
);
5234 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5235 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5236 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5237 temp
= gen_reg_rtx (Pmode
);
5238 emit_move_insn (temp
, new_rtx
);
5242 /* If the GOT offset might be >= 4k, we determine the position
5243 of the GOT entry via a PC-relative LARL. */
5245 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_INDNTPOFF
);
5246 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5247 temp
= gen_reg_rtx (Pmode
);
5248 emit_move_insn (temp
, new_rtx
);
5250 new_rtx
= gen_const_mem (Pmode
, temp
);
5251 temp
= gen_reg_rtx (Pmode
);
5252 emit_move_insn (temp
, new_rtx
);
5255 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5258 s390_load_address (reg
, new_rtx
);
5263 case TLS_MODEL_LOCAL_EXEC
:
5264 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5265 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5266 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5267 temp
= gen_reg_rtx (Pmode
);
5268 emit_move_insn (temp
, new_rtx
);
5270 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5273 s390_load_address (reg
, new_rtx
);
5282 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == UNSPEC
)
5284 switch (XINT (XEXP (addr
, 0), 1))
5287 case UNSPEC_INDNTPOFF
:
5296 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
5297 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
5299 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5300 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5301 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5303 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5304 new_rtx
= plus_constant (Pmode
, new_rtx
,
5305 INTVAL (XEXP (XEXP (addr
, 0), 1)));
5306 new_rtx
= force_operand (new_rtx
, 0);
5309 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5310 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == NEG
)
5312 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5313 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5314 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5316 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5317 new_rtx
= gen_rtx_NEG (Pmode
, new_rtx
);
5318 new_rtx
= force_operand (new_rtx
, 0);
5322 gcc_unreachable (); /* for now ... */
5327 /* Emit insns making the address in operands[1] valid for a standard
5328 move to operands[0]. operands[1] is replaced by an address which
5329 should be used instead of the former RTX to emit the move
5333 emit_symbolic_move (rtx
*operands
)
5335 rtx temp
= !can_create_pseudo_p () ? operands
[0] : gen_reg_rtx (Pmode
);
5337 if (GET_CODE (operands
[0]) == MEM
)
5338 operands
[1] = force_reg (Pmode
, operands
[1]);
5339 else if (TLS_SYMBOLIC_CONST (operands
[1]))
5340 operands
[1] = legitimize_tls_address (operands
[1], temp
);
5342 operands
[1] = legitimize_pic_address (operands
[1], temp
);
5345 /* Try machine-dependent ways of modifying an illegitimate address X
5346 to be legitimate. If we find one, return the new, valid address.
5348 OLDX is the address as it was before break_out_memory_refs was called.
5349 In some cases it is useful to look at this to decide what needs to be done.
5351 MODE is the mode of the operand pointed to by X.
5353 When -fpic is used, special handling is needed for symbolic references.
5354 See comments by legitimize_pic_address for details. */
5357 s390_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
5358 machine_mode mode ATTRIBUTE_UNUSED
)
5360 rtx constant_term
= const0_rtx
;
5362 if (TLS_SYMBOLIC_CONST (x
))
5364 x
= legitimize_tls_address (x
, 0);
5366 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5369 else if (GET_CODE (x
) == PLUS
5370 && (TLS_SYMBOLIC_CONST (XEXP (x
, 0))
5371 || TLS_SYMBOLIC_CONST (XEXP (x
, 1))))
5377 if (SYMBOLIC_CONST (x
)
5378 || (GET_CODE (x
) == PLUS
5379 && (SYMBOLIC_CONST (XEXP (x
, 0))
5380 || SYMBOLIC_CONST (XEXP (x
, 1)))))
5381 x
= legitimize_pic_address (x
, 0);
5383 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5387 x
= eliminate_constant_term (x
, &constant_term
);
5389 /* Optimize loading of large displacements by splitting them
5390 into the multiple of 4K and the rest; this allows the
5391 former to be CSE'd if possible.
5393 Don't do this if the displacement is added to a register
5394 pointing into the stack frame, as the offsets will
5395 change later anyway. */
5397 if (GET_CODE (constant_term
) == CONST_INT
5398 && !TARGET_LONG_DISPLACEMENT
5399 && !DISP_IN_RANGE (INTVAL (constant_term
))
5400 && !(REG_P (x
) && REGNO_PTR_FRAME_P (REGNO (x
))))
5402 HOST_WIDE_INT lower
= INTVAL (constant_term
) & 0xfff;
5403 HOST_WIDE_INT upper
= INTVAL (constant_term
) ^ lower
;
5405 rtx temp
= gen_reg_rtx (Pmode
);
5406 rtx val
= force_operand (GEN_INT (upper
), temp
);
5408 emit_move_insn (temp
, val
);
5410 x
= gen_rtx_PLUS (Pmode
, x
, temp
);
5411 constant_term
= GEN_INT (lower
);
5414 if (GET_CODE (x
) == PLUS
)
5416 if (GET_CODE (XEXP (x
, 0)) == REG
)
5418 rtx temp
= gen_reg_rtx (Pmode
);
5419 rtx val
= force_operand (XEXP (x
, 1), temp
);
5421 emit_move_insn (temp
, val
);
5423 x
= gen_rtx_PLUS (Pmode
, XEXP (x
, 0), temp
);
5426 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5428 rtx temp
= gen_reg_rtx (Pmode
);
5429 rtx val
= force_operand (XEXP (x
, 0), temp
);
5431 emit_move_insn (temp
, val
);
5433 x
= gen_rtx_PLUS (Pmode
, temp
, XEXP (x
, 1));
5437 if (constant_term
!= const0_rtx
)
5438 x
= gen_rtx_PLUS (Pmode
, x
, constant_term
);
5443 /* Try a machine-dependent way of reloading an illegitimate address AD
5444 operand. If we find one, push the reload and return the new address.
5446 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5447 and TYPE is the reload type of the current reload. */
5450 legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
5451 int opnum
, int type
)
5453 if (!optimize
|| TARGET_LONG_DISPLACEMENT
)
5456 if (GET_CODE (ad
) == PLUS
)
5458 rtx tem
= simplify_binary_operation (PLUS
, Pmode
,
5459 XEXP (ad
, 0), XEXP (ad
, 1));
5464 if (GET_CODE (ad
) == PLUS
5465 && GET_CODE (XEXP (ad
, 0)) == REG
5466 && GET_CODE (XEXP (ad
, 1)) == CONST_INT
5467 && !DISP_IN_RANGE (INTVAL (XEXP (ad
, 1))))
5469 HOST_WIDE_INT lower
= INTVAL (XEXP (ad
, 1)) & 0xfff;
5470 HOST_WIDE_INT upper
= INTVAL (XEXP (ad
, 1)) ^ lower
;
5471 rtx cst
, tem
, new_rtx
;
5473 cst
= GEN_INT (upper
);
5474 if (!legitimate_reload_constant_p (cst
))
5475 cst
= force_const_mem (Pmode
, cst
);
5477 tem
= gen_rtx_PLUS (Pmode
, XEXP (ad
, 0), cst
);
5478 new_rtx
= gen_rtx_PLUS (Pmode
, tem
, GEN_INT (lower
));
5480 push_reload (XEXP (tem
, 1), 0, &XEXP (tem
, 1), 0,
5481 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
5482 opnum
, (enum reload_type
) type
);
5489 /* Emit code to move LEN bytes from DST to SRC. */
5492 s390_expand_cpymem (rtx dst
, rtx src
, rtx len
)
5494 /* When tuning for z10 or higher we rely on the Glibc functions to
5495 do the right thing. Only for constant lengths below 64k we will
5496 generate inline code. */
5497 if (s390_tune
>= PROCESSOR_2097_Z10
5498 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5501 /* Expand memcpy for constant length operands without a loop if it
5502 is shorter that way.
5504 With a constant length argument a
5505 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5506 if (GET_CODE (len
) == CONST_INT
5507 && INTVAL (len
) >= 0
5508 && INTVAL (len
) <= 256 * 6
5509 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5513 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5515 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5516 rtx newsrc
= adjust_address (src
, BLKmode
, o
);
5517 emit_insn (gen_cpymem_short (newdst
, newsrc
,
5518 GEN_INT (l
> 256 ? 255 : l
- 1)));
5522 else if (TARGET_MVCLE
)
5524 emit_insn (gen_cpymem_long (dst
, src
, convert_to_mode (Pmode
, len
, 1)));
5529 rtx dst_addr
, src_addr
, count
, blocks
, temp
;
5530 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5531 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5532 rtx_code_label
*end_label
= gen_label_rtx ();
5535 mode
= GET_MODE (len
);
5536 if (mode
== VOIDmode
)
5539 dst_addr
= gen_reg_rtx (Pmode
);
5540 src_addr
= gen_reg_rtx (Pmode
);
5541 count
= gen_reg_rtx (mode
);
5542 blocks
= gen_reg_rtx (mode
);
5544 convert_move (count
, len
, 1);
5545 emit_cmp_and_jump_insns (count
, const0_rtx
,
5546 EQ
, NULL_RTX
, mode
, 1, end_label
);
5548 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5549 emit_move_insn (src_addr
, force_operand (XEXP (src
, 0), NULL_RTX
));
5550 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5551 src
= change_address (src
, VOIDmode
, src_addr
);
5553 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5556 emit_move_insn (count
, temp
);
5558 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5561 emit_move_insn (blocks
, temp
);
5563 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5564 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5566 emit_label (loop_start_label
);
5569 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 768))
5573 /* Issue a read prefetch for the +3 cache line. */
5574 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (768)),
5575 const0_rtx
, const0_rtx
);
5576 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5577 emit_insn (prefetch
);
5579 /* Issue a write prefetch for the +3 cache line. */
5580 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (768)),
5581 const1_rtx
, const0_rtx
);
5582 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5583 emit_insn (prefetch
);
5586 emit_insn (gen_cpymem_short (dst
, src
, GEN_INT (255)));
5587 s390_load_address (dst_addr
,
5588 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5589 s390_load_address (src_addr
,
5590 gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (256)));
5592 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5595 emit_move_insn (blocks
, temp
);
5597 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5598 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5600 emit_jump (loop_start_label
);
5601 emit_label (loop_end_label
);
5603 emit_insn (gen_cpymem_short (dst
, src
,
5604 convert_to_mode (Pmode
, count
, 1)));
5605 emit_label (end_label
);
5610 /* Emit code to set LEN bytes at DST to VAL.
5611 Make use of clrmem if VAL is zero. */
5614 s390_expand_setmem (rtx dst
, rtx len
, rtx val
)
5616 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) <= 0)
5619 gcc_assert (GET_CODE (val
) == CONST_INT
|| GET_MODE (val
) == QImode
);
5621 /* Expand setmem/clrmem for a constant length operand without a
5622 loop if it will be shorter that way.
5623 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5624 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5625 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5626 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5627 if (GET_CODE (len
) == CONST_INT
5628 && ((val
== const0_rtx
5629 && (INTVAL (len
) <= 256 * 4
5630 || (INTVAL (len
) <= 256 * 5 && TARGET_SETMEM_PFD(val
,len
))))
5631 || (val
!= const0_rtx
&& INTVAL (len
) <= 257 * 4))
5632 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5636 if (val
== const0_rtx
)
5637 /* clrmem: emit 256 byte blockwise XCs. */
5638 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5640 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5641 emit_insn (gen_clrmem_short (newdst
,
5642 GEN_INT (l
> 256 ? 255 : l
- 1)));
5645 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5646 setting first byte to val and using a 256 byte mvc with one
5647 byte overlap to propagate the byte. */
5648 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 257, o
+= 257)
5650 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5651 emit_move_insn (adjust_address (dst
, QImode
, o
), val
);
5654 rtx newdstp1
= adjust_address (dst
, BLKmode
, o
+ 1);
5655 emit_insn (gen_cpymem_short (newdstp1
, newdst
,
5656 GEN_INT (l
> 257 ? 255 : l
- 2)));
5661 else if (TARGET_MVCLE
)
5663 val
= force_not_mem (convert_modes (Pmode
, QImode
, val
, 1));
5665 emit_insn (gen_setmem_long_di (dst
, convert_to_mode (Pmode
, len
, 1),
5668 emit_insn (gen_setmem_long_si (dst
, convert_to_mode (Pmode
, len
, 1),
5674 rtx dst_addr
, count
, blocks
, temp
, dstp1
= NULL_RTX
;
5675 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5676 rtx_code_label
*onebyte_end_label
= gen_label_rtx ();
5677 rtx_code_label
*zerobyte_end_label
= gen_label_rtx ();
5678 rtx_code_label
*restbyte_end_label
= gen_label_rtx ();
5681 mode
= GET_MODE (len
);
5682 if (mode
== VOIDmode
)
5685 dst_addr
= gen_reg_rtx (Pmode
);
5686 count
= gen_reg_rtx (mode
);
5687 blocks
= gen_reg_rtx (mode
);
5689 convert_move (count
, len
, 1);
5690 emit_cmp_and_jump_insns (count
, const0_rtx
,
5691 EQ
, NULL_RTX
, mode
, 1, zerobyte_end_label
,
5692 profile_probability::very_unlikely ());
5694 /* We need to make a copy of the target address since memset is
5695 supposed to return it unmodified. We have to make it here
5696 already since the new reg is used at onebyte_end_label. */
5697 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5698 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5700 if (val
!= const0_rtx
)
5702 /* When using the overlapping mvc the original target
5703 address is only accessed as single byte entity (even by
5704 the mvc reading this value). */
5705 set_mem_size (dst
, 1);
5706 dstp1
= adjust_address (dst
, VOIDmode
, 1);
5707 emit_cmp_and_jump_insns (count
,
5708 const1_rtx
, EQ
, NULL_RTX
, mode
, 1,
5710 profile_probability::very_unlikely ());
5713 /* There is one unconditional (mvi+mvc)/xc after the loop
5714 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5715 or one (xc) here leaves this number of bytes to be handled by
5717 temp
= expand_binop (mode
, add_optab
, count
,
5718 val
== const0_rtx
? constm1_rtx
: GEN_INT (-2),
5719 count
, 1, OPTAB_DIRECT
);
5721 emit_move_insn (count
, temp
);
5723 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5726 emit_move_insn (blocks
, temp
);
5728 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5729 EQ
, NULL_RTX
, mode
, 1, restbyte_end_label
);
5731 emit_jump (loop_start_label
);
5733 if (val
!= const0_rtx
)
5735 /* The 1 byte != 0 special case. Not handled efficiently
5736 since we require two jumps for that. However, this
5737 should be very rare. */
5738 emit_label (onebyte_end_label
);
5739 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5740 emit_jump (zerobyte_end_label
);
5743 emit_label (loop_start_label
);
5745 if (TARGET_SETMEM_PFD (val
, len
))
5747 /* Issue a write prefetch. */
5748 rtx distance
= GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE
);
5749 rtx prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, distance
),
5750 const1_rtx
, const0_rtx
);
5751 emit_insn (prefetch
);
5752 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5755 if (val
== const0_rtx
)
5756 emit_insn (gen_clrmem_short (dst
, GEN_INT (255)));
5759 /* Set the first byte in the block to the value and use an
5760 overlapping mvc for the block. */
5761 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5762 emit_insn (gen_cpymem_short (dstp1
, dst
, GEN_INT (254)));
5764 s390_load_address (dst_addr
,
5765 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5767 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5770 emit_move_insn (blocks
, temp
);
5772 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5773 NE
, NULL_RTX
, mode
, 1, loop_start_label
);
5775 emit_label (restbyte_end_label
);
5777 if (val
== const0_rtx
)
5778 emit_insn (gen_clrmem_short (dst
, convert_to_mode (Pmode
, count
, 1)));
5781 /* Set the first byte in the block to the value and use an
5782 overlapping mvc for the block. */
5783 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5784 /* execute only uses the lowest 8 bits of count that's
5785 exactly what we need here. */
5786 emit_insn (gen_cpymem_short (dstp1
, dst
,
5787 convert_to_mode (Pmode
, count
, 1)));
5790 emit_label (zerobyte_end_label
);
5794 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5795 and return the result in TARGET. */
5798 s390_expand_cmpmem (rtx target
, rtx op0
, rtx op1
, rtx len
)
5800 rtx ccreg
= gen_rtx_REG (CCUmode
, CC_REGNUM
);
5803 /* When tuning for z10 or higher we rely on the Glibc functions to
5804 do the right thing. Only for constant lengths below 64k we will
5805 generate inline code. */
5806 if (s390_tune
>= PROCESSOR_2097_Z10
5807 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5810 /* As the result of CMPINT is inverted compared to what we need,
5811 we have to swap the operands. */
5812 tmp
= op0
; op0
= op1
; op1
= tmp
;
5814 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) >= 0 && INTVAL (len
) <= 256)
5816 if (INTVAL (len
) > 0)
5818 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (INTVAL (len
) - 1)));
5819 emit_insn (gen_cmpint (target
, ccreg
));
5822 emit_move_insn (target
, const0_rtx
);
5824 else if (TARGET_MVCLE
)
5826 emit_insn (gen_cmpmem_long (op0
, op1
, convert_to_mode (Pmode
, len
, 1)));
5827 emit_insn (gen_cmpint (target
, ccreg
));
5831 rtx addr0
, addr1
, count
, blocks
, temp
;
5832 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5833 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5834 rtx_code_label
*end_label
= gen_label_rtx ();
5837 mode
= GET_MODE (len
);
5838 if (mode
== VOIDmode
)
5841 addr0
= gen_reg_rtx (Pmode
);
5842 addr1
= gen_reg_rtx (Pmode
);
5843 count
= gen_reg_rtx (mode
);
5844 blocks
= gen_reg_rtx (mode
);
5846 convert_move (count
, len
, 1);
5847 emit_cmp_and_jump_insns (count
, const0_rtx
,
5848 EQ
, NULL_RTX
, mode
, 1, end_label
);
5850 emit_move_insn (addr0
, force_operand (XEXP (op0
, 0), NULL_RTX
));
5851 emit_move_insn (addr1
, force_operand (XEXP (op1
, 0), NULL_RTX
));
5852 op0
= change_address (op0
, VOIDmode
, addr0
);
5853 op1
= change_address (op1
, VOIDmode
, addr1
);
5855 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5858 emit_move_insn (count
, temp
);
5860 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5863 emit_move_insn (blocks
, temp
);
5865 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5866 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5868 emit_label (loop_start_label
);
5871 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 512))
5875 /* Issue a read prefetch for the +2 cache line of operand 1. */
5876 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (512)),
5877 const0_rtx
, const0_rtx
);
5878 emit_insn (prefetch
);
5879 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5881 /* Issue a read prefetch for the +2 cache line of operand 2. */
5882 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (512)),
5883 const0_rtx
, const0_rtx
);
5884 emit_insn (prefetch
);
5885 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5888 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (255)));
5889 temp
= gen_rtx_NE (VOIDmode
, ccreg
, const0_rtx
);
5890 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5891 gen_rtx_LABEL_REF (VOIDmode
, end_label
), pc_rtx
);
5892 temp
= gen_rtx_SET (pc_rtx
, temp
);
5893 emit_jump_insn (temp
);
5895 s390_load_address (addr0
,
5896 gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (256)));
5897 s390_load_address (addr1
,
5898 gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (256)));
5900 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5903 emit_move_insn (blocks
, temp
);
5905 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5906 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5908 emit_jump (loop_start_label
);
5909 emit_label (loop_end_label
);
5911 emit_insn (gen_cmpmem_short (op0
, op1
,
5912 convert_to_mode (Pmode
, count
, 1)));
5913 emit_label (end_label
);
5915 emit_insn (gen_cmpint (target
, ccreg
));
5920 /* Emit a conditional jump to LABEL for condition code mask MASK using
5921 comparsion operator COMPARISON. Return the emitted jump insn. */
5924 s390_emit_ccraw_jump (HOST_WIDE_INT mask
, enum rtx_code comparison
, rtx label
)
5928 gcc_assert (comparison
== EQ
|| comparison
== NE
);
5929 gcc_assert (mask
> 0 && mask
< 15);
5931 temp
= gen_rtx_fmt_ee (comparison
, VOIDmode
,
5932 gen_rtx_REG (CCRAWmode
, CC_REGNUM
), GEN_INT (mask
));
5933 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5934 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
5935 temp
= gen_rtx_SET (pc_rtx
, temp
);
5936 return emit_jump_insn (temp
);
5939 /* Emit the instructions to implement strlen of STRING and store the
5940 result in TARGET. The string has the known ALIGNMENT. This
5941 version uses vector instructions and is therefore not appropriate
5942 for targets prior to z13. */
5945 s390_expand_vec_strlen (rtx target
, rtx string
, rtx alignment
)
5947 rtx highest_index_to_load_reg
= gen_reg_rtx (Pmode
);
5948 rtx str_reg
= gen_reg_rtx (V16QImode
);
5949 rtx str_addr_base_reg
= gen_reg_rtx (Pmode
);
5950 rtx str_idx_reg
= gen_reg_rtx (Pmode
);
5951 rtx result_reg
= gen_reg_rtx (V16QImode
);
5952 rtx is_aligned_label
= gen_label_rtx ();
5953 rtx into_loop_label
= NULL_RTX
;
5954 rtx loop_start_label
= gen_label_rtx ();
5956 rtx len
= gen_reg_rtx (QImode
);
5960 s390_load_address (str_addr_base_reg
, XEXP (string
, 0));
5961 emit_move_insn (str_idx_reg
, const0_rtx
);
5963 if (INTVAL (alignment
) < 16)
5965 /* Check whether the address happens to be aligned properly so
5966 jump directly to the aligned loop. */
5967 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode
,
5968 str_addr_base_reg
, GEN_INT (15)),
5969 const0_rtx
, EQ
, NULL_RTX
,
5970 Pmode
, 1, is_aligned_label
);
5972 temp
= gen_reg_rtx (Pmode
);
5973 temp
= expand_binop (Pmode
, and_optab
, str_addr_base_reg
,
5974 GEN_INT (15), temp
, 1, OPTAB_DIRECT
);
5975 gcc_assert (REG_P (temp
));
5976 highest_index_to_load_reg
=
5977 expand_binop (Pmode
, sub_optab
, GEN_INT (15), temp
,
5978 highest_index_to_load_reg
, 1, OPTAB_DIRECT
);
5979 gcc_assert (REG_P (highest_index_to_load_reg
));
5980 emit_insn (gen_vllv16qi (str_reg
,
5981 convert_to_mode (SImode
, highest_index_to_load_reg
, 1),
5982 gen_rtx_MEM (BLKmode
, str_addr_base_reg
)));
5984 into_loop_label
= gen_label_rtx ();
5985 s390_emit_jump (into_loop_label
, NULL_RTX
);
5989 emit_label (is_aligned_label
);
5990 LABEL_NUSES (is_aligned_label
) = INTVAL (alignment
) < 16 ? 2 : 1;
5992 /* Reaching this point we are only performing 16 bytes aligned
5994 emit_move_insn (highest_index_to_load_reg
, GEN_INT (15));
5996 emit_label (loop_start_label
);
5997 LABEL_NUSES (loop_start_label
) = 1;
5999 /* Load 16 bytes of the string into VR. */
6000 mem
= gen_rtx_MEM (V16QImode
,
6001 gen_rtx_PLUS (Pmode
, str_idx_reg
, str_addr_base_reg
));
6002 set_mem_align (mem
, 128);
6003 emit_move_insn (str_reg
, mem
);
6004 if (into_loop_label
!= NULL_RTX
)
6006 emit_label (into_loop_label
);
6007 LABEL_NUSES (into_loop_label
) = 1;
6010 /* Increment string index by 16 bytes. */
6011 expand_binop (Pmode
, add_optab
, str_idx_reg
, GEN_INT (16),
6012 str_idx_reg
, 1, OPTAB_DIRECT
);
6014 emit_insn (gen_vec_vfenesv16qi (result_reg
, str_reg
, str_reg
,
6015 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6017 add_int_reg_note (s390_emit_ccraw_jump (8, NE
, loop_start_label
),
6019 profile_probability::very_likely ().to_reg_br_prob_note ());
6020 emit_insn (gen_vec_extractv16qiqi (len
, result_reg
, GEN_INT (7)));
6022 /* If the string pointer wasn't aligned we have loaded less then 16
6023 bytes and the remaining bytes got filled with zeros (by vll).
6024 Now we have to check whether the resulting index lies within the
6025 bytes actually part of the string. */
6027 cond
= s390_emit_compare (GT
, convert_to_mode (Pmode
, len
, 1),
6028 highest_index_to_load_reg
);
6029 s390_load_address (highest_index_to_load_reg
,
6030 gen_rtx_PLUS (Pmode
, highest_index_to_load_reg
,
6033 emit_insn (gen_movdicc (str_idx_reg
, cond
,
6034 highest_index_to_load_reg
, str_idx_reg
));
6036 emit_insn (gen_movsicc (str_idx_reg
, cond
,
6037 highest_index_to_load_reg
, str_idx_reg
));
6039 add_reg_br_prob_note (s390_emit_jump (is_aligned_label
, cond
),
6040 profile_probability::very_unlikely ());
6042 expand_binop (Pmode
, add_optab
, str_idx_reg
,
6043 GEN_INT (-16), str_idx_reg
, 1, OPTAB_DIRECT
);
6044 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6046 temp
= expand_binop (Pmode
, add_optab
, str_idx_reg
,
6047 convert_to_mode (Pmode
, len
, 1),
6048 target
, 1, OPTAB_DIRECT
);
6050 emit_move_insn (target
, temp
);
6054 s390_expand_vec_movstr (rtx result
, rtx dst
, rtx src
)
6056 rtx temp
= gen_reg_rtx (Pmode
);
6057 rtx src_addr
= XEXP (src
, 0);
6058 rtx dst_addr
= XEXP (dst
, 0);
6059 rtx src_addr_reg
= gen_reg_rtx (Pmode
);
6060 rtx dst_addr_reg
= gen_reg_rtx (Pmode
);
6061 rtx offset
= gen_reg_rtx (Pmode
);
6062 rtx vsrc
= gen_reg_rtx (V16QImode
);
6063 rtx vpos
= gen_reg_rtx (V16QImode
);
6064 rtx loadlen
= gen_reg_rtx (SImode
);
6065 rtx gpos_qi
= gen_reg_rtx(QImode
);
6066 rtx gpos
= gen_reg_rtx (SImode
);
6067 rtx done_label
= gen_label_rtx ();
6068 rtx loop_label
= gen_label_rtx ();
6069 rtx exit_label
= gen_label_rtx ();
6070 rtx full_label
= gen_label_rtx ();
6072 /* Perform a quick check for string ending on the first up to 16
6073 bytes and exit early if successful. */
6075 emit_insn (gen_vlbb (vsrc
, src
, GEN_INT (6)));
6076 emit_insn (gen_lcbb (loadlen
, src_addr
, GEN_INT (6)));
6077 emit_insn (gen_vfenezv16qi (vpos
, vsrc
, vsrc
));
6078 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6079 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6080 /* gpos is the byte index if a zero was found and 16 otherwise.
6081 So if it is lower than the loaded bytes we have a hit. */
6082 emit_cmp_and_jump_insns (gpos
, loadlen
, GE
, NULL_RTX
, SImode
, 1,
6084 emit_insn (gen_vstlv16qi (vsrc
, gpos
, dst
));
6086 force_expand_binop (Pmode
, add_optab
, dst_addr
, gpos
, result
,
6088 emit_jump (exit_label
);
6091 emit_label (full_label
);
6092 LABEL_NUSES (full_label
) = 1;
6094 /* Calculate `offset' so that src + offset points to the last byte
6095 before 16 byte alignment. */
6097 /* temp = src_addr & 0xf */
6098 force_expand_binop (Pmode
, and_optab
, src_addr
, GEN_INT (15), temp
,
6101 /* offset = 0xf - temp */
6102 emit_move_insn (offset
, GEN_INT (15));
6103 force_expand_binop (Pmode
, sub_optab
, offset
, temp
, offset
,
6106 /* Store `offset' bytes in the dstination string. The quick check
6107 has loaded at least `offset' bytes into vsrc. */
6109 emit_insn (gen_vstlv16qi (vsrc
, gen_lowpart (SImode
, offset
), dst
));
6111 /* Advance to the next byte to be loaded. */
6112 force_expand_binop (Pmode
, add_optab
, offset
, const1_rtx
, offset
,
6115 /* Make sure the addresses are single regs which can be used as a
6117 emit_move_insn (src_addr_reg
, src_addr
);
6118 emit_move_insn (dst_addr_reg
, dst_addr
);
6122 emit_label (loop_label
);
6123 LABEL_NUSES (loop_label
) = 1;
6125 emit_move_insn (vsrc
,
6126 gen_rtx_MEM (V16QImode
,
6127 gen_rtx_PLUS (Pmode
, src_addr_reg
, offset
)));
6129 emit_insn (gen_vec_vfenesv16qi (vpos
, vsrc
, vsrc
,
6130 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6131 add_int_reg_note (s390_emit_ccraw_jump (8, EQ
, done_label
),
6132 REG_BR_PROB
, profile_probability::very_unlikely ()
6133 .to_reg_br_prob_note ());
6135 emit_move_insn (gen_rtx_MEM (V16QImode
,
6136 gen_rtx_PLUS (Pmode
, dst_addr_reg
, offset
)),
6139 force_expand_binop (Pmode
, add_optab
, offset
, GEN_INT (16),
6140 offset
, 1, OPTAB_DIRECT
);
6142 emit_jump (loop_label
);
6147 /* We are done. Add the offset of the zero character to the dst_addr
6148 pointer to get the result. */
6150 emit_label (done_label
);
6151 LABEL_NUSES (done_label
) = 1;
6153 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, offset
, dst_addr_reg
,
6156 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6157 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6159 emit_insn (gen_vstlv16qi (vsrc
, gpos
, gen_rtx_MEM (BLKmode
, dst_addr_reg
)));
6161 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, gpos
, result
,
6166 emit_label (exit_label
);
6167 LABEL_NUSES (exit_label
) = 1;
6171 /* Expand conditional increment or decrement using alc/slb instructions.
6172 Should generate code setting DST to either SRC or SRC + INCREMENT,
6173 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6174 Returns true if successful, false otherwise.
6176 That makes it possible to implement some if-constructs without jumps e.g.:
6177 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6178 unsigned int a, b, c;
6179 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6180 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6181 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6182 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6184 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6185 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6186 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6187 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6188 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6191 s390_expand_addcc (enum rtx_code cmp_code
, rtx cmp_op0
, rtx cmp_op1
,
6192 rtx dst
, rtx src
, rtx increment
)
6194 machine_mode cmp_mode
;
6195 machine_mode cc_mode
;
6201 if ((GET_MODE (cmp_op0
) == SImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6202 && (GET_MODE (cmp_op1
) == SImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6204 else if ((GET_MODE (cmp_op0
) == DImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6205 && (GET_MODE (cmp_op1
) == DImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6210 /* Try ADD LOGICAL WITH CARRY. */
6211 if (increment
== const1_rtx
)
6213 /* Determine CC mode to use. */
6214 if (cmp_code
== EQ
|| cmp_code
== NE
)
6216 if (cmp_op1
!= const0_rtx
)
6218 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6219 NULL_RTX
, 0, OPTAB_WIDEN
);
6220 cmp_op1
= const0_rtx
;
6223 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6226 if (cmp_code
== LTU
|| cmp_code
== LEU
)
6231 cmp_code
= swap_condition (cmp_code
);
6248 /* Emit comparison instruction pattern. */
6249 if (!register_operand (cmp_op0
, cmp_mode
))
6250 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6252 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6253 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6254 /* We use insn_invalid_p here to add clobbers if required. */
6255 ret
= insn_invalid_p (emit_insn (insn
), false);
6258 /* Emit ALC instruction pattern. */
6259 op_res
= gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6260 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6263 if (src
!= const0_rtx
)
6265 if (!register_operand (src
, GET_MODE (dst
)))
6266 src
= force_reg (GET_MODE (dst
), src
);
6268 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, src
);
6269 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, const0_rtx
);
6272 p
= rtvec_alloc (2);
6274 gen_rtx_SET (dst
, op_res
);
6276 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6277 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6282 /* Try SUBTRACT LOGICAL WITH BORROW. */
6283 if (increment
== constm1_rtx
)
6285 /* Determine CC mode to use. */
6286 if (cmp_code
== EQ
|| cmp_code
== NE
)
6288 if (cmp_op1
!= const0_rtx
)
6290 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6291 NULL_RTX
, 0, OPTAB_WIDEN
);
6292 cmp_op1
= const0_rtx
;
6295 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6298 if (cmp_code
== GTU
|| cmp_code
== GEU
)
6303 cmp_code
= swap_condition (cmp_code
);
6320 /* Emit comparison instruction pattern. */
6321 if (!register_operand (cmp_op0
, cmp_mode
))
6322 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6324 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6325 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6326 /* We use insn_invalid_p here to add clobbers if required. */
6327 ret
= insn_invalid_p (emit_insn (insn
), false);
6330 /* Emit SLB instruction pattern. */
6331 if (!register_operand (src
, GET_MODE (dst
)))
6332 src
= force_reg (GET_MODE (dst
), src
);
6334 op_res
= gen_rtx_MINUS (GET_MODE (dst
),
6335 gen_rtx_MINUS (GET_MODE (dst
), src
, const0_rtx
),
6336 gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6337 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6339 p
= rtvec_alloc (2);
6341 gen_rtx_SET (dst
, op_res
);
6343 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6344 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6352 /* Expand code for the insv template. Return true if successful. */
6355 s390_expand_insv (rtx dest
, rtx op1
, rtx op2
, rtx src
)
6357 int bitsize
= INTVAL (op1
);
6358 int bitpos
= INTVAL (op2
);
6359 machine_mode mode
= GET_MODE (dest
);
6361 int smode_bsize
, mode_bsize
;
6364 if (bitsize
+ bitpos
> GET_MODE_BITSIZE (mode
))
6367 /* Generate INSERT IMMEDIATE (IILL et al). */
6368 /* (set (ze (reg)) (const_int)). */
6370 && register_operand (dest
, word_mode
)
6371 && (bitpos
% 16) == 0
6372 && (bitsize
% 16) == 0
6373 && const_int_operand (src
, VOIDmode
))
6375 HOST_WIDE_INT val
= INTVAL (src
);
6376 int regpos
= bitpos
+ bitsize
;
6378 while (regpos
> bitpos
)
6380 machine_mode putmode
;
6383 if (TARGET_EXTIMM
&& (regpos
% 32 == 0) && (regpos
>= bitpos
+ 32))
6388 putsize
= GET_MODE_BITSIZE (putmode
);
6390 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6393 gen_int_mode (val
, putmode
));
6396 gcc_assert (regpos
== bitpos
);
6400 smode
= smallest_int_mode_for_size (bitsize
);
6401 smode_bsize
= GET_MODE_BITSIZE (smode
);
6402 mode_bsize
= GET_MODE_BITSIZE (mode
);
6404 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6406 && (bitsize
% BITS_PER_UNIT
) == 0
6408 && (register_operand (src
, word_mode
)
6409 || const_int_operand (src
, VOIDmode
)))
6411 /* Emit standard pattern if possible. */
6412 if (smode_bsize
== bitsize
)
6414 emit_move_insn (adjust_address (dest
, smode
, 0),
6415 gen_lowpart (smode
, src
));
6419 /* (set (ze (mem)) (const_int)). */
6420 else if (const_int_operand (src
, VOIDmode
))
6422 int size
= bitsize
/ BITS_PER_UNIT
;
6423 rtx src_mem
= adjust_address (force_const_mem (word_mode
, src
),
6425 UNITS_PER_WORD
- size
);
6427 dest
= adjust_address (dest
, BLKmode
, 0);
6428 set_mem_size (dest
, size
);
6429 s390_expand_cpymem (dest
, src_mem
, GEN_INT (size
));
6433 /* (set (ze (mem)) (reg)). */
6434 else if (register_operand (src
, word_mode
))
6437 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
, op1
,
6441 /* Emit st,stcmh sequence. */
6442 int stcmh_width
= bitsize
- 32;
6443 int size
= stcmh_width
/ BITS_PER_UNIT
;
6445 emit_move_insn (adjust_address (dest
, SImode
, size
),
6446 gen_lowpart (SImode
, src
));
6447 set_mem_size (dest
, size
);
6448 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6449 GEN_INT (stcmh_width
),
6451 gen_rtx_LSHIFTRT (word_mode
, src
, GEN_INT (32)));
6457 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6458 if ((bitpos
% BITS_PER_UNIT
) == 0
6459 && (bitsize
% BITS_PER_UNIT
) == 0
6460 && (bitpos
& 32) == ((bitpos
+ bitsize
- 1) & 32)
6462 && (mode
== DImode
|| mode
== SImode
)
6463 && register_operand (dest
, mode
))
6465 /* Emit a strict_low_part pattern if possible. */
6466 if (smode_bsize
== bitsize
&& bitpos
== mode_bsize
- smode_bsize
)
6468 rtx low_dest
= gen_lowpart (smode
, dest
);
6469 rtx low_src
= gen_lowpart (smode
, src
);
6473 case E_QImode
: emit_insn (gen_movstrictqi (low_dest
, low_src
)); return true;
6474 case E_HImode
: emit_insn (gen_movstricthi (low_dest
, low_src
)); return true;
6475 case E_SImode
: emit_insn (gen_movstrictsi (low_dest
, low_src
)); return true;
6480 /* ??? There are more powerful versions of ICM that are not
6481 completely represented in the md file. */
6484 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6485 if (TARGET_Z10
&& (mode
== DImode
|| mode
== SImode
))
6487 machine_mode mode_s
= GET_MODE (src
);
6489 if (CONSTANT_P (src
))
6491 /* For constant zero values the representation with AND
6492 appears to be folded in more situations than the (set
6493 (zero_extract) ...).
6494 We only do this when the start and end of the bitfield
6495 remain in the same SImode chunk. That way nihf or nilf
6497 The AND patterns might still generate a risbg for this. */
6498 if (src
== const0_rtx
&& bitpos
/ 32 == (bitpos
+ bitsize
- 1) / 32)
6501 src
= force_reg (mode
, src
);
6503 else if (mode_s
!= mode
)
6505 gcc_assert (GET_MODE_BITSIZE (mode_s
) >= bitsize
);
6506 src
= force_reg (mode_s
, src
);
6507 src
= gen_lowpart (mode
, src
);
6510 op
= gen_rtx_ZERO_EXTRACT (mode
, dest
, op1
, op2
),
6511 op
= gen_rtx_SET (op
, src
);
6515 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6516 op
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
));
6526 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6527 register that holds VAL of mode MODE shifted by COUNT bits. */
6530 s390_expand_mask_and_shift (rtx val
, machine_mode mode
, rtx count
)
6532 val
= expand_simple_binop (SImode
, AND
, val
, GEN_INT (GET_MODE_MASK (mode
)),
6533 NULL_RTX
, 1, OPTAB_DIRECT
);
6534 return expand_simple_binop (SImode
, ASHIFT
, val
, count
,
6535 NULL_RTX
, 1, OPTAB_DIRECT
);
6538 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6539 the result in TARGET. */
6542 s390_expand_vec_compare (rtx target
, enum rtx_code cond
,
6543 rtx cmp_op1
, rtx cmp_op2
)
6545 machine_mode mode
= GET_MODE (target
);
6546 bool neg_p
= false, swap_p
= false;
6549 if (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_VECTOR_FLOAT
)
6553 /* NE a != b -> !(a == b) */
6554 case NE
: cond
= EQ
; neg_p
= true; break;
6556 emit_insn (gen_vec_cmpungt (target
, cmp_op1
, cmp_op2
));
6559 emit_insn (gen_vec_cmpunge (target
, cmp_op1
, cmp_op2
));
6561 case LE
: cond
= GE
; swap_p
= true; break;
6562 /* UNLE: (a u<= b) -> (b u>= a). */
6564 emit_insn (gen_vec_cmpunge (target
, cmp_op2
, cmp_op1
));
6566 /* LT: a < b -> b > a */
6567 case LT
: cond
= GT
; swap_p
= true; break;
6568 /* UNLT: (a u< b) -> (b u> a). */
6570 emit_insn (gen_vec_cmpungt (target
, cmp_op2
, cmp_op1
));
6573 emit_insn (gen_vec_cmpuneq (target
, cmp_op1
, cmp_op2
));
6576 emit_insn (gen_vec_cmpltgt (target
, cmp_op1
, cmp_op2
));
6579 emit_insn (gen_vec_cmpordered (target
, cmp_op1
, cmp_op2
));
6582 emit_insn (gen_vec_cmpunordered (target
, cmp_op1
, cmp_op2
));
6591 /* NE: a != b -> !(a == b) */
6592 case NE
: cond
= EQ
; neg_p
= true; break;
6593 /* GE: a >= b -> !(b > a) */
6594 case GE
: cond
= GT
; neg_p
= true; swap_p
= true; break;
6595 /* GEU: a >= b -> !(b > a) */
6596 case GEU
: cond
= GTU
; neg_p
= true; swap_p
= true; break;
6597 /* LE: a <= b -> !(a > b) */
6598 case LE
: cond
= GT
; neg_p
= true; break;
6599 /* LEU: a <= b -> !(a > b) */
6600 case LEU
: cond
= GTU
; neg_p
= true; break;
6601 /* LT: a < b -> b > a */
6602 case LT
: cond
= GT
; swap_p
= true; break;
6603 /* LTU: a < b -> b > a */
6604 case LTU
: cond
= GTU
; swap_p
= true; break;
6611 tmp
= cmp_op1
; cmp_op1
= cmp_op2
; cmp_op2
= tmp
;
6614 emit_insn (gen_rtx_SET (target
, gen_rtx_fmt_ee (cond
,
6616 cmp_op1
, cmp_op2
)));
6618 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (mode
, target
)));
6621 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6622 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6623 elements in CMP1 and CMP2 fulfill the comparison.
6624 This function is only used to emit patterns for the vx builtins and
6625 therefore only handles comparison codes required by the
6628 s390_expand_vec_compare_cc (rtx target
, enum rtx_code code
,
6629 rtx cmp1
, rtx cmp2
, bool all_p
)
6631 machine_mode cc_producer_mode
, cc_consumer_mode
, scratch_mode
;
6632 rtx tmp_reg
= gen_reg_rtx (SImode
);
6633 bool swap_p
= false;
6635 if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_INT
)
6641 cc_producer_mode
= CCVEQmode
;
6645 code
= swap_condition (code
);
6650 cc_producer_mode
= CCVIHmode
;
6654 code
= swap_condition (code
);
6659 cc_producer_mode
= CCVIHUmode
;
6665 scratch_mode
= GET_MODE (cmp1
);
6666 /* These codes represent inverted CC interpretations. Inverting
6667 an ALL CC mode results in an ANY CC mode and the other way
6668 around. Invert the all_p flag here to compensate for
6670 if (code
== NE
|| code
== LE
|| code
== LEU
)
6673 cc_consumer_mode
= all_p
? CCVIALLmode
: CCVIANYmode
;
6675 else if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_FLOAT
)
6681 case EQ
: cc_producer_mode
= CCVEQmode
; break;
6682 case NE
: cc_producer_mode
= CCVEQmode
; inv_p
= true; break;
6683 case GT
: cc_producer_mode
= CCVFHmode
; break;
6684 case GE
: cc_producer_mode
= CCVFHEmode
; break;
6685 case UNLE
: cc_producer_mode
= CCVFHmode
; inv_p
= true; break;
6686 case UNLT
: cc_producer_mode
= CCVFHEmode
; inv_p
= true; break;
6687 case LT
: cc_producer_mode
= CCVFHmode
; code
= GT
; swap_p
= true; break;
6688 case LE
: cc_producer_mode
= CCVFHEmode
; code
= GE
; swap_p
= true; break;
6689 default: gcc_unreachable ();
6691 scratch_mode
= related_int_vector_mode (GET_MODE (cmp1
)).require ();
6696 cc_consumer_mode
= all_p
? CCVFALLmode
: CCVFANYmode
;
6708 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6709 gen_rtvec (2, gen_rtx_SET (
6710 gen_rtx_REG (cc_producer_mode
, CC_REGNUM
),
6711 gen_rtx_COMPARE (cc_producer_mode
, cmp1
, cmp2
)),
6712 gen_rtx_CLOBBER (VOIDmode
,
6713 gen_rtx_SCRATCH (scratch_mode
)))));
6714 emit_move_insn (target
, const0_rtx
);
6715 emit_move_insn (tmp_reg
, const1_rtx
);
6717 emit_move_insn (target
,
6718 gen_rtx_IF_THEN_ELSE (SImode
,
6719 gen_rtx_fmt_ee (code
, VOIDmode
,
6720 gen_rtx_REG (cc_consumer_mode
, CC_REGNUM
),
6725 /* Invert the comparison CODE applied to a CC mode. This is only safe
6726 if we know whether there result was created by a floating point
6727 compare or not. For the CCV modes this is encoded as part of the
6730 s390_reverse_condition (machine_mode mode
, enum rtx_code code
)
6732 /* Reversal of FP compares takes care -- an ordered compare
6733 becomes an unordered compare and vice versa. */
6734 if (mode
== CCVFALLmode
|| mode
== CCVFANYmode
|| mode
== CCSFPSmode
)
6735 return reverse_condition_maybe_unordered (code
);
6736 else if (mode
== CCVIALLmode
|| mode
== CCVIANYmode
)
6737 return reverse_condition (code
);
6742 /* Generate a vector comparison expression loading either elements of
6743 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6747 s390_expand_vcond (rtx target
, rtx then
, rtx els
,
6748 enum rtx_code cond
, rtx cmp_op1
, rtx cmp_op2
)
6751 machine_mode result_mode
;
6754 machine_mode target_mode
= GET_MODE (target
);
6755 machine_mode cmp_mode
= GET_MODE (cmp_op1
);
6756 rtx op
= (cond
== LT
) ? els
: then
;
6758 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6759 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6760 for short and byte (x >> 15 and x >> 7 respectively). */
6761 if ((cond
== LT
|| cond
== GE
)
6762 && target_mode
== cmp_mode
6763 && cmp_op2
== CONST0_RTX (cmp_mode
)
6764 && op
== CONST0_RTX (target_mode
)
6765 && s390_vector_mode_supported_p (target_mode
)
6766 && GET_MODE_CLASS (target_mode
) == MODE_VECTOR_INT
)
6768 rtx negop
= (cond
== LT
) ? then
: els
;
6770 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (target_mode
)) - 1;
6772 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6773 if (negop
== CONST1_RTX (target_mode
))
6775 rtx res
= expand_simple_binop (cmp_mode
, LSHIFTRT
, cmp_op1
,
6776 GEN_INT (shift
), target
,
6779 emit_move_insn (target
, res
);
6783 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6784 else if (all_ones_operand (negop
, target_mode
))
6786 rtx res
= expand_simple_binop (cmp_mode
, ASHIFTRT
, cmp_op1
,
6787 GEN_INT (shift
), target
,
6790 emit_move_insn (target
, res
);
6795 /* We always use an integral type vector to hold the comparison
6797 result_mode
= related_int_vector_mode (cmp_mode
).require ();
6798 result_target
= gen_reg_rtx (result_mode
);
6800 /* We allow vector immediates as comparison operands that
6801 can be handled by the optimization above but not by the
6802 following code. Hence, force them into registers here. */
6803 if (!REG_P (cmp_op1
))
6804 cmp_op1
= force_reg (GET_MODE (cmp_op1
), cmp_op1
);
6806 if (!REG_P (cmp_op2
))
6807 cmp_op2
= force_reg (GET_MODE (cmp_op2
), cmp_op2
);
6809 s390_expand_vec_compare (result_target
, cond
,
6812 /* If the results are supposed to be either -1 or 0 we are done
6813 since this is what our compare instructions generate anyway. */
6814 if (all_ones_operand (then
, GET_MODE (then
))
6815 && const0_operand (els
, GET_MODE (els
)))
6817 emit_move_insn (target
, gen_rtx_SUBREG (target_mode
,
6822 /* Otherwise we will do a vsel afterwards. */
6823 /* This gets triggered e.g.
6824 with gcc.c-torture/compile/pr53410-1.c */
6826 then
= force_reg (target_mode
, then
);
6829 els
= force_reg (target_mode
, els
);
6831 tmp
= gen_rtx_fmt_ee (EQ
, VOIDmode
,
6833 CONST0_RTX (result_mode
));
6835 /* We compared the result against zero above so we have to swap then
6837 tmp
= gen_rtx_IF_THEN_ELSE (target_mode
, tmp
, els
, then
);
6839 gcc_assert (target_mode
== GET_MODE (then
));
6840 emit_insn (gen_rtx_SET (target
, tmp
));
6843 /* Emit the RTX necessary to initialize the vector TARGET with values
6846 s390_expand_vec_init (rtx target
, rtx vals
)
6848 machine_mode mode
= GET_MODE (target
);
6849 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6850 int n_elts
= GET_MODE_NUNITS (mode
);
6851 bool all_same
= true, all_regs
= true, all_const_int
= true;
6855 for (i
= 0; i
< n_elts
; ++i
)
6857 x
= XVECEXP (vals
, 0, i
);
6859 if (!CONST_INT_P (x
))
6860 all_const_int
= false;
6862 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6869 /* Use vector gen mask or vector gen byte mask if possible. */
6870 if (all_same
&& all_const_int
)
6872 rtx vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6873 if (XVECEXP (vals
, 0, 0) == const0_rtx
6874 || s390_contiguous_bitmask_vector_p (vec
, NULL
, NULL
)
6875 || s390_bytemask_vector_p (vec
, NULL
))
6877 emit_insn (gen_rtx_SET (target
, vec
));
6882 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6885 rtx elem
= XVECEXP (vals
, 0, 0);
6887 /* vec_splats accepts general_operand as source. */
6888 if (!general_operand (elem
, GET_MODE (elem
)))
6889 elem
= force_reg (inner_mode
, elem
);
6891 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, elem
)));
6898 && GET_MODE_SIZE (inner_mode
) == 8)
6900 /* Use vector load pair. */
6901 emit_insn (gen_rtx_SET (target
,
6902 gen_rtx_VEC_CONCAT (mode
,
6903 XVECEXP (vals
, 0, 0),
6904 XVECEXP (vals
, 0, 1))));
6908 /* Use vector load logical element and zero. */
6909 if (TARGET_VXE
&& (mode
== V4SImode
|| mode
== V4SFmode
))
6913 x
= XVECEXP (vals
, 0, 0);
6914 if (memory_operand (x
, inner_mode
))
6916 for (i
= 1; i
< n_elts
; ++i
)
6917 found
= found
&& XVECEXP (vals
, 0, i
) == const0_rtx
;
6921 machine_mode half_mode
= (inner_mode
== SFmode
6922 ? V2SFmode
: V2SImode
);
6923 emit_insn (gen_rtx_SET (target
,
6924 gen_rtx_VEC_CONCAT (mode
,
6925 gen_rtx_VEC_CONCAT (half_mode
,
6928 gen_rtx_VEC_CONCAT (half_mode
,
6936 /* We are about to set the vector elements one by one. Zero out the
6937 full register first in order to help the data flow framework to
6938 detect it as full VR set. */
6939 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
6941 /* Unfortunately the vec_init expander is not allowed to fail. So
6942 we have to implement the fallback ourselves. */
6943 for (i
= 0; i
< n_elts
; i
++)
6945 rtx elem
= XVECEXP (vals
, 0, i
);
6946 if (!general_operand (elem
, GET_MODE (elem
)))
6947 elem
= force_reg (inner_mode
, elem
);
6949 emit_insn (gen_rtx_SET (target
,
6950 gen_rtx_UNSPEC (mode
,
6952 GEN_INT (i
), target
),
6957 /* Emit a vector constant that contains 1s in each element's sign bit position
6958 and 0s in other positions. MODE is the desired constant's mode. */
6960 s390_build_signbit_mask (machine_mode mode
)
6962 /* Generate the integral element mask value. */
6963 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6964 int inner_bitsize
= GET_MODE_BITSIZE (inner_mode
);
6965 wide_int mask_val
= wi::set_bit_in_zero (inner_bitsize
- 1, inner_bitsize
);
6967 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
6968 value to the desired mode. */
6969 machine_mode int_mode
= related_int_vector_mode (mode
).require ();
6970 rtx mask
= immed_wide_int_const (mask_val
, GET_MODE_INNER (int_mode
));
6971 mask
= gen_lowpart (inner_mode
, mask
);
6973 /* Emit the vector mask rtx by mode the element mask rtx. */
6974 int nunits
= GET_MODE_NUNITS (mode
);
6975 rtvec v
= rtvec_alloc (nunits
);
6976 for (int i
= 0; i
< nunits
; i
++)
6977 RTVEC_ELT (v
, i
) = mask
;
6978 return gen_rtx_CONST_VECTOR (mode
, v
);
6981 /* Structure to hold the initial parameters for a compare_and_swap operation
6982 in HImode and QImode. */
6984 struct alignment_context
6986 rtx memsi
; /* SI aligned memory location. */
6987 rtx shift
; /* Bit offset with regard to lsb. */
6988 rtx modemask
; /* Mask of the HQImode shifted by SHIFT bits. */
6989 rtx modemaski
; /* ~modemask */
6990 bool aligned
; /* True if memory is aligned, false else. */
6993 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6994 structure AC for transparent simplifying, if the memory alignment is known
6995 to be at least 32bit. MEM is the memory location for the actual operation
6996 and MODE its mode. */
6999 init_alignment_context (struct alignment_context
*ac
, rtx mem
,
7002 ac
->shift
= GEN_INT (GET_MODE_SIZE (SImode
) - GET_MODE_SIZE (mode
));
7003 ac
->aligned
= (MEM_ALIGN (mem
) >= GET_MODE_BITSIZE (SImode
));
7006 ac
->memsi
= adjust_address (mem
, SImode
, 0); /* Memory is aligned. */
7009 /* Alignment is unknown. */
7010 rtx byteoffset
, addr
, align
;
7012 /* Force the address into a register. */
7013 addr
= force_reg (Pmode
, XEXP (mem
, 0));
7015 /* Align it to SImode. */
7016 align
= expand_simple_binop (Pmode
, AND
, addr
,
7017 GEN_INT (-GET_MODE_SIZE (SImode
)),
7018 NULL_RTX
, 1, OPTAB_DIRECT
);
7020 ac
->memsi
= gen_rtx_MEM (SImode
, align
);
7021 MEM_VOLATILE_P (ac
->memsi
) = MEM_VOLATILE_P (mem
);
7022 set_mem_alias_set (ac
->memsi
, ALIAS_SET_MEMORY_BARRIER
);
7023 set_mem_align (ac
->memsi
, GET_MODE_BITSIZE (SImode
));
7025 /* Calculate shiftcount. */
7026 byteoffset
= expand_simple_binop (Pmode
, AND
, addr
,
7027 GEN_INT (GET_MODE_SIZE (SImode
) - 1),
7028 NULL_RTX
, 1, OPTAB_DIRECT
);
7029 /* As we already have some offset, evaluate the remaining distance. */
7030 ac
->shift
= expand_simple_binop (SImode
, MINUS
, ac
->shift
, byteoffset
,
7031 NULL_RTX
, 1, OPTAB_DIRECT
);
7034 /* Shift is the byte count, but we need the bitcount. */
7035 ac
->shift
= expand_simple_binop (SImode
, ASHIFT
, ac
->shift
, GEN_INT (3),
7036 NULL_RTX
, 1, OPTAB_DIRECT
);
7038 /* Calculate masks. */
7039 ac
->modemask
= expand_simple_binop (SImode
, ASHIFT
,
7040 GEN_INT (GET_MODE_MASK (mode
)),
7041 ac
->shift
, NULL_RTX
, 1, OPTAB_DIRECT
);
7042 ac
->modemaski
= expand_simple_unop (SImode
, NOT
, ac
->modemask
,
7046 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7047 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7048 perform the merge in SEQ2. */
7051 s390_two_part_insv (struct alignment_context
*ac
, rtx
*seq1
, rtx
*seq2
,
7052 machine_mode mode
, rtx val
, rtx ins
)
7059 tmp
= copy_to_mode_reg (SImode
, val
);
7060 if (s390_expand_insv (tmp
, GEN_INT (GET_MODE_BITSIZE (mode
)),
7064 *seq2
= get_insns ();
7071 /* Failed to use insv. Generate a two part shift and mask. */
7073 tmp
= s390_expand_mask_and_shift (ins
, mode
, ac
->shift
);
7074 *seq1
= get_insns ();
7078 tmp
= expand_simple_binop (SImode
, IOR
, tmp
, val
, NULL_RTX
, 1, OPTAB_DIRECT
);
7079 *seq2
= get_insns ();
7085 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7086 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7087 value to set if CMP == MEM. */
7090 s390_expand_cs_hqi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7091 rtx cmp
, rtx new_rtx
, bool is_weak
)
7093 struct alignment_context ac
;
7094 rtx cmpv
, newv
, val
, cc
, seq0
, seq1
, seq2
, seq3
;
7095 rtx res
= gen_reg_rtx (SImode
);
7096 rtx_code_label
*csloop
= NULL
, *csend
= NULL
;
7098 gcc_assert (MEM_P (mem
));
7100 init_alignment_context (&ac
, mem
, mode
);
7102 /* Load full word. Subsequent loads are performed by CS. */
7103 val
= expand_simple_binop (SImode
, AND
, ac
.memsi
, ac
.modemaski
,
7104 NULL_RTX
, 1, OPTAB_DIRECT
);
7106 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7107 possible, we try to use insv to make this happen efficiently. If
7108 that fails we'll generate code both inside and outside the loop. */
7109 cmpv
= s390_two_part_insv (&ac
, &seq0
, &seq2
, mode
, val
, cmp
);
7110 newv
= s390_two_part_insv (&ac
, &seq1
, &seq3
, mode
, val
, new_rtx
);
7117 /* Start CS loop. */
7120 /* Begin assuming success. */
7121 emit_move_insn (btarget
, const1_rtx
);
7123 csloop
= gen_label_rtx ();
7124 csend
= gen_label_rtx ();
7125 emit_label (csloop
);
7128 /* val = "<mem>00..0<mem>"
7129 * cmp = "00..0<cmp>00..0"
7130 * new = "00..0<new>00..0"
7136 cc
= s390_emit_compare_and_swap (EQ
, res
, ac
.memsi
, cmpv
, newv
, CCZ1mode
);
7138 emit_insn (gen_cstorecc4 (btarget
, cc
, XEXP (cc
, 0), XEXP (cc
, 1)));
7143 /* Jump to end if we're done (likely?). */
7144 s390_emit_jump (csend
, cc
);
7146 /* Check for changes outside mode, and loop internal if so.
7147 Arrange the moves so that the compare is adjacent to the
7148 branch so that we can generate CRJ. */
7149 tmp
= copy_to_reg (val
);
7150 force_expand_binop (SImode
, and_optab
, res
, ac
.modemaski
, val
,
7152 cc
= s390_emit_compare (NE
, val
, tmp
);
7153 s390_emit_jump (csloop
, cc
);
7156 emit_move_insn (btarget
, const0_rtx
);
7160 /* Return the correct part of the bitfield. */
7161 convert_move (vtarget
, expand_simple_binop (SImode
, LSHIFTRT
, res
, ac
.shift
,
7162 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7165 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7167 s390_expand_cs_tdsi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7168 rtx cmp
, rtx new_rtx
, bool is_weak
)
7170 rtx output
= vtarget
;
7171 rtx_code_label
*skip_cs_label
= NULL
;
7172 bool do_const_opt
= false;
7174 if (!register_operand (output
, mode
))
7175 output
= gen_reg_rtx (mode
);
7177 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7178 with the constant first and skip the compare_and_swap because its very
7179 expensive and likely to fail anyway.
7180 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7181 cause spurious in that case.
7182 Note 2: It may be useful to do this also for non-constant INPUT.
7183 Note 3: Currently only targets with "load on condition" are supported
7184 (z196 and newer). */
7187 && (mode
== SImode
|| mode
== DImode
))
7188 do_const_opt
= (is_weak
&& CONST_INT_P (cmp
));
7192 rtx cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7194 skip_cs_label
= gen_label_rtx ();
7195 emit_move_insn (btarget
, const0_rtx
);
7196 if (CONST_INT_P (cmp
) && INTVAL (cmp
) == 0)
7198 rtvec lt
= rtvec_alloc (2);
7200 /* Load-and-test + conditional jump. */
7202 = gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, mem
, cmp
));
7203 RTVEC_ELT (lt
, 1) = gen_rtx_SET (output
, mem
);
7204 emit_insn (gen_rtx_PARALLEL (VOIDmode
, lt
));
7208 emit_move_insn (output
, mem
);
7209 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, output
, cmp
)));
7211 s390_emit_jump (skip_cs_label
, gen_rtx_NE (VOIDmode
, cc
, const0_rtx
));
7212 add_reg_br_prob_note (get_last_insn (),
7213 profile_probability::very_unlikely ());
7214 /* If the jump is not taken, OUTPUT is the expected value. */
7216 /* Reload newval to a register manually, *after* the compare and jump
7217 above. Otherwise Reload might place it before the jump. */
7220 cmp
= force_reg (mode
, cmp
);
7221 new_rtx
= force_reg (mode
, new_rtx
);
7222 s390_emit_compare_and_swap (EQ
, output
, mem
, cmp
, new_rtx
,
7223 (do_const_opt
) ? CCZmode
: CCZ1mode
);
7224 if (skip_cs_label
!= NULL
)
7225 emit_label (skip_cs_label
);
7227 /* We deliberately accept non-register operands in the predicate
7228 to ensure the write back to the output operand happens *before*
7229 the store-flags code below. This makes it easier for combine
7230 to merge the store-flags code with a potential test-and-branch
7231 pattern following (immediately!) afterwards. */
7232 if (output
!= vtarget
)
7233 emit_move_insn (vtarget
, output
);
7239 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7240 btarget has already been initialized with 0 above. */
7241 cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7242 cond
= gen_rtx_EQ (VOIDmode
, cc
, const0_rtx
);
7243 ite
= gen_rtx_IF_THEN_ELSE (SImode
, cond
, const1_rtx
, btarget
);
7244 emit_insn (gen_rtx_SET (btarget
, ite
));
7250 cc
= gen_rtx_REG (CCZ1mode
, CC_REGNUM
);
7251 cond
= gen_rtx_EQ (SImode
, cc
, const0_rtx
);
7252 emit_insn (gen_cstorecc4 (btarget
, cond
, cc
, const0_rtx
));
7256 /* Expand an atomic compare and swap operation. MEM is the memory location,
7257 CMP the old value to compare MEM with and NEW_RTX the value to set if
7261 s390_expand_cs (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7262 rtx cmp
, rtx new_rtx
, bool is_weak
)
7269 s390_expand_cs_tdsi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7273 s390_expand_cs_hqi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7280 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7281 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7285 s390_expand_atomic_exchange_tdsi (rtx output
, rtx mem
, rtx input
)
7287 machine_mode mode
= GET_MODE (mem
);
7288 rtx_code_label
*csloop
;
7291 && (mode
== DImode
|| mode
== SImode
)
7292 && CONST_INT_P (input
) && INTVAL (input
) == 0)
7294 emit_move_insn (output
, const0_rtx
);
7296 emit_insn (gen_atomic_fetch_anddi (output
, mem
, const0_rtx
, input
));
7298 emit_insn (gen_atomic_fetch_andsi (output
, mem
, const0_rtx
, input
));
7302 input
= force_reg (mode
, input
);
7303 emit_move_insn (output
, mem
);
7304 csloop
= gen_label_rtx ();
7305 emit_label (csloop
);
7306 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, output
, mem
, output
,
7310 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7311 and VAL the value to play with. If AFTER is true then store the value
7312 MEM holds after the operation, if AFTER is false then store the value MEM
7313 holds before the operation. If TARGET is zero then discard that value, else
7314 store it to TARGET. */
7317 s390_expand_atomic (machine_mode mode
, enum rtx_code code
,
7318 rtx target
, rtx mem
, rtx val
, bool after
)
7320 struct alignment_context ac
;
7322 rtx new_rtx
= gen_reg_rtx (SImode
);
7323 rtx orig
= gen_reg_rtx (SImode
);
7324 rtx_code_label
*csloop
= gen_label_rtx ();
7326 gcc_assert (!target
|| register_operand (target
, VOIDmode
));
7327 gcc_assert (MEM_P (mem
));
7329 init_alignment_context (&ac
, mem
, mode
);
7331 /* Shift val to the correct bit positions.
7332 Preserve "icm", but prevent "ex icm". */
7333 if (!(ac
.aligned
&& code
== SET
&& MEM_P (val
)))
7334 val
= s390_expand_mask_and_shift (val
, mode
, ac
.shift
);
7336 /* Further preparation insns. */
7337 if (code
== PLUS
|| code
== MINUS
)
7338 emit_move_insn (orig
, val
);
7339 else if (code
== MULT
|| code
== AND
) /* val = "11..1<val>11..1" */
7340 val
= expand_simple_binop (SImode
, XOR
, val
, ac
.modemaski
,
7341 NULL_RTX
, 1, OPTAB_DIRECT
);
7343 /* Load full word. Subsequent loads are performed by CS. */
7344 cmp
= force_reg (SImode
, ac
.memsi
);
7346 /* Start CS loop. */
7347 emit_label (csloop
);
7348 emit_move_insn (new_rtx
, cmp
);
7350 /* Patch new with val at correct position. */
7355 val
= expand_simple_binop (SImode
, code
, new_rtx
, orig
,
7356 NULL_RTX
, 1, OPTAB_DIRECT
);
7357 val
= expand_simple_binop (SImode
, AND
, val
, ac
.modemask
,
7358 NULL_RTX
, 1, OPTAB_DIRECT
);
7361 if (ac
.aligned
&& MEM_P (val
))
7362 store_bit_field (new_rtx
, GET_MODE_BITSIZE (mode
), 0,
7363 0, 0, SImode
, val
, false);
7366 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, ac
.modemaski
,
7367 NULL_RTX
, 1, OPTAB_DIRECT
);
7368 new_rtx
= expand_simple_binop (SImode
, IOR
, new_rtx
, val
,
7369 NULL_RTX
, 1, OPTAB_DIRECT
);
7375 new_rtx
= expand_simple_binop (SImode
, code
, new_rtx
, val
,
7376 NULL_RTX
, 1, OPTAB_DIRECT
);
7378 case MULT
: /* NAND */
7379 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, val
,
7380 NULL_RTX
, 1, OPTAB_DIRECT
);
7381 new_rtx
= expand_simple_binop (SImode
, XOR
, new_rtx
, ac
.modemask
,
7382 NULL_RTX
, 1, OPTAB_DIRECT
);
7388 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, cmp
,
7389 ac
.memsi
, cmp
, new_rtx
,
7392 /* Return the correct part of the bitfield. */
7394 convert_move (target
, expand_simple_binop (SImode
, LSHIFTRT
,
7395 after
? new_rtx
: cmp
, ac
.shift
,
7396 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7399 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7400 We need to emit DTP-relative relocations. */
7402 static void s390_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
7405 s390_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7410 fputs ("\t.long\t", file
);
7413 fputs ("\t.quad\t", file
);
7418 output_addr_const (file
, x
);
7419 fputs ("@DTPOFF", file
);
7422 /* Return the proper mode for REGNO being represented in the dwarf
7425 s390_dwarf_frame_reg_mode (int regno
)
7427 machine_mode save_mode
= default_dwarf_frame_reg_mode (regno
);
7429 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7430 if (GENERAL_REGNO_P (regno
))
7433 /* The rightmost 64 bits of vector registers are call-clobbered. */
7434 if (GET_MODE_SIZE (save_mode
) > 8)
7440 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7441 /* Implement TARGET_MANGLE_TYPE. */
7444 s390_mangle_type (const_tree type
)
7446 type
= TYPE_MAIN_VARIANT (type
);
7448 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
7449 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
7452 if (type
== s390_builtin_types
[BT_BV16QI
]) return "U6__boolc";
7453 if (type
== s390_builtin_types
[BT_BV8HI
]) return "U6__bools";
7454 if (type
== s390_builtin_types
[BT_BV4SI
]) return "U6__booli";
7455 if (type
== s390_builtin_types
[BT_BV2DI
]) return "U6__booll";
7457 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
7458 && TARGET_LONG_DOUBLE_128
)
7461 /* For all other types, use normal C++ mangling. */
7466 /* In the name of slightly smaller debug output, and to cater to
7467 general assembler lossage, recognize various UNSPEC sequences
7468 and turn them back into a direct symbol reference. */
7471 s390_delegitimize_address (rtx orig_x
)
7475 orig_x
= delegitimize_mem_from_attrs (orig_x
);
7478 /* Extract the symbol ref from:
7479 (plus:SI (reg:SI 12 %r12)
7480 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7481 UNSPEC_GOTOFF/PLTOFF)))
7483 (plus:SI (reg:SI 12 %r12)
7484 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7485 UNSPEC_GOTOFF/PLTOFF)
7486 (const_int 4 [0x4])))) */
7487 if (GET_CODE (x
) == PLUS
7488 && REG_P (XEXP (x
, 0))
7489 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
7490 && GET_CODE (XEXP (x
, 1)) == CONST
)
7492 HOST_WIDE_INT offset
= 0;
7494 /* The const operand. */
7495 y
= XEXP (XEXP (x
, 1), 0);
7497 if (GET_CODE (y
) == PLUS
7498 && GET_CODE (XEXP (y
, 1)) == CONST_INT
)
7500 offset
= INTVAL (XEXP (y
, 1));
7504 if (GET_CODE (y
) == UNSPEC
7505 && (XINT (y
, 1) == UNSPEC_GOTOFF
7506 || XINT (y
, 1) == UNSPEC_PLTOFF
))
7507 return plus_constant (Pmode
, XVECEXP (y
, 0, 0), offset
);
7510 if (GET_CODE (x
) != MEM
)
7514 if (GET_CODE (x
) == PLUS
7515 && GET_CODE (XEXP (x
, 1)) == CONST
7516 && GET_CODE (XEXP (x
, 0)) == REG
7517 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7519 y
= XEXP (XEXP (x
, 1), 0);
7520 if (GET_CODE (y
) == UNSPEC
7521 && XINT (y
, 1) == UNSPEC_GOT
)
7522 y
= XVECEXP (y
, 0, 0);
7526 else if (GET_CODE (x
) == CONST
)
7528 /* Extract the symbol ref from:
7529 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7530 UNSPEC_PLT/GOTENT))) */
7533 if (GET_CODE (y
) == UNSPEC
7534 && (XINT (y
, 1) == UNSPEC_GOTENT
7535 || XINT (y
, 1) == UNSPEC_PLT
))
7536 y
= XVECEXP (y
, 0, 0);
7543 if (GET_MODE (orig_x
) != Pmode
)
7545 if (GET_MODE (orig_x
) == BLKmode
)
7547 y
= lowpart_subreg (GET_MODE (orig_x
), y
, Pmode
);
7554 /* Output operand OP to stdio stream FILE.
7555 OP is an address (register + offset) which is not used to address data;
7556 instead the rightmost bits are interpreted as the value. */
7559 print_addrstyle_operand (FILE *file
, rtx op
)
7561 HOST_WIDE_INT offset
;
7564 /* Extract base register and offset. */
7565 if (!s390_decompose_addrstyle_without_index (op
, &base
, &offset
))
7571 gcc_assert (GET_CODE (base
) == REG
);
7572 gcc_assert (REGNO (base
) < FIRST_PSEUDO_REGISTER
);
7573 gcc_assert (REGNO_REG_CLASS (REGNO (base
)) == ADDR_REGS
);
7576 /* Offsets are constricted to twelve bits. */
7577 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
& ((1 << 12) - 1));
7579 fprintf (file
, "(%s)", reg_names
[REGNO (base
)]);
7582 /* Print the shift count operand OP to FILE.
7583 OP is an address-style operand in a form which
7584 s390_valid_shift_count permits. Subregs and no-op
7585 and-masking of the operand are stripped. */
7588 print_shift_count_operand (FILE *file
, rtx op
)
7590 /* No checking of the and mask required here. */
7591 if (!s390_valid_shift_count (op
, 0))
7594 while (op
&& GET_CODE (op
) == SUBREG
)
7595 op
= SUBREG_REG (op
);
7597 if (GET_CODE (op
) == AND
)
7600 print_addrstyle_operand (file
, op
);
7603 /* Assigns the number of NOP halfwords to be emitted before and after the
7604 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7605 If hotpatching is disabled for the function, the values are set to zero.
7609 s390_function_num_hotpatch_hw (tree decl
,
7615 attr
= lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl
));
7617 /* Handle the arguments of the hotpatch attribute. The values
7618 specified via attribute might override the cmdline argument
7622 tree args
= TREE_VALUE (attr
);
7624 *hw_before
= TREE_INT_CST_LOW (TREE_VALUE (args
));
7625 *hw_after
= TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args
)));
7629 /* Use the values specified by the cmdline arguments. */
7630 *hw_before
= s390_hotpatch_hw_before_label
;
7631 *hw_after
= s390_hotpatch_hw_after_label
;
7635 /* Write the current .machine and .machinemode specification to the assembler
7638 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7640 s390_asm_output_machine_for_arch (FILE *asm_out_file
)
7642 fprintf (asm_out_file
, "\t.machinemode %s\n",
7643 (TARGET_ZARCH
) ? "zarch" : "esa");
7644 fprintf (asm_out_file
, "\t.machine \"%s",
7645 processor_table
[s390_arch
].binutils_name
);
7646 if (S390_USE_ARCHITECTURE_MODIFIERS
)
7650 cpu_flags
= processor_flags_table
[(int) s390_arch
];
7651 if (TARGET_HTM
&& !(cpu_flags
& PF_TX
))
7652 fprintf (asm_out_file
, "+htm");
7653 else if (!TARGET_HTM
&& (cpu_flags
& PF_TX
))
7654 fprintf (asm_out_file
, "+nohtm");
7655 if (TARGET_VX
&& !(cpu_flags
& PF_VX
))
7656 fprintf (asm_out_file
, "+vx");
7657 else if (!TARGET_VX
&& (cpu_flags
& PF_VX
))
7658 fprintf (asm_out_file
, "+novx");
7660 fprintf (asm_out_file
, "\"\n");
7663 /* Write an extra function header before the very start of the function. */
7666 s390_asm_output_function_prefix (FILE *asm_out_file
,
7667 const char *fnname ATTRIBUTE_UNUSED
)
7669 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
) == NULL
)
7671 /* Since only the function specific options are saved but not the indications
7672 which options are set, it's too much work here to figure out which options
7673 have actually changed. Thus, generate .machine and .machinemode whenever a
7674 function has the target attribute or pragma. */
7675 fprintf (asm_out_file
, "\t.machinemode push\n");
7676 fprintf (asm_out_file
, "\t.machine push\n");
7677 s390_asm_output_machine_for_arch (asm_out_file
);
7680 /* Write an extra function footer after the very end of the function. */
7683 s390_asm_declare_function_size (FILE *asm_out_file
,
7684 const char *fnname
, tree decl
)
7686 if (!flag_inhibit_size_directive
)
7687 ASM_OUTPUT_MEASURED_SIZE (asm_out_file
, fnname
);
7688 if (DECL_FUNCTION_SPECIFIC_TARGET (decl
) == NULL
)
7690 fprintf (asm_out_file
, "\t.machine pop\n");
7691 fprintf (asm_out_file
, "\t.machinemode pop\n");
7695 /* Write the extra assembler code needed to declare a function properly. */
7698 s390_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
7701 int hw_before
, hw_after
;
7703 s390_function_num_hotpatch_hw (decl
, &hw_before
, &hw_after
);
7706 unsigned int function_alignment
;
7709 /* Add a trampoline code area before the function label and initialize it
7710 with two-byte nop instructions. This area can be overwritten with code
7711 that jumps to a patched version of the function. */
7712 asm_fprintf (asm_out_file
, "\tnopr\t%%r0"
7713 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7715 for (i
= 1; i
< hw_before
; i
++)
7716 fputs ("\tnopr\t%r0\n", asm_out_file
);
7718 /* Note: The function label must be aligned so that (a) the bytes of the
7719 following nop do not cross a cacheline boundary, and (b) a jump address
7720 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7721 stored directly before the label without crossing a cacheline
7722 boundary. All this is necessary to make sure the trampoline code can
7723 be changed atomically.
7724 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7725 if there are NOPs before the function label, the alignment is placed
7726 before them. So it is necessary to duplicate the alignment after the
7728 function_alignment
= MAX (8, DECL_ALIGN (decl
) / BITS_PER_UNIT
);
7729 if (! DECL_USER_ALIGN (decl
))
7731 = MAX (function_alignment
,
7732 (unsigned int) align_functions
.levels
[0].get_value ());
7733 fputs ("\t# alignment for hotpatch\n", asm_out_file
);
7734 ASM_OUTPUT_ALIGN (asm_out_file
, align_functions
.levels
[0].log
);
7737 if (S390_USE_TARGET_ATTRIBUTE
&& TARGET_DEBUG_ARG
)
7739 asm_fprintf (asm_out_file
, "\t# fn:%s ar%d\n", fname
, s390_arch
);
7740 asm_fprintf (asm_out_file
, "\t# fn:%s tu%d\n", fname
, s390_tune
);
7741 asm_fprintf (asm_out_file
, "\t# fn:%s sg%d\n", fname
, s390_stack_guard
);
7742 asm_fprintf (asm_out_file
, "\t# fn:%s ss%d\n", fname
, s390_stack_size
);
7743 asm_fprintf (asm_out_file
, "\t# fn:%s bc%d\n", fname
, s390_branch_cost
);
7744 asm_fprintf (asm_out_file
, "\t# fn:%s wf%d\n", fname
,
7745 s390_warn_framesize
);
7746 asm_fprintf (asm_out_file
, "\t# fn:%s ba%d\n", fname
, TARGET_BACKCHAIN
);
7747 asm_fprintf (asm_out_file
, "\t# fn:%s hd%d\n", fname
, TARGET_HARD_DFP
);
7748 asm_fprintf (asm_out_file
, "\t# fn:%s hf%d\n", fname
, !TARGET_SOFT_FLOAT
);
7749 asm_fprintf (asm_out_file
, "\t# fn:%s ht%d\n", fname
, TARGET_OPT_HTM
);
7750 asm_fprintf (asm_out_file
, "\t# fn:%s vx%d\n", fname
, TARGET_OPT_VX
);
7751 asm_fprintf (asm_out_file
, "\t# fn:%s ps%d\n", fname
,
7752 TARGET_PACKED_STACK
);
7753 asm_fprintf (asm_out_file
, "\t# fn:%s se%d\n", fname
, TARGET_SMALL_EXEC
);
7754 asm_fprintf (asm_out_file
, "\t# fn:%s mv%d\n", fname
, TARGET_MVCLE
);
7755 asm_fprintf (asm_out_file
, "\t# fn:%s zv%d\n", fname
, TARGET_ZVECTOR
);
7756 asm_fprintf (asm_out_file
, "\t# fn:%s wd%d\n", fname
,
7757 s390_warn_dynamicstack_p
);
7759 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
7761 asm_fprintf (asm_out_file
,
7762 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7766 /* Output machine-dependent UNSPECs occurring in address constant X
7767 in assembler syntax to stdio stream FILE. Returns true if the
7768 constant X could be recognized, false otherwise. */
7771 s390_output_addr_const_extra (FILE *file
, rtx x
)
7773 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 1)
7774 switch (XINT (x
, 1))
7777 output_addr_const (file
, XVECEXP (x
, 0, 0));
7778 fprintf (file
, "@GOTENT");
7781 output_addr_const (file
, XVECEXP (x
, 0, 0));
7782 fprintf (file
, "@GOT");
7785 output_addr_const (file
, XVECEXP (x
, 0, 0));
7786 fprintf (file
, "@GOTOFF");
7789 output_addr_const (file
, XVECEXP (x
, 0, 0));
7790 fprintf (file
, "@PLT");
7793 output_addr_const (file
, XVECEXP (x
, 0, 0));
7794 fprintf (file
, "@PLTOFF");
7797 output_addr_const (file
, XVECEXP (x
, 0, 0));
7798 fprintf (file
, "@TLSGD");
7801 assemble_name (file
, get_some_local_dynamic_name ());
7802 fprintf (file
, "@TLSLDM");
7805 output_addr_const (file
, XVECEXP (x
, 0, 0));
7806 fprintf (file
, "@DTPOFF");
7809 output_addr_const (file
, XVECEXP (x
, 0, 0));
7810 fprintf (file
, "@NTPOFF");
7812 case UNSPEC_GOTNTPOFF
:
7813 output_addr_const (file
, XVECEXP (x
, 0, 0));
7814 fprintf (file
, "@GOTNTPOFF");
7816 case UNSPEC_INDNTPOFF
:
7817 output_addr_const (file
, XVECEXP (x
, 0, 0));
7818 fprintf (file
, "@INDNTPOFF");
7822 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 2)
7823 switch (XINT (x
, 1))
7825 case UNSPEC_POOL_OFFSET
:
7826 x
= gen_rtx_MINUS (GET_MODE (x
), XVECEXP (x
, 0, 0), XVECEXP (x
, 0, 1));
7827 output_addr_const (file
, x
);
7833 /* Output address operand ADDR in assembler syntax to
7834 stdio stream FILE. */
7837 print_operand_address (FILE *file
, rtx addr
)
7839 struct s390_address ad
;
7840 memset (&ad
, 0, sizeof (s390_address
));
7842 if (s390_loadrelative_operand_p (addr
, NULL
, NULL
))
7846 output_operand_lossage ("symbolic memory references are "
7847 "only supported on z10 or later");
7850 output_addr_const (file
, addr
);
7854 if (!s390_decompose_address (addr
, &ad
)
7855 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7856 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
7857 output_operand_lossage ("cannot decompose address");
7860 output_addr_const (file
, ad
.disp
);
7862 fprintf (file
, "0");
7864 if (ad
.base
&& ad
.indx
)
7865 fprintf (file
, "(%s,%s)", reg_names
[REGNO (ad
.indx
)],
7866 reg_names
[REGNO (ad
.base
)]);
7868 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
7871 /* Output operand X in assembler syntax to stdio stream FILE.
7872 CODE specified the format flag. The following format flags
7875 'A': On z14 or higher: If operand is a mem print the alignment
7876 hint usable with vl/vst prefixed by a comma.
7877 'C': print opcode suffix for branch condition.
7878 'D': print opcode suffix for inverse branch condition.
7879 'E': print opcode suffix for branch on index instruction.
7880 'G': print the size of the operand in bytes.
7881 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7882 'M': print the second word of a TImode operand.
7883 'N': print the second word of a DImode operand.
7884 'O': print only the displacement of a memory reference or address.
7885 'R': print only the base register of a memory reference or address.
7886 'S': print S-type memory reference (base+displacement).
7887 'Y': print address style operand without index (e.g. shift count or setmem
7890 'b': print integer X as if it's an unsigned byte.
7891 'c': print integer X as if it's an signed byte.
7892 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7893 'f': "end" contiguous bitmask X in SImode.
7894 'h': print integer X as if it's a signed halfword.
7895 'i': print the first nonzero HImode part of X.
7896 'j': print the first HImode part unequal to -1 of X.
7897 'k': print the first nonzero SImode part of X.
7898 'm': print the first SImode part unequal to -1 of X.
7899 'o': print integer X as if it's an unsigned 32bit word.
7900 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7901 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7902 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7903 'x': print integer X as if it's an unsigned halfword.
7904 'v': print register number as vector register (v1 instead of f1).
7908 print_operand (FILE *file
, rtx x
, int code
)
7915 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS
&& MEM_P (x
))
7917 if (MEM_ALIGN (x
) >= 128)
7918 fprintf (file
, ",4");
7919 else if (MEM_ALIGN (x
) == 64)
7920 fprintf (file
, ",3");
7924 fprintf (file
, s390_branch_condition_mnemonic (x
, FALSE
));
7928 fprintf (file
, s390_branch_condition_mnemonic (x
, TRUE
));
7932 if (GET_CODE (x
) == LE
)
7933 fprintf (file
, "l");
7934 else if (GET_CODE (x
) == GT
)
7935 fprintf (file
, "h");
7937 output_operand_lossage ("invalid comparison operator "
7938 "for 'E' output modifier");
7942 if (GET_CODE (x
) == SYMBOL_REF
)
7944 fprintf (file
, "%s", ":tls_load:");
7945 output_addr_const (file
, x
);
7947 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
7949 fprintf (file
, "%s", ":tls_gdcall:");
7950 output_addr_const (file
, XVECEXP (x
, 0, 0));
7952 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSLDM
)
7954 fprintf (file
, "%s", ":tls_ldcall:");
7955 const char *name
= get_some_local_dynamic_name ();
7957 assemble_name (file
, name
);
7960 output_operand_lossage ("invalid reference for 'J' output modifier");
7964 fprintf (file
, "%u", GET_MODE_SIZE (GET_MODE (x
)));
7969 struct s390_address ad
;
7972 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
7975 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7978 output_operand_lossage ("invalid address for 'O' output modifier");
7983 output_addr_const (file
, ad
.disp
);
7985 fprintf (file
, "0");
7991 struct s390_address ad
;
7994 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
7997 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8000 output_operand_lossage ("invalid address for 'R' output modifier");
8005 fprintf (file
, "%s", reg_names
[REGNO (ad
.base
)]);
8007 fprintf (file
, "0");
8013 struct s390_address ad
;
8018 output_operand_lossage ("memory reference expected for "
8019 "'S' output modifier");
8022 ret
= s390_decompose_address (XEXP (x
, 0), &ad
);
8025 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8028 output_operand_lossage ("invalid address for 'S' output modifier");
8033 output_addr_const (file
, ad
.disp
);
8035 fprintf (file
, "0");
8038 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
8043 if (GET_CODE (x
) == REG
)
8044 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8045 else if (GET_CODE (x
) == MEM
)
8046 x
= change_address (x
, VOIDmode
,
8047 plus_constant (Pmode
, XEXP (x
, 0), 4));
8049 output_operand_lossage ("register or memory expression expected "
8050 "for 'N' output modifier");
8054 if (GET_CODE (x
) == REG
)
8055 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8056 else if (GET_CODE (x
) == MEM
)
8057 x
= change_address (x
, VOIDmode
,
8058 plus_constant (Pmode
, XEXP (x
, 0), 8));
8060 output_operand_lossage ("register or memory expression expected "
8061 "for 'M' output modifier");
8065 print_shift_count_operand (file
, x
);
8069 switch (GET_CODE (x
))
8072 /* Print FP regs as fx instead of vx when they are accessed
8073 through non-vector mode. */
8075 || VECTOR_NOFP_REG_P (x
)
8076 || (FP_REG_P (x
) && VECTOR_MODE_P (GET_MODE (x
)))
8077 || (VECTOR_REG_P (x
)
8078 && (GET_MODE_SIZE (GET_MODE (x
)) /
8079 s390_class_max_nregs (FP_REGS
, GET_MODE (x
))) > 8))
8080 fprintf (file
, "%%v%s", reg_names
[REGNO (x
)] + 2);
8082 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
8086 output_address (GET_MODE (x
), XEXP (x
, 0));
8093 output_addr_const (file
, x
);
8106 ival
= ((ival
& 0xff) ^ 0x80) - 0x80;
8112 ival
= ((ival
& 0xffff) ^ 0x8000) - 0x8000;
8115 ival
= s390_extract_part (x
, HImode
, 0);
8118 ival
= s390_extract_part (x
, HImode
, -1);
8121 ival
= s390_extract_part (x
, SImode
, 0);
8124 ival
= s390_extract_part (x
, SImode
, -1);
8136 len
= (code
== 's' || code
== 'e' ? 64 : 32);
8137 ok
= s390_contiguous_bitmask_p (ival
, true, len
, &start
, &end
);
8139 if (code
== 's' || code
== 't')
8146 output_operand_lossage ("invalid constant for output modifier '%c'", code
);
8148 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8151 case CONST_WIDE_INT
:
8153 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8154 CONST_WIDE_INT_ELT (x
, 0) & 0xff);
8155 else if (code
== 'x')
8156 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8157 CONST_WIDE_INT_ELT (x
, 0) & 0xffff);
8158 else if (code
== 'h')
8159 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8160 ((CONST_WIDE_INT_ELT (x
, 0) & 0xffff) ^ 0x8000) - 0x8000);
8164 output_operand_lossage ("invalid constant - try using "
8165 "an output modifier");
8167 output_operand_lossage ("invalid constant for output modifier '%c'",
8175 gcc_assert (const_vec_duplicate_p (x
));
8176 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8177 ((INTVAL (XVECEXP (x
, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8185 ok
= s390_contiguous_bitmask_vector_p (x
, &start
, &end
);
8187 ival
= (code
== 's') ? start
: end
;
8188 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8194 bool ok
= s390_bytemask_vector_p (x
, &mask
);
8196 fprintf (file
, "%u", mask
);
8201 output_operand_lossage ("invalid constant vector for output "
8202 "modifier '%c'", code
);
8208 output_operand_lossage ("invalid expression - try using "
8209 "an output modifier");
8211 output_operand_lossage ("invalid expression for output "
8212 "modifier '%c'", code
);
8217 /* Target hook for assembling integer objects. We need to define it
8218 here to work a round a bug in some versions of GAS, which couldn't
8219 handle values smaller than INT_MIN when printed in decimal. */
8222 s390_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
8224 if (size
== 8 && aligned_p
8225 && GET_CODE (x
) == CONST_INT
&& INTVAL (x
) < INT_MIN
)
8227 fprintf (asm_out_file
, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX
"\n",
8231 return default_assemble_integer (x
, size
, aligned_p
);
8234 /* Returns true if register REGNO is used for forming
8235 a memory address in expression X. */
8238 reg_used_in_mem_p (int regno
, rtx x
)
8240 enum rtx_code code
= GET_CODE (x
);
8246 if (refers_to_regno_p (regno
, XEXP (x
, 0)))
8249 else if (code
== SET
8250 && GET_CODE (SET_DEST (x
)) == PC
)
8252 if (refers_to_regno_p (regno
, SET_SRC (x
)))
8256 fmt
= GET_RTX_FORMAT (code
);
8257 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8260 && reg_used_in_mem_p (regno
, XEXP (x
, i
)))
8263 else if (fmt
[i
] == 'E')
8264 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8265 if (reg_used_in_mem_p (regno
, XVECEXP (x
, i
, j
)))
8271 /* Returns true if expression DEP_RTX sets an address register
8272 used by instruction INSN to address memory. */
8275 addr_generation_dependency_p (rtx dep_rtx
, rtx_insn
*insn
)
8279 if (NONJUMP_INSN_P (dep_rtx
))
8280 dep_rtx
= PATTERN (dep_rtx
);
8282 if (GET_CODE (dep_rtx
) == SET
)
8284 target
= SET_DEST (dep_rtx
);
8285 if (GET_CODE (target
) == STRICT_LOW_PART
)
8286 target
= XEXP (target
, 0);
8287 while (GET_CODE (target
) == SUBREG
)
8288 target
= SUBREG_REG (target
);
8290 if (GET_CODE (target
) == REG
)
8292 int regno
= REGNO (target
);
8294 if (s390_safe_attr_type (insn
) == TYPE_LA
)
8296 pat
= PATTERN (insn
);
8297 if (GET_CODE (pat
) == PARALLEL
)
8299 gcc_assert (XVECLEN (pat
, 0) == 2);
8300 pat
= XVECEXP (pat
, 0, 0);
8302 gcc_assert (GET_CODE (pat
) == SET
);
8303 return refers_to_regno_p (regno
, SET_SRC (pat
));
8305 else if (get_attr_atype (insn
) == ATYPE_AGEN
)
8306 return reg_used_in_mem_p (regno
, PATTERN (insn
));
8312 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8315 s390_agen_dep_p (rtx_insn
*dep_insn
, rtx_insn
*insn
)
8317 rtx dep_rtx
= PATTERN (dep_insn
);
8320 if (GET_CODE (dep_rtx
) == SET
8321 && addr_generation_dependency_p (dep_rtx
, insn
))
8323 else if (GET_CODE (dep_rtx
) == PARALLEL
)
8325 for (i
= 0; i
< XVECLEN (dep_rtx
, 0); i
++)
8327 if (addr_generation_dependency_p (XVECEXP (dep_rtx
, 0, i
), insn
))
8335 /* A C statement (sans semicolon) to update the integer scheduling priority
8336 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8337 reduce the priority to execute INSN later. Do not define this macro if
8338 you do not need to adjust the scheduling priorities of insns.
8340 A STD instruction should be scheduled earlier,
8341 in order to use the bypass. */
8343 s390_adjust_priority (rtx_insn
*insn
, int priority
)
8345 if (! INSN_P (insn
))
8348 if (s390_tune
<= PROCESSOR_2064_Z900
)
8351 switch (s390_safe_attr_type (insn
))
8355 priority
= priority
<< 3;
8359 priority
= priority
<< 1;
8368 /* The number of instructions that can be issued per cycle. */
8371 s390_issue_rate (void)
8375 case PROCESSOR_2084_Z990
:
8376 case PROCESSOR_2094_Z9_109
:
8377 case PROCESSOR_2094_Z9_EC
:
8378 case PROCESSOR_2817_Z196
:
8380 case PROCESSOR_2097_Z10
:
8382 case PROCESSOR_2064_Z900
:
8383 /* Starting with EC12 we use the sched_reorder hook to take care
8384 of instruction dispatch constraints. The algorithm only
8385 picks the best instruction and assumes only a single
8386 instruction gets issued per cycle. */
8387 case PROCESSOR_2827_ZEC12
:
8388 case PROCESSOR_2964_Z13
:
8389 case PROCESSOR_3906_Z14
:
8396 s390_first_cycle_multipass_dfa_lookahead (void)
8402 annotate_constant_pool_refs_1 (rtx
*x
)
8407 gcc_assert (GET_CODE (*x
) != SYMBOL_REF
8408 || !CONSTANT_POOL_ADDRESS_P (*x
));
8410 /* Literal pool references can only occur inside a MEM ... */
8411 if (GET_CODE (*x
) == MEM
)
8413 rtx memref
= XEXP (*x
, 0);
8415 if (GET_CODE (memref
) == SYMBOL_REF
8416 && CONSTANT_POOL_ADDRESS_P (memref
))
8418 rtx base
= cfun
->machine
->base_reg
;
8419 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, memref
, base
),
8422 *x
= replace_equiv_address (*x
, addr
);
8426 if (GET_CODE (memref
) == CONST
8427 && GET_CODE (XEXP (memref
, 0)) == PLUS
8428 && GET_CODE (XEXP (XEXP (memref
, 0), 1)) == CONST_INT
8429 && GET_CODE (XEXP (XEXP (memref
, 0), 0)) == SYMBOL_REF
8430 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref
, 0), 0)))
8432 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (memref
, 0), 1));
8433 rtx sym
= XEXP (XEXP (memref
, 0), 0);
8434 rtx base
= cfun
->machine
->base_reg
;
8435 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8438 *x
= replace_equiv_address (*x
, plus_constant (Pmode
, addr
, off
));
8443 /* ... or a load-address type pattern. */
8444 if (GET_CODE (*x
) == SET
)
8446 rtx addrref
= SET_SRC (*x
);
8448 if (GET_CODE (addrref
) == SYMBOL_REF
8449 && CONSTANT_POOL_ADDRESS_P (addrref
))
8451 rtx base
= cfun
->machine
->base_reg
;
8452 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addrref
, base
),
8455 SET_SRC (*x
) = addr
;
8459 if (GET_CODE (addrref
) == CONST
8460 && GET_CODE (XEXP (addrref
, 0)) == PLUS
8461 && GET_CODE (XEXP (XEXP (addrref
, 0), 1)) == CONST_INT
8462 && GET_CODE (XEXP (XEXP (addrref
, 0), 0)) == SYMBOL_REF
8463 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref
, 0), 0)))
8465 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (addrref
, 0), 1));
8466 rtx sym
= XEXP (XEXP (addrref
, 0), 0);
8467 rtx base
= cfun
->machine
->base_reg
;
8468 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8471 SET_SRC (*x
) = plus_constant (Pmode
, addr
, off
);
8476 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8477 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8481 annotate_constant_pool_refs_1 (&XEXP (*x
, i
));
8483 else if (fmt
[i
] == 'E')
8485 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8486 annotate_constant_pool_refs_1 (&XVECEXP (*x
, i
, j
));
8491 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8492 Fix up MEMs as required.
8493 Skip insns which support relative addressing, because they do not use a base
8497 annotate_constant_pool_refs (rtx_insn
*insn
)
8499 if (s390_safe_relative_long_p (insn
))
8501 annotate_constant_pool_refs_1 (&PATTERN (insn
));
8505 find_constant_pool_ref_1 (rtx x
, rtx
*ref
)
8510 /* Likewise POOL_ENTRY insns. */
8511 if (GET_CODE (x
) == UNSPEC_VOLATILE
8512 && XINT (x
, 1) == UNSPECV_POOL_ENTRY
)
8515 gcc_assert (GET_CODE (x
) != SYMBOL_REF
8516 || !CONSTANT_POOL_ADDRESS_P (x
));
8518 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_LTREF
)
8520 rtx sym
= XVECEXP (x
, 0, 0);
8521 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
8522 && CONSTANT_POOL_ADDRESS_P (sym
));
8524 if (*ref
== NULL_RTX
)
8527 gcc_assert (*ref
== sym
);
8532 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8533 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8537 find_constant_pool_ref_1 (XEXP (x
, i
), ref
);
8539 else if (fmt
[i
] == 'E')
8541 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8542 find_constant_pool_ref_1 (XVECEXP (x
, i
, j
), ref
);
8547 /* Find an annotated literal pool symbol referenced in INSN,
8548 and store it at REF. Will abort if INSN contains references to
8549 more than one such pool symbol; multiple references to the same
8550 symbol are allowed, however.
8552 The rtx pointed to by REF must be initialized to NULL_RTX
8553 by the caller before calling this routine.
8555 Skip insns which support relative addressing, because they do not use a base
8559 find_constant_pool_ref (rtx_insn
*insn
, rtx
*ref
)
8561 if (s390_safe_relative_long_p (insn
))
8563 find_constant_pool_ref_1 (PATTERN (insn
), ref
);
8567 replace_constant_pool_ref_1 (rtx
*x
, rtx ref
, rtx offset
)
8572 gcc_assert (*x
!= ref
);
8574 if (GET_CODE (*x
) == UNSPEC
8575 && XINT (*x
, 1) == UNSPEC_LTREF
8576 && XVECEXP (*x
, 0, 0) == ref
)
8578 *x
= gen_rtx_PLUS (Pmode
, XVECEXP (*x
, 0, 1), offset
);
8582 if (GET_CODE (*x
) == PLUS
8583 && GET_CODE (XEXP (*x
, 1)) == CONST_INT
8584 && GET_CODE (XEXP (*x
, 0)) == UNSPEC
8585 && XINT (XEXP (*x
, 0), 1) == UNSPEC_LTREF
8586 && XVECEXP (XEXP (*x
, 0), 0, 0) == ref
)
8588 rtx addr
= gen_rtx_PLUS (Pmode
, XVECEXP (XEXP (*x
, 0), 0, 1), offset
);
8589 *x
= plus_constant (Pmode
, addr
, INTVAL (XEXP (*x
, 1)));
8593 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8594 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8598 replace_constant_pool_ref_1 (&XEXP (*x
, i
), ref
, offset
);
8600 else if (fmt
[i
] == 'E')
8602 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8603 replace_constant_pool_ref_1 (&XVECEXP (*x
, i
, j
), ref
, offset
);
8608 /* Replace every reference to the annotated literal pool
8609 symbol REF in INSN by its base plus OFFSET.
8610 Skip insns which support relative addressing, because they do not use a base
8614 replace_constant_pool_ref (rtx_insn
*insn
, rtx ref
, rtx offset
)
8616 if (s390_safe_relative_long_p (insn
))
8618 replace_constant_pool_ref_1 (&PATTERN (insn
), ref
, offset
);
8621 /* We keep a list of constants which we have to add to internal
8622 constant tables in the middle of large functions. */
8624 #define NR_C_MODES 32
8625 machine_mode constant_modes
[NR_C_MODES
] =
8627 TFmode
, TImode
, TDmode
,
8628 V16QImode
, V8HImode
, V4SImode
, V2DImode
, V1TImode
,
8629 V4SFmode
, V2DFmode
, V1TFmode
,
8630 DFmode
, DImode
, DDmode
,
8631 V8QImode
, V4HImode
, V2SImode
, V1DImode
, V2SFmode
, V1DFmode
,
8632 SFmode
, SImode
, SDmode
,
8633 V4QImode
, V2HImode
, V1SImode
, V1SFmode
,
8642 struct constant
*next
;
8644 rtx_code_label
*label
;
8647 struct constant_pool
8649 struct constant_pool
*next
;
8650 rtx_insn
*first_insn
;
8651 rtx_insn
*pool_insn
;
8653 rtx_insn
*emit_pool_after
;
8655 struct constant
*constants
[NR_C_MODES
];
8656 struct constant
*execute
;
8657 rtx_code_label
*label
;
8661 /* Allocate new constant_pool structure. */
8663 static struct constant_pool
*
8664 s390_alloc_pool (void)
8666 struct constant_pool
*pool
;
8669 pool
= (struct constant_pool
*) xmalloc (sizeof *pool
);
8671 for (i
= 0; i
< NR_C_MODES
; i
++)
8672 pool
->constants
[i
] = NULL
;
8674 pool
->execute
= NULL
;
8675 pool
->label
= gen_label_rtx ();
8676 pool
->first_insn
= NULL
;
8677 pool
->pool_insn
= NULL
;
8678 pool
->insns
= BITMAP_ALLOC (NULL
);
8680 pool
->emit_pool_after
= NULL
;
8685 /* Create new constant pool covering instructions starting at INSN
8686 and chain it to the end of POOL_LIST. */
8688 static struct constant_pool
*
8689 s390_start_pool (struct constant_pool
**pool_list
, rtx_insn
*insn
)
8691 struct constant_pool
*pool
, **prev
;
8693 pool
= s390_alloc_pool ();
8694 pool
->first_insn
= insn
;
8696 for (prev
= pool_list
; *prev
; prev
= &(*prev
)->next
)
8703 /* End range of instructions covered by POOL at INSN and emit
8704 placeholder insn representing the pool. */
8707 s390_end_pool (struct constant_pool
*pool
, rtx_insn
*insn
)
8709 rtx pool_size
= GEN_INT (pool
->size
+ 8 /* alignment slop */);
8712 insn
= get_last_insn ();
8714 pool
->pool_insn
= emit_insn_after (gen_pool (pool_size
), insn
);
8715 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
8718 /* Add INSN to the list of insns covered by POOL. */
8721 s390_add_pool_insn (struct constant_pool
*pool
, rtx insn
)
8723 bitmap_set_bit (pool
->insns
, INSN_UID (insn
));
8726 /* Return pool out of POOL_LIST that covers INSN. */
8728 static struct constant_pool
*
8729 s390_find_pool (struct constant_pool
*pool_list
, rtx insn
)
8731 struct constant_pool
*pool
;
8733 for (pool
= pool_list
; pool
; pool
= pool
->next
)
8734 if (bitmap_bit_p (pool
->insns
, INSN_UID (insn
)))
8740 /* Add constant VAL of mode MODE to the constant pool POOL. */
8743 s390_add_constant (struct constant_pool
*pool
, rtx val
, machine_mode mode
)
8748 for (i
= 0; i
< NR_C_MODES
; i
++)
8749 if (constant_modes
[i
] == mode
)
8751 gcc_assert (i
!= NR_C_MODES
);
8753 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8754 if (rtx_equal_p (val
, c
->value
))
8759 c
= (struct constant
*) xmalloc (sizeof *c
);
8761 c
->label
= gen_label_rtx ();
8762 c
->next
= pool
->constants
[i
];
8763 pool
->constants
[i
] = c
;
8764 pool
->size
+= GET_MODE_SIZE (mode
);
8768 /* Return an rtx that represents the offset of X from the start of
8772 s390_pool_offset (struct constant_pool
*pool
, rtx x
)
8776 label
= gen_rtx_LABEL_REF (GET_MODE (x
), pool
->label
);
8777 x
= gen_rtx_UNSPEC (GET_MODE (x
), gen_rtvec (2, x
, label
),
8778 UNSPEC_POOL_OFFSET
);
8779 return gen_rtx_CONST (GET_MODE (x
), x
);
8782 /* Find constant VAL of mode MODE in the constant pool POOL.
8783 Return an RTX describing the distance from the start of
8784 the pool to the location of the new constant. */
8787 s390_find_constant (struct constant_pool
*pool
, rtx val
,
8793 for (i
= 0; i
< NR_C_MODES
; i
++)
8794 if (constant_modes
[i
] == mode
)
8796 gcc_assert (i
!= NR_C_MODES
);
8798 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8799 if (rtx_equal_p (val
, c
->value
))
8804 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8807 /* Check whether INSN is an execute. Return the label_ref to its
8808 execute target template if so, NULL_RTX otherwise. */
8811 s390_execute_label (rtx insn
)
8814 && GET_CODE (PATTERN (insn
)) == PARALLEL
8815 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
8816 && (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
8817 || XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE_JUMP
))
8819 if (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
)
8820 return XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 2);
8823 gcc_assert (JUMP_P (insn
));
8824 /* For jump insns as execute target:
8825 - There is one operand less in the parallel (the
8826 modification register of the execute is always 0).
8827 - The execute target label is wrapped into an
8828 if_then_else in order to hide it from jump analysis. */
8829 return XEXP (XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 0), 0);
8836 /* Find execute target for INSN in the constant pool POOL.
8837 Return an RTX describing the distance from the start of
8838 the pool to the location of the execute target. */
8841 s390_find_execute (struct constant_pool
*pool
, rtx insn
)
8845 for (c
= pool
->execute
; c
!= NULL
; c
= c
->next
)
8846 if (INSN_UID (insn
) == INSN_UID (c
->value
))
8851 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8854 /* For an execute INSN, extract the execute target template. */
8857 s390_execute_target (rtx insn
)
8859 rtx pattern
= PATTERN (insn
);
8860 gcc_assert (s390_execute_label (insn
));
8862 if (XVECLEN (pattern
, 0) == 2)
8864 pattern
= copy_rtx (XVECEXP (pattern
, 0, 1));
8868 rtvec vec
= rtvec_alloc (XVECLEN (pattern
, 0) - 1);
8871 for (i
= 0; i
< XVECLEN (pattern
, 0) - 1; i
++)
8872 RTVEC_ELT (vec
, i
) = copy_rtx (XVECEXP (pattern
, 0, i
+ 1));
8874 pattern
= gen_rtx_PARALLEL (VOIDmode
, vec
);
8880 /* Indicate that INSN cannot be duplicated. This is the case for
8881 execute insns that carry a unique label. */
8884 s390_cannot_copy_insn_p (rtx_insn
*insn
)
8886 rtx label
= s390_execute_label (insn
);
8887 return label
&& label
!= const0_rtx
;
8890 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8891 do not emit the pool base label. */
8894 s390_dump_pool (struct constant_pool
*pool
, bool remote_label
)
8897 rtx_insn
*insn
= pool
->pool_insn
;
8900 /* Switch to rodata section. */
8901 insn
= emit_insn_after (gen_pool_section_start (), insn
);
8902 INSN_ADDRESSES_NEW (insn
, -1);
8904 /* Ensure minimum pool alignment. */
8905 insn
= emit_insn_after (gen_pool_align (GEN_INT (8)), insn
);
8906 INSN_ADDRESSES_NEW (insn
, -1);
8908 /* Emit pool base label. */
8911 insn
= emit_label_after (pool
->label
, insn
);
8912 INSN_ADDRESSES_NEW (insn
, -1);
8915 /* Dump constants in descending alignment requirement order,
8916 ensuring proper alignment for every constant. */
8917 for (i
= 0; i
< NR_C_MODES
; i
++)
8918 for (c
= pool
->constants
[i
]; c
; c
= c
->next
)
8920 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8921 rtx value
= copy_rtx (c
->value
);
8922 if (GET_CODE (value
) == CONST
8923 && GET_CODE (XEXP (value
, 0)) == UNSPEC
8924 && XINT (XEXP (value
, 0), 1) == UNSPEC_LTREL_OFFSET
8925 && XVECLEN (XEXP (value
, 0), 0) == 1)
8926 value
= s390_pool_offset (pool
, XVECEXP (XEXP (value
, 0), 0, 0));
8928 insn
= emit_label_after (c
->label
, insn
);
8929 INSN_ADDRESSES_NEW (insn
, -1);
8931 value
= gen_rtx_UNSPEC_VOLATILE (constant_modes
[i
],
8932 gen_rtvec (1, value
),
8933 UNSPECV_POOL_ENTRY
);
8934 insn
= emit_insn_after (value
, insn
);
8935 INSN_ADDRESSES_NEW (insn
, -1);
8938 /* Ensure minimum alignment for instructions. */
8939 insn
= emit_insn_after (gen_pool_align (GEN_INT (2)), insn
);
8940 INSN_ADDRESSES_NEW (insn
, -1);
8942 /* Output in-pool execute template insns. */
8943 for (c
= pool
->execute
; c
; c
= c
->next
)
8945 insn
= emit_label_after (c
->label
, insn
);
8946 INSN_ADDRESSES_NEW (insn
, -1);
8948 insn
= emit_insn_after (s390_execute_target (c
->value
), insn
);
8949 INSN_ADDRESSES_NEW (insn
, -1);
8952 /* Switch back to previous section. */
8953 insn
= emit_insn_after (gen_pool_section_end (), insn
);
8954 INSN_ADDRESSES_NEW (insn
, -1);
8956 insn
= emit_barrier_after (insn
);
8957 INSN_ADDRESSES_NEW (insn
, -1);
8959 /* Remove placeholder insn. */
8960 remove_insn (pool
->pool_insn
);
8963 /* Free all memory used by POOL. */
8966 s390_free_pool (struct constant_pool
*pool
)
8968 struct constant
*c
, *next
;
8971 for (i
= 0; i
< NR_C_MODES
; i
++)
8972 for (c
= pool
->constants
[i
]; c
; c
= next
)
8978 for (c
= pool
->execute
; c
; c
= next
)
8984 BITMAP_FREE (pool
->insns
);
8989 /* Collect main literal pool. Return NULL on overflow. */
8991 static struct constant_pool
*
8992 s390_mainpool_start (void)
8994 struct constant_pool
*pool
;
8997 pool
= s390_alloc_pool ();
8999 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9001 if (NONJUMP_INSN_P (insn
)
9002 && GET_CODE (PATTERN (insn
)) == SET
9003 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC_VOLATILE
9004 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPECV_MAIN_POOL
)
9006 /* There might be two main_pool instructions if base_reg
9007 is call-clobbered; one for shrink-wrapped code and one
9008 for the rest. We want to keep the first. */
9009 if (pool
->pool_insn
)
9011 insn
= PREV_INSN (insn
);
9012 delete_insn (NEXT_INSN (insn
));
9015 pool
->pool_insn
= insn
;
9018 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9020 rtx pool_ref
= NULL_RTX
;
9021 find_constant_pool_ref (insn
, &pool_ref
);
9024 rtx constant
= get_pool_constant (pool_ref
);
9025 machine_mode mode
= get_pool_mode (pool_ref
);
9026 s390_add_constant (pool
, constant
, mode
);
9030 /* If hot/cold partitioning is enabled we have to make sure that
9031 the literal pool is emitted in the same section where the
9032 initialization of the literal pool base pointer takes place.
9033 emit_pool_after is only used in the non-overflow case on non
9034 Z cpus where we can emit the literal pool at the end of the
9035 function body within the text section. */
9037 && NOTE_KIND (insn
) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9038 && !pool
->emit_pool_after
)
9039 pool
->emit_pool_after
= PREV_INSN (insn
);
9042 gcc_assert (pool
->pool_insn
|| pool
->size
== 0);
9044 if (pool
->size
>= 4096)
9046 /* We're going to chunkify the pool, so remove the main
9047 pool placeholder insn. */
9048 remove_insn (pool
->pool_insn
);
9050 s390_free_pool (pool
);
9054 /* If the functions ends with the section where the literal pool
9055 should be emitted set the marker to its end. */
9056 if (pool
&& !pool
->emit_pool_after
)
9057 pool
->emit_pool_after
= get_last_insn ();
9062 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9063 Modify the current function to output the pool constants as well as
9064 the pool register setup instruction. */
9067 s390_mainpool_finish (struct constant_pool
*pool
)
9069 rtx base_reg
= cfun
->machine
->base_reg
;
9073 /* If the pool is empty, we're done. */
9074 if (pool
->size
== 0)
9076 /* We don't actually need a base register after all. */
9077 cfun
->machine
->base_reg
= NULL_RTX
;
9079 if (pool
->pool_insn
)
9080 remove_insn (pool
->pool_insn
);
9081 s390_free_pool (pool
);
9085 /* We need correct insn addresses. */
9086 shorten_branches (get_insns ());
9088 /* Use a LARL to load the pool register. The pool is
9089 located in the .rodata section, so we emit it after the function. */
9090 set
= gen_main_base_64 (base_reg
, pool
->label
);
9091 insn
= emit_insn_after (set
, pool
->pool_insn
);
9092 INSN_ADDRESSES_NEW (insn
, -1);
9093 remove_insn (pool
->pool_insn
);
9095 insn
= get_last_insn ();
9096 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9097 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9099 s390_dump_pool (pool
, 0);
9101 /* Replace all literal pool references. */
9103 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9105 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9107 rtx addr
, pool_ref
= NULL_RTX
;
9108 find_constant_pool_ref (insn
, &pool_ref
);
9111 if (s390_execute_label (insn
))
9112 addr
= s390_find_execute (pool
, insn
);
9114 addr
= s390_find_constant (pool
, get_pool_constant (pool_ref
),
9115 get_pool_mode (pool_ref
));
9117 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9118 INSN_CODE (insn
) = -1;
9124 /* Free the pool. */
9125 s390_free_pool (pool
);
9128 /* Chunkify the literal pool. */
9130 #define S390_POOL_CHUNK_MIN 0xc00
9131 #define S390_POOL_CHUNK_MAX 0xe00
9133 static struct constant_pool
*
9134 s390_chunkify_start (void)
9136 struct constant_pool
*curr_pool
= NULL
, *pool_list
= NULL
;
9140 /* We need correct insn addresses. */
9142 shorten_branches (get_insns ());
9144 /* Scan all insns and move literals to pool chunks. */
9146 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9148 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9150 rtx pool_ref
= NULL_RTX
;
9151 find_constant_pool_ref (insn
, &pool_ref
);
9154 rtx constant
= get_pool_constant (pool_ref
);
9155 machine_mode mode
= get_pool_mode (pool_ref
);
9158 curr_pool
= s390_start_pool (&pool_list
, insn
);
9160 s390_add_constant (curr_pool
, constant
, mode
);
9161 s390_add_pool_insn (curr_pool
, insn
);
9165 if (JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
) || LABEL_P (insn
))
9168 s390_add_pool_insn (curr_pool
, insn
);
9171 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_VAR_LOCATION
)
9175 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn
)
9176 || INSN_ADDRESSES (INSN_UID (insn
)) == -1)
9179 if (curr_pool
->size
< S390_POOL_CHUNK_MAX
)
9182 s390_end_pool (curr_pool
, NULL
);
9187 s390_end_pool (curr_pool
, NULL
);
9189 /* Find all labels that are branched into
9190 from an insn belonging to a different chunk. */
9192 far_labels
= BITMAP_ALLOC (NULL
);
9194 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9196 rtx_jump_table_data
*table
;
9198 /* Labels marked with LABEL_PRESERVE_P can be target
9199 of non-local jumps, so we have to mark them.
9200 The same holds for named labels.
9202 Don't do that, however, if it is the label before
9206 && (LABEL_PRESERVE_P (insn
) || LABEL_NAME (insn
)))
9208 rtx_insn
*vec_insn
= NEXT_INSN (insn
);
9209 if (! vec_insn
|| ! JUMP_TABLE_DATA_P (vec_insn
))
9210 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (insn
));
9212 /* Check potential targets in a table jump (casesi_jump). */
9213 else if (tablejump_p (insn
, NULL
, &table
))
9215 rtx vec_pat
= PATTERN (table
);
9216 int i
, diff_p
= GET_CODE (vec_pat
) == ADDR_DIFF_VEC
;
9218 for (i
= 0; i
< XVECLEN (vec_pat
, diff_p
); i
++)
9220 rtx label
= XEXP (XVECEXP (vec_pat
, diff_p
, i
), 0);
9222 if (s390_find_pool (pool_list
, label
)
9223 != s390_find_pool (pool_list
, insn
))
9224 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9227 /* If we have a direct jump (conditional or unconditional),
9228 check all potential targets. */
9229 else if (JUMP_P (insn
))
9231 rtx pat
= PATTERN (insn
);
9233 if (GET_CODE (pat
) == PARALLEL
)
9234 pat
= XVECEXP (pat
, 0, 0);
9236 if (GET_CODE (pat
) == SET
)
9238 rtx label
= JUMP_LABEL (insn
);
9239 if (label
&& !ANY_RETURN_P (label
))
9241 if (s390_find_pool (pool_list
, label
)
9242 != s390_find_pool (pool_list
, insn
))
9243 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9249 /* Insert base register reload insns before every pool. */
9251 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9253 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9255 rtx_insn
*insn
= curr_pool
->first_insn
;
9256 INSN_ADDRESSES_NEW (emit_insn_before (new_insn
, insn
), -1);
9259 /* Insert base register reload insns at every far label. */
9261 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9263 && bitmap_bit_p (far_labels
, CODE_LABEL_NUMBER (insn
)))
9265 struct constant_pool
*pool
= s390_find_pool (pool_list
, insn
);
9268 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9270 INSN_ADDRESSES_NEW (emit_insn_after (new_insn
, insn
), -1);
9275 BITMAP_FREE (far_labels
);
9278 /* Recompute insn addresses. */
9280 init_insn_lengths ();
9281 shorten_branches (get_insns ());
9286 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9287 After we have decided to use this list, finish implementing
9288 all changes to the current function as required. */
9291 s390_chunkify_finish (struct constant_pool
*pool_list
)
9293 struct constant_pool
*curr_pool
= NULL
;
9297 /* Replace all literal pool references. */
9299 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9301 curr_pool
= s390_find_pool (pool_list
, insn
);
9305 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9307 rtx addr
, pool_ref
= NULL_RTX
;
9308 find_constant_pool_ref (insn
, &pool_ref
);
9311 if (s390_execute_label (insn
))
9312 addr
= s390_find_execute (curr_pool
, insn
);
9314 addr
= s390_find_constant (curr_pool
,
9315 get_pool_constant (pool_ref
),
9316 get_pool_mode (pool_ref
));
9318 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9319 INSN_CODE (insn
) = -1;
9324 /* Dump out all literal pools. */
9326 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9327 s390_dump_pool (curr_pool
, 0);
9329 /* Free pool list. */
9333 struct constant_pool
*next
= pool_list
->next
;
9334 s390_free_pool (pool_list
);
9339 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9342 s390_output_pool_entry (rtx exp
, machine_mode mode
, unsigned int align
)
9344 switch (GET_MODE_CLASS (mode
))
9347 case MODE_DECIMAL_FLOAT
:
9348 gcc_assert (GET_CODE (exp
) == CONST_DOUBLE
);
9350 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp
),
9351 as_a
<scalar_float_mode
> (mode
), align
);
9355 assemble_integer (exp
, GET_MODE_SIZE (mode
), align
, 1);
9356 mark_symbol_refs_as_used (exp
);
9359 case MODE_VECTOR_INT
:
9360 case MODE_VECTOR_FLOAT
:
9363 machine_mode inner_mode
;
9364 gcc_assert (GET_CODE (exp
) == CONST_VECTOR
);
9366 inner_mode
= GET_MODE_INNER (GET_MODE (exp
));
9367 for (i
= 0; i
< XVECLEN (exp
, 0); i
++)
9368 s390_output_pool_entry (XVECEXP (exp
, 0, i
),
9372 : GET_MODE_BITSIZE (inner_mode
));
9382 /* Return an RTL expression representing the value of the return address
9383 for the frame COUNT steps up from the current frame. FRAME is the
9384 frame pointer of that frame. */
9387 s390_return_addr_rtx (int count
, rtx frame ATTRIBUTE_UNUSED
)
9392 /* Without backchain, we fail for all but the current frame. */
9394 if (!TARGET_BACKCHAIN
&& count
> 0)
9397 /* For the current frame, we need to make sure the initial
9398 value of RETURN_REGNUM is actually saved. */
9401 return get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
);
9403 if (TARGET_PACKED_STACK
)
9404 offset
= -2 * UNITS_PER_LONG
;
9406 offset
= RETURN_REGNUM
* UNITS_PER_LONG
;
9408 addr
= plus_constant (Pmode
, frame
, offset
);
9409 addr
= memory_address (Pmode
, addr
);
9410 return gen_rtx_MEM (Pmode
, addr
);
9413 /* Return an RTL expression representing the back chain stored in
9414 the current stack frame. */
9417 s390_back_chain_rtx (void)
9421 gcc_assert (TARGET_BACKCHAIN
);
9423 if (TARGET_PACKED_STACK
)
9424 chain
= plus_constant (Pmode
, stack_pointer_rtx
,
9425 STACK_POINTER_OFFSET
- UNITS_PER_LONG
);
9427 chain
= stack_pointer_rtx
;
9429 chain
= gen_rtx_MEM (Pmode
, chain
);
9433 /* Find first call clobbered register unused in a function.
9434 This could be used as base register in a leaf function
9435 or for holding the return address before epilogue. */
9438 find_unused_clobbered_reg (void)
9441 for (i
= 0; i
< 6; i
++)
9442 if (!df_regs_ever_live_p (i
))
9448 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9449 clobbered hard regs in SETREG. */
9452 s390_reg_clobbered_rtx (rtx setreg
, const_rtx set_insn ATTRIBUTE_UNUSED
, void *data
)
9454 char *regs_ever_clobbered
= (char *)data
;
9455 unsigned int i
, regno
;
9456 machine_mode mode
= GET_MODE (setreg
);
9458 if (GET_CODE (setreg
) == SUBREG
)
9460 rtx inner
= SUBREG_REG (setreg
);
9461 if (!GENERAL_REG_P (inner
) && !FP_REG_P (inner
))
9463 regno
= subreg_regno (setreg
);
9465 else if (GENERAL_REG_P (setreg
) || FP_REG_P (setreg
))
9466 regno
= REGNO (setreg
);
9471 i
< end_hard_regno (mode
, regno
);
9473 regs_ever_clobbered
[i
] = 1;
9476 /* Walks through all basic blocks of the current function looking
9477 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9478 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9479 each of those regs. */
9482 s390_regs_ever_clobbered (char regs_ever_clobbered
[])
9488 memset (regs_ever_clobbered
, 0, 32);
9490 /* For non-leaf functions we have to consider all call clobbered regs to be
9494 for (i
= 0; i
< 32; i
++)
9495 regs_ever_clobbered
[i
] = call_used_regs
[i
];
9498 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9499 this work is done by liveness analysis (mark_regs_live_at_end).
9500 Special care is needed for functions containing landing pads. Landing pads
9501 may use the eh registers, but the code which sets these registers is not
9502 contained in that function. Hence s390_regs_ever_clobbered is not able to
9503 deal with this automatically. */
9504 if (crtl
->calls_eh_return
|| cfun
->machine
->has_landing_pad_p
)
9505 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; i
++)
9506 if (crtl
->calls_eh_return
9507 || (cfun
->machine
->has_landing_pad_p
9508 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i
))))
9509 regs_ever_clobbered
[EH_RETURN_DATA_REGNO (i
)] = 1;
9511 /* For nonlocal gotos all call-saved registers have to be saved.
9512 This flag is also set for the unwinding code in libgcc.
9513 See expand_builtin_unwind_init. For regs_ever_live this is done by
9515 if (crtl
->saves_all_registers
)
9516 for (i
= 0; i
< 32; i
++)
9517 if (!call_used_regs
[i
])
9518 regs_ever_clobbered
[i
] = 1;
9520 FOR_EACH_BB_FN (cur_bb
, cfun
)
9522 FOR_BB_INSNS (cur_bb
, cur_insn
)
9526 if (!INSN_P (cur_insn
))
9529 pat
= PATTERN (cur_insn
);
9531 /* Ignore GPR restore insns. */
9532 if (epilogue_completed
&& RTX_FRAME_RELATED_P (cur_insn
))
9534 if (GET_CODE (pat
) == SET
9535 && GENERAL_REG_P (SET_DEST (pat
)))
9538 if (GET_MODE (SET_SRC (pat
)) == DImode
9539 && FP_REG_P (SET_SRC (pat
)))
9543 if (GET_CODE (SET_SRC (pat
)) == MEM
)
9548 if (GET_CODE (pat
) == PARALLEL
9549 && load_multiple_operation (pat
, VOIDmode
))
9553 note_stores (cur_insn
,
9554 s390_reg_clobbered_rtx
,
9555 regs_ever_clobbered
);
9560 /* Determine the frame area which actually has to be accessed
9561 in the function epilogue. The values are stored at the
9562 given pointers AREA_BOTTOM (address of the lowest used stack
9563 address) and AREA_TOP (address of the first item which does
9564 not belong to the stack frame). */
9567 s390_frame_area (int *area_bottom
, int *area_top
)
9574 if (cfun_frame_layout
.first_restore_gpr
!= -1)
9576 b
= (cfun_frame_layout
.gprs_offset
9577 + cfun_frame_layout
.first_restore_gpr
* UNITS_PER_LONG
);
9578 t
= b
+ (cfun_frame_layout
.last_restore_gpr
9579 - cfun_frame_layout
.first_restore_gpr
+ 1) * UNITS_PER_LONG
;
9582 if (TARGET_64BIT
&& cfun_save_high_fprs_p
)
9584 b
= MIN (b
, cfun_frame_layout
.f8_offset
);
9585 t
= MAX (t
, (cfun_frame_layout
.f8_offset
9586 + cfun_frame_layout
.high_fprs
* 8));
9591 if (cfun_fpr_save_p (FPR4_REGNUM
))
9593 b
= MIN (b
, cfun_frame_layout
.f4_offset
);
9594 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 8);
9596 if (cfun_fpr_save_p (FPR6_REGNUM
))
9598 b
= MIN (b
, cfun_frame_layout
.f4_offset
+ 8);
9599 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 16);
9605 /* Update gpr_save_slots in the frame layout trying to make use of
9606 FPRs as GPR save slots.
9607 This is a helper routine of s390_register_info. */
9610 s390_register_info_gprtofpr ()
9612 int save_reg_slot
= FPR0_REGNUM
;
9615 if (TARGET_TPF
|| !TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
9618 /* builtin_eh_return needs to be able to modify the return address
9619 on the stack. It could also adjust the FPR save slot instead but
9620 is it worth the trouble?! */
9621 if (crtl
->calls_eh_return
)
9624 for (i
= 15; i
>= 6; i
--)
9626 if (cfun_gpr_save_slot (i
) == SAVE_SLOT_NONE
)
9629 /* Advance to the next FP register which can be used as a
9631 while ((!call_used_regs
[save_reg_slot
]
9632 || df_regs_ever_live_p (save_reg_slot
)
9633 || cfun_fpr_save_p (save_reg_slot
))
9634 && FP_REGNO_P (save_reg_slot
))
9636 if (!FP_REGNO_P (save_reg_slot
))
9638 /* We only want to use ldgr/lgdr if we can get rid of
9639 stm/lm entirely. So undo the gpr slot allocation in
9640 case we ran out of FPR save slots. */
9641 for (j
= 6; j
<= 15; j
++)
9642 if (FP_REGNO_P (cfun_gpr_save_slot (j
)))
9643 cfun_gpr_save_slot (j
) = SAVE_SLOT_STACK
;
9646 cfun_gpr_save_slot (i
) = save_reg_slot
++;
9650 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9652 This is a helper routine for s390_register_info. */
9655 s390_register_info_stdarg_fpr ()
9661 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9662 f0-f4 for 64 bit. */
9664 || !TARGET_HARD_FLOAT
9665 || !cfun
->va_list_fpr_size
9666 || crtl
->args
.info
.fprs
>= FP_ARG_NUM_REG
)
9669 min_fpr
= crtl
->args
.info
.fprs
;
9670 max_fpr
= min_fpr
+ cfun
->va_list_fpr_size
- 1;
9671 if (max_fpr
>= FP_ARG_NUM_REG
)
9672 max_fpr
= FP_ARG_NUM_REG
- 1;
9674 /* FPR argument regs start at f0. */
9675 min_fpr
+= FPR0_REGNUM
;
9676 max_fpr
+= FPR0_REGNUM
;
9678 for (i
= min_fpr
; i
<= max_fpr
; i
++)
9679 cfun_set_fpr_save (i
);
9682 /* Reserve the GPR save slots for GPRs which need to be saved due to
9684 This is a helper routine for s390_register_info. */
9687 s390_register_info_stdarg_gpr ()
9694 || !cfun
->va_list_gpr_size
9695 || crtl
->args
.info
.gprs
>= GP_ARG_NUM_REG
)
9698 min_gpr
= crtl
->args
.info
.gprs
;
9699 max_gpr
= min_gpr
+ cfun
->va_list_gpr_size
- 1;
9700 if (max_gpr
>= GP_ARG_NUM_REG
)
9701 max_gpr
= GP_ARG_NUM_REG
- 1;
9703 /* GPR argument regs start at r2. */
9704 min_gpr
+= GPR2_REGNUM
;
9705 max_gpr
+= GPR2_REGNUM
;
9707 /* If r6 was supposed to be saved into an FPR and now needs to go to
9708 the stack for vararg we have to adjust the restore range to make
9709 sure that the restore is done from stack as well. */
9710 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM
))
9711 && min_gpr
<= GPR6_REGNUM
9712 && max_gpr
>= GPR6_REGNUM
)
9714 if (cfun_frame_layout
.first_restore_gpr
== -1
9715 || cfun_frame_layout
.first_restore_gpr
> GPR6_REGNUM
)
9716 cfun_frame_layout
.first_restore_gpr
= GPR6_REGNUM
;
9717 if (cfun_frame_layout
.last_restore_gpr
== -1
9718 || cfun_frame_layout
.last_restore_gpr
< GPR6_REGNUM
)
9719 cfun_frame_layout
.last_restore_gpr
= GPR6_REGNUM
;
9722 if (cfun_frame_layout
.first_save_gpr
== -1
9723 || cfun_frame_layout
.first_save_gpr
> min_gpr
)
9724 cfun_frame_layout
.first_save_gpr
= min_gpr
;
9726 if (cfun_frame_layout
.last_save_gpr
== -1
9727 || cfun_frame_layout
.last_save_gpr
< max_gpr
)
9728 cfun_frame_layout
.last_save_gpr
= max_gpr
;
9730 for (i
= min_gpr
; i
<= max_gpr
; i
++)
9731 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9734 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9735 prologue and epilogue. */
9738 s390_register_info_set_ranges ()
9742 /* Find the first and the last save slot supposed to use the stack
9743 to set the restore range.
9744 Vararg regs might be marked as save to stack but only the
9745 call-saved regs really need restoring (i.e. r6). This code
9746 assumes that the vararg regs have not yet been recorded in
9747 cfun_gpr_save_slot. */
9748 for (i
= 0; i
< 16 && cfun_gpr_save_slot (i
) != SAVE_SLOT_STACK
; i
++);
9749 for (j
= 15; j
> i
&& cfun_gpr_save_slot (j
) != SAVE_SLOT_STACK
; j
--);
9750 cfun_frame_layout
.first_restore_gpr
= (i
== 16) ? -1 : i
;
9751 cfun_frame_layout
.last_restore_gpr
= (i
== 16) ? -1 : j
;
9752 cfun_frame_layout
.first_save_gpr
= (i
== 16) ? -1 : i
;
9753 cfun_frame_layout
.last_save_gpr
= (i
== 16) ? -1 : j
;
9756 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9757 for registers which need to be saved in function prologue.
9758 This function can be used until the insns emitted for save/restore
9759 of the regs are visible in the RTL stream. */
9762 s390_register_info ()
9765 char clobbered_regs
[32];
9767 gcc_assert (!epilogue_completed
);
9769 if (reload_completed
)
9770 /* After reload we rely on our own routine to determine which
9771 registers need saving. */
9772 s390_regs_ever_clobbered (clobbered_regs
);
9774 /* During reload we use regs_ever_live as a base since reload
9775 does changes in there which we otherwise would not be aware
9777 for (i
= 0; i
< 32; i
++)
9778 clobbered_regs
[i
] = df_regs_ever_live_p (i
);
9780 for (i
= 0; i
< 32; i
++)
9781 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_regs
[i
];
9783 /* Mark the call-saved FPRs which need to be saved.
9784 This needs to be done before checking the special GPRs since the
9785 stack pointer usage depends on whether high FPRs have to be saved
9787 cfun_frame_layout
.fpr_bitmap
= 0;
9788 cfun_frame_layout
.high_fprs
= 0;
9789 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
9790 if (clobbered_regs
[i
] && !call_used_regs
[i
])
9792 cfun_set_fpr_save (i
);
9793 if (i
>= FPR8_REGNUM
)
9794 cfun_frame_layout
.high_fprs
++;
9797 /* Register 12 is used for GOT address, but also as temp in prologue
9798 for split-stack stdarg functions (unless r14 is available). */
9800 |= ((flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
9801 || (flag_split_stack
&& cfun
->stdarg
9802 && (crtl
->is_leaf
|| TARGET_TPF_PROFILING
9803 || has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
))));
9805 clobbered_regs
[BASE_REGNUM
]
9806 |= (cfun
->machine
->base_reg
9807 && REGNO (cfun
->machine
->base_reg
) == BASE_REGNUM
);
9809 clobbered_regs
[HARD_FRAME_POINTER_REGNUM
]
9810 |= !!frame_pointer_needed
;
9812 /* On pre z900 machines this might take until machine dependent
9814 save_return_addr_p will only be set on non-zarch machines so
9815 there is no risk that r14 goes into an FPR instead of a stack
9817 clobbered_regs
[RETURN_REGNUM
]
9819 || TARGET_TPF_PROFILING
9820 || cfun_frame_layout
.save_return_addr_p
9821 || crtl
->calls_eh_return
);
9823 clobbered_regs
[STACK_POINTER_REGNUM
]
9825 || TARGET_TPF_PROFILING
9826 || cfun_save_high_fprs_p
9827 || get_frame_size () > 0
9828 || (reload_completed
&& cfun_frame_layout
.frame_size
> 0)
9829 || cfun
->calls_alloca
);
9831 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 16);
9833 for (i
= 6; i
< 16; i
++)
9834 if (clobbered_regs
[i
])
9835 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9837 s390_register_info_stdarg_fpr ();
9838 s390_register_info_gprtofpr ();
9839 s390_register_info_set_ranges ();
9840 /* stdarg functions might need to save GPRs 2 to 6. This might
9841 override the GPR->FPR save decision made by
9842 s390_register_info_gprtofpr for r6 since vararg regs must go to
9844 s390_register_info_stdarg_gpr ();
9847 /* Return true if REGNO is a global register, but not one
9848 of the special ones that need to be saved/restored in anyway. */
9851 global_not_special_regno_p (int regno
)
9853 return (global_regs
[regno
]
9854 /* These registers are special and need to be
9855 restored in any case. */
9856 && !(regno
== STACK_POINTER_REGNUM
9857 || regno
== RETURN_REGNUM
9858 || regno
== BASE_REGNUM
9859 || (flag_pic
&& regno
== (int)PIC_OFFSET_TABLE_REGNUM
)));
9862 /* This function is called by s390_optimize_prologue in order to get
9863 rid of unnecessary GPR save/restore instructions. The register info
9864 for the GPRs is re-computed and the ranges are re-calculated. */
9867 s390_optimize_register_info ()
9869 char clobbered_regs
[32];
9872 gcc_assert (epilogue_completed
);
9874 s390_regs_ever_clobbered (clobbered_regs
);
9876 /* Global registers do not need to be saved and restored unless it
9877 is one of our special regs. (r12, r13, r14, or r15). */
9878 for (i
= 0; i
< 32; i
++)
9879 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_not_special_regno_p (i
);
9881 /* There is still special treatment needed for cases invisible to
9882 s390_regs_ever_clobbered. */
9883 clobbered_regs
[RETURN_REGNUM
]
9884 |= (TARGET_TPF_PROFILING
9885 /* When expanding builtin_return_addr in ESA mode we do not
9886 know whether r14 will later be needed as scratch reg when
9887 doing branch splitting. So the builtin always accesses the
9888 r14 save slot and we need to stick to the save/restore
9889 decision for r14 even if it turns out that it didn't get
9891 || cfun_frame_layout
.save_return_addr_p
9892 || crtl
->calls_eh_return
);
9894 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 6);
9896 for (i
= 6; i
< 16; i
++)
9897 if (!clobbered_regs
[i
])
9898 cfun_gpr_save_slot (i
) = SAVE_SLOT_NONE
;
9900 s390_register_info_set_ranges ();
9901 s390_register_info_stdarg_gpr ();
9904 /* Fill cfun->machine with info about frame of current function. */
9907 s390_frame_info (void)
9909 HOST_WIDE_INT lowest_offset
;
9911 cfun_frame_layout
.first_save_gpr_slot
= cfun_frame_layout
.first_save_gpr
;
9912 cfun_frame_layout
.last_save_gpr_slot
= cfun_frame_layout
.last_save_gpr
;
9914 /* The va_arg builtin uses a constant distance of 16 *
9915 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9916 pointer. So even if we are going to save the stack pointer in an
9917 FPR we need the stack space in order to keep the offsets
9919 if (cfun
->stdarg
&& cfun_save_arg_fprs_p
)
9921 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
9923 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
9924 cfun_frame_layout
.first_save_gpr_slot
= STACK_POINTER_REGNUM
;
9927 cfun_frame_layout
.frame_size
= get_frame_size ();
9928 if (!TARGET_64BIT
&& cfun_frame_layout
.frame_size
> 0x7fff0000)
9929 fatal_error (input_location
,
9930 "total size of local variables exceeds architecture limit");
9932 if (!TARGET_PACKED_STACK
)
9934 /* Fixed stack layout. */
9935 cfun_frame_layout
.backchain_offset
= 0;
9936 cfun_frame_layout
.f0_offset
= 16 * UNITS_PER_LONG
;
9937 cfun_frame_layout
.f4_offset
= cfun_frame_layout
.f0_offset
+ 2 * 8;
9938 cfun_frame_layout
.f8_offset
= -cfun_frame_layout
.high_fprs
* 8;
9939 cfun_frame_layout
.gprs_offset
= (cfun_frame_layout
.first_save_gpr_slot
9942 else if (TARGET_BACKCHAIN
)
9944 /* Kernel stack layout - packed stack, backchain, no float */
9945 gcc_assert (TARGET_SOFT_FLOAT
);
9946 cfun_frame_layout
.backchain_offset
= (STACK_POINTER_OFFSET
9949 /* The distance between the backchain and the return address
9950 save slot must not change. So we always need a slot for the
9951 stack pointer which resides in between. */
9952 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
9954 cfun_frame_layout
.gprs_offset
9955 = cfun_frame_layout
.backchain_offset
- cfun_gprs_save_area_size
;
9957 /* FPRs will not be saved. Nevertheless pick sane values to
9958 keep area calculations valid. */
9959 cfun_frame_layout
.f0_offset
=
9960 cfun_frame_layout
.f4_offset
=
9961 cfun_frame_layout
.f8_offset
= cfun_frame_layout
.gprs_offset
;
9967 /* Packed stack layout without backchain. */
9969 /* With stdarg FPRs need their dedicated slots. */
9970 num_fprs
= (TARGET_64BIT
&& cfun
->stdarg
? 2
9971 : (cfun_fpr_save_p (FPR4_REGNUM
) +
9972 cfun_fpr_save_p (FPR6_REGNUM
)));
9973 cfun_frame_layout
.f4_offset
= STACK_POINTER_OFFSET
- 8 * num_fprs
;
9975 num_fprs
= (cfun
->stdarg
? 2
9976 : (cfun_fpr_save_p (FPR0_REGNUM
)
9977 + cfun_fpr_save_p (FPR2_REGNUM
)));
9978 cfun_frame_layout
.f0_offset
= cfun_frame_layout
.f4_offset
- 8 * num_fprs
;
9980 cfun_frame_layout
.gprs_offset
9981 = cfun_frame_layout
.f0_offset
- cfun_gprs_save_area_size
;
9983 cfun_frame_layout
.f8_offset
= (cfun_frame_layout
.gprs_offset
9984 - cfun_frame_layout
.high_fprs
* 8);
9987 if (cfun_save_high_fprs_p
)
9988 cfun_frame_layout
.frame_size
+= cfun_frame_layout
.high_fprs
* 8;
9991 cfun_frame_layout
.frame_size
+= crtl
->outgoing_args_size
;
9993 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9994 sized area at the bottom of the stack. This is required also for
9995 leaf functions. When GCC generates a local stack reference it
9996 will always add STACK_POINTER_OFFSET to all these references. */
9998 && !TARGET_TPF_PROFILING
9999 && cfun_frame_layout
.frame_size
== 0
10000 && !cfun
->calls_alloca
)
10003 /* Calculate the number of bytes we have used in our own register
10004 save area. With the packed stack layout we can re-use the
10005 remaining bytes for normal stack elements. */
10007 if (TARGET_PACKED_STACK
)
10008 lowest_offset
= MIN (MIN (cfun_frame_layout
.f0_offset
,
10009 cfun_frame_layout
.f4_offset
),
10010 cfun_frame_layout
.gprs_offset
);
10014 if (TARGET_BACKCHAIN
)
10015 lowest_offset
= MIN (lowest_offset
, cfun_frame_layout
.backchain_offset
);
10017 cfun_frame_layout
.frame_size
+= STACK_POINTER_OFFSET
- lowest_offset
;
10019 /* If under 31 bit an odd number of gprs has to be saved we have to
10020 adjust the frame size to sustain 8 byte alignment of stack
10022 cfun_frame_layout
.frame_size
= ((cfun_frame_layout
.frame_size
+
10023 STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
10024 & ~(STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
10027 /* Generate frame layout. Fills in register and frame data for the current
10028 function in cfun->machine. This routine can be called multiple times;
10029 it will re-do the complete frame layout every time. */
10032 s390_init_frame_layout (void)
10034 HOST_WIDE_INT frame_size
;
10037 /* After LRA the frame layout is supposed to be read-only and should
10038 not be re-computed. */
10039 if (reload_completed
)
10044 frame_size
= cfun_frame_layout
.frame_size
;
10046 /* Try to predict whether we'll need the base register. */
10047 base_used
= crtl
->uses_const_pool
10048 || (!DISP_IN_RANGE (frame_size
)
10049 && !CONST_OK_FOR_K (frame_size
));
10051 /* Decide which register to use as literal pool base. In small
10052 leaf functions, try to use an unused call-clobbered register
10053 as base register to avoid save/restore overhead. */
10055 cfun
->machine
->base_reg
= NULL_RTX
;
10061 /* Prefer r5 (most likely to be free). */
10062 for (br
= 5; br
>= 2 && df_regs_ever_live_p (br
); br
--)
10064 cfun
->machine
->base_reg
=
10065 gen_rtx_REG (Pmode
, (br
>= 2) ? br
: BASE_REGNUM
);
10068 s390_register_info ();
10069 s390_frame_info ();
10071 while (frame_size
!= cfun_frame_layout
.frame_size
);
10074 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10075 the TX is nonescaping. A transaction is considered escaping if
10076 there is at least one path from tbegin returning CC0 to the
10077 function exit block without an tend.
10079 The check so far has some limitations:
10080 - only single tbegin/tend BBs are supported
10081 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10082 - when CC is copied to a GPR and the CC0 check is done with the GPR
10083 this is not supported
10087 s390_optimize_nonescaping_tx (void)
10089 const unsigned int CC0
= 1 << 3;
10090 basic_block tbegin_bb
= NULL
;
10091 basic_block tend_bb
= NULL
;
10094 bool result
= true;
10096 rtx_insn
*tbegin_insn
= NULL
;
10098 if (!cfun
->machine
->tbegin_p
)
10101 for (bb_index
= 0; bb_index
< n_basic_blocks_for_fn (cfun
); bb_index
++)
10103 bb
= BASIC_BLOCK_FOR_FN (cfun
, bb_index
);
10108 FOR_BB_INSNS (bb
, insn
)
10110 rtx ite
, cc
, pat
, target
;
10111 unsigned HOST_WIDE_INT mask
;
10113 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
10116 pat
= PATTERN (insn
);
10118 if (GET_CODE (pat
) == PARALLEL
)
10119 pat
= XVECEXP (pat
, 0, 0);
10121 if (GET_CODE (pat
) != SET
10122 || GET_CODE (SET_SRC (pat
)) != UNSPEC_VOLATILE
)
10125 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TBEGIN
)
10129 tbegin_insn
= insn
;
10131 /* Just return if the tbegin doesn't have clobbers. */
10132 if (GET_CODE (PATTERN (insn
)) != PARALLEL
)
10135 if (tbegin_bb
!= NULL
)
10138 /* Find the next conditional jump. */
10139 for (tmp
= NEXT_INSN (insn
);
10141 tmp
= NEXT_INSN (tmp
))
10143 if (reg_set_p (gen_rtx_REG (CCmode
, CC_REGNUM
), tmp
))
10148 ite
= SET_SRC (PATTERN (tmp
));
10149 if (GET_CODE (ite
) != IF_THEN_ELSE
)
10152 cc
= XEXP (XEXP (ite
, 0), 0);
10153 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
))
10154 || GET_MODE (cc
) != CCRAWmode
10155 || GET_CODE (XEXP (XEXP (ite
, 0), 1)) != CONST_INT
)
10158 if (bb
->succs
->length () != 2)
10161 mask
= INTVAL (XEXP (XEXP (ite
, 0), 1));
10162 if (GET_CODE (XEXP (ite
, 0)) == NE
)
10166 target
= XEXP (ite
, 1);
10167 else if (mask
== (CC0
^ 0xf))
10168 target
= XEXP (ite
, 2);
10176 ei
= ei_start (bb
->succs
);
10177 e1
= ei_safe_edge (ei
);
10179 e2
= ei_safe_edge (ei
);
10181 if (e2
->flags
& EDGE_FALLTHRU
)
10184 e1
= ei_safe_edge (ei
);
10187 if (!(e1
->flags
& EDGE_FALLTHRU
))
10190 tbegin_bb
= (target
== pc_rtx
) ? e1
->dest
: e2
->dest
;
10192 if (tmp
== BB_END (bb
))
10197 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TEND
)
10199 if (tend_bb
!= NULL
)
10206 /* Either we successfully remove the FPR clobbers here or we are not
10207 able to do anything for this TX. Both cases don't qualify for
10209 cfun
->machine
->tbegin_p
= false;
10211 if (tbegin_bb
== NULL
|| tend_bb
== NULL
)
10214 calculate_dominance_info (CDI_POST_DOMINATORS
);
10215 result
= dominated_by_p (CDI_POST_DOMINATORS
, tbegin_bb
, tend_bb
);
10216 free_dominance_info (CDI_POST_DOMINATORS
);
10221 PATTERN (tbegin_insn
) = gen_rtx_PARALLEL (VOIDmode
,
10223 XVECEXP (PATTERN (tbegin_insn
), 0, 0),
10224 XVECEXP (PATTERN (tbegin_insn
), 0, 1)));
10225 INSN_CODE (tbegin_insn
) = -1;
10226 df_insn_rescan (tbegin_insn
);
10231 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10232 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10234 static unsigned int
10235 s390_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
10237 return s390_class_max_nregs (REGNO_REG_CLASS (regno
), mode
);
10240 /* Implement TARGET_HARD_REGNO_MODE_OK.
10242 Integer modes <= word size fit into any GPR.
10243 Integer modes > word size fit into successive GPRs, starting with
10244 an even-numbered register.
10245 SImode and DImode fit into FPRs as well.
10247 Floating point modes <= word size fit into any FPR or GPR.
10248 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10249 into any FPR, or an even-odd GPR pair.
10250 TFmode fits only into an even-odd FPR pair.
10252 Complex floating point modes fit either into two FPRs, or into
10253 successive GPRs (again starting with an even number).
10254 TCmode fits only into two successive even-odd FPR pairs.
10256 Condition code modes fit only into the CC register. */
10259 s390_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10261 if (!TARGET_VX
&& VECTOR_NOFP_REGNO_P (regno
))
10264 switch (REGNO_REG_CLASS (regno
))
10267 return ((GET_MODE_CLASS (mode
) == MODE_INT
10268 && s390_class_max_nregs (VEC_REGS
, mode
) == 1)
10270 || (TARGET_VXE
&& mode
== SFmode
)
10271 || s390_vector_mode_supported_p (mode
));
10275 && ((GET_MODE_CLASS (mode
) == MODE_INT
10276 && s390_class_max_nregs (FP_REGS
, mode
) == 1)
10278 || s390_vector_mode_supported_p (mode
)))
10281 if (REGNO_PAIR_OK (regno
, mode
))
10283 if (mode
== SImode
|| mode
== DImode
)
10286 if (FLOAT_MODE_P (mode
) && GET_MODE_CLASS (mode
) != MODE_VECTOR_FLOAT
)
10291 if (FRAME_REGNO_P (regno
) && mode
== Pmode
)
10296 if (REGNO_PAIR_OK (regno
, mode
))
10299 || (mode
!= TFmode
&& mode
!= TCmode
&& mode
!= TDmode
))
10304 if (GET_MODE_CLASS (mode
) == MODE_CC
)
10308 if (REGNO_PAIR_OK (regno
, mode
))
10310 if (mode
== SImode
|| mode
== Pmode
)
10321 /* Implement TARGET_MODES_TIEABLE_P. */
10324 s390_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10326 return ((mode1
== SFmode
|| mode1
== DFmode
)
10327 == (mode2
== SFmode
|| mode2
== DFmode
));
10330 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10333 s390_hard_regno_rename_ok (unsigned int old_reg
, unsigned int new_reg
)
10335 /* Once we've decided upon a register to use as base register, it must
10336 no longer be used for any other purpose. */
10337 if (cfun
->machine
->base_reg
)
10338 if (REGNO (cfun
->machine
->base_reg
) == old_reg
10339 || REGNO (cfun
->machine
->base_reg
) == new_reg
)
10342 /* Prevent regrename from using call-saved regs which haven't
10343 actually been saved. This is necessary since regrename assumes
10344 the backend save/restore decisions are based on
10345 df_regs_ever_live. Since we have our own routine we have to tell
10346 regrename manually about it. */
10347 if (GENERAL_REGNO_P (new_reg
)
10348 && !call_used_regs
[new_reg
]
10349 && cfun_gpr_save_slot (new_reg
) == SAVE_SLOT_NONE
)
10355 /* Return nonzero if register REGNO can be used as a scratch register
10359 s390_hard_regno_scratch_ok (unsigned int regno
)
10361 /* See s390_hard_regno_rename_ok. */
10362 if (GENERAL_REGNO_P (regno
)
10363 && !call_used_regs
[regno
]
10364 && cfun_gpr_save_slot (regno
) == SAVE_SLOT_NONE
)
10370 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10371 code that runs in z/Architecture mode, but conforms to the 31-bit
10372 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10373 bytes are saved across calls, however. */
10376 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
10381 && GET_MODE_SIZE (mode
) > 4
10382 && ((regno
>= 6 && regno
<= 15) || regno
== 32))
10386 && GET_MODE_SIZE (mode
) > 8
10387 && (((TARGET_64BIT
&& regno
>= 24 && regno
<= 31))
10388 || (!TARGET_64BIT
&& (regno
== 18 || regno
== 19))))
10394 /* Maximum number of registers to represent a value of mode MODE
10395 in a register of class RCLASS. */
10398 s390_class_max_nregs (enum reg_class rclass
, machine_mode mode
)
10401 bool reg_pair_required_p
= false;
10407 reg_size
= TARGET_VX
? 16 : 8;
10409 /* TF and TD modes would fit into a VR but we put them into a
10410 register pair since we do not have 128bit FP instructions on
10413 && SCALAR_FLOAT_MODE_P (mode
)
10414 && GET_MODE_SIZE (mode
) >= 16)
10415 reg_pair_required_p
= true;
10417 /* Even if complex types would fit into a single FPR/VR we force
10418 them into a register pair to deal with the parts more easily.
10419 (FIXME: What about complex ints?) */
10420 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
10421 reg_pair_required_p
= true;
10427 reg_size
= UNITS_PER_WORD
;
10431 if (reg_pair_required_p
)
10432 return 2 * ((GET_MODE_SIZE (mode
) / 2 + reg_size
- 1) / reg_size
);
10434 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
10437 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10440 s390_can_change_mode_class (machine_mode from_mode
,
10441 machine_mode to_mode
,
10442 reg_class_t rclass
)
10444 machine_mode small_mode
;
10445 machine_mode big_mode
;
10447 /* V1TF and TF have different representations in vector
10449 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10450 && ((from_mode
== V1TFmode
&& to_mode
== TFmode
)
10451 || (from_mode
== TFmode
&& to_mode
== V1TFmode
)))
10454 if (GET_MODE_SIZE (from_mode
) == GET_MODE_SIZE (to_mode
))
10457 if (GET_MODE_SIZE (from_mode
) < GET_MODE_SIZE (to_mode
))
10459 small_mode
= from_mode
;
10460 big_mode
= to_mode
;
10464 small_mode
= to_mode
;
10465 big_mode
= from_mode
;
10468 /* Values residing in VRs are little-endian style. All modes are
10469 placed left-aligned in an VR. This means that we cannot allow
10470 switching between modes with differing sizes. Also if the vector
10471 facility is available we still place TFmode values in VR register
10472 pairs, since the only instructions we have operating on TFmodes
10473 only deal with register pairs. Therefore we have to allow DFmode
10474 subregs of TFmodes to enable the TFmode splitters. */
10475 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10476 && (GET_MODE_SIZE (small_mode
) < 8
10477 || s390_class_max_nregs (VEC_REGS
, big_mode
) == 1))
10480 /* Likewise for access registers, since they have only half the
10481 word size on 64-bit. */
10482 if (reg_classes_intersect_p (ACCESS_REGS
, rclass
))
10488 /* Return true if we use LRA instead of reload pass. */
10492 return s390_lra_flag
;
10495 /* Return true if register FROM can be eliminated via register TO. */
10498 s390_can_eliminate (const int from
, const int to
)
10500 /* We have not marked the base register as fixed.
10501 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10502 If a function requires the base register, we say here that this
10503 elimination cannot be performed. This will cause reload to free
10504 up the base register (as if it were fixed). On the other hand,
10505 if the current function does *not* require the base register, we
10506 say here the elimination succeeds, which in turn allows reload
10507 to allocate the base register for any other purpose. */
10508 if (from
== BASE_REGNUM
&& to
== BASE_REGNUM
)
10510 s390_init_frame_layout ();
10511 return cfun
->machine
->base_reg
== NULL_RTX
;
10514 /* Everything else must point into the stack frame. */
10515 gcc_assert (to
== STACK_POINTER_REGNUM
10516 || to
== HARD_FRAME_POINTER_REGNUM
);
10518 gcc_assert (from
== FRAME_POINTER_REGNUM
10519 || from
== ARG_POINTER_REGNUM
10520 || from
== RETURN_ADDRESS_POINTER_REGNUM
);
10522 /* Make sure we actually saved the return address. */
10523 if (from
== RETURN_ADDRESS_POINTER_REGNUM
)
10524 if (!crtl
->calls_eh_return
10526 && !cfun_frame_layout
.save_return_addr_p
)
10532 /* Return offset between register FROM and TO initially after prolog. */
10535 s390_initial_elimination_offset (int from
, int to
)
10537 HOST_WIDE_INT offset
;
10539 /* ??? Why are we called for non-eliminable pairs? */
10540 if (!s390_can_eliminate (from
, to
))
10545 case FRAME_POINTER_REGNUM
:
10546 offset
= (get_frame_size()
10547 + STACK_POINTER_OFFSET
10548 + crtl
->outgoing_args_size
);
10551 case ARG_POINTER_REGNUM
:
10552 s390_init_frame_layout ();
10553 offset
= cfun_frame_layout
.frame_size
+ STACK_POINTER_OFFSET
;
10556 case RETURN_ADDRESS_POINTER_REGNUM
:
10557 s390_init_frame_layout ();
10559 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10561 /* If it turns out that for stdarg nothing went into the reg
10562 save area we also do not need the return address
10564 if (cfun
->stdarg
&& !cfun_save_arg_fprs_p
)
10567 gcc_unreachable ();
10570 /* In order to make the following work it is not necessary for
10571 r14 to have a save slot. It is sufficient if one other GPR
10572 got one. Since the GPRs are always stored without gaps we
10573 are able to calculate where the r14 save slot would
10575 offset
= (cfun_frame_layout
.frame_size
+ cfun_frame_layout
.gprs_offset
+
10576 (RETURN_REGNUM
- cfun_frame_layout
.first_save_gpr_slot
) *
10585 gcc_unreachable ();
10591 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10592 to register BASE. Return generated insn. */
10595 save_fpr (rtx base
, int offset
, int regnum
)
10598 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10600 if (regnum
>= 16 && regnum
<= (16 + FP_ARG_NUM_REG
))
10601 set_mem_alias_set (addr
, get_varargs_alias_set ());
10603 set_mem_alias_set (addr
, get_frame_alias_set ());
10605 return emit_move_insn (addr
, gen_rtx_REG (DFmode
, regnum
));
10608 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10609 to register BASE. Return generated insn. */
10612 restore_fpr (rtx base
, int offset
, int regnum
)
10615 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10616 set_mem_alias_set (addr
, get_frame_alias_set ());
10618 return emit_move_insn (gen_rtx_REG (DFmode
, regnum
), addr
);
10621 /* Generate insn to save registers FIRST to LAST into
10622 the register save area located at offset OFFSET
10623 relative to register BASE. */
10626 save_gprs (rtx base
, int offset
, int first
, int last
)
10628 rtx addr
, insn
, note
;
10631 addr
= plus_constant (Pmode
, base
, offset
);
10632 addr
= gen_rtx_MEM (Pmode
, addr
);
10634 set_mem_alias_set (addr
, get_frame_alias_set ());
10636 /* Special-case single register. */
10640 insn
= gen_movdi (addr
, gen_rtx_REG (Pmode
, first
));
10642 insn
= gen_movsi (addr
, gen_rtx_REG (Pmode
, first
));
10644 if (!global_not_special_regno_p (first
))
10645 RTX_FRAME_RELATED_P (insn
) = 1;
10650 insn
= gen_store_multiple (addr
,
10651 gen_rtx_REG (Pmode
, first
),
10652 GEN_INT (last
- first
+ 1));
10654 if (first
<= 6 && cfun
->stdarg
)
10655 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
10657 rtx mem
= XEXP (XVECEXP (PATTERN (insn
), 0, i
), 0);
10659 if (first
+ i
<= 6)
10660 set_mem_alias_set (mem
, get_varargs_alias_set ());
10663 /* We need to set the FRAME_RELATED flag on all SETs
10664 inside the store-multiple pattern.
10666 However, we must not emit DWARF records for registers 2..5
10667 if they are stored for use by variable arguments ...
10669 ??? Unfortunately, it is not enough to simply not the
10670 FRAME_RELATED flags for those SETs, because the first SET
10671 of the PARALLEL is always treated as if it had the flag
10672 set, even if it does not. Therefore we emit a new pattern
10673 without those registers as REG_FRAME_RELATED_EXPR note. */
10675 if (first
>= 6 && !global_not_special_regno_p (first
))
10677 rtx pat
= PATTERN (insn
);
10679 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
10680 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
10681 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat
,
10683 RTX_FRAME_RELATED_P (XVECEXP (pat
, 0, i
)) = 1;
10685 RTX_FRAME_RELATED_P (insn
) = 1;
10687 else if (last
>= 6)
10691 for (start
= first
>= 6 ? first
: 6; start
<= last
; start
++)
10692 if (!global_not_special_regno_p (start
))
10698 addr
= plus_constant (Pmode
, base
,
10699 offset
+ (start
- first
) * UNITS_PER_LONG
);
10704 note
= gen_movdi (gen_rtx_MEM (Pmode
, addr
),
10705 gen_rtx_REG (Pmode
, start
));
10707 note
= gen_movsi (gen_rtx_MEM (Pmode
, addr
),
10708 gen_rtx_REG (Pmode
, start
));
10709 note
= PATTERN (note
);
10711 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10712 RTX_FRAME_RELATED_P (insn
) = 1;
10717 note
= gen_store_multiple (gen_rtx_MEM (Pmode
, addr
),
10718 gen_rtx_REG (Pmode
, start
),
10719 GEN_INT (last
- start
+ 1));
10720 note
= PATTERN (note
);
10722 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10724 for (i
= 0; i
< XVECLEN (note
, 0); i
++)
10725 if (GET_CODE (XVECEXP (note
, 0, i
)) == SET
10726 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note
,
10728 RTX_FRAME_RELATED_P (XVECEXP (note
, 0, i
)) = 1;
10730 RTX_FRAME_RELATED_P (insn
) = 1;
10736 /* Generate insn to restore registers FIRST to LAST from
10737 the register save area located at offset OFFSET
10738 relative to register BASE. */
10741 restore_gprs (rtx base
, int offset
, int first
, int last
)
10745 addr
= plus_constant (Pmode
, base
, offset
);
10746 addr
= gen_rtx_MEM (Pmode
, addr
);
10747 set_mem_alias_set (addr
, get_frame_alias_set ());
10749 /* Special-case single register. */
10753 insn
= gen_movdi (gen_rtx_REG (Pmode
, first
), addr
);
10755 insn
= gen_movsi (gen_rtx_REG (Pmode
, first
), addr
);
10757 RTX_FRAME_RELATED_P (insn
) = 1;
10761 insn
= gen_load_multiple (gen_rtx_REG (Pmode
, first
),
10763 GEN_INT (last
- first
+ 1));
10764 RTX_FRAME_RELATED_P (insn
) = 1;
10768 /* Return insn sequence to load the GOT register. */
10771 s390_load_got (void)
10775 /* We cannot use pic_offset_table_rtx here since we use this
10776 function also for non-pic if __tls_get_offset is called and in
10777 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10779 rtx got_rtx
= gen_rtx_REG (Pmode
, 12);
10783 emit_move_insn (got_rtx
, s390_got_symbol ());
10785 insns
= get_insns ();
10790 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10791 and the change to the stack pointer. */
10794 s390_emit_stack_tie (void)
10796 rtx mem
= gen_frame_mem (BLKmode
,
10797 gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
));
10799 emit_insn (gen_stack_tie (mem
));
10802 /* Copy GPRS into FPR save slots. */
10805 s390_save_gprs_to_fprs (void)
10809 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
10812 for (i
= 6; i
< 16; i
++)
10814 if (FP_REGNO_P (cfun_gpr_save_slot (i
)))
10817 emit_move_insn (gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
)),
10818 gen_rtx_REG (DImode
, i
));
10819 RTX_FRAME_RELATED_P (insn
) = 1;
10820 /* This prevents dwarf2cfi from interpreting the set. Doing
10821 so it might emit def_cfa_register infos setting an FPR as
10823 add_reg_note (insn
, REG_CFA_REGISTER
, copy_rtx (PATTERN (insn
)));
10828 /* Restore GPRs from FPR save slots. */
10831 s390_restore_gprs_from_fprs (void)
10835 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
10838 /* Restore the GPRs starting with the stack pointer. That way the
10839 stack pointer already has its original value when it comes to
10840 restoring the hard frame pointer. So we can set the cfa reg back
10841 to the stack pointer. */
10842 for (i
= STACK_POINTER_REGNUM
; i
>= 6; i
--)
10846 if (!FP_REGNO_P (cfun_gpr_save_slot (i
)))
10849 rtx fpr
= gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
));
10851 if (i
== STACK_POINTER_REGNUM
)
10852 insn
= emit_insn (gen_stack_restore_from_fpr (fpr
));
10854 insn
= emit_move_insn (gen_rtx_REG (DImode
, i
), fpr
);
10856 df_set_regs_ever_live (i
, true);
10857 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, i
));
10859 /* If either the stack pointer or the frame pointer get restored
10860 set the CFA value to its value at function start. Doing this
10861 for the frame pointer results in .cfi_def_cfa_register 15
10862 what is ok since if the stack pointer got modified it has
10863 been restored already. */
10864 if (i
== STACK_POINTER_REGNUM
|| i
== HARD_FRAME_POINTER_REGNUM
)
10865 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10866 plus_constant (Pmode
, stack_pointer_rtx
,
10867 STACK_POINTER_OFFSET
));
10868 RTX_FRAME_RELATED_P (insn
) = 1;
10873 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10878 const pass_data pass_data_s390_early_mach
=
10880 RTL_PASS
, /* type */
10881 "early_mach", /* name */
10882 OPTGROUP_NONE
, /* optinfo_flags */
10883 TV_MACH_DEP
, /* tv_id */
10884 0, /* properties_required */
10885 0, /* properties_provided */
10886 0, /* properties_destroyed */
10887 0, /* todo_flags_start */
10888 ( TODO_df_verify
| TODO_df_finish
), /* todo_flags_finish */
10891 class pass_s390_early_mach
: public rtl_opt_pass
10894 pass_s390_early_mach (gcc::context
*ctxt
)
10895 : rtl_opt_pass (pass_data_s390_early_mach
, ctxt
)
10898 /* opt_pass methods: */
10899 virtual unsigned int execute (function
*);
10901 }; // class pass_s390_early_mach
10904 pass_s390_early_mach::execute (function
*fun
)
10908 /* Try to get rid of the FPR clobbers. */
10909 s390_optimize_nonescaping_tx ();
10911 /* Re-compute register info. */
10912 s390_register_info ();
10914 /* If we're using a base register, ensure that it is always valid for
10915 the first non-prologue instruction. */
10916 if (fun
->machine
->base_reg
)
10917 emit_insn_at_entry (gen_main_pool (fun
->machine
->base_reg
));
10919 /* Annotate all constant pool references to let the scheduler know
10920 they implicitly use the base register. */
10921 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
10924 annotate_constant_pool_refs (insn
);
10925 df_insn_rescan (insn
);
10930 } // anon namespace
10933 make_pass_s390_early_mach (gcc::context
*ctxt
)
10935 return new pass_s390_early_mach (ctxt
);
10938 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10939 - push too big immediates to the literal pool and annotate the refs
10940 - emit frame related notes for stack pointer changes. */
10943 s390_prologue_plus_offset (rtx target
, rtx reg
, rtx offset
, bool frame_related_p
)
10946 rtx orig_offset
= offset
;
10948 gcc_assert (REG_P (target
));
10949 gcc_assert (REG_P (reg
));
10950 gcc_assert (CONST_INT_P (offset
));
10952 if (offset
== const0_rtx
) /* lr/lgr */
10954 insn
= emit_move_insn (target
, reg
);
10956 else if (DISP_IN_RANGE (INTVAL (offset
))) /* la */
10958 insn
= emit_move_insn (target
, gen_rtx_PLUS (Pmode
, reg
,
10963 if (!satisfies_constraint_K (offset
) /* ahi/aghi */
10965 || (!satisfies_constraint_Op (offset
) /* alfi/algfi */
10966 && !satisfies_constraint_On (offset
)))) /* slfi/slgfi */
10967 offset
= force_const_mem (Pmode
, offset
);
10971 insn
= emit_move_insn (target
, reg
);
10972 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
10975 insn
= emit_insn (gen_add2_insn (target
, offset
));
10977 if (!CONST_INT_P (offset
))
10979 annotate_constant_pool_refs (insn
);
10981 if (frame_related_p
)
10982 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10983 gen_rtx_SET (target
,
10984 gen_rtx_PLUS (Pmode
, target
,
10989 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
10991 /* If this is a stack adjustment and we are generating a stack clash
10992 prologue, then add a REG_STACK_CHECK note to signal that this insn
10993 should be left alone. */
10994 if (flag_stack_clash_protection
&& target
== stack_pointer_rtx
)
10995 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
11000 /* Emit a compare instruction with a volatile memory access as stack
11001 probe. It does not waste store tags and does not clobber any
11002 registers apart from the condition code. */
11004 s390_emit_stack_probe (rtx addr
)
11006 rtx mem
= gen_rtx_MEM (Pmode
, addr
);
11007 MEM_VOLATILE_P (mem
) = 1;
11008 emit_insn (gen_probe_stack (mem
));
11011 /* Use a runtime loop if we have to emit more probes than this. */
11012 #define MIN_UNROLL_PROBES 3
11014 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11015 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11016 probe relative to the stack pointer.
11018 Note that SIZE is negative.
11020 The return value is true if TEMP_REG has been clobbered. */
11022 allocate_stack_space (rtx size
, HOST_WIDE_INT last_probe_offset
,
11025 bool temp_reg_clobbered_p
= false;
11026 HOST_WIDE_INT probe_interval
11027 = 1 << param_stack_clash_protection_probe_interval
;
11028 HOST_WIDE_INT guard_size
11029 = 1 << param_stack_clash_protection_guard_size
;
11031 if (flag_stack_clash_protection
)
11033 if (last_probe_offset
+ -INTVAL (size
) < guard_size
)
11034 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
11037 rtx offset
= GEN_INT (probe_interval
- UNITS_PER_LONG
);
11038 HOST_WIDE_INT rounded_size
= -INTVAL (size
) & -probe_interval
;
11039 HOST_WIDE_INT num_probes
= rounded_size
/ probe_interval
;
11040 HOST_WIDE_INT residual
= -INTVAL (size
) - rounded_size
;
11042 if (num_probes
< MIN_UNROLL_PROBES
)
11044 /* Emit unrolled probe statements. */
11046 for (unsigned int i
= 0; i
< num_probes
; i
++)
11048 s390_prologue_plus_offset (stack_pointer_rtx
,
11050 GEN_INT (-probe_interval
), true);
11051 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11055 if (num_probes
> 0)
11056 last_probe_offset
= INTVAL (offset
);
11057 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
11061 /* Emit a loop probing the pages. */
11063 rtx_code_label
*loop_start_label
= gen_label_rtx ();
11065 /* From now on temp_reg will be the CFA register. */
11066 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11067 GEN_INT (-rounded_size
), true);
11068 emit_label (loop_start_label
);
11070 s390_prologue_plus_offset (stack_pointer_rtx
,
11072 GEN_INT (-probe_interval
), false);
11073 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11076 emit_cmp_and_jump_insns (stack_pointer_rtx
, temp_reg
,
11078 Pmode
, 1, loop_start_label
);
11080 /* Without this make_edges ICEes. */
11081 JUMP_LABEL (get_last_insn ()) = loop_start_label
;
11082 LABEL_NUSES (loop_start_label
) = 1;
11084 /* That's going to be a NOP since stack pointer and
11085 temp_reg are supposed to be the same here. We just
11086 emit it to set the CFA reg back to r15. */
11087 s390_prologue_plus_offset (stack_pointer_rtx
, temp_reg
,
11089 temp_reg_clobbered_p
= true;
11090 last_probe_offset
= INTVAL (offset
);
11091 dump_stack_clash_frame_info (PROBE_LOOP
, residual
!= 0);
11094 /* Handle any residual allocation request. */
11095 s390_prologue_plus_offset (stack_pointer_rtx
,
11097 GEN_INT (-residual
), true);
11098 last_probe_offset
+= residual
;
11099 if (last_probe_offset
>= probe_interval
)
11100 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11103 - UNITS_PER_LONG
)));
11105 return temp_reg_clobbered_p
;
11109 /* Subtract frame size from stack pointer. */
11110 s390_prologue_plus_offset (stack_pointer_rtx
,
11114 return temp_reg_clobbered_p
;
11117 /* Expand the prologue into a bunch of separate insns. */
11120 s390_emit_prologue (void)
11128 /* Choose best register to use for temp use within prologue.
11129 TPF with profiling must avoid the register 14 - the tracing function
11130 needs the original contents of r14 to be preserved. */
11132 if (!has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
)
11134 && !TARGET_TPF_PROFILING
)
11135 temp_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11136 else if (flag_split_stack
&& cfun
->stdarg
)
11137 temp_reg
= gen_rtx_REG (Pmode
, 12);
11139 temp_reg
= gen_rtx_REG (Pmode
, 1);
11141 /* When probing for stack-clash mitigation, we have to track the distance
11142 between the stack pointer and closest known reference.
11144 Most of the time we have to make a worst case assumption. The
11145 only exception is when TARGET_BACKCHAIN is active, in which case
11146 we know *sp (offset 0) was written. */
11147 HOST_WIDE_INT probe_interval
11148 = 1 << param_stack_clash_protection_probe_interval
;
11149 HOST_WIDE_INT last_probe_offset
11150 = (TARGET_BACKCHAIN
11151 ? (TARGET_PACKED_STACK
? STACK_POINTER_OFFSET
- UNITS_PER_LONG
: 0)
11152 : probe_interval
- (STACK_BOUNDARY
/ UNITS_PER_WORD
));
11154 s390_save_gprs_to_fprs ();
11156 /* Save call saved gprs. */
11157 if (cfun_frame_layout
.first_save_gpr
!= -1)
11159 insn
= save_gprs (stack_pointer_rtx
,
11160 cfun_frame_layout
.gprs_offset
+
11161 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11162 - cfun_frame_layout
.first_save_gpr_slot
),
11163 cfun_frame_layout
.first_save_gpr
,
11164 cfun_frame_layout
.last_save_gpr
);
11166 /* This is not 100% correct. If we have more than one register saved,
11167 then LAST_PROBE_OFFSET can move even closer to sp. */
11169 = (cfun_frame_layout
.gprs_offset
+
11170 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11171 - cfun_frame_layout
.first_save_gpr_slot
));
11176 /* Dummy insn to mark literal pool slot. */
11178 if (cfun
->machine
->base_reg
)
11179 emit_insn (gen_main_pool (cfun
->machine
->base_reg
));
11181 offset
= cfun_frame_layout
.f0_offset
;
11183 /* Save f0 and f2. */
11184 for (i
= FPR0_REGNUM
; i
<= FPR0_REGNUM
+ 1; i
++)
11186 if (cfun_fpr_save_p (i
))
11188 save_fpr (stack_pointer_rtx
, offset
, i
);
11189 if (offset
< last_probe_offset
)
11190 last_probe_offset
= offset
;
11193 else if (!TARGET_PACKED_STACK
|| cfun
->stdarg
)
11197 /* Save f4 and f6. */
11198 offset
= cfun_frame_layout
.f4_offset
;
11199 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11201 if (cfun_fpr_save_p (i
))
11203 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11204 if (offset
< last_probe_offset
)
11205 last_probe_offset
= offset
;
11208 /* If f4 and f6 are call clobbered they are saved due to
11209 stdargs and therefore are not frame related. */
11210 if (!call_used_regs
[i
])
11211 RTX_FRAME_RELATED_P (insn
) = 1;
11213 else if (!TARGET_PACKED_STACK
|| call_used_regs
[i
])
11217 if (TARGET_PACKED_STACK
11218 && cfun_save_high_fprs_p
11219 && cfun_frame_layout
.f8_offset
+ cfun_frame_layout
.high_fprs
* 8 > 0)
11221 offset
= (cfun_frame_layout
.f8_offset
11222 + (cfun_frame_layout
.high_fprs
- 1) * 8);
11224 for (i
= FPR15_REGNUM
; i
>= FPR8_REGNUM
&& offset
>= 0; i
--)
11225 if (cfun_fpr_save_p (i
))
11227 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11228 if (offset
< last_probe_offset
)
11229 last_probe_offset
= offset
;
11231 RTX_FRAME_RELATED_P (insn
) = 1;
11234 if (offset
>= cfun_frame_layout
.f8_offset
)
11238 if (!TARGET_PACKED_STACK
)
11239 next_fpr
= cfun_save_high_fprs_p
? FPR15_REGNUM
: 0;
11241 if (flag_stack_usage_info
)
11242 current_function_static_stack_size
= cfun_frame_layout
.frame_size
;
11244 /* Decrement stack pointer. */
11246 if (cfun_frame_layout
.frame_size
> 0)
11248 rtx frame_off
= GEN_INT (-cfun_frame_layout
.frame_size
);
11249 rtx_insn
*stack_pointer_backup_loc
;
11250 bool temp_reg_clobbered_p
;
11252 if (s390_stack_size
)
11254 HOST_WIDE_INT stack_guard
;
11256 if (s390_stack_guard
)
11257 stack_guard
= s390_stack_guard
;
11260 /* If no value for stack guard is provided the smallest power of 2
11261 larger than the current frame size is chosen. */
11263 while (stack_guard
< cfun_frame_layout
.frame_size
)
11267 if (cfun_frame_layout
.frame_size
>= s390_stack_size
)
11269 warning (0, "frame size of function %qs is %wd"
11270 " bytes exceeding user provided stack limit of "
11272 "An unconditional trap is added.",
11273 current_function_name(), cfun_frame_layout
.frame_size
,
11275 emit_insn (gen_trap ());
11280 /* stack_guard has to be smaller than s390_stack_size.
11281 Otherwise we would emit an AND with zero which would
11282 not match the test under mask pattern. */
11283 if (stack_guard
>= s390_stack_size
)
11285 warning (0, "frame size of function %qs is %wd"
11286 " bytes which is more than half the stack size. "
11287 "The dynamic check would not be reliable. "
11288 "No check emitted for this function.",
11289 current_function_name(),
11290 cfun_frame_layout
.frame_size
);
11294 HOST_WIDE_INT stack_check_mask
= ((s390_stack_size
- 1)
11295 & ~(stack_guard
- 1));
11297 rtx t
= gen_rtx_AND (Pmode
, stack_pointer_rtx
,
11298 GEN_INT (stack_check_mask
));
11300 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode
,
11302 t
, const0_rtx
, const0_rtx
));
11304 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode
,
11306 t
, const0_rtx
, const0_rtx
));
11311 if (s390_warn_framesize
> 0
11312 && cfun_frame_layout
.frame_size
>= s390_warn_framesize
)
11313 warning (0, "frame size of %qs is %wd bytes",
11314 current_function_name (), cfun_frame_layout
.frame_size
);
11316 if (s390_warn_dynamicstack_p
&& cfun
->calls_alloca
)
11317 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11319 /* Save the location where we could backup the incoming stack
11321 stack_pointer_backup_loc
= get_last_insn ();
11323 temp_reg_clobbered_p
= allocate_stack_space (frame_off
, last_probe_offset
,
11326 if (TARGET_BACKCHAIN
|| next_fpr
)
11328 if (temp_reg_clobbered_p
)
11330 /* allocate_stack_space had to make use of temp_reg and
11331 we need it to hold a backup of the incoming stack
11332 pointer. Calculate back that value from the current
11334 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11335 GEN_INT (cfun_frame_layout
.frame_size
),
11340 /* allocate_stack_space didn't actually required
11341 temp_reg. Insert the stack pointer backup insn
11342 before the stack pointer decrement code - knowing now
11343 that the value will survive. */
11344 emit_insn_after (gen_move_insn (temp_reg
, stack_pointer_rtx
),
11345 stack_pointer_backup_loc
);
11349 /* Set backchain. */
11351 if (TARGET_BACKCHAIN
)
11353 if (cfun_frame_layout
.backchain_offset
)
11354 addr
= gen_rtx_MEM (Pmode
,
11355 plus_constant (Pmode
, stack_pointer_rtx
,
11356 cfun_frame_layout
.backchain_offset
));
11358 addr
= gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
11359 set_mem_alias_set (addr
, get_frame_alias_set ());
11360 insn
= emit_insn (gen_move_insn (addr
, temp_reg
));
11363 /* If we support non-call exceptions (e.g. for Java),
11364 we need to make sure the backchain pointer is set up
11365 before any possibly trapping memory access. */
11366 if (TARGET_BACKCHAIN
&& cfun
->can_throw_non_call_exceptions
)
11368 addr
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
11369 emit_clobber (addr
);
11372 else if (flag_stack_clash_protection
)
11373 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
11375 /* Save fprs 8 - 15 (64 bit ABI). */
11377 if (cfun_save_high_fprs_p
&& next_fpr
)
11379 /* If the stack might be accessed through a different register
11380 we have to make sure that the stack pointer decrement is not
11381 moved below the use of the stack slots. */
11382 s390_emit_stack_tie ();
11384 insn
= emit_insn (gen_add2_insn (temp_reg
,
11385 GEN_INT (cfun_frame_layout
.f8_offset
)));
11389 for (i
= FPR8_REGNUM
; i
<= next_fpr
; i
++)
11390 if (cfun_fpr_save_p (i
))
11392 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
,
11393 cfun_frame_layout
.frame_size
11394 + cfun_frame_layout
.f8_offset
11397 insn
= save_fpr (temp_reg
, offset
, i
);
11399 RTX_FRAME_RELATED_P (insn
) = 1;
11400 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11401 gen_rtx_SET (gen_rtx_MEM (DFmode
, addr
),
11402 gen_rtx_REG (DFmode
, i
)));
11406 /* Set frame pointer, if needed. */
11408 if (frame_pointer_needed
)
11410 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
11411 RTX_FRAME_RELATED_P (insn
) = 1;
11414 /* Set up got pointer, if needed. */
11416 if (flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
11418 rtx_insn
*insns
= s390_load_got ();
11420 for (rtx_insn
*insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
11421 annotate_constant_pool_refs (insn
);
11426 #if TARGET_TPF != 0
11427 if (TARGET_TPF_PROFILING
)
11429 /* Generate a BAS instruction to serve as a function entry
11430 intercept to facilitate the use of tracing algorithms located
11431 at the branch target. */
11432 emit_insn (gen_prologue_tpf (
11433 GEN_INT (s390_tpf_trace_hook_prologue_check
),
11434 GEN_INT (s390_tpf_trace_hook_prologue_target
)));
11436 /* Emit a blockage here so that all code lies between the
11437 profiling mechanisms. */
11438 emit_insn (gen_blockage ());
11443 /* Expand the epilogue into a bunch of separate insns. */
11446 s390_emit_epilogue (bool sibcall
)
11448 rtx frame_pointer
, return_reg
= NULL_RTX
, cfa_restores
= NULL_RTX
;
11449 int area_bottom
, area_top
, offset
= 0;
11453 #if TARGET_TPF != 0
11454 if (TARGET_TPF_PROFILING
)
11456 /* Generate a BAS instruction to serve as a function entry
11457 intercept to facilitate the use of tracing algorithms located
11458 at the branch target. */
11460 /* Emit a blockage here so that all code lies between the
11461 profiling mechanisms. */
11462 emit_insn (gen_blockage ());
11464 emit_insn (gen_epilogue_tpf (
11465 GEN_INT (s390_tpf_trace_hook_epilogue_check
),
11466 GEN_INT (s390_tpf_trace_hook_epilogue_target
)));
11470 /* Check whether to use frame or stack pointer for restore. */
11472 frame_pointer
= (frame_pointer_needed
11473 ? hard_frame_pointer_rtx
: stack_pointer_rtx
);
11475 s390_frame_area (&area_bottom
, &area_top
);
11477 /* Check whether we can access the register save area.
11478 If not, increment the frame pointer as required. */
11480 if (area_top
<= area_bottom
)
11482 /* Nothing to restore. */
11484 else if (DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_bottom
)
11485 && DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_top
- 1))
11487 /* Area is in range. */
11488 offset
= cfun_frame_layout
.frame_size
;
11493 rtx frame_off
, cfa
;
11495 offset
= area_bottom
< 0 ? -area_bottom
: 0;
11496 frame_off
= GEN_INT (cfun_frame_layout
.frame_size
- offset
);
11498 cfa
= gen_rtx_SET (frame_pointer
,
11499 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11500 if (DISP_IN_RANGE (INTVAL (frame_off
)))
11504 set
= gen_rtx_SET (frame_pointer
,
11505 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11506 insn
= emit_insn (set
);
11510 if (!CONST_OK_FOR_K (INTVAL (frame_off
)))
11511 frame_off
= force_const_mem (Pmode
, frame_off
);
11513 insn
= emit_insn (gen_add2_insn (frame_pointer
, frame_off
));
11514 annotate_constant_pool_refs (insn
);
11516 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, cfa
);
11517 RTX_FRAME_RELATED_P (insn
) = 1;
11520 /* Restore call saved fprs. */
11524 if (cfun_save_high_fprs_p
)
11526 next_offset
= cfun_frame_layout
.f8_offset
;
11527 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
11529 if (cfun_fpr_save_p (i
))
11531 restore_fpr (frame_pointer
,
11532 offset
+ next_offset
, i
);
11534 = alloc_reg_note (REG_CFA_RESTORE
,
11535 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11544 next_offset
= cfun_frame_layout
.f4_offset
;
11546 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11548 if (cfun_fpr_save_p (i
))
11550 restore_fpr (frame_pointer
,
11551 offset
+ next_offset
, i
);
11553 = alloc_reg_note (REG_CFA_RESTORE
,
11554 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11557 else if (!TARGET_PACKED_STACK
)
11563 /* Restore call saved gprs. */
11565 if (cfun_frame_layout
.first_restore_gpr
!= -1)
11570 /* Check for global register and save them
11571 to stack location from where they get restored. */
11573 for (i
= cfun_frame_layout
.first_restore_gpr
;
11574 i
<= cfun_frame_layout
.last_restore_gpr
;
11577 if (global_not_special_regno_p (i
))
11579 addr
= plus_constant (Pmode
, frame_pointer
,
11580 offset
+ cfun_frame_layout
.gprs_offset
11581 + (i
- cfun_frame_layout
.first_save_gpr_slot
)
11583 addr
= gen_rtx_MEM (Pmode
, addr
);
11584 set_mem_alias_set (addr
, get_frame_alias_set ());
11585 emit_move_insn (addr
, gen_rtx_REG (Pmode
, i
));
11589 = alloc_reg_note (REG_CFA_RESTORE
,
11590 gen_rtx_REG (Pmode
, i
), cfa_restores
);
11593 /* Fetch return address from stack before load multiple,
11594 this will do good for scheduling.
11596 Only do this if we already decided that r14 needs to be
11597 saved to a stack slot. (And not just because r14 happens to
11598 be in between two GPRs which need saving.) Otherwise it
11599 would be difficult to take that decision back in
11600 s390_optimize_prologue.
11602 This optimization is only helpful on in-order machines. */
11604 && cfun_gpr_save_slot (RETURN_REGNUM
) == SAVE_SLOT_STACK
11605 && s390_tune
<= PROCESSOR_2097_Z10
)
11607 int return_regnum
= find_unused_clobbered_reg();
11609 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11611 && return_regnum
== INDIRECT_BRANCH_THUNK_REGNUM
))
11613 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM
!= 4);
11616 return_reg
= gen_rtx_REG (Pmode
, return_regnum
);
11618 addr
= plus_constant (Pmode
, frame_pointer
,
11619 offset
+ cfun_frame_layout
.gprs_offset
11621 - cfun_frame_layout
.first_save_gpr_slot
)
11623 addr
= gen_rtx_MEM (Pmode
, addr
);
11624 set_mem_alias_set (addr
, get_frame_alias_set ());
11625 emit_move_insn (return_reg
, addr
);
11627 /* Once we did that optimization we have to make sure
11628 s390_optimize_prologue does not try to remove the store
11629 of r14 since we will not be able to find the load issued
11631 cfun_frame_layout
.save_return_addr_p
= true;
11634 insn
= restore_gprs (frame_pointer
,
11635 offset
+ cfun_frame_layout
.gprs_offset
11636 + (cfun_frame_layout
.first_restore_gpr
11637 - cfun_frame_layout
.first_save_gpr_slot
)
11639 cfun_frame_layout
.first_restore_gpr
,
11640 cfun_frame_layout
.last_restore_gpr
);
11641 insn
= emit_insn (insn
);
11642 REG_NOTES (insn
) = cfa_restores
;
11643 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11644 plus_constant (Pmode
, stack_pointer_rtx
,
11645 STACK_POINTER_OFFSET
));
11646 RTX_FRAME_RELATED_P (insn
) = 1;
11649 s390_restore_gprs_from_fprs ();
11653 if (!return_reg
&& !s390_can_use_return_insn ())
11654 /* We planned to emit (return), be we are not allowed to. */
11655 return_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11658 /* Emit (return) and (use). */
11659 emit_jump_insn (gen_return_use (return_reg
));
11661 /* The fact that RETURN_REGNUM is used is already reflected by
11662 EPILOGUE_USES. Emit plain (return). */
11663 emit_jump_insn (gen_return ());
11667 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11670 s300_set_up_by_prologue (hard_reg_set_container
*regs
)
11672 if (cfun
->machine
->base_reg
11673 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
11674 SET_HARD_REG_BIT (regs
->set
, REGNO (cfun
->machine
->base_reg
));
11677 /* -fsplit-stack support. */
11679 /* A SYMBOL_REF for __morestack. */
11680 static GTY(()) rtx morestack_ref
;
11682 /* When using -fsplit-stack, the allocation routines set a field in
11683 the TCB to the bottom of the stack plus this much space, measured
11686 #define SPLIT_STACK_AVAILABLE 1024
11688 /* Emit the parmblock for __morestack into .rodata section. It
11689 consists of 3 pointer size entries:
11691 - size of stack arguments
11692 - offset between parm block and __morestack return label */
11695 s390_output_split_stack_data (rtx parm_block
, rtx call_done
,
11696 rtx frame_size
, rtx args_size
)
11698 rtx ops
[] = { parm_block
, call_done
};
11700 switch_to_section (targetm
.asm_out
.function_rodata_section
11701 (current_function_decl
));
11704 output_asm_insn (".align\t8", NULL
);
11706 output_asm_insn (".align\t4", NULL
);
11708 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
11709 CODE_LABEL_NUMBER (parm_block
));
11712 output_asm_insn (".quad\t%0", &frame_size
);
11713 output_asm_insn (".quad\t%0", &args_size
);
11714 output_asm_insn (".quad\t%1-%0", ops
);
11718 output_asm_insn (".long\t%0", &frame_size
);
11719 output_asm_insn (".long\t%0", &args_size
);
11720 output_asm_insn (".long\t%1-%0", ops
);
11723 switch_to_section (current_function_section ());
11726 /* Emit -fsplit-stack prologue, which goes before the regular function
11730 s390_expand_split_stack_prologue (void)
11732 rtx r1
, guard
, cc
= NULL
;
11734 /* Offset from thread pointer to __private_ss. */
11735 int psso
= TARGET_64BIT
? 0x38 : 0x20;
11736 /* Pointer size in bytes. */
11737 /* Frame size and argument size - the two parameters to __morestack. */
11738 HOST_WIDE_INT frame_size
= cfun_frame_layout
.frame_size
;
11739 /* Align argument size to 8 bytes - simplifies __morestack code. */
11740 HOST_WIDE_INT args_size
= crtl
->args
.size
>= 0
11741 ? ((crtl
->args
.size
+ 7) & ~7)
11743 /* Label to be called by __morestack. */
11744 rtx_code_label
*call_done
= NULL
;
11745 rtx_code_label
*parm_base
= NULL
;
11748 gcc_assert (flag_split_stack
&& reload_completed
);
11750 r1
= gen_rtx_REG (Pmode
, 1);
11752 /* If no stack frame will be allocated, don't do anything. */
11755 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11757 /* If va_start is used, just use r15. */
11758 emit_move_insn (r1
,
11759 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11760 GEN_INT (STACK_POINTER_OFFSET
)));
11766 if (morestack_ref
== NULL_RTX
)
11768 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11769 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
11770 | SYMBOL_FLAG_FUNCTION
);
11773 if (CONST_OK_FOR_K (frame_size
) || CONST_OK_FOR_Op (frame_size
))
11775 /* If frame_size will fit in an add instruction, do a stack space
11776 check, and only call __morestack if there's not enough space. */
11778 /* Get thread pointer. r1 is the only register we can always destroy - r0
11779 could contain a static chain (and cannot be used to address memory
11780 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11781 emit_insn (gen_get_thread_pointer (Pmode
, r1
));
11782 /* Aim at __private_ss. */
11783 guard
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, r1
, psso
));
11785 /* If less that 1kiB used, skip addition and compare directly with
11787 if (frame_size
> SPLIT_STACK_AVAILABLE
)
11789 emit_move_insn (r1
, guard
);
11791 emit_insn (gen_adddi3 (r1
, r1
, GEN_INT (frame_size
)));
11793 emit_insn (gen_addsi3 (r1
, r1
, GEN_INT (frame_size
)));
11797 /* Compare the (maybe adjusted) guard with the stack pointer. */
11798 cc
= s390_emit_compare (LT
, stack_pointer_rtx
, guard
);
11801 call_done
= gen_label_rtx ();
11802 parm_base
= gen_label_rtx ();
11803 LABEL_NUSES (parm_base
)++;
11804 LABEL_NUSES (call_done
)++;
11806 /* %r1 = litbase. */
11807 insn
= emit_move_insn (r1
, gen_rtx_LABEL_REF (VOIDmode
, parm_base
));
11808 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11809 LABEL_NUSES (parm_base
)++;
11811 /* Now, we need to call __morestack. It has very special calling
11812 conventions: it preserves param/return/static chain registers for
11813 calling main function body, and looks for its own parameters at %r1. */
11815 tmp
= gen_split_stack_cond_call (Pmode
,
11819 GEN_INT (frame_size
),
11820 GEN_INT (args_size
),
11823 tmp
= gen_split_stack_call (Pmode
,
11827 GEN_INT (frame_size
),
11828 GEN_INT (args_size
));
11830 insn
= emit_jump_insn (tmp
);
11831 JUMP_LABEL (insn
) = call_done
;
11832 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11833 add_reg_note (insn
, REG_LABEL_OPERAND
, call_done
);
11837 /* Mark the jump as very unlikely to be taken. */
11838 add_reg_br_prob_note (insn
,
11839 profile_probability::very_unlikely ());
11841 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11843 /* If va_start is used, and __morestack was not called, just use
11845 emit_move_insn (r1
,
11846 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11847 GEN_INT (STACK_POINTER_OFFSET
)));
11855 /* __morestack will call us here. */
11857 emit_label (call_done
);
11860 /* We may have to tell the dataflow pass that the split stack prologue
11861 is initializing a register. */
11864 s390_live_on_entry (bitmap regs
)
11866 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11868 gcc_assert (flag_split_stack
);
11869 bitmap_set_bit (regs
, 1);
11873 /* Return true if the function can use simple_return to return outside
11874 of a shrink-wrapped region. At present shrink-wrapping is supported
11878 s390_can_use_simple_return_insn (void)
11883 /* Return true if the epilogue is guaranteed to contain only a return
11884 instruction and if a direct return can therefore be used instead.
11885 One of the main advantages of using direct return instructions
11886 is that we can then use conditional returns. */
11889 s390_can_use_return_insn (void)
11893 if (!reload_completed
)
11899 if (TARGET_TPF_PROFILING
)
11902 for (i
= 0; i
< 16; i
++)
11903 if (cfun_gpr_save_slot (i
) != SAVE_SLOT_NONE
)
11906 /* For 31 bit this is not covered by the frame_size check below
11907 since f4, f6 are saved in the register save area without needing
11908 additional stack space. */
11910 && (cfun_fpr_save_p (FPR4_REGNUM
) || cfun_fpr_save_p (FPR6_REGNUM
)))
11913 if (cfun
->machine
->base_reg
11914 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
11917 return cfun_frame_layout
.frame_size
== 0;
11920 /* The VX ABI differs for vararg functions. Therefore we need the
11921 prototype of the callee to be available when passing vector type
11923 static const char *
11924 s390_invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
11926 return ((TARGET_VX_ABI
11928 && VECTOR_TYPE_P (TREE_TYPE (val
))
11929 && (funcdecl
== NULL_TREE
11930 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
11931 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
11932 ? N_("vector argument passed to unprototyped function")
11937 /* Return the size in bytes of a function argument of
11938 type TYPE and/or mode MODE. At least one of TYPE or
11939 MODE must be specified. */
11942 s390_function_arg_size (machine_mode mode
, const_tree type
)
11945 return int_size_in_bytes (type
);
11947 /* No type info available for some library calls ... */
11948 if (mode
!= BLKmode
)
11949 return GET_MODE_SIZE (mode
);
11951 /* If we have neither type nor mode, abort */
11952 gcc_unreachable ();
11955 /* Return true if a function argument of type TYPE and mode MODE
11956 is to be passed in a vector register, if available. */
11959 s390_function_arg_vector (machine_mode mode
, const_tree type
)
11961 if (!TARGET_VX_ABI
)
11964 if (s390_function_arg_size (mode
, type
) > 16)
11967 /* No type info available for some library calls ... */
11969 return VECTOR_MODE_P (mode
);
11971 /* The ABI says that record types with a single member are treated
11972 just like that member would be. */
11973 int empty_base_seen
= 0;
11974 const_tree orig_type
= type
;
11975 while (TREE_CODE (type
) == RECORD_TYPE
)
11977 tree field
, single
= NULL_TREE
;
11979 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
11981 if (TREE_CODE (field
) != FIELD_DECL
)
11984 if (DECL_FIELD_ABI_IGNORED (field
))
11986 if (lookup_attribute ("no_unique_address",
11987 DECL_ATTRIBUTES (field
)))
11988 empty_base_seen
|= 2;
11990 empty_base_seen
|= 1;
11994 if (single
== NULL_TREE
)
11995 single
= TREE_TYPE (field
);
12000 if (single
== NULL_TREE
)
12004 /* If the field declaration adds extra byte due to
12005 e.g. padding this is not accepted as vector type. */
12006 if (int_size_in_bytes (single
) <= 0
12007 || int_size_in_bytes (single
) != int_size_in_bytes (type
))
12013 if (!VECTOR_TYPE_P (type
))
12016 if (warn_psabi
&& empty_base_seen
)
12018 static unsigned last_reported_type_uid
;
12019 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12020 if (uid
!= last_reported_type_uid
)
12022 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12023 last_reported_type_uid
= uid
;
12024 if (empty_base_seen
& 1)
12025 inform (input_location
,
12026 "parameter passing for argument of type %qT when C++17 "
12027 "is enabled changed to match C++14 %{in GCC 10.1%}",
12030 inform (input_location
,
12031 "parameter passing for argument of type %qT with "
12032 "%<[[no_unique_address]]%> members changed "
12033 "%{in GCC 10.1%}", orig_type
, url
);
12039 /* Return true if a function argument of type TYPE and mode MODE
12040 is to be passed in a floating-point register, if available. */
12043 s390_function_arg_float (machine_mode mode
, const_tree type
)
12045 if (s390_function_arg_size (mode
, type
) > 8)
12048 /* Soft-float changes the ABI: no floating-point registers are used. */
12049 if (TARGET_SOFT_FLOAT
)
12052 /* No type info available for some library calls ... */
12054 return mode
== SFmode
|| mode
== DFmode
|| mode
== SDmode
|| mode
== DDmode
;
12056 /* The ABI says that record types with a single member are treated
12057 just like that member would be. */
12058 int empty_base_seen
= 0;
12059 const_tree orig_type
= type
;
12060 while (TREE_CODE (type
) == RECORD_TYPE
)
12062 tree field
, single
= NULL_TREE
;
12064 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12066 if (TREE_CODE (field
) != FIELD_DECL
)
12068 if (DECL_FIELD_ABI_IGNORED (field
))
12070 if (lookup_attribute ("no_unique_address",
12071 DECL_ATTRIBUTES (field
)))
12072 empty_base_seen
|= 2;
12074 empty_base_seen
|= 1;
12078 if (single
== NULL_TREE
)
12079 single
= TREE_TYPE (field
);
12084 if (single
== NULL_TREE
)
12090 if (TREE_CODE (type
) != REAL_TYPE
)
12093 if (warn_psabi
&& empty_base_seen
)
12095 static unsigned last_reported_type_uid
;
12096 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12097 if (uid
!= last_reported_type_uid
)
12099 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12100 last_reported_type_uid
= uid
;
12101 if (empty_base_seen
& 1)
12102 inform (input_location
,
12103 "parameter passing for argument of type %qT when C++17 "
12104 "is enabled changed to match C++14 %{in GCC 10.1%}",
12107 inform (input_location
,
12108 "parameter passing for argument of type %qT with "
12109 "%<[[no_unique_address]]%> members changed "
12110 "%{in GCC 10.1%}", orig_type
, url
);
12117 /* Return true if a function argument of type TYPE and mode MODE
12118 is to be passed in an integer register, or a pair of integer
12119 registers, if available. */
12122 s390_function_arg_integer (machine_mode mode
, const_tree type
)
12124 int size
= s390_function_arg_size (mode
, type
);
12128 /* No type info available for some library calls ... */
12130 return GET_MODE_CLASS (mode
) == MODE_INT
12131 || (TARGET_SOFT_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
));
12133 /* We accept small integral (and similar) types. */
12134 if (INTEGRAL_TYPE_P (type
)
12135 || POINTER_TYPE_P (type
)
12136 || TREE_CODE (type
) == NULLPTR_TYPE
12137 || TREE_CODE (type
) == OFFSET_TYPE
12138 || (TARGET_SOFT_FLOAT
&& TREE_CODE (type
) == REAL_TYPE
))
12141 /* We also accept structs of size 1, 2, 4, 8 that are not
12142 passed in floating-point registers. */
12143 if (AGGREGATE_TYPE_P (type
)
12144 && exact_log2 (size
) >= 0
12145 && !s390_function_arg_float (mode
, type
))
12151 /* Return 1 if a function argument ARG is to be passed by reference.
12152 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12153 are passed by value, all other structures (and complex numbers) are
12154 passed by reference. */
12157 s390_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
12159 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12161 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12167 if (tree type
= arg
.type
)
12169 if (AGGREGATE_TYPE_P (type
) && exact_log2 (size
) < 0)
12172 if (TREE_CODE (type
) == COMPLEX_TYPE
12173 || TREE_CODE (type
) == VECTOR_TYPE
)
12180 /* Update the data in CUM to advance over argument ARG. */
12183 s390_function_arg_advance (cumulative_args_t cum_v
,
12184 const function_arg_info
&arg
)
12186 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12188 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12190 /* We are called for unnamed vector stdarg arguments which are
12191 passed on the stack. In this case this hook does not have to
12192 do anything since stack arguments are tracked by common
12198 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12202 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12204 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12205 cum
->gprs
+= ((size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
);
12208 gcc_unreachable ();
12211 /* Define where to put the arguments to a function.
12212 Value is zero to push the argument on the stack,
12213 or a hard register in which to store the argument.
12215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12216 the preceding args and about the function being called.
12217 ARG is a description of the argument.
12219 On S/390, we use general purpose registers 2 through 6 to
12220 pass integer, pointer, and certain structure arguments, and
12221 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12222 to pass floating point arguments. All remaining arguments
12223 are pushed to the stack. */
12226 s390_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
12228 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12231 s390_check_type_for_vector_abi (arg
.type
, true, false);
12233 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12235 /* Vector arguments being part of the ellipsis are passed on the
12237 if (!arg
.named
|| (cum
->vrs
+ 1 > VEC_ARG_NUM_REG
))
12240 return gen_rtx_REG (arg
.mode
, cum
->vrs
+ FIRST_VEC_ARG_REGNO
);
12242 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12244 if (cum
->fprs
+ 1 > FP_ARG_NUM_REG
)
12247 return gen_rtx_REG (arg
.mode
, cum
->fprs
+ 16);
12249 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12251 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12252 int n_gprs
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12254 if (cum
->gprs
+ n_gprs
> GP_ARG_NUM_REG
)
12256 else if (n_gprs
== 1 || UNITS_PER_WORD
== UNITS_PER_LONG
)
12257 return gen_rtx_REG (arg
.mode
, cum
->gprs
+ 2);
12258 else if (n_gprs
== 2)
12260 rtvec p
= rtvec_alloc (2);
12263 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 2),
12266 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 3),
12269 return gen_rtx_PARALLEL (arg
.mode
, p
);
12273 /* After the real arguments, expand_call calls us once again with an
12274 end marker. Whatever we return here is passed as operand 2 to the
12277 We don't need this feature ... */
12278 else if (arg
.end_marker_p ())
12281 gcc_unreachable ();
12284 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12285 left-justified when placed on the stack during parameter passing. */
12287 static pad_direction
12288 s390_function_arg_padding (machine_mode mode
, const_tree type
)
12290 if (s390_function_arg_vector (mode
, type
))
12293 return default_function_arg_padding (mode
, type
);
12296 /* Return true if return values of type TYPE should be returned
12297 in a memory buffer whose address is passed by the caller as
12298 hidden first argument. */
12301 s390_return_in_memory (const_tree type
, const_tree fundecl ATTRIBUTE_UNUSED
)
12303 /* We accept small integral (and similar) types. */
12304 if (INTEGRAL_TYPE_P (type
)
12305 || POINTER_TYPE_P (type
)
12306 || TREE_CODE (type
) == OFFSET_TYPE
12307 || TREE_CODE (type
) == REAL_TYPE
)
12308 return int_size_in_bytes (type
) > 8;
12310 /* vector types which fit into a VR. */
12312 && VECTOR_TYPE_P (type
)
12313 && int_size_in_bytes (type
) <= 16)
12316 /* Aggregates and similar constructs are always returned
12318 if (AGGREGATE_TYPE_P (type
)
12319 || TREE_CODE (type
) == COMPLEX_TYPE
12320 || VECTOR_TYPE_P (type
))
12323 /* ??? We get called on all sorts of random stuff from
12324 aggregate_value_p. We can't abort, but it's not clear
12325 what's safe to return. Pretend it's a struct I guess. */
12329 /* Function arguments and return values are promoted to word size. */
12331 static machine_mode
12332 s390_promote_function_mode (const_tree type
, machine_mode mode
,
12334 const_tree fntype ATTRIBUTE_UNUSED
,
12335 int for_return ATTRIBUTE_UNUSED
)
12337 if (INTEGRAL_MODE_P (mode
)
12338 && GET_MODE_SIZE (mode
) < UNITS_PER_LONG
)
12340 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
12341 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
12348 /* Define where to return a (scalar) value of type RET_TYPE.
12349 If RET_TYPE is null, define where to return a (scalar)
12350 value of mode MODE from a libcall. */
12353 s390_function_and_libcall_value (machine_mode mode
,
12354 const_tree ret_type
,
12355 const_tree fntype_or_decl
,
12356 bool outgoing ATTRIBUTE_UNUSED
)
12358 /* For vector return types it is important to use the RET_TYPE
12359 argument whenever available since the middle-end might have
12360 changed the mode to a scalar mode. */
12361 bool vector_ret_type_p
= ((ret_type
&& VECTOR_TYPE_P (ret_type
))
12362 || (!ret_type
&& VECTOR_MODE_P (mode
)));
12364 /* For normal functions perform the promotion as
12365 promote_function_mode would do. */
12368 int unsignedp
= TYPE_UNSIGNED (ret_type
);
12369 mode
= promote_function_mode (ret_type
, mode
, &unsignedp
,
12370 fntype_or_decl
, 1);
12373 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
12374 || SCALAR_FLOAT_MODE_P (mode
)
12375 || (TARGET_VX_ABI
&& vector_ret_type_p
));
12376 gcc_assert (GET_MODE_SIZE (mode
) <= (TARGET_VX_ABI
? 16 : 8));
12378 if (TARGET_VX_ABI
&& vector_ret_type_p
)
12379 return gen_rtx_REG (mode
, FIRST_VEC_ARG_REGNO
);
12380 else if (TARGET_HARD_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
))
12381 return gen_rtx_REG (mode
, 16);
12382 else if (GET_MODE_SIZE (mode
) <= UNITS_PER_LONG
12383 || UNITS_PER_LONG
== UNITS_PER_WORD
)
12384 return gen_rtx_REG (mode
, 2);
12385 else if (GET_MODE_SIZE (mode
) == 2 * UNITS_PER_LONG
)
12387 /* This case is triggered when returning a 64 bit value with
12388 -m31 -mzarch. Although the value would fit into a single
12389 register it has to be forced into a 32 bit register pair in
12390 order to match the ABI. */
12391 rtvec p
= rtvec_alloc (2);
12394 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 2), const0_rtx
);
12396 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 3), GEN_INT (4));
12398 return gen_rtx_PARALLEL (mode
, p
);
12401 gcc_unreachable ();
12404 /* Define where to return a scalar return value of type RET_TYPE. */
12407 s390_function_value (const_tree ret_type
, const_tree fn_decl_or_type
,
12410 return s390_function_and_libcall_value (TYPE_MODE (ret_type
), ret_type
,
12411 fn_decl_or_type
, outgoing
);
12414 /* Define where to return a scalar libcall return value of mode
12418 s390_libcall_value (machine_mode mode
, const_rtx fun ATTRIBUTE_UNUSED
)
12420 return s390_function_and_libcall_value (mode
, NULL_TREE
,
12425 /* Create and return the va_list datatype.
12427 On S/390, va_list is an array type equivalent to
12429 typedef struct __va_list_tag
12433 void *__overflow_arg_area;
12434 void *__reg_save_area;
12437 where __gpr and __fpr hold the number of general purpose
12438 or floating point arguments used up to now, respectively,
12439 __overflow_arg_area points to the stack location of the
12440 next argument passed on the stack, and __reg_save_area
12441 always points to the start of the register area in the
12442 call frame of the current function. The function prologue
12443 saves all registers used for argument passing into this
12444 area if the function uses variable arguments. */
12447 s390_build_builtin_va_list (void)
12449 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
12451 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
12454 build_decl (BUILTINS_LOCATION
,
12455 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
12457 f_gpr
= build_decl (BUILTINS_LOCATION
,
12458 FIELD_DECL
, get_identifier ("__gpr"),
12459 long_integer_type_node
);
12460 f_fpr
= build_decl (BUILTINS_LOCATION
,
12461 FIELD_DECL
, get_identifier ("__fpr"),
12462 long_integer_type_node
);
12463 f_ovf
= build_decl (BUILTINS_LOCATION
,
12464 FIELD_DECL
, get_identifier ("__overflow_arg_area"),
12466 f_sav
= build_decl (BUILTINS_LOCATION
,
12467 FIELD_DECL
, get_identifier ("__reg_save_area"),
12470 va_list_gpr_counter_field
= f_gpr
;
12471 va_list_fpr_counter_field
= f_fpr
;
12473 DECL_FIELD_CONTEXT (f_gpr
) = record
;
12474 DECL_FIELD_CONTEXT (f_fpr
) = record
;
12475 DECL_FIELD_CONTEXT (f_ovf
) = record
;
12476 DECL_FIELD_CONTEXT (f_sav
) = record
;
12478 TYPE_STUB_DECL (record
) = type_decl
;
12479 TYPE_NAME (record
) = type_decl
;
12480 TYPE_FIELDS (record
) = f_gpr
;
12481 DECL_CHAIN (f_gpr
) = f_fpr
;
12482 DECL_CHAIN (f_fpr
) = f_ovf
;
12483 DECL_CHAIN (f_ovf
) = f_sav
;
12485 layout_type (record
);
12487 /* The correct type is an array type of one element. */
12488 return build_array_type (record
, build_index_type (size_zero_node
));
12491 /* Implement va_start by filling the va_list structure VALIST.
12492 STDARG_P is always true, and ignored.
12493 NEXTARG points to the first anonymous stack argument.
12495 The following global variables are used to initialize
12496 the va_list structure:
12499 holds number of gprs and fprs used for named arguments.
12500 crtl->args.arg_offset_rtx:
12501 holds the offset of the first anonymous stack argument
12502 (relative to the virtual arg pointer). */
12505 s390_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
12507 HOST_WIDE_INT n_gpr
, n_fpr
;
12509 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12510 tree gpr
, fpr
, ovf
, sav
, t
;
12512 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12513 f_fpr
= DECL_CHAIN (f_gpr
);
12514 f_ovf
= DECL_CHAIN (f_fpr
);
12515 f_sav
= DECL_CHAIN (f_ovf
);
12517 valist
= build_simple_mem_ref (valist
);
12518 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12519 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12520 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12521 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12523 /* Count number of gp and fp argument registers used. */
12525 n_gpr
= crtl
->args
.info
.gprs
;
12526 n_fpr
= crtl
->args
.info
.fprs
;
12528 if (cfun
->va_list_gpr_size
)
12530 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
12531 build_int_cst (NULL_TREE
, n_gpr
));
12532 TREE_SIDE_EFFECTS (t
) = 1;
12533 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12536 if (cfun
->va_list_fpr_size
)
12538 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
12539 build_int_cst (NULL_TREE
, n_fpr
));
12540 TREE_SIDE_EFFECTS (t
) = 1;
12541 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12544 if (flag_split_stack
12545 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
12547 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12552 reg
= gen_reg_rtx (Pmode
);
12553 cfun
->machine
->split_stack_varargs_pointer
= reg
;
12556 emit_move_insn (reg
, gen_rtx_REG (Pmode
, 1));
12557 seq
= get_insns ();
12560 push_topmost_sequence ();
12561 emit_insn_after (seq
, entry_of_function ());
12562 pop_topmost_sequence ();
12565 /* Find the overflow area.
12566 FIXME: This currently is too pessimistic when the vector ABI is
12567 enabled. In that case we *always* set up the overflow area
12569 if (n_gpr
+ cfun
->va_list_gpr_size
> GP_ARG_NUM_REG
12570 || n_fpr
+ cfun
->va_list_fpr_size
> FP_ARG_NUM_REG
12573 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12574 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
12576 t
= make_tree (TREE_TYPE (ovf
), cfun
->machine
->split_stack_varargs_pointer
);
12578 off
= INTVAL (crtl
->args
.arg_offset_rtx
);
12579 off
= off
< 0 ? 0 : off
;
12580 if (TARGET_DEBUG_ARG
)
12581 fprintf (stderr
, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12582 (int)n_gpr
, (int)n_fpr
, off
);
12584 t
= fold_build_pointer_plus_hwi (t
, off
);
12586 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
12587 TREE_SIDE_EFFECTS (t
) = 1;
12588 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12591 /* Find the register save area. */
12592 if ((cfun
->va_list_gpr_size
&& n_gpr
< GP_ARG_NUM_REG
)
12593 || (cfun
->va_list_fpr_size
&& n_fpr
< FP_ARG_NUM_REG
))
12595 t
= make_tree (TREE_TYPE (sav
), return_address_pointer_rtx
);
12596 t
= fold_build_pointer_plus_hwi (t
, -RETURN_REGNUM
* UNITS_PER_LONG
);
12598 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
12599 TREE_SIDE_EFFECTS (t
) = 1;
12600 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12604 /* Implement va_arg by updating the va_list structure
12605 VALIST as required to retrieve an argument of type
12606 TYPE, and returning that argument.
12608 Generates code equivalent to:
12610 if (integral value) {
12611 if (size <= 4 && args.gpr < 5 ||
12612 size > 4 && args.gpr < 4 )
12613 ret = args.reg_save_area[args.gpr+8]
12615 ret = *args.overflow_arg_area++;
12616 } else if (vector value) {
12617 ret = *args.overflow_arg_area;
12618 args.overflow_arg_area += size / 8;
12619 } else if (float value) {
12621 ret = args.reg_save_area[args.fpr+64]
12623 ret = *args.overflow_arg_area++;
12624 } else if (aggregate value) {
12626 ret = *args.reg_save_area[args.gpr]
12628 ret = **args.overflow_arg_area++;
12632 s390_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
12633 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
12635 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12636 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
12637 int indirect_p
, size
, n_reg
, sav_ofs
, sav_scale
, max_reg
;
12638 tree lab_false
, lab_over
= NULL_TREE
;
12639 tree addr
= create_tmp_var (ptr_type_node
, "addr");
12640 bool left_align_p
; /* How a value < UNITS_PER_LONG is aligned within
12643 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12644 f_fpr
= DECL_CHAIN (f_gpr
);
12645 f_ovf
= DECL_CHAIN (f_fpr
);
12646 f_sav
= DECL_CHAIN (f_ovf
);
12648 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12649 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12650 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12652 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12653 both appear on a lhs. */
12654 valist
= unshare_expr (valist
);
12655 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12657 size
= int_size_in_bytes (type
);
12659 s390_check_type_for_vector_abi (type
, true, false);
12661 if (pass_va_arg_by_reference (type
))
12663 if (TARGET_DEBUG_ARG
)
12665 fprintf (stderr
, "va_arg: aggregate type");
12669 /* Aggregates are passed by reference. */
12674 /* kernel stack layout on 31 bit: It is assumed here that no padding
12675 will be added by s390_frame_info because for va_args always an even
12676 number of gprs has to be saved r15-r2 = 14 regs. */
12677 sav_ofs
= 2 * UNITS_PER_LONG
;
12678 sav_scale
= UNITS_PER_LONG
;
12679 size
= UNITS_PER_LONG
;
12680 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12681 left_align_p
= false;
12683 else if (s390_function_arg_vector (TYPE_MODE (type
), type
))
12685 if (TARGET_DEBUG_ARG
)
12687 fprintf (stderr
, "va_arg: vector type");
12697 left_align_p
= true;
12699 else if (s390_function_arg_float (TYPE_MODE (type
), type
))
12701 if (TARGET_DEBUG_ARG
)
12703 fprintf (stderr
, "va_arg: float type");
12707 /* FP args go in FP registers, if present. */
12711 sav_ofs
= 16 * UNITS_PER_LONG
;
12713 max_reg
= FP_ARG_NUM_REG
- n_reg
;
12714 left_align_p
= false;
12718 if (TARGET_DEBUG_ARG
)
12720 fprintf (stderr
, "va_arg: other type");
12724 /* Otherwise into GP registers. */
12727 n_reg
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12729 /* kernel stack layout on 31 bit: It is assumed here that no padding
12730 will be added by s390_frame_info because for va_args always an even
12731 number of gprs has to be saved r15-r2 = 14 regs. */
12732 sav_ofs
= 2 * UNITS_PER_LONG
;
12734 if (size
< UNITS_PER_LONG
)
12735 sav_ofs
+= UNITS_PER_LONG
- size
;
12737 sav_scale
= UNITS_PER_LONG
;
12738 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12739 left_align_p
= false;
12742 /* Pull the value out of the saved registers ... */
12744 if (reg
!= NULL_TREE
)
12747 if (reg > ((typeof (reg))max_reg))
12750 addr = sav + sav_ofs + reg * save_scale;
12757 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
12758 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
12760 t
= fold_convert (TREE_TYPE (reg
), size_int (max_reg
));
12761 t
= build2 (GT_EXPR
, boolean_type_node
, reg
, t
);
12762 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
12763 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
12764 gimplify_and_add (t
, pre_p
);
12766 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
12767 u
= build2 (MULT_EXPR
, TREE_TYPE (reg
), reg
,
12768 fold_convert (TREE_TYPE (reg
), size_int (sav_scale
)));
12769 t
= fold_build_pointer_plus (t
, u
);
12771 gimplify_assign (addr
, t
, pre_p
);
12773 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
12775 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
12778 /* ... Otherwise out of the overflow area. */
12781 if (size
< UNITS_PER_LONG
&& !left_align_p
)
12782 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
- size
);
12784 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
12786 gimplify_assign (addr
, t
, pre_p
);
12788 if (size
< UNITS_PER_LONG
&& left_align_p
)
12789 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
);
12791 t
= fold_build_pointer_plus_hwi (t
, size
);
12793 gimplify_assign (ovf
, t
, pre_p
);
12795 if (reg
!= NULL_TREE
)
12796 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
12799 /* Increment register save count. */
12803 u
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (reg
), reg
,
12804 fold_convert (TREE_TYPE (reg
), size_int (n_reg
)));
12805 gimplify_and_add (u
, pre_p
);
12810 t
= build_pointer_type_for_mode (build_pointer_type (type
),
12812 addr
= fold_convert (t
, addr
);
12813 addr
= build_va_arg_indirect_ref (addr
);
12817 t
= build_pointer_type_for_mode (type
, ptr_mode
, true);
12818 addr
= fold_convert (t
, addr
);
12821 return build_va_arg_indirect_ref (addr
);
12824 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12826 DEST - Register location where CC will be stored.
12827 TDB - Pointer to a 256 byte area where to store the transaction.
12828 diagnostic block. NULL if TDB is not needed.
12829 RETRY - Retry count value. If non-NULL a retry loop for CC2
12831 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12832 of the tbegin instruction pattern. */
12835 s390_expand_tbegin (rtx dest
, rtx tdb
, rtx retry
, bool clobber_fprs_p
)
12837 rtx retry_plus_two
= gen_reg_rtx (SImode
);
12838 rtx retry_reg
= gen_reg_rtx (SImode
);
12839 rtx_code_label
*retry_label
= NULL
;
12841 if (retry
!= NULL_RTX
)
12843 emit_move_insn (retry_reg
, retry
);
12844 emit_insn (gen_addsi3 (retry_plus_two
, retry_reg
, const2_rtx
));
12845 emit_insn (gen_addsi3 (retry_reg
, retry_reg
, const1_rtx
));
12846 retry_label
= gen_label_rtx ();
12847 emit_label (retry_label
);
12850 if (clobber_fprs_p
)
12853 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12856 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12860 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12863 emit_move_insn (dest
, gen_rtx_UNSPEC (SImode
,
12864 gen_rtvec (1, gen_rtx_REG (CCRAWmode
,
12866 UNSPEC_CC_TO_INT
));
12867 if (retry
!= NULL_RTX
)
12869 const int CC0
= 1 << 3;
12870 const int CC1
= 1 << 2;
12871 const int CC3
= 1 << 0;
12873 rtx count
= gen_reg_rtx (SImode
);
12874 rtx_code_label
*leave_label
= gen_label_rtx ();
12876 /* Exit for success and permanent failures. */
12877 jump
= s390_emit_jump (leave_label
,
12878 gen_rtx_EQ (VOIDmode
,
12879 gen_rtx_REG (CCRAWmode
, CC_REGNUM
),
12880 gen_rtx_CONST_INT (VOIDmode
, CC0
| CC1
| CC3
)));
12881 LABEL_NUSES (leave_label
) = 1;
12883 /* CC2 - transient failure. Perform retry with ppa. */
12884 emit_move_insn (count
, retry_plus_two
);
12885 emit_insn (gen_subsi3 (count
, count
, retry_reg
));
12886 emit_insn (gen_tx_assist (count
));
12887 jump
= emit_jump_insn (gen_doloop_si64 (retry_label
,
12890 JUMP_LABEL (jump
) = retry_label
;
12891 LABEL_NUSES (retry_label
) = 1;
12892 emit_label (leave_label
);
12897 /* Return the decl for the target specific builtin with the function
12901 s390_builtin_decl (unsigned fcode
, bool initialized_p ATTRIBUTE_UNUSED
)
12903 if (fcode
>= S390_BUILTIN_MAX
)
12904 return error_mark_node
;
12906 return s390_builtin_decls
[fcode
];
12909 /* We call mcount before the function prologue. So a profiled leaf
12910 function should stay a leaf function. */
12913 s390_keep_leaf_when_profiled ()
12918 /* Output assembly code for the trampoline template to
12921 On S/390, we use gpr 1 internally in the trampoline code;
12922 gpr 0 is used to hold the static chain. */
12925 s390_asm_trampoline_template (FILE *file
)
12928 op
[0] = gen_rtx_REG (Pmode
, 0);
12929 op
[1] = gen_rtx_REG (Pmode
, 1);
12933 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
12934 output_asm_insn ("lmg\t%0,%1,14(%1)", op
); /* 6 byte */
12935 output_asm_insn ("br\t%1", op
); /* 2 byte */
12936 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 10));
12940 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
12941 output_asm_insn ("lm\t%0,%1,6(%1)", op
); /* 4 byte */
12942 output_asm_insn ("br\t%1", op
); /* 2 byte */
12943 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 8));
12947 /* Emit RTL insns to initialize the variable parts of a trampoline.
12948 FNADDR is an RTX for the address of the function's pure code.
12949 CXT is an RTX for the static chain value for the function. */
12952 s390_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
12954 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
12957 emit_block_move (m_tramp
, assemble_trampoline_template (),
12958 GEN_INT (2 * UNITS_PER_LONG
), BLOCK_OP_NORMAL
);
12960 mem
= adjust_address (m_tramp
, Pmode
, 2 * UNITS_PER_LONG
);
12961 emit_move_insn (mem
, cxt
);
12962 mem
= adjust_address (m_tramp
, Pmode
, 3 * UNITS_PER_LONG
);
12963 emit_move_insn (mem
, fnaddr
);
12967 output_asm_nops (const char *user
, int hw
)
12969 asm_fprintf (asm_out_file
, "\t# NOPs for %s (%d halfwords)\n", user
, hw
);
12974 output_asm_insn ("brcl\t0,0", NULL
);
12979 output_asm_insn ("bc\t0,0", NULL
);
12984 output_asm_insn ("bcr\t0,0", NULL
);
12990 /* Output assembler code to FILE to increment profiler label # LABELNO
12991 for profiling a function entry. */
12994 s390_function_profiler (FILE *file
, int labelno
)
12999 ASM_GENERATE_INTERNAL_LABEL (label
, "LP", labelno
);
13001 fprintf (file
, "# function profiler \n");
13003 op
[0] = gen_rtx_REG (Pmode
, RETURN_REGNUM
);
13004 op
[1] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
13005 op
[1] = gen_rtx_MEM (Pmode
, plus_constant (Pmode
, op
[1], UNITS_PER_LONG
));
13006 op
[7] = GEN_INT (UNITS_PER_LONG
);
13008 op
[2] = gen_rtx_REG (Pmode
, 1);
13009 op
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
13010 SYMBOL_REF_FLAGS (op
[3]) = SYMBOL_FLAG_LOCAL
;
13012 op
[4] = gen_rtx_SYMBOL_REF (Pmode
, flag_fentry
? "__fentry__" : "_mcount");
13015 op
[4] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[4]), UNSPEC_PLT
);
13016 op
[4] = gen_rtx_CONST (Pmode
, op
[4]);
13019 if (flag_record_mcount
)
13020 fprintf (file
, "1:\n");
13024 if (flag_nop_mcount
)
13025 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13026 else if (cfun
->static_chain_decl
)
13027 warning (OPT_Wcannot_profile
, "nested functions cannot be profiled "
13028 "with %<-mfentry%> on s390");
13030 output_asm_insn ("brasl\t0,%4", op
);
13032 else if (TARGET_64BIT
)
13034 if (flag_nop_mcount
)
13035 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
13036 /* brasl */ 3 + /* lg */ 3);
13039 output_asm_insn ("stg\t%0,%1", op
);
13040 if (flag_dwarf2_cfi_asm
)
13041 output_asm_insn (".cfi_rel_offset\t%0,%7", op
);
13042 output_asm_insn ("larl\t%2,%3", op
);
13043 output_asm_insn ("brasl\t%0,%4", op
);
13044 output_asm_insn ("lg\t%0,%1", op
);
13045 if (flag_dwarf2_cfi_asm
)
13046 output_asm_insn (".cfi_restore\t%0", op
);
13051 if (flag_nop_mcount
)
13052 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
13053 /* brasl */ 3 + /* l */ 2);
13056 output_asm_insn ("st\t%0,%1", op
);
13057 if (flag_dwarf2_cfi_asm
)
13058 output_asm_insn (".cfi_rel_offset\t%0,%7", op
);
13059 output_asm_insn ("larl\t%2,%3", op
);
13060 output_asm_insn ("brasl\t%0,%4", op
);
13061 output_asm_insn ("l\t%0,%1", op
);
13062 if (flag_dwarf2_cfi_asm
)
13063 output_asm_insn (".cfi_restore\t%0", op
);
13067 if (flag_record_mcount
)
13069 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
13070 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
13071 fprintf (file
, "\t.previous\n");
13075 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13076 into its SYMBOL_REF_FLAGS. */
13079 s390_encode_section_info (tree decl
, rtx rtl
, int first
)
13081 default_encode_section_info (decl
, rtl
, first
);
13083 if (TREE_CODE (decl
) == VAR_DECL
)
13085 /* Store the alignment to be able to check if we can use
13086 a larl/load-relative instruction. We only handle the cases
13087 that can go wrong (i.e. no FUNC_DECLs). */
13088 if (DECL_ALIGN (decl
) == 0 || DECL_ALIGN (decl
) % 16)
13089 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13090 else if (DECL_ALIGN (decl
) % 32)
13091 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13092 else if (DECL_ALIGN (decl
) % 64)
13093 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13096 /* Literal pool references don't have a decl so they are handled
13097 differently here. We rely on the information in the MEM_ALIGN
13098 entry to decide upon the alignment. */
13100 && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
13101 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl
, 0)))
13103 if (MEM_ALIGN (rtl
) == 0 || MEM_ALIGN (rtl
) % 16)
13104 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13105 else if (MEM_ALIGN (rtl
) % 32)
13106 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13107 else if (MEM_ALIGN (rtl
) % 64)
13108 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13112 /* Output thunk to FILE that implements a C++ virtual function call (with
13113 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13114 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13115 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13116 relative to the resulting this pointer. */
13119 s390_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
13120 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
13123 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
13127 assemble_start_function (thunk
, fnname
);
13128 /* Make sure unwind info is emitted for the thunk if needed. */
13129 final_start_function (emit_barrier (), file
, 1);
13131 /* Operand 0 is the target function. */
13132 op
[0] = XEXP (DECL_RTL (function
), 0);
13133 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (op
[0]))
13136 op
[0] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[0]),
13137 TARGET_64BIT
? UNSPEC_PLT
: UNSPEC_GOT
);
13138 op
[0] = gen_rtx_CONST (Pmode
, op
[0]);
13141 /* Operand 1 is the 'this' pointer. */
13142 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
13143 op
[1] = gen_rtx_REG (Pmode
, 3);
13145 op
[1] = gen_rtx_REG (Pmode
, 2);
13147 /* Operand 2 is the delta. */
13148 op
[2] = GEN_INT (delta
);
13150 /* Operand 3 is the vcall_offset. */
13151 op
[3] = GEN_INT (vcall_offset
);
13153 /* Operand 4 is the temporary register. */
13154 op
[4] = gen_rtx_REG (Pmode
, 1);
13156 /* Operands 5 to 8 can be used as labels. */
13162 /* Operand 9 can be used for temporary register. */
13165 /* Generate code. */
13168 /* Setup literal pool pointer if required. */
13169 if ((!DISP_IN_RANGE (delta
)
13170 && !CONST_OK_FOR_K (delta
)
13171 && !CONST_OK_FOR_Os (delta
))
13172 || (!DISP_IN_RANGE (vcall_offset
)
13173 && !CONST_OK_FOR_K (vcall_offset
)
13174 && !CONST_OK_FOR_Os (vcall_offset
)))
13176 op
[5] = gen_label_rtx ();
13177 output_asm_insn ("larl\t%4,%5", op
);
13180 /* Add DELTA to this pointer. */
13183 if (CONST_OK_FOR_J (delta
))
13184 output_asm_insn ("la\t%1,%2(%1)", op
);
13185 else if (DISP_IN_RANGE (delta
))
13186 output_asm_insn ("lay\t%1,%2(%1)", op
);
13187 else if (CONST_OK_FOR_K (delta
))
13188 output_asm_insn ("aghi\t%1,%2", op
);
13189 else if (CONST_OK_FOR_Os (delta
))
13190 output_asm_insn ("agfi\t%1,%2", op
);
13193 op
[6] = gen_label_rtx ();
13194 output_asm_insn ("agf\t%1,%6-%5(%4)", op
);
13198 /* Perform vcall adjustment. */
13201 if (DISP_IN_RANGE (vcall_offset
))
13203 output_asm_insn ("lg\t%4,0(%1)", op
);
13204 output_asm_insn ("ag\t%1,%3(%4)", op
);
13206 else if (CONST_OK_FOR_K (vcall_offset
))
13208 output_asm_insn ("lghi\t%4,%3", op
);
13209 output_asm_insn ("ag\t%4,0(%1)", op
);
13210 output_asm_insn ("ag\t%1,0(%4)", op
);
13212 else if (CONST_OK_FOR_Os (vcall_offset
))
13214 output_asm_insn ("lgfi\t%4,%3", op
);
13215 output_asm_insn ("ag\t%4,0(%1)", op
);
13216 output_asm_insn ("ag\t%1,0(%4)", op
);
13220 op
[7] = gen_label_rtx ();
13221 output_asm_insn ("llgf\t%4,%7-%5(%4)", op
);
13222 output_asm_insn ("ag\t%4,0(%1)", op
);
13223 output_asm_insn ("ag\t%1,0(%4)", op
);
13227 /* Jump to target. */
13228 output_asm_insn ("jg\t%0", op
);
13230 /* Output literal pool if required. */
13233 output_asm_insn (".align\t4", op
);
13234 targetm
.asm_out
.internal_label (file
, "L",
13235 CODE_LABEL_NUMBER (op
[5]));
13239 targetm
.asm_out
.internal_label (file
, "L",
13240 CODE_LABEL_NUMBER (op
[6]));
13241 output_asm_insn (".long\t%2", op
);
13245 targetm
.asm_out
.internal_label (file
, "L",
13246 CODE_LABEL_NUMBER (op
[7]));
13247 output_asm_insn (".long\t%3", op
);
13252 /* Setup base pointer if required. */
13254 || (!DISP_IN_RANGE (delta
)
13255 && !CONST_OK_FOR_K (delta
)
13256 && !CONST_OK_FOR_Os (delta
))
13257 || (!DISP_IN_RANGE (delta
)
13258 && !CONST_OK_FOR_K (vcall_offset
)
13259 && !CONST_OK_FOR_Os (vcall_offset
)))
13261 op
[5] = gen_label_rtx ();
13262 output_asm_insn ("basr\t%4,0", op
);
13263 targetm
.asm_out
.internal_label (file
, "L",
13264 CODE_LABEL_NUMBER (op
[5]));
13267 /* Add DELTA to this pointer. */
13270 if (CONST_OK_FOR_J (delta
))
13271 output_asm_insn ("la\t%1,%2(%1)", op
);
13272 else if (DISP_IN_RANGE (delta
))
13273 output_asm_insn ("lay\t%1,%2(%1)", op
);
13274 else if (CONST_OK_FOR_K (delta
))
13275 output_asm_insn ("ahi\t%1,%2", op
);
13276 else if (CONST_OK_FOR_Os (delta
))
13277 output_asm_insn ("afi\t%1,%2", op
);
13280 op
[6] = gen_label_rtx ();
13281 output_asm_insn ("a\t%1,%6-%5(%4)", op
);
13285 /* Perform vcall adjustment. */
13288 if (CONST_OK_FOR_J (vcall_offset
))
13290 output_asm_insn ("l\t%4,0(%1)", op
);
13291 output_asm_insn ("a\t%1,%3(%4)", op
);
13293 else if (DISP_IN_RANGE (vcall_offset
))
13295 output_asm_insn ("l\t%4,0(%1)", op
);
13296 output_asm_insn ("ay\t%1,%3(%4)", op
);
13298 else if (CONST_OK_FOR_K (vcall_offset
))
13300 output_asm_insn ("lhi\t%4,%3", op
);
13301 output_asm_insn ("a\t%4,0(%1)", op
);
13302 output_asm_insn ("a\t%1,0(%4)", op
);
13304 else if (CONST_OK_FOR_Os (vcall_offset
))
13306 output_asm_insn ("iilf\t%4,%3", op
);
13307 output_asm_insn ("a\t%4,0(%1)", op
);
13308 output_asm_insn ("a\t%1,0(%4)", op
);
13312 op
[7] = gen_label_rtx ();
13313 output_asm_insn ("l\t%4,%7-%5(%4)", op
);
13314 output_asm_insn ("a\t%4,0(%1)", op
);
13315 output_asm_insn ("a\t%1,0(%4)", op
);
13318 /* We had to clobber the base pointer register.
13319 Re-setup the base pointer (with a different base). */
13320 op
[5] = gen_label_rtx ();
13321 output_asm_insn ("basr\t%4,0", op
);
13322 targetm
.asm_out
.internal_label (file
, "L",
13323 CODE_LABEL_NUMBER (op
[5]));
13326 /* Jump to target. */
13327 op
[8] = gen_label_rtx ();
13330 output_asm_insn ("l\t%4,%8-%5(%4)", op
);
13331 else if (!nonlocal
)
13332 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13333 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13334 else if (flag_pic
== 1)
13336 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13337 output_asm_insn ("l\t%4,%0(%4)", op
);
13339 else if (flag_pic
== 2)
13341 op
[9] = gen_rtx_REG (Pmode
, 0);
13342 output_asm_insn ("l\t%9,%8-4-%5(%4)", op
);
13343 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13344 output_asm_insn ("ar\t%4,%9", op
);
13345 output_asm_insn ("l\t%4,0(%4)", op
);
13348 output_asm_insn ("br\t%4", op
);
13350 /* Output literal pool. */
13351 output_asm_insn (".align\t4", op
);
13353 if (nonlocal
&& flag_pic
== 2)
13354 output_asm_insn (".long\t%0", op
);
13357 op
[0] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13358 SYMBOL_REF_FLAGS (op
[0]) = SYMBOL_FLAG_LOCAL
;
13361 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[8]));
13363 output_asm_insn (".long\t%0", op
);
13365 output_asm_insn (".long\t%0-%5", op
);
13369 targetm
.asm_out
.internal_label (file
, "L",
13370 CODE_LABEL_NUMBER (op
[6]));
13371 output_asm_insn (".long\t%2", op
);
13375 targetm
.asm_out
.internal_label (file
, "L",
13376 CODE_LABEL_NUMBER (op
[7]));
13377 output_asm_insn (".long\t%3", op
);
13380 final_end_function ();
13381 assemble_end_function (thunk
, fnname
);
13384 /* Output either an indirect jump or an indirect call
13385 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13386 using a branch trampoline disabling branch target prediction. */
13389 s390_indirect_branch_via_thunk (unsigned int regno
,
13390 unsigned int return_addr_regno
,
13391 rtx comparison_operator
,
13392 enum s390_indirect_branch_type type
)
13394 enum s390_indirect_branch_option option
;
13396 if (type
== s390_indirect_branch_type_return
)
13398 if (s390_return_addr_from_memory ())
13399 option
= s390_opt_function_return_mem
;
13401 option
= s390_opt_function_return_reg
;
13403 else if (type
== s390_indirect_branch_type_jump
)
13404 option
= s390_opt_indirect_branch_jump
;
13405 else if (type
== s390_indirect_branch_type_call
)
13406 option
= s390_opt_indirect_branch_call
;
13408 gcc_unreachable ();
13410 if (TARGET_INDIRECT_BRANCH_TABLE
)
13414 ASM_GENERATE_INTERNAL_LABEL (label
,
13415 indirect_branch_table_label
[option
],
13416 indirect_branch_table_label_no
[option
]++);
13417 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13420 if (return_addr_regno
!= INVALID_REGNUM
)
13422 gcc_assert (comparison_operator
== NULL_RTX
);
13423 fprintf (asm_out_file
, " \tbrasl\t%%r%d,", return_addr_regno
);
13427 fputs (" \tjg", asm_out_file
);
13428 if (comparison_operator
!= NULL_RTX
)
13429 print_operand (asm_out_file
, comparison_operator
, 'C');
13431 fputs ("\t", asm_out_file
);
13434 if (TARGET_CPU_Z10
)
13435 fprintf (asm_out_file
,
13436 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
"\n",
13439 fprintf (asm_out_file
,
13440 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
"\n",
13441 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
13443 if ((option
== s390_opt_indirect_branch_jump
13444 && cfun
->machine
->indirect_branch_jump
== indirect_branch_thunk
)
13445 || (option
== s390_opt_indirect_branch_call
13446 && cfun
->machine
->indirect_branch_call
== indirect_branch_thunk
)
13447 || (option
== s390_opt_function_return_reg
13448 && cfun
->machine
->function_return_reg
== indirect_branch_thunk
)
13449 || (option
== s390_opt_function_return_mem
13450 && cfun
->machine
->function_return_mem
== indirect_branch_thunk
))
13452 if (TARGET_CPU_Z10
)
13453 indirect_branch_z10thunk_mask
|= (1 << regno
);
13455 indirect_branch_prez10thunk_mask
|= (1 << regno
);
13459 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13460 either be an address register or a label pointing to the location
13461 of the jump instruction. */
13464 s390_indirect_branch_via_inline_thunk (rtx execute_target
)
13466 if (TARGET_INDIRECT_BRANCH_TABLE
)
13470 ASM_GENERATE_INTERNAL_LABEL (label
,
13471 indirect_branch_table_label
[s390_opt_indirect_branch_jump
],
13472 indirect_branch_table_label_no
[s390_opt_indirect_branch_jump
]++);
13473 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13477 fputs ("\t.machinemode zarch\n", asm_out_file
);
13479 if (REG_P (execute_target
))
13480 fprintf (asm_out_file
, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target
));
13482 output_asm_insn ("\texrl\t%%r0,%0", &execute_target
);
13485 fputs ("\t.machinemode esa\n", asm_out_file
);
13487 fputs ("0:\tj\t0b\n", asm_out_file
);
13491 s390_valid_pointer_mode (scalar_int_mode mode
)
13493 return (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
));
13496 /* Checks whether the given CALL_EXPR would use a caller
13497 saved register. This is used to decide whether sibling call
13498 optimization could be performed on the respective function
13502 s390_call_saved_register_used (tree call_expr
)
13504 CUMULATIVE_ARGS cum_v
;
13505 cumulative_args_t cum
;
13510 INIT_CUMULATIVE_ARGS (cum_v
, NULL
, NULL
, 0, 0);
13511 cum
= pack_cumulative_args (&cum_v
);
13513 for (i
= 0; i
< call_expr_nargs (call_expr
); i
++)
13515 parameter
= CALL_EXPR_ARG (call_expr
, i
);
13516 gcc_assert (parameter
);
13518 /* For an undeclared variable passed as parameter we will get
13519 an ERROR_MARK node here. */
13520 if (TREE_CODE (parameter
) == ERROR_MARK
)
13523 /* We assume that in the target function all parameters are
13524 named. This only has an impact on vector argument register
13525 usage none of which is call-saved. */
13526 function_arg_info
arg (TREE_TYPE (parameter
), /*named=*/true);
13527 apply_pass_by_reference_rules (&cum_v
, arg
);
13529 parm_rtx
= s390_function_arg (cum
, arg
);
13531 s390_function_arg_advance (cum
, arg
);
13536 if (REG_P (parm_rtx
))
13538 for (reg
= 0; reg
< REG_NREGS (parm_rtx
); reg
++)
13539 if (!call_used_or_fixed_reg_p (reg
+ REGNO (parm_rtx
)))
13543 if (GET_CODE (parm_rtx
) == PARALLEL
)
13547 for (i
= 0; i
< XVECLEN (parm_rtx
, 0); i
++)
13549 rtx r
= XEXP (XVECEXP (parm_rtx
, 0, i
), 0);
13551 gcc_assert (REG_P (r
));
13553 for (reg
= 0; reg
< REG_NREGS (r
); reg
++)
13554 if (!call_used_or_fixed_reg_p (reg
+ REGNO (r
)))
13563 /* Return true if the given call expression can be
13564 turned into a sibling call.
13565 DECL holds the declaration of the function to be called whereas
13566 EXP is the call expression itself. */
13569 s390_function_ok_for_sibcall (tree decl
, tree exp
)
13571 /* The TPF epilogue uses register 1. */
13572 if (TARGET_TPF_PROFILING
)
13575 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13576 which would have to be restored before the sibcall. */
13577 if (!TARGET_64BIT
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
13580 /* The thunks for indirect branches require r1 if no exrl is
13581 available. r1 might not be available when doing a sibling
13583 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13588 /* Register 6 on s390 is available as an argument register but unfortunately
13589 "caller saved". This makes functions needing this register for arguments
13590 not suitable for sibcalls. */
13591 return !s390_call_saved_register_used (exp
);
13594 /* Return the fixed registers used for condition codes. */
13597 s390_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
13600 *p2
= INVALID_REGNUM
;
13605 /* This function is used by the call expanders of the machine description.
13606 It emits the call insn itself together with the necessary operations
13607 to adjust the target address and returns the emitted insn.
13608 ADDR_LOCATION is the target address rtx
13609 TLS_CALL the location of the thread-local symbol
13610 RESULT_REG the register where the result of the call should be stored
13611 RETADDR_REG the register where the return address should be stored
13612 If this parameter is NULL_RTX the call is considered
13613 to be a sibling call. */
13616 s390_emit_call (rtx addr_location
, rtx tls_call
, rtx result_reg
,
13619 bool plt_call
= false;
13621 rtx vec
[4] = { NULL_RTX
};
13623 rtx
*call
= &vec
[0];
13624 rtx
*clobber_ret_reg
= &vec
[1];
13625 rtx
*use
= &vec
[2];
13626 rtx
*clobber_thunk_reg
= &vec
[3];
13629 /* Direct function calls need special treatment. */
13630 if (GET_CODE (addr_location
) == SYMBOL_REF
)
13632 /* When calling a global routine in PIC mode, we must
13633 replace the symbol itself with the PLT stub. */
13634 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (addr_location
))
13636 if (TARGET_64BIT
|| retaddr_reg
!= NULL_RTX
)
13638 addr_location
= gen_rtx_UNSPEC (Pmode
,
13639 gen_rtvec (1, addr_location
),
13641 addr_location
= gen_rtx_CONST (Pmode
, addr_location
);
13645 /* For -fpic code the PLT entries might use r12 which is
13646 call-saved. Therefore we cannot do a sibcall when
13647 calling directly using a symbol ref. When reaching
13648 this point we decided (in s390_function_ok_for_sibcall)
13649 to do a sibcall for a function pointer but one of the
13650 optimizers was able to get rid of the function pointer
13651 by propagating the symbol ref into the call. This
13652 optimization is illegal for S/390 so we turn the direct
13653 call into a indirect call again. */
13654 addr_location
= force_reg (Pmode
, addr_location
);
13658 /* If it is already an indirect call or the code above moved the
13659 SYMBOL_REF to somewhere else make sure the address can be found in
13661 if (retaddr_reg
== NULL_RTX
13662 && GET_CODE (addr_location
) != SYMBOL_REF
13665 emit_move_insn (gen_rtx_REG (Pmode
, SIBCALL_REGNUM
), addr_location
);
13666 addr_location
= gen_rtx_REG (Pmode
, SIBCALL_REGNUM
);
13669 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13670 && GET_CODE (addr_location
) != SYMBOL_REF
13673 /* Indirect branch thunks require the target to be a single GPR. */
13674 addr_location
= force_reg (Pmode
, addr_location
);
13676 /* Without exrl the indirect branch thunks need an additional
13677 register for larl;ex */
13678 if (!TARGET_CPU_Z10
)
13680 *clobber_thunk_reg
= gen_rtx_REG (Pmode
, INDIRECT_BRANCH_THUNK_REGNUM
);
13681 *clobber_thunk_reg
= gen_rtx_CLOBBER (VOIDmode
, *clobber_thunk_reg
);
13685 addr_location
= gen_rtx_MEM (QImode
, addr_location
);
13686 *call
= gen_rtx_CALL (VOIDmode
, addr_location
, const0_rtx
);
13688 if (result_reg
!= NULL_RTX
)
13689 *call
= gen_rtx_SET (result_reg
, *call
);
13691 if (retaddr_reg
!= NULL_RTX
)
13693 *clobber_ret_reg
= gen_rtx_CLOBBER (VOIDmode
, retaddr_reg
);
13695 if (tls_call
!= NULL_RTX
)
13696 *use
= gen_rtx_USE (VOIDmode
, tls_call
);
13700 for (i
= 0; i
< 4; i
++)
13701 if (vec
[i
] != NULL_RTX
)
13709 v
= rtvec_alloc (elts
);
13710 for (i
= 0; i
< 4; i
++)
13711 if (vec
[i
] != NULL_RTX
)
13713 RTVEC_ELT (v
, e
) = vec
[i
];
13717 *call
= gen_rtx_PARALLEL (VOIDmode
, v
);
13720 insn
= emit_call_insn (*call
);
13722 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13723 if ((!TARGET_64BIT
&& plt_call
) || tls_call
!= NULL_RTX
)
13725 /* s390_function_ok_for_sibcall should
13726 have denied sibcalls in this case. */
13727 gcc_assert (retaddr_reg
!= NULL_RTX
);
13728 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, 12));
13733 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13736 s390_conditional_register_usage (void)
13741 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
13742 fixed_regs
[BASE_REGNUM
] = 0;
13743 fixed_regs
[RETURN_REGNUM
] = 0;
13746 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13747 call_used_regs
[i
] = 0;
13751 call_used_regs
[FPR4_REGNUM
] = 0;
13752 call_used_regs
[FPR6_REGNUM
] = 0;
13755 if (TARGET_SOFT_FLOAT
)
13757 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13761 /* Disable v16 - v31 for non-vector target. */
13764 for (i
= VR16_REGNUM
; i
<= VR31_REGNUM
; i
++)
13765 fixed_regs
[i
] = call_used_regs
[i
] = 1;
13769 /* Corresponding function to eh_return expander. */
13771 static GTY(()) rtx s390_tpf_eh_return_symbol
;
13773 s390_emit_tpf_eh_return (rtx target
)
13778 if (!s390_tpf_eh_return_symbol
)
13779 s390_tpf_eh_return_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tpf_eh_return");
13781 reg
= gen_rtx_REG (Pmode
, 2);
13782 orig_ra
= gen_rtx_REG (Pmode
, 3);
13784 emit_move_insn (reg
, target
);
13785 emit_move_insn (orig_ra
, get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
));
13786 insn
= s390_emit_call (s390_tpf_eh_return_symbol
, NULL_RTX
, reg
,
13787 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
13788 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), reg
);
13789 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), orig_ra
);
13791 emit_move_insn (EH_RETURN_HANDLER_RTX
, reg
);
13794 /* Rework the prologue/epilogue to avoid saving/restoring
13795 registers unnecessarily. */
13798 s390_optimize_prologue (void)
13800 rtx_insn
*insn
, *new_insn
, *next_insn
;
13802 /* Do a final recompute of the frame-related data. */
13803 s390_optimize_register_info ();
13805 /* If all special registers are in fact used, there's nothing we
13806 can do, so no point in walking the insn list. */
13808 if (cfun_frame_layout
.first_save_gpr
<= BASE_REGNUM
13809 && cfun_frame_layout
.last_save_gpr
>= BASE_REGNUM
)
13812 /* Search for prologue/epilogue insns and replace them. */
13813 for (insn
= get_insns (); insn
; insn
= next_insn
)
13815 int first
, last
, off
;
13816 rtx set
, base
, offset
;
13819 next_insn
= NEXT_INSN (insn
);
13821 if (! NONJUMP_INSN_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
13824 pat
= PATTERN (insn
);
13826 /* Remove ldgr/lgdr instructions used for saving and restore
13827 GPRs if possible. */
13832 if (INSN_CODE (insn
) == CODE_FOR_stack_restore_from_fpr
)
13833 tmp_pat
= XVECEXP (pat
, 0, 0);
13835 if (GET_CODE (tmp_pat
) == SET
13836 && GET_MODE (SET_SRC (tmp_pat
)) == DImode
13837 && REG_P (SET_SRC (tmp_pat
))
13838 && REG_P (SET_DEST (tmp_pat
)))
13840 int src_regno
= REGNO (SET_SRC (tmp_pat
));
13841 int dest_regno
= REGNO (SET_DEST (tmp_pat
));
13845 if (!((GENERAL_REGNO_P (src_regno
)
13846 && FP_REGNO_P (dest_regno
))
13847 || (FP_REGNO_P (src_regno
)
13848 && GENERAL_REGNO_P (dest_regno
))))
13851 gpr_regno
= GENERAL_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
13852 fpr_regno
= FP_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
13854 /* GPR must be call-saved, FPR must be call-clobbered. */
13855 if (!call_used_regs
[fpr_regno
]
13856 || call_used_regs
[gpr_regno
])
13859 /* It must not happen that what we once saved in an FPR now
13860 needs a stack slot. */
13861 gcc_assert (cfun_gpr_save_slot (gpr_regno
) != SAVE_SLOT_STACK
);
13863 if (cfun_gpr_save_slot (gpr_regno
) == SAVE_SLOT_NONE
)
13865 remove_insn (insn
);
13871 if (GET_CODE (pat
) == PARALLEL
13872 && store_multiple_operation (pat
, VOIDmode
))
13874 set
= XVECEXP (pat
, 0, 0);
13875 first
= REGNO (SET_SRC (set
));
13876 last
= first
+ XVECLEN (pat
, 0) - 1;
13877 offset
= const0_rtx
;
13878 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
13879 off
= INTVAL (offset
);
13881 if (GET_CODE (base
) != REG
|| off
< 0)
13883 if (cfun_frame_layout
.first_save_gpr
!= -1
13884 && (cfun_frame_layout
.first_save_gpr
< first
13885 || cfun_frame_layout
.last_save_gpr
> last
))
13887 if (REGNO (base
) != STACK_POINTER_REGNUM
13888 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13890 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
13893 if (cfun_frame_layout
.first_save_gpr
!= -1)
13895 rtx s_pat
= save_gprs (base
,
13896 off
+ (cfun_frame_layout
.first_save_gpr
13897 - first
) * UNITS_PER_LONG
,
13898 cfun_frame_layout
.first_save_gpr
,
13899 cfun_frame_layout
.last_save_gpr
);
13900 new_insn
= emit_insn_before (s_pat
, insn
);
13901 INSN_ADDRESSES_NEW (new_insn
, -1);
13904 remove_insn (insn
);
13908 if (cfun_frame_layout
.first_save_gpr
== -1
13909 && GET_CODE (pat
) == SET
13910 && GENERAL_REG_P (SET_SRC (pat
))
13911 && GET_CODE (SET_DEST (pat
)) == MEM
)
13914 first
= REGNO (SET_SRC (set
));
13915 offset
= const0_rtx
;
13916 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
13917 off
= INTVAL (offset
);
13919 if (GET_CODE (base
) != REG
|| off
< 0)
13921 if (REGNO (base
) != STACK_POINTER_REGNUM
13922 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13925 remove_insn (insn
);
13929 if (GET_CODE (pat
) == PARALLEL
13930 && load_multiple_operation (pat
, VOIDmode
))
13932 set
= XVECEXP (pat
, 0, 0);
13933 first
= REGNO (SET_DEST (set
));
13934 last
= first
+ XVECLEN (pat
, 0) - 1;
13935 offset
= const0_rtx
;
13936 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
13937 off
= INTVAL (offset
);
13939 if (GET_CODE (base
) != REG
|| off
< 0)
13942 if (cfun_frame_layout
.first_restore_gpr
!= -1
13943 && (cfun_frame_layout
.first_restore_gpr
< first
13944 || cfun_frame_layout
.last_restore_gpr
> last
))
13946 if (REGNO (base
) != STACK_POINTER_REGNUM
13947 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13949 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
13952 if (cfun_frame_layout
.first_restore_gpr
!= -1)
13954 rtx rpat
= restore_gprs (base
,
13955 off
+ (cfun_frame_layout
.first_restore_gpr
13956 - first
) * UNITS_PER_LONG
,
13957 cfun_frame_layout
.first_restore_gpr
,
13958 cfun_frame_layout
.last_restore_gpr
);
13960 /* Remove REG_CFA_RESTOREs for registers that we no
13961 longer need to save. */
13962 REG_NOTES (rpat
) = REG_NOTES (insn
);
13963 for (rtx
*ptr
= ®_NOTES (rpat
); *ptr
; )
13964 if (REG_NOTE_KIND (*ptr
) == REG_CFA_RESTORE
13965 && ((int) REGNO (XEXP (*ptr
, 0))
13966 < cfun_frame_layout
.first_restore_gpr
))
13967 *ptr
= XEXP (*ptr
, 1);
13969 ptr
= &XEXP (*ptr
, 1);
13970 new_insn
= emit_insn_before (rpat
, insn
);
13971 RTX_FRAME_RELATED_P (new_insn
) = 1;
13972 INSN_ADDRESSES_NEW (new_insn
, -1);
13975 remove_insn (insn
);
13979 if (cfun_frame_layout
.first_restore_gpr
== -1
13980 && GET_CODE (pat
) == SET
13981 && GENERAL_REG_P (SET_DEST (pat
))
13982 && GET_CODE (SET_SRC (pat
)) == MEM
)
13985 first
= REGNO (SET_DEST (set
));
13986 offset
= const0_rtx
;
13987 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
13988 off
= INTVAL (offset
);
13990 if (GET_CODE (base
) != REG
|| off
< 0)
13993 if (REGNO (base
) != STACK_POINTER_REGNUM
13994 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13997 remove_insn (insn
);
14003 /* On z10 and later the dynamic branch prediction must see the
14004 backward jump within a certain windows. If not it falls back to
14005 the static prediction. This function rearranges the loop backward
14006 branch in a way which makes the static prediction always correct.
14007 The function returns true if it added an instruction. */
14009 s390_fix_long_loop_prediction (rtx_insn
*insn
)
14011 rtx set
= single_set (insn
);
14012 rtx code_label
, label_ref
;
14013 rtx_insn
*uncond_jump
;
14014 rtx_insn
*cur_insn
;
14018 /* This will exclude branch on count and branch on index patterns
14019 since these are correctly statically predicted.
14021 The additional check for a PARALLEL is required here since
14022 single_set might be != NULL for PARALLELs where the set of the
14023 iteration variable is dead. */
14024 if (GET_CODE (PATTERN (insn
)) == PARALLEL
14026 || SET_DEST (set
) != pc_rtx
14027 || GET_CODE (SET_SRC(set
)) != IF_THEN_ELSE
)
14030 /* Skip conditional returns. */
14031 if (ANY_RETURN_P (XEXP (SET_SRC (set
), 1))
14032 && XEXP (SET_SRC (set
), 2) == pc_rtx
)
14035 label_ref
= (GET_CODE (XEXP (SET_SRC (set
), 1)) == LABEL_REF
?
14036 XEXP (SET_SRC (set
), 1) : XEXP (SET_SRC (set
), 2));
14038 gcc_assert (GET_CODE (label_ref
) == LABEL_REF
);
14040 code_label
= XEXP (label_ref
, 0);
14042 if (INSN_ADDRESSES (INSN_UID (code_label
)) == -1
14043 || INSN_ADDRESSES (INSN_UID (insn
)) == -1
14044 || (INSN_ADDRESSES (INSN_UID (insn
))
14045 - INSN_ADDRESSES (INSN_UID (code_label
)) < PREDICT_DISTANCE
))
14048 for (distance
= 0, cur_insn
= PREV_INSN (insn
);
14049 distance
< PREDICT_DISTANCE
- 6;
14050 distance
+= get_attr_length (cur_insn
), cur_insn
= PREV_INSN (cur_insn
))
14051 if (!cur_insn
|| JUMP_P (cur_insn
) || LABEL_P (cur_insn
))
14054 rtx_code_label
*new_label
= gen_label_rtx ();
14055 uncond_jump
= emit_jump_insn_after (
14056 gen_rtx_SET (pc_rtx
,
14057 gen_rtx_LABEL_REF (VOIDmode
, code_label
)),
14059 emit_label_after (new_label
, uncond_jump
);
14061 tmp
= XEXP (SET_SRC (set
), 1);
14062 XEXP (SET_SRC (set
), 1) = XEXP (SET_SRC (set
), 2);
14063 XEXP (SET_SRC (set
), 2) = tmp
;
14064 INSN_CODE (insn
) = -1;
14066 XEXP (label_ref
, 0) = new_label
;
14067 JUMP_LABEL (insn
) = new_label
;
14068 JUMP_LABEL (uncond_jump
) = code_label
;
14073 /* Returns 1 if INSN reads the value of REG for purposes not related
14074 to addressing of memory, and 0 otherwise. */
14076 s390_non_addr_reg_read_p (rtx reg
, rtx_insn
*insn
)
14078 return reg_referenced_p (reg
, PATTERN (insn
))
14079 && !reg_used_in_mem_p (REGNO (reg
), PATTERN (insn
));
14082 /* Starting from INSN find_cond_jump looks downwards in the insn
14083 stream for a single jump insn which is the last user of the
14084 condition code set in INSN. */
14086 find_cond_jump (rtx_insn
*insn
)
14088 for (; insn
; insn
= NEXT_INSN (insn
))
14092 if (LABEL_P (insn
))
14095 if (!JUMP_P (insn
))
14097 if (reg_mentioned_p (gen_rtx_REG (CCmode
, CC_REGNUM
), insn
))
14102 /* This will be triggered by a return. */
14103 if (GET_CODE (PATTERN (insn
)) != SET
)
14106 gcc_assert (SET_DEST (PATTERN (insn
)) == pc_rtx
);
14107 ite
= SET_SRC (PATTERN (insn
));
14109 if (GET_CODE (ite
) != IF_THEN_ELSE
)
14112 cc
= XEXP (XEXP (ite
, 0), 0);
14113 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
)))
14116 if (find_reg_note (insn
, REG_DEAD
, cc
))
14124 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14125 the semantics does not change. If NULL_RTX is passed as COND the
14126 function tries to find the conditional jump starting with INSN. */
14128 s390_swap_cmp (rtx cond
, rtx
*op0
, rtx
*op1
, rtx_insn
*insn
)
14132 if (cond
== NULL_RTX
)
14134 rtx_insn
*jump
= find_cond_jump (NEXT_INSN (insn
));
14135 rtx set
= jump
? single_set (jump
) : NULL_RTX
;
14137 if (set
== NULL_RTX
)
14140 cond
= XEXP (SET_SRC (set
), 0);
14145 PUT_CODE (cond
, swap_condition (GET_CODE (cond
)));
14148 /* On z10, instructions of the compare-and-branch family have the
14149 property to access the register occurring as second operand with
14150 its bits complemented. If such a compare is grouped with a second
14151 instruction that accesses the same register non-complemented, and
14152 if that register's value is delivered via a bypass, then the
14153 pipeline recycles, thereby causing significant performance decline.
14154 This function locates such situations and exchanges the two
14155 operands of the compare. The function return true whenever it
14158 s390_z10_optimize_cmp (rtx_insn
*insn
)
14160 rtx_insn
*prev_insn
, *next_insn
;
14161 bool insn_added_p
= false;
14162 rtx cond
, *op0
, *op1
;
14164 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
14166 /* Handle compare and branch and branch on count
14168 rtx pattern
= single_set (insn
);
14171 || SET_DEST (pattern
) != pc_rtx
14172 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
)
14175 cond
= XEXP (SET_SRC (pattern
), 0);
14176 op0
= &XEXP (cond
, 0);
14177 op1
= &XEXP (cond
, 1);
14179 else if (GET_CODE (PATTERN (insn
)) == SET
)
14183 /* Handle normal compare instructions. */
14184 src
= SET_SRC (PATTERN (insn
));
14185 dest
= SET_DEST (PATTERN (insn
));
14188 || !CC_REGNO_P (REGNO (dest
))
14189 || GET_CODE (src
) != COMPARE
)
14192 /* s390_swap_cmp will try to find the conditional
14193 jump when passing NULL_RTX as condition. */
14195 op0
= &XEXP (src
, 0);
14196 op1
= &XEXP (src
, 1);
14201 if (!REG_P (*op0
) || !REG_P (*op1
))
14204 if (GET_MODE_CLASS (GET_MODE (*op0
)) != MODE_INT
)
14207 /* Swap the COMPARE arguments and its mask if there is a
14208 conflicting access in the previous insn. */
14209 prev_insn
= prev_active_insn (insn
);
14210 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14211 && reg_referenced_p (*op1
, PATTERN (prev_insn
)))
14212 s390_swap_cmp (cond
, op0
, op1
, insn
);
14214 /* Check if there is a conflict with the next insn. If there
14215 was no conflict with the previous insn, then swap the
14216 COMPARE arguments and its mask. If we already swapped
14217 the operands, or if swapping them would cause a conflict
14218 with the previous insn, issue a NOP after the COMPARE in
14219 order to separate the two instuctions. */
14220 next_insn
= next_active_insn (insn
);
14221 if (next_insn
!= NULL_RTX
&& INSN_P (next_insn
)
14222 && s390_non_addr_reg_read_p (*op1
, next_insn
))
14224 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14225 && s390_non_addr_reg_read_p (*op0
, prev_insn
))
14227 if (REGNO (*op1
) == 0)
14228 emit_insn_after (gen_nop_lr1 (), insn
);
14230 emit_insn_after (gen_nop_lr0 (), insn
);
14231 insn_added_p
= true;
14234 s390_swap_cmp (cond
, op0
, op1
, insn
);
14236 return insn_added_p
;
14239 /* Number of INSNs to be scanned backward in the last BB of the loop
14240 and forward in the first BB of the loop. This usually should be a
14241 bit more than the number of INSNs which could go into one
14243 #define S390_OSC_SCAN_INSN_NUM 5
14245 /* Scan LOOP for static OSC collisions and return true if a osc_break
14246 should be issued for this loop. */
14248 s390_adjust_loop_scan_osc (struct loop
* loop
)
14251 HARD_REG_SET modregs
, newregs
;
14252 rtx_insn
*insn
, *store_insn
= NULL
;
14254 struct s390_address addr_store
, addr_load
;
14255 subrtx_iterator::array_type array
;
14258 CLEAR_HARD_REG_SET (modregs
);
14261 FOR_BB_INSNS_REVERSE (loop
->latch
, insn
)
14263 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14267 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14270 find_all_hard_reg_sets (insn
, &newregs
, true);
14271 modregs
|= newregs
;
14273 set
= single_set (insn
);
14277 if (MEM_P (SET_DEST (set
))
14278 && s390_decompose_address (XEXP (SET_DEST (set
), 0), &addr_store
))
14285 if (store_insn
== NULL_RTX
)
14289 FOR_BB_INSNS (loop
->header
, insn
)
14291 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14294 if (insn
== store_insn
)
14298 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14301 find_all_hard_reg_sets (insn
, &newregs
, true);
14302 modregs
|= newregs
;
14304 set
= single_set (insn
);
14308 /* An intermediate store disrupts static OSC checking
14310 if (MEM_P (SET_DEST (set
))
14311 && s390_decompose_address (XEXP (SET_DEST (set
), 0), NULL
))
14314 FOR_EACH_SUBRTX (iter
, array
, SET_SRC (set
), NONCONST
)
14316 && s390_decompose_address (XEXP (*iter
, 0), &addr_load
)
14317 && rtx_equal_p (addr_load
.base
, addr_store
.base
)
14318 && rtx_equal_p (addr_load
.indx
, addr_store
.indx
)
14319 && rtx_equal_p (addr_load
.disp
, addr_store
.disp
))
14321 if ((addr_load
.base
!= NULL_RTX
14322 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.base
)))
14323 || (addr_load
.indx
!= NULL_RTX
14324 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.indx
))))
14331 /* Look for adjustments which can be done on simple innermost
14334 s390_adjust_loops ()
14336 struct loop
*loop
= NULL
;
14339 compute_bb_for_insn ();
14341 /* Find the loops. */
14342 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
14344 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
14348 flow_loop_dump (loop
, dump_file
, NULL
, 0);
14349 fprintf (dump_file
, ";; OSC loop scan Loop: ");
14351 if (loop
->latch
== NULL
14352 || pc_set (BB_END (loop
->latch
)) == NULL_RTX
14353 || !s390_adjust_loop_scan_osc (loop
))
14357 if (loop
->latch
== NULL
)
14358 fprintf (dump_file
, " muliple backward jumps\n");
14361 fprintf (dump_file
, " header insn: %d latch insn: %d ",
14362 INSN_UID (BB_HEAD (loop
->header
)),
14363 INSN_UID (BB_END (loop
->latch
)));
14364 if (pc_set (BB_END (loop
->latch
)) == NULL_RTX
)
14365 fprintf (dump_file
, " loop does not end with jump\n");
14367 fprintf (dump_file
, " not instrumented\n");
14373 rtx_insn
*new_insn
;
14376 fprintf (dump_file
, " adding OSC break insn: ");
14377 new_insn
= emit_insn_before (gen_osc_break (),
14378 BB_END (loop
->latch
));
14379 INSN_ADDRESSES_NEW (new_insn
, -1);
14383 loop_optimizer_finalize ();
14385 df_finish_pass (false);
14388 /* Perform machine-dependent processing. */
14393 struct constant_pool
*pool
;
14395 int hw_before
, hw_after
;
14397 if (s390_tune
== PROCESSOR_2964_Z13
)
14398 s390_adjust_loops ();
14400 /* Make sure all splits have been performed; splits after
14401 machine_dependent_reorg might confuse insn length counts. */
14402 split_all_insns_noflow ();
14404 /* Install the main literal pool and the associated base
14405 register load insns. The literal pool might be > 4096 bytes in
14406 size, so that some of its elements cannot be directly accessed.
14408 To fix this, we split the single literal pool into multiple
14409 pool chunks, reloading the pool base register at various
14410 points throughout the function to ensure it always points to
14411 the pool chunk the following code expects. */
14413 /* Collect the literal pool. */
14414 pool
= s390_mainpool_start ();
14417 /* Finish up literal pool related changes. */
14418 s390_mainpool_finish (pool
);
14422 /* If literal pool overflowed, chunkify it. */
14423 pool
= s390_chunkify_start ();
14424 s390_chunkify_finish (pool
);
14427 /* Generate out-of-pool execute target insns. */
14428 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14433 label
= s390_execute_label (insn
);
14437 gcc_assert (label
!= const0_rtx
);
14439 target
= emit_label (XEXP (label
, 0));
14440 INSN_ADDRESSES_NEW (target
, -1);
14444 target
= emit_jump_insn (s390_execute_target (insn
));
14445 /* This is important in order to keep a table jump
14446 pointing at the jump table label. Only this makes it
14447 being recognized as table jump. */
14448 JUMP_LABEL (target
) = JUMP_LABEL (insn
);
14451 target
= emit_insn (s390_execute_target (insn
));
14452 INSN_ADDRESSES_NEW (target
, -1);
14455 /* Try to optimize prologue and epilogue further. */
14456 s390_optimize_prologue ();
14458 /* Walk over the insns and do some >=z10 specific changes. */
14459 if (s390_tune
>= PROCESSOR_2097_Z10
)
14462 bool insn_added_p
= false;
14464 /* The insn lengths and addresses have to be up to date for the
14465 following manipulations. */
14466 shorten_branches (get_insns ());
14468 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14470 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14474 insn_added_p
|= s390_fix_long_loop_prediction (insn
);
14476 if ((GET_CODE (PATTERN (insn
)) == PARALLEL
14477 || GET_CODE (PATTERN (insn
)) == SET
)
14478 && s390_tune
== PROCESSOR_2097_Z10
)
14479 insn_added_p
|= s390_z10_optimize_cmp (insn
);
14482 /* Adjust branches if we added new instructions. */
14484 shorten_branches (get_insns ());
14487 s390_function_num_hotpatch_hw (current_function_decl
, &hw_before
, &hw_after
);
14492 /* Insert NOPs for hotpatching. */
14493 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14495 1. inside the area covered by debug information to allow setting
14496 breakpoints at the NOPs,
14497 2. before any insn which results in an asm instruction,
14498 3. before in-function labels to avoid jumping to the NOPs, for
14499 example as part of a loop,
14500 4. before any barrier in case the function is completely empty
14501 (__builtin_unreachable ()) and has neither internal labels nor
14504 if (active_insn_p (insn
) || BARRIER_P (insn
) || LABEL_P (insn
))
14506 /* Output a series of NOPs before the first active insn. */
14507 while (insn
&& hw_after
> 0)
14511 emit_insn_before (gen_nop_6_byte (), insn
);
14514 else if (hw_after
>= 2)
14516 emit_insn_before (gen_nop_4_byte (), insn
);
14521 emit_insn_before (gen_nop_2_byte (), insn
);
14528 /* Return true if INSN is a fp load insn writing register REGNO. */
14530 s390_fpload_toreg (rtx_insn
*insn
, unsigned int regno
)
14533 enum attr_type flag
= s390_safe_attr_type (insn
);
14535 if (flag
!= TYPE_FLOADSF
&& flag
!= TYPE_FLOADDF
)
14538 set
= single_set (insn
);
14540 if (set
== NULL_RTX
)
14543 if (!REG_P (SET_DEST (set
)) || !MEM_P (SET_SRC (set
)))
14546 if (REGNO (SET_DEST (set
)) != regno
)
14552 /* This value describes the distance to be avoided between an
14553 arithmetic fp instruction and an fp load writing the same register.
14554 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14555 fine but the exact value has to be avoided. Otherwise the FP
14556 pipeline will throw an exception causing a major penalty. */
14557 #define Z10_EARLYLOAD_DISTANCE 7
14559 /* Rearrange the ready list in order to avoid the situation described
14560 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14561 moved to the very end of the ready list. */
14563 s390_z10_prevent_earlyload_conflicts (rtx_insn
**ready
, int *nready_p
)
14565 unsigned int regno
;
14566 int nready
= *nready_p
;
14571 enum attr_type flag
;
14574 /* Skip DISTANCE - 1 active insns. */
14575 for (insn
= last_scheduled_insn
, distance
= Z10_EARLYLOAD_DISTANCE
- 1;
14576 distance
> 0 && insn
!= NULL_RTX
;
14577 distance
--, insn
= prev_active_insn (insn
))
14578 if (CALL_P (insn
) || JUMP_P (insn
))
14581 if (insn
== NULL_RTX
)
14584 set
= single_set (insn
);
14586 if (set
== NULL_RTX
|| !REG_P (SET_DEST (set
))
14587 || GET_MODE_CLASS (GET_MODE (SET_DEST (set
))) != MODE_FLOAT
)
14590 flag
= s390_safe_attr_type (insn
);
14592 if (flag
== TYPE_FLOADSF
|| flag
== TYPE_FLOADDF
)
14595 regno
= REGNO (SET_DEST (set
));
14598 while (!s390_fpload_toreg (ready
[i
], regno
) && i
> 0)
14605 memmove (&ready
[1], &ready
[0], sizeof (rtx_insn
*) * i
);
14609 /* Returns TRUE if BB is entered via a fallthru edge and all other
14610 incoming edges are less than likely. */
14612 s390_bb_fallthru_entry_likely (basic_block bb
)
14614 edge e
, fallthru_edge
;
14620 fallthru_edge
= find_fallthru_edge (bb
->preds
);
14621 if (!fallthru_edge
)
14624 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14625 if (e
!= fallthru_edge
14626 && e
->probability
>= profile_probability::likely ())
14632 struct s390_sched_state
14634 /* Number of insns in the group. */
14636 /* Execution side of the group. */
14638 /* Group can only hold two insns. */
14640 } s390_sched_state
;
14642 static struct s390_sched_state sched_state
= {0, 1, false};
14644 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14645 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14646 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14647 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14648 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14650 static unsigned int
14651 s390_get_sched_attrmask (rtx_insn
*insn
)
14653 unsigned int mask
= 0;
14657 case PROCESSOR_2827_ZEC12
:
14658 if (get_attr_zEC12_cracked (insn
))
14659 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14660 if (get_attr_zEC12_expanded (insn
))
14661 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14662 if (get_attr_zEC12_endgroup (insn
))
14663 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14664 if (get_attr_zEC12_groupalone (insn
))
14665 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14667 case PROCESSOR_2964_Z13
:
14668 if (get_attr_z13_cracked (insn
))
14669 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14670 if (get_attr_z13_expanded (insn
))
14671 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14672 if (get_attr_z13_endgroup (insn
))
14673 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14674 if (get_attr_z13_groupalone (insn
))
14675 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14676 if (get_attr_z13_groupoftwo (insn
))
14677 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14679 case PROCESSOR_3906_Z14
:
14680 if (get_attr_z14_cracked (insn
))
14681 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14682 if (get_attr_z14_expanded (insn
))
14683 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14684 if (get_attr_z14_endgroup (insn
))
14685 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14686 if (get_attr_z14_groupalone (insn
))
14687 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14688 if (get_attr_z14_groupoftwo (insn
))
14689 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14691 case PROCESSOR_8561_Z15
:
14692 if (get_attr_z15_cracked (insn
))
14693 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14694 if (get_attr_z15_expanded (insn
))
14695 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14696 if (get_attr_z15_endgroup (insn
))
14697 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14698 if (get_attr_z15_groupalone (insn
))
14699 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14700 if (get_attr_z15_groupoftwo (insn
))
14701 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14704 gcc_unreachable ();
14709 static unsigned int
14710 s390_get_unit_mask (rtx_insn
*insn
, int *units
)
14712 unsigned int mask
= 0;
14716 case PROCESSOR_2964_Z13
:
14718 if (get_attr_z13_unit_lsu (insn
))
14720 if (get_attr_z13_unit_fxa (insn
))
14722 if (get_attr_z13_unit_fxb (insn
))
14724 if (get_attr_z13_unit_vfu (insn
))
14727 case PROCESSOR_3906_Z14
:
14729 if (get_attr_z14_unit_lsu (insn
))
14731 if (get_attr_z14_unit_fxa (insn
))
14733 if (get_attr_z14_unit_fxb (insn
))
14735 if (get_attr_z14_unit_vfu (insn
))
14738 case PROCESSOR_8561_Z15
:
14740 if (get_attr_z15_unit_lsu (insn
))
14742 if (get_attr_z15_unit_fxa (insn
))
14744 if (get_attr_z15_unit_fxb (insn
))
14746 if (get_attr_z15_unit_vfu (insn
))
14750 gcc_unreachable ();
14756 s390_is_fpd (rtx_insn
*insn
)
14758 if (insn
== NULL_RTX
)
14761 return get_attr_z13_unit_fpd (insn
) || get_attr_z14_unit_fpd (insn
)
14762 || get_attr_z15_unit_fpd (insn
);
14766 s390_is_fxd (rtx_insn
*insn
)
14768 if (insn
== NULL_RTX
)
14771 return get_attr_z13_unit_fxd (insn
) || get_attr_z14_unit_fxd (insn
)
14772 || get_attr_z15_unit_fxd (insn
);
14775 /* Returns TRUE if INSN is a long-running instruction. */
14777 s390_is_longrunning (rtx_insn
*insn
)
14779 if (insn
== NULL_RTX
)
14782 return s390_is_fxd (insn
) || s390_is_fpd (insn
);
14786 /* Return the scheduling score for INSN. The higher the score the
14787 better. The score is calculated from the OOO scheduling attributes
14788 of INSN and the scheduling state sched_state. */
14790 s390_sched_score (rtx_insn
*insn
)
14792 unsigned int mask
= s390_get_sched_attrmask (insn
);
14795 switch (sched_state
.group_state
)
14798 /* Try to put insns into the first slot which would otherwise
14800 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14801 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
14803 if ((mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
14807 /* Prefer not cracked insns while trying to put together a
14809 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14810 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14811 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14813 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) == 0)
14815 /* If we are in a group of two already, try to schedule another
14816 group-of-two insn to avoid shortening another group. */
14817 if (sched_state
.group_of_two
14818 && (mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
14822 /* Prefer not cracked insns while trying to put together a
14824 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14825 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14826 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14828 /* Prefer endgroup insns in the last slot. */
14829 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
14831 /* Try to avoid group-of-two insns in the last slot as they will
14832 shorten this group as well as the next one. */
14833 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
14834 score
= MAX (0, score
- 15);
14838 if (s390_tune
>= PROCESSOR_2964_Z13
)
14841 unsigned unit_mask
, m
= 1;
14843 unit_mask
= s390_get_unit_mask (insn
, &units
);
14844 gcc_assert (units
<= MAX_SCHED_UNITS
);
14846 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14847 ago the last insn of this unit type got scheduled. This is
14848 supposed to help providing a proper instruction mix to the
14850 for (i
= 0; i
< units
; i
++, m
<<= 1)
14852 score
+= (last_scheduled_unit_distance
[i
][sched_state
.side
]
14853 * MAX_SCHED_MIX_SCORE
/ MAX_SCHED_MIX_DISTANCE
);
14855 int other_side
= 1 - sched_state
.side
;
14857 /* Try to delay long-running insns when side is busy. */
14858 if (s390_is_longrunning (insn
))
14860 if (s390_is_fxd (insn
))
14862 if (fxd_longrunning
[sched_state
.side
]
14863 && fxd_longrunning
[other_side
]
14864 <= fxd_longrunning
[sched_state
.side
])
14865 score
= MAX (0, score
- 10);
14867 else if (fxd_longrunning
[other_side
]
14868 >= fxd_longrunning
[sched_state
.side
])
14872 if (s390_is_fpd (insn
))
14874 if (fpd_longrunning
[sched_state
.side
]
14875 && fpd_longrunning
[other_side
]
14876 <= fpd_longrunning
[sched_state
.side
])
14877 score
= MAX (0, score
- 10);
14879 else if (fpd_longrunning
[other_side
]
14880 >= fpd_longrunning
[sched_state
.side
])
14889 /* This function is called via hook TARGET_SCHED_REORDER before
14890 issuing one insn from list READY which contains *NREADYP entries.
14891 For target z10 it reorders load instructions to avoid early load
14892 conflicts in the floating point pipeline */
14894 s390_sched_reorder (FILE *file
, int verbose
,
14895 rtx_insn
**ready
, int *nreadyp
, int clock ATTRIBUTE_UNUSED
)
14897 if (s390_tune
== PROCESSOR_2097_Z10
14898 && reload_completed
14900 s390_z10_prevent_earlyload_conflicts (ready
, nreadyp
);
14902 if (s390_tune
>= PROCESSOR_2827_ZEC12
14903 && reload_completed
14907 int last_index
= *nreadyp
- 1;
14908 int max_index
= -1;
14909 int max_score
= -1;
14912 /* Just move the insn with the highest score to the top (the
14913 end) of the list. A full sort is not needed since a conflict
14914 in the hazard recognition cannot happen. So the top insn in
14915 the ready list will always be taken. */
14916 for (i
= last_index
; i
>= 0; i
--)
14920 if (recog_memoized (ready
[i
]) < 0)
14923 score
= s390_sched_score (ready
[i
]);
14924 if (score
> max_score
)
14931 if (max_index
!= -1)
14933 if (max_index
!= last_index
)
14935 tmp
= ready
[max_index
];
14936 ready
[max_index
] = ready
[last_index
];
14937 ready
[last_index
] = tmp
;
14941 ";;\t\tBACKEND: move insn %d to the top of list\n",
14942 INSN_UID (ready
[last_index
]));
14944 else if (verbose
> 5)
14946 ";;\t\tBACKEND: best insn %d already on top\n",
14947 INSN_UID (ready
[last_index
]));
14952 fprintf (file
, "ready list ooo attributes - sched state: %d\n",
14953 sched_state
.group_state
);
14955 for (i
= last_index
; i
>= 0; i
--)
14957 unsigned int sched_mask
;
14958 rtx_insn
*insn
= ready
[i
];
14960 if (recog_memoized (insn
) < 0)
14963 sched_mask
= s390_get_sched_attrmask (insn
);
14964 fprintf (file
, ";;\t\tBACKEND: insn %d score: %d: ",
14966 s390_sched_score (insn
));
14967 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14968 ((M) & sched_mask) ? #ATTR : "");
14969 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
14970 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
14971 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
14972 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
14973 #undef PRINT_SCHED_ATTR
14974 if (s390_tune
>= PROCESSOR_2964_Z13
)
14976 unsigned int unit_mask
, m
= 1;
14979 unit_mask
= s390_get_unit_mask (insn
, &units
);
14980 fprintf (file
, "(units:");
14981 for (j
= 0; j
< units
; j
++, m
<<= 1)
14983 fprintf (file
, " u%d", j
);
14984 fprintf (file
, ")");
14986 fprintf (file
, "\n");
14991 return s390_issue_rate ();
14995 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14996 the scheduler has issued INSN. It stores the last issued insn into
14997 last_scheduled_insn in order to make it available for
14998 s390_sched_reorder. */
15000 s390_sched_variable_issue (FILE *file
, int verbose
, rtx_insn
*insn
, int more
)
15002 last_scheduled_insn
= insn
;
15004 bool ends_group
= false;
15006 if (s390_tune
>= PROCESSOR_2827_ZEC12
15007 && reload_completed
15008 && recog_memoized (insn
) >= 0)
15010 unsigned int mask
= s390_get_sched_attrmask (insn
);
15012 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15013 sched_state
.group_of_two
= true;
15015 /* If this is a group-of-two insn, we actually ended the last group
15016 and this insn is the first one of the new group. */
15017 if (sched_state
.group_state
== 2 && sched_state
.group_of_two
)
15019 sched_state
.side
= sched_state
.side
? 0 : 1;
15020 sched_state
.group_state
= 0;
15023 /* Longrunning and side bookkeeping. */
15024 for (int i
= 0; i
< 2; i
++)
15026 fxd_longrunning
[i
] = MAX (0, fxd_longrunning
[i
] - 1);
15027 fpd_longrunning
[i
] = MAX (0, fpd_longrunning
[i
] - 1);
15030 unsigned latency
= insn_default_latency (insn
);
15031 if (s390_is_longrunning (insn
))
15033 if (s390_is_fxd (insn
))
15034 fxd_longrunning
[sched_state
.side
] = latency
;
15036 fpd_longrunning
[sched_state
.side
] = latency
;
15039 if (s390_tune
>= PROCESSOR_2964_Z13
)
15042 unsigned unit_mask
, m
= 1;
15044 unit_mask
= s390_get_unit_mask (insn
, &units
);
15045 gcc_assert (units
<= MAX_SCHED_UNITS
);
15047 for (i
= 0; i
< units
; i
++, m
<<= 1)
15049 last_scheduled_unit_distance
[i
][sched_state
.side
] = 0;
15050 else if (last_scheduled_unit_distance
[i
][sched_state
.side
]
15051 < MAX_SCHED_MIX_DISTANCE
)
15052 last_scheduled_unit_distance
[i
][sched_state
.side
]++;
15055 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
15056 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0
15057 || (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0
15058 || (mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
15060 sched_state
.group_state
= 0;
15065 switch (sched_state
.group_state
)
15068 sched_state
.group_state
++;
15071 sched_state
.group_state
++;
15072 if (sched_state
.group_of_two
)
15074 sched_state
.group_state
= 0;
15079 sched_state
.group_state
++;
15087 unsigned int sched_mask
;
15089 sched_mask
= s390_get_sched_attrmask (insn
);
15091 fprintf (file
, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn
));
15092 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15093 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15094 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15095 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15096 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15097 #undef PRINT_SCHED_ATTR
15099 if (s390_tune
>= PROCESSOR_2964_Z13
)
15101 unsigned int unit_mask
, m
= 1;
15104 unit_mask
= s390_get_unit_mask (insn
, &units
);
15105 fprintf (file
, "(units:");
15106 for (j
= 0; j
< units
; j
++, m
<<= 1)
15108 fprintf (file
, " %d", j
);
15109 fprintf (file
, ")");
15111 fprintf (file
, " sched state: %d\n", sched_state
.group_state
);
15113 if (s390_tune
>= PROCESSOR_2964_Z13
)
15117 s390_get_unit_mask (insn
, &units
);
15119 fprintf (file
, ";;\t\tBACKEND: units on this side unused for: ");
15120 for (j
= 0; j
< units
; j
++)
15121 fprintf (file
, "%d:%d ", j
,
15122 last_scheduled_unit_distance
[j
][sched_state
.side
]);
15123 fprintf (file
, "\n");
15127 /* If this insn ended a group, the next will be on the other side. */
15130 sched_state
.group_state
= 0;
15131 sched_state
.side
= sched_state
.side
? 0 : 1;
15132 sched_state
.group_of_two
= false;
15136 if (GET_CODE (PATTERN (insn
)) != USE
15137 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
15144 s390_sched_init (FILE *file ATTRIBUTE_UNUSED
,
15145 int verbose ATTRIBUTE_UNUSED
,
15146 int max_ready ATTRIBUTE_UNUSED
)
15148 /* If the next basic block is most likely entered via a fallthru edge
15149 we keep the last sched state. Otherwise we start a new group.
15150 The scheduler traverses basic blocks in "instruction stream" ordering
15151 so if we see a fallthru edge here, sched_state will be of its
15154 current_sched_info->prev_head is the insn before the first insn of the
15155 block of insns to be scheduled.
15157 rtx_insn
*insn
= current_sched_info
->prev_head
15158 ? NEXT_INSN (current_sched_info
->prev_head
) : NULL
;
15159 basic_block bb
= insn
? BLOCK_FOR_INSN (insn
) : NULL
;
15160 if (s390_tune
< PROCESSOR_2964_Z13
|| !s390_bb_fallthru_entry_likely (bb
))
15162 last_scheduled_insn
= NULL
;
15163 memset (last_scheduled_unit_distance
, 0,
15164 MAX_SCHED_UNITS
* NUM_SIDES
* sizeof (int));
15165 sched_state
.group_state
= 0;
15166 sched_state
.group_of_two
= false;
15170 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15171 a new number struct loop *loop should be unrolled if tuned for cpus with
15172 a built-in stride prefetcher.
15173 The loop is analyzed for memory accesses by calling check_dpu for
15174 each rtx of the loop. Depending on the loop_depth and the amount of
15175 memory accesses a new number <=nunroll is returned to improve the
15176 behavior of the hardware prefetch unit. */
15178 s390_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
15183 unsigned mem_count
= 0;
15185 if (s390_tune
< PROCESSOR_2097_Z10
)
15188 /* Count the number of memory references within the loop body. */
15189 bbs
= get_loop_body (loop
);
15190 subrtx_iterator::array_type array
;
15191 for (i
= 0; i
< loop
->num_nodes
; i
++)
15192 FOR_BB_INSNS (bbs
[i
], insn
)
15193 if (INSN_P (insn
) && INSN_CODE (insn
) != -1)
15197 /* The runtime of small loops with memory block operations
15198 will be determined by the memory operation. Doing
15199 unrolling doesn't help here. Measurements to confirm
15200 this where only done on recent CPU levels. So better do
15201 not change anything for older CPUs. */
15202 if (s390_tune
>= PROCESSOR_2964_Z13
15203 && loop
->ninsns
<= BLOCK_MEM_OPS_LOOP_INSNS
15204 && ((set
= single_set (insn
)) != NULL_RTX
)
15205 && ((GET_MODE (SET_DEST (set
)) == BLKmode
15206 && (GET_MODE (SET_SRC (set
)) == BLKmode
15207 || SET_SRC (set
) == const0_rtx
))
15208 || (GET_CODE (SET_SRC (set
)) == COMPARE
15209 && GET_MODE (XEXP (SET_SRC (set
), 0)) == BLKmode
15210 && GET_MODE (XEXP (SET_SRC (set
), 1)) == BLKmode
)))
15213 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
15219 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15220 if (mem_count
== 0)
15223 switch (loop_depth(loop
))
15226 return MIN (nunroll
, 28 / mem_count
);
15228 return MIN (nunroll
, 22 / mem_count
);
15230 return MIN (nunroll
, 16 / mem_count
);
15234 /* Restore the current options. This is a hook function and also called
15238 s390_function_specific_restore (struct gcc_options
*opts
,
15239 struct gcc_options */
* opts_set */
,
15240 struct cl_target_option
*ptr ATTRIBUTE_UNUSED
)
15242 opts
->x_s390_cost_pointer
= (long)processor_table
[opts
->x_s390_tune
].cost
;
15246 s390_default_align (struct gcc_options
*opts
)
15248 /* Set the default function alignment to 16 in order to get rid of
15249 some unwanted performance effects. */
15250 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
15251 && opts
->x_s390_tune
>= PROCESSOR_2964_Z13
)
15252 opts
->x_str_align_functions
= "16";
15256 s390_override_options_after_change (void)
15258 s390_default_align (&global_options
);
15262 s390_option_override_internal (struct gcc_options
*opts
,
15263 struct gcc_options
*opts_set
)
15265 /* Architecture mode defaults according to ABI. */
15266 if (!(opts_set
->x_target_flags
& MASK_ZARCH
))
15269 opts
->x_target_flags
|= MASK_ZARCH
;
15271 opts
->x_target_flags
&= ~MASK_ZARCH
;
15274 /* Set the march default in case it hasn't been specified on cmdline. */
15275 if (!opts_set
->x_s390_arch
)
15276 opts
->x_s390_arch
= PROCESSOR_2064_Z900
;
15278 opts
->x_s390_arch_flags
= processor_flags_table
[(int) opts
->x_s390_arch
];
15280 /* Determine processor to tune for. */
15281 if (!opts_set
->x_s390_tune
)
15282 opts
->x_s390_tune
= opts
->x_s390_arch
;
15284 opts
->x_s390_tune_flags
= processor_flags_table
[opts
->x_s390_tune
];
15286 /* Sanity checks. */
15287 if (opts
->x_s390_arch
== PROCESSOR_NATIVE
15288 || opts
->x_s390_tune
== PROCESSOR_NATIVE
)
15289 gcc_unreachable ();
15290 if (TARGET_64BIT
&& !TARGET_ZARCH_P (opts
->x_target_flags
))
15291 error ("64-bit ABI not supported in ESA/390 mode");
15293 if (opts
->x_s390_indirect_branch
== indirect_branch_thunk_inline
15294 || opts
->x_s390_indirect_branch_call
== indirect_branch_thunk_inline
15295 || opts
->x_s390_function_return
== indirect_branch_thunk_inline
15296 || opts
->x_s390_function_return_reg
== indirect_branch_thunk_inline
15297 || opts
->x_s390_function_return_mem
== indirect_branch_thunk_inline
)
15298 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15300 if (opts
->x_s390_indirect_branch
!= indirect_branch_keep
)
15302 if (!opts_set
->x_s390_indirect_branch_call
)
15303 opts
->x_s390_indirect_branch_call
= opts
->x_s390_indirect_branch
;
15305 if (!opts_set
->x_s390_indirect_branch_jump
)
15306 opts
->x_s390_indirect_branch_jump
= opts
->x_s390_indirect_branch
;
15309 if (opts
->x_s390_function_return
!= indirect_branch_keep
)
15311 if (!opts_set
->x_s390_function_return_reg
)
15312 opts
->x_s390_function_return_reg
= opts
->x_s390_function_return
;
15314 if (!opts_set
->x_s390_function_return_mem
)
15315 opts
->x_s390_function_return_mem
= opts
->x_s390_function_return
;
15318 /* Enable hardware transactions if available and not explicitly
15319 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15320 if (!TARGET_OPT_HTM_P (opts_set
->x_target_flags
))
15322 if (TARGET_CPU_HTM_P (opts
) && TARGET_ZARCH_P (opts
->x_target_flags
))
15323 opts
->x_target_flags
|= MASK_OPT_HTM
;
15325 opts
->x_target_flags
&= ~MASK_OPT_HTM
;
15328 if (TARGET_OPT_VX_P (opts_set
->x_target_flags
))
15330 if (TARGET_OPT_VX_P (opts
->x_target_flags
))
15332 if (!TARGET_CPU_VX_P (opts
))
15333 error ("hardware vector support not available on %s",
15334 processor_table
[(int)opts
->x_s390_arch
].name
);
15335 if (TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15336 error ("hardware vector support not available with "
15337 "%<-msoft-float%>");
15342 if (TARGET_CPU_VX_P (opts
))
15343 /* Enable vector support if available and not explicitly disabled
15344 by user. E.g. with -m31 -march=z13 -mzarch */
15345 opts
->x_target_flags
|= MASK_OPT_VX
;
15347 opts
->x_target_flags
&= ~MASK_OPT_VX
;
15350 /* Use hardware DFP if available and not explicitly disabled by
15351 user. E.g. with -m31 -march=z10 -mzarch */
15352 if (!TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15354 if (TARGET_DFP_P (opts
))
15355 opts
->x_target_flags
|= MASK_HARD_DFP
;
15357 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15360 if (TARGET_HARD_DFP_P (opts
->x_target_flags
) && !TARGET_DFP_P (opts
))
15362 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15364 if (!TARGET_CPU_DFP_P (opts
))
15365 error ("hardware decimal floating point instructions"
15366 " not available on %s",
15367 processor_table
[(int)opts
->x_s390_arch
].name
);
15368 if (!TARGET_ZARCH_P (opts
->x_target_flags
))
15369 error ("hardware decimal floating point instructions"
15370 " not available in ESA/390 mode");
15373 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15376 if (TARGET_SOFT_FLOAT_P (opts_set
->x_target_flags
)
15377 && TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15379 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
)
15380 && TARGET_HARD_DFP_P (opts
->x_target_flags
))
15381 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15382 "%<-msoft-float%>");
15384 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15387 if (TARGET_BACKCHAIN_P (opts
->x_target_flags
)
15388 && TARGET_PACKED_STACK_P (opts
->x_target_flags
)
15389 && TARGET_HARD_FLOAT_P (opts
->x_target_flags
))
15390 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15391 "supported in combination");
15393 if (opts
->x_s390_stack_size
)
15395 if (opts
->x_s390_stack_guard
>= opts
->x_s390_stack_size
)
15396 error ("stack size must be greater than the stack guard value");
15397 else if (opts
->x_s390_stack_size
> 1 << 16)
15398 error ("stack size must not be greater than 64k");
15400 else if (opts
->x_s390_stack_guard
)
15401 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15403 /* Our implementation of the stack probe requires the probe interval
15404 to be used as displacement in an address operand. The maximum
15405 probe interval currently is 64k. This would exceed short
15406 displacements. Trim that value down to 4k if that happens. This
15407 might result in too many probes being generated only on the
15408 oldest supported machine level z900. */
15409 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval
)))
15410 param_stack_clash_protection_probe_interval
= 12;
15412 #if TARGET_TPF != 0
15413 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_check
))
15414 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15416 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_target
))
15417 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15419 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_check
))
15420 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15422 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_target
))
15423 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15425 if (s390_tpf_trace_skip
)
15427 opts
->x_s390_tpf_trace_hook_prologue_target
= TPF_TRACE_PROLOGUE_SKIP_TARGET
;
15428 opts
->x_s390_tpf_trace_hook_epilogue_target
= TPF_TRACE_EPILOGUE_SKIP_TARGET
;
15432 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15433 if (!TARGET_LONG_DOUBLE_128_P (opts_set
->x_target_flags
))
15434 opts
->x_target_flags
|= MASK_LONG_DOUBLE_128
;
15437 if (opts
->x_s390_tune
>= PROCESSOR_2097_Z10
)
15439 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unrolled_insns
,
15441 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unroll_times
, 32);
15442 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peeled_insns
,
15444 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peel_times
,
15448 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_pending_list_length
,
15450 /* values for loop prefetching */
15451 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_line_size
, 256);
15452 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_size
, 128);
15453 /* s390 has more than 2 levels and the size is much larger. Since
15454 we are always running virtualized assume that we only get a small
15455 part of the caches above l1. */
15456 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l2_cache_size
, 1500);
15457 SET_OPTION_IF_UNSET (opts
, opts_set
,
15458 param_prefetch_min_insn_to_mem_ratio
, 2);
15459 SET_OPTION_IF_UNSET (opts
, opts_set
, param_simultaneous_prefetches
, 6);
15461 /* Use the alternative scheduling-pressure algorithm by default. */
15462 SET_OPTION_IF_UNSET (opts
, opts_set
, param_sched_pressure_algorithm
, 2);
15463 SET_OPTION_IF_UNSET (opts
, opts_set
, param_min_vect_loop_bound
, 2);
15465 /* Use aggressive inlining parameters. */
15466 if (opts
->x_s390_tune
>= PROCESSOR_2964_Z13
)
15468 SET_OPTION_IF_UNSET (opts
, opts_set
, param_inline_min_speedup
, 2);
15469 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_inline_insns_auto
, 80);
15472 /* Set the default alignment. */
15473 s390_default_align (opts
);
15475 /* Call target specific restore function to do post-init work. At the moment,
15476 this just sets opts->x_s390_cost_pointer. */
15477 s390_function_specific_restore (opts
, opts_set
, NULL
);
15479 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15480 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15481 not the case when the code runs before the prolog. */
15482 if (opts
->x_flag_fentry
&& !TARGET_64BIT
)
15483 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15487 s390_option_override (void)
15490 cl_deferred_option
*opt
;
15491 vec
<cl_deferred_option
> *v
=
15492 (vec
<cl_deferred_option
> *) s390_deferred_options
;
15495 FOR_EACH_VEC_ELT (*v
, i
, opt
)
15497 switch (opt
->opt_index
)
15499 case OPT_mhotpatch_
:
15503 char *s
= strtok (ASTRDUP (opt
->arg
), ",");
15504 char *t
= strtok (NULL
, "\0");
15508 val1
= integral_argument (s
);
15509 val2
= integral_argument (t
);
15516 if (val1
== -1 || val2
== -1)
15518 /* argument is not a plain number */
15519 error ("arguments to %qs should be non-negative integers",
15523 else if (val1
> s390_hotpatch_hw_max
15524 || val2
> s390_hotpatch_hw_max
)
15526 error ("argument to %qs is too large (max. %d)",
15527 "-mhotpatch=n,m", s390_hotpatch_hw_max
);
15530 s390_hotpatch_hw_before_label
= val1
;
15531 s390_hotpatch_hw_after_label
= val2
;
15535 gcc_unreachable ();
15539 /* Set up function hooks. */
15540 init_machine_status
= s390_init_machine_status
;
15542 s390_option_override_internal (&global_options
, &global_options_set
);
15544 /* Save the initial options in case the user does function specific
15546 target_option_default_node
15547 = build_target_option_node (&global_options
, &global_options_set
);
15548 target_option_current_node
= target_option_default_node
;
15550 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15551 requires the arch flags to be evaluated already. Since prefetching
15552 is beneficial on s390, we enable it if available. */
15553 if (flag_prefetch_loop_arrays
< 0 && HAVE_prefetch
&& optimize
>= 3)
15554 flag_prefetch_loop_arrays
= 1;
15556 if (!s390_pic_data_is_text_relative
&& !flag_pic
)
15557 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15558 "%<-fpic%>/%<-fPIC%>");
15562 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15563 debuggers do not yet support DWARF 3/4. */
15564 if (!global_options_set
.x_dwarf_strict
)
15566 if (!global_options_set
.x_dwarf_version
)
15571 #if S390_USE_TARGET_ATTRIBUTE
15572 /* Inner function to process the attribute((target(...))), take an argument and
15573 set the current options from the argument. If we have a list, recursively go
15577 s390_valid_target_attribute_inner_p (tree args
,
15578 struct gcc_options
*opts
,
15579 struct gcc_options
*new_opts_set
,
15585 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15586 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15587 static const struct
15589 const char *string
;
15593 int only_as_pragma
;
15596 S390_ATTRIB ("arch=", OPT_march_
, 1),
15597 S390_ATTRIB ("tune=", OPT_mtune_
, 1),
15598 /* uinteger options */
15599 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_
, 1),
15600 S390_ATTRIB ("stack-size=", OPT_mstack_size_
, 1),
15601 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_
, 1),
15602 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_
, 1),
15604 S390_ATTRIB ("backchain", OPT_mbackchain
, 0),
15605 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp
, 0),
15606 S390_ATTRIB ("hard-float", OPT_mhard_float
, 0),
15607 S390_ATTRIB ("htm", OPT_mhtm
, 0),
15608 S390_ATTRIB ("vx", OPT_mvx
, 0),
15609 S390_ATTRIB ("packed-stack", OPT_mpacked_stack
, 0),
15610 S390_ATTRIB ("small-exec", OPT_msmall_exec
, 0),
15611 S390_ATTRIB ("soft-float", OPT_msoft_float
, 0),
15612 S390_ATTRIB ("mvcle", OPT_mmvcle
, 0),
15613 S390_PRAGMA ("zvector", OPT_mzvector
, 0),
15614 /* boolean options */
15615 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack
, 0),
15620 /* If this is a list, recurse to get the options. */
15621 if (TREE_CODE (args
) == TREE_LIST
)
15624 int num_pragma_values
;
15627 /* Note: attribs.c:decl_attributes prepends the values from
15628 current_target_pragma to the list of target attributes. To determine
15629 whether we're looking at a value of the attribute or the pragma we
15630 assume that the first [list_length (current_target_pragma)] values in
15631 the list are the values from the pragma. */
15632 num_pragma_values
= (!force_pragma
&& current_target_pragma
!= NULL
)
15633 ? list_length (current_target_pragma
) : 0;
15634 for (i
= 0; args
; args
= TREE_CHAIN (args
), i
++)
15638 is_pragma
= (force_pragma
|| i
< num_pragma_values
);
15639 if (TREE_VALUE (args
)
15640 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args
),
15641 opts
, new_opts_set
,
15650 else if (TREE_CODE (args
) != STRING_CST
)
15652 error ("attribute %<target%> argument not a string");
15656 /* Handle multiple arguments separated by commas. */
15657 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
15659 while (next_optstr
&& *next_optstr
!= '\0')
15661 char *p
= next_optstr
;
15663 char *comma
= strchr (next_optstr
, ',');
15664 size_t len
, opt_len
;
15670 enum cl_var_type var_type
;
15676 len
= comma
- next_optstr
;
15677 next_optstr
= comma
+ 1;
15682 next_optstr
= NULL
;
15685 /* Recognize no-xxx. */
15686 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
15695 /* Find the option. */
15698 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
15700 opt_len
= attrs
[i
].len
;
15701 if (ch
== attrs
[i
].string
[0]
15702 && ((attrs
[i
].has_arg
) ? len
> opt_len
: len
== opt_len
)
15703 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
15705 opt
= attrs
[i
].opt
;
15706 if (!opt_set_p
&& cl_options
[opt
].cl_reject_negative
)
15708 mask
= cl_options
[opt
].var_value
;
15709 var_type
= cl_options
[opt
].var_type
;
15715 /* Process the option. */
15718 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15721 else if (attrs
[i
].only_as_pragma
&& !force_pragma
)
15723 /* Value is not allowed for the target attribute. */
15724 error ("value %qs is not supported by attribute %<target%>",
15729 else if (var_type
== CLVC_BIT_SET
|| var_type
== CLVC_BIT_CLEAR
)
15731 if (var_type
== CLVC_BIT_CLEAR
)
15732 opt_set_p
= !opt_set_p
;
15735 opts
->x_target_flags
|= mask
;
15737 opts
->x_target_flags
&= ~mask
;
15738 new_opts_set
->x_target_flags
|= mask
;
15741 else if (cl_options
[opt
].var_type
== CLVC_BOOLEAN
)
15745 if (cl_options
[opt
].cl_uinteger
)
15747 /* Unsigned integer argument. Code based on the function
15748 decode_cmdline_option () in opts-common.c. */
15749 value
= integral_argument (p
+ opt_len
);
15752 value
= (opt_set_p
) ? 1 : 0;
15756 struct cl_decoded_option decoded
;
15758 /* Value range check; only implemented for numeric and boolean
15759 options at the moment. */
15760 generate_option (opt
, NULL
, value
, CL_TARGET
, &decoded
);
15761 s390_handle_option (opts
, new_opts_set
, &decoded
, input_location
);
15762 set_option (opts
, new_opts_set
, opt
, value
,
15763 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15768 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15773 else if (cl_options
[opt
].var_type
== CLVC_ENUM
)
15778 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
15780 set_option (opts
, new_opts_set
, opt
, value
,
15781 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15785 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15791 gcc_unreachable ();
15796 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15799 s390_valid_target_attribute_tree (tree args
,
15800 struct gcc_options
*opts
,
15801 const struct gcc_options
*opts_set
,
15804 tree t
= NULL_TREE
;
15805 struct gcc_options new_opts_set
;
15807 memset (&new_opts_set
, 0, sizeof (new_opts_set
));
15809 /* Process each of the options on the chain. */
15810 if (! s390_valid_target_attribute_inner_p (args
, opts
, &new_opts_set
,
15812 return error_mark_node
;
15814 /* If some option was set (even if it has not changed), rerun
15815 s390_option_override_internal, and then save the options away. */
15816 if (new_opts_set
.x_target_flags
15817 || new_opts_set
.x_s390_arch
15818 || new_opts_set
.x_s390_tune
15819 || new_opts_set
.x_s390_stack_guard
15820 || new_opts_set
.x_s390_stack_size
15821 || new_opts_set
.x_s390_branch_cost
15822 || new_opts_set
.x_s390_warn_framesize
15823 || new_opts_set
.x_s390_warn_dynamicstack_p
)
15825 const unsigned char *src
= (const unsigned char *)opts_set
;
15826 unsigned char *dest
= (unsigned char *)&new_opts_set
;
15829 /* Merge the original option flags into the new ones. */
15830 for (i
= 0; i
< sizeof(*opts_set
); i
++)
15833 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15834 s390_option_override_internal (opts
, &new_opts_set
);
15835 /* Save the current options unless we are validating options for
15837 t
= build_target_option_node (opts
, &new_opts_set
);
15842 /* Hook to validate attribute((target("string"))). */
15845 s390_valid_target_attribute_p (tree fndecl
,
15846 tree
ARG_UNUSED (name
),
15848 int ARG_UNUSED (flags
))
15850 struct gcc_options func_options
, func_options_set
;
15851 tree new_target
, new_optimize
;
15854 /* attribute((target("default"))) does nothing, beyond
15855 affecting multi-versioning. */
15856 if (TREE_VALUE (args
)
15857 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
15858 && TREE_CHAIN (args
) == NULL_TREE
15859 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
15863 = build_optimization_node (&global_options
, &global_options_set
);
15865 /* Get the optimization options of the current function. */
15866 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
15868 if (!func_optimize
)
15869 func_optimize
= old_optimize
;
15871 /* Init func_options. */
15872 memset (&func_options
, 0, sizeof (func_options
));
15873 init_options_struct (&func_options
, NULL
);
15874 lang_hooks
.init_options_struct (&func_options
);
15875 memset (&func_options_set
, 0, sizeof (func_options_set
));
15877 cl_optimization_restore (&func_options
, &func_options_set
,
15878 TREE_OPTIMIZATION (func_optimize
));
15880 /* Initialize func_options to the default before its target options can
15882 cl_target_option_restore (&func_options
, &func_options_set
,
15883 TREE_TARGET_OPTION (target_option_default_node
));
15885 new_target
= s390_valid_target_attribute_tree (args
, &func_options
,
15886 &global_options_set
,
15888 current_target_pragma
));
15889 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
15890 if (new_target
== error_mark_node
)
15892 else if (fndecl
&& new_target
)
15894 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
15895 if (old_optimize
!= new_optimize
)
15896 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
15901 /* Hook to determine if one function can safely inline another. */
15904 s390_can_inline_p (tree caller
, tree callee
)
15906 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
15907 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
15910 callee_tree
= target_option_default_node
;
15912 caller_tree
= target_option_default_node
;
15913 if (callee_tree
== caller_tree
)
15916 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
15917 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
15920 if ((caller_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
))
15921 != (callee_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
)))
15924 /* Don't inline functions to be compiled for a more recent arch into a
15925 function for an older arch. */
15926 else if (caller_opts
->x_s390_arch
< callee_opts
->x_s390_arch
)
15929 /* Inlining a hard float function into a soft float function is only
15930 allowed if the hard float function doesn't actually make use of
15933 We are called from FEs for multi-versioning call optimization, so
15934 beware of ipa_fn_summaries not available. */
15935 else if (((TARGET_SOFT_FLOAT_P (caller_opts
->x_target_flags
)
15936 && !TARGET_SOFT_FLOAT_P (callee_opts
->x_target_flags
))
15937 || (!TARGET_HARD_DFP_P (caller_opts
->x_target_flags
)
15938 && TARGET_HARD_DFP_P (callee_opts
->x_target_flags
)))
15939 && (! ipa_fn_summaries
15940 || ipa_fn_summaries
->get
15941 (cgraph_node::get (callee
))->fp_expressions
))
15948 /* Set VAL to correct enum value according to the indirect-branch or
15949 function-return attribute in ATTR. */
15952 s390_indirect_branch_attrvalue (tree attr
, enum indirect_branch
*val
)
15954 const char *str
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
15955 if (strcmp (str
, "keep") == 0)
15956 *val
= indirect_branch_keep
;
15957 else if (strcmp (str
, "thunk") == 0)
15958 *val
= indirect_branch_thunk
;
15959 else if (strcmp (str
, "thunk-inline") == 0)
15960 *val
= indirect_branch_thunk_inline
;
15961 else if (strcmp (str
, "thunk-extern") == 0)
15962 *val
= indirect_branch_thunk_extern
;
15965 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15966 from either the cmdline or the function attributes in
15970 s390_indirect_branch_settings (tree fndecl
)
15977 /* Initialize with the cmdline options and let the attributes
15979 cfun
->machine
->indirect_branch_jump
= s390_indirect_branch_jump
;
15980 cfun
->machine
->indirect_branch_call
= s390_indirect_branch_call
;
15982 cfun
->machine
->function_return_reg
= s390_function_return_reg
;
15983 cfun
->machine
->function_return_mem
= s390_function_return_mem
;
15985 if ((attr
= lookup_attribute ("indirect_branch",
15986 DECL_ATTRIBUTES (fndecl
))))
15988 s390_indirect_branch_attrvalue (attr
,
15989 &cfun
->machine
->indirect_branch_jump
);
15990 s390_indirect_branch_attrvalue (attr
,
15991 &cfun
->machine
->indirect_branch_call
);
15994 if ((attr
= lookup_attribute ("indirect_branch_jump",
15995 DECL_ATTRIBUTES (fndecl
))))
15996 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_jump
);
15998 if ((attr
= lookup_attribute ("indirect_branch_call",
15999 DECL_ATTRIBUTES (fndecl
))))
16000 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_call
);
16002 if ((attr
= lookup_attribute ("function_return",
16003 DECL_ATTRIBUTES (fndecl
))))
16005 s390_indirect_branch_attrvalue (attr
,
16006 &cfun
->machine
->function_return_reg
);
16007 s390_indirect_branch_attrvalue (attr
,
16008 &cfun
->machine
->function_return_mem
);
16011 if ((attr
= lookup_attribute ("function_return_reg",
16012 DECL_ATTRIBUTES (fndecl
))))
16013 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_reg
);
16015 if ((attr
= lookup_attribute ("function_return_mem",
16016 DECL_ATTRIBUTES (fndecl
))))
16017 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_mem
);
16020 #if S390_USE_TARGET_ATTRIBUTE
16021 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16025 s390_activate_target_options (tree new_tree
)
16027 cl_target_option_restore (&global_options
, &global_options_set
,
16028 TREE_TARGET_OPTION (new_tree
));
16029 if (TREE_TARGET_GLOBALS (new_tree
))
16030 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
16031 else if (new_tree
== target_option_default_node
)
16032 restore_target_globals (&default_target_globals
);
16034 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
16035 s390_previous_fndecl
= NULL_TREE
;
16039 /* Establish appropriate back-end context for processing the function
16040 FNDECL. The argument might be NULL to indicate processing at top
16041 level, outside of any function scope. */
16043 s390_set_current_function (tree fndecl
)
16045 #if S390_USE_TARGET_ATTRIBUTE
16046 /* Only change the context if the function changes. This hook is called
16047 several times in the course of compiling a function, and we don't want to
16048 slow things down too much or call target_reinit when it isn't safe. */
16049 if (fndecl
== s390_previous_fndecl
)
16051 s390_indirect_branch_settings (fndecl
);
16056 if (s390_previous_fndecl
== NULL_TREE
)
16057 old_tree
= target_option_current_node
;
16058 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
))
16059 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
);
16061 old_tree
= target_option_default_node
;
16063 if (fndecl
== NULL_TREE
)
16065 if (old_tree
!= target_option_current_node
)
16066 s390_activate_target_options (target_option_current_node
);
16070 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
16071 if (new_tree
== NULL_TREE
)
16072 new_tree
= target_option_default_node
;
16074 if (old_tree
!= new_tree
)
16075 s390_activate_target_options (new_tree
);
16076 s390_previous_fndecl
= fndecl
;
16078 s390_indirect_branch_settings (fndecl
);
16081 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16084 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
16085 unsigned int align ATTRIBUTE_UNUSED
,
16086 enum by_pieces_operation op ATTRIBUTE_UNUSED
,
16087 bool speed_p ATTRIBUTE_UNUSED
)
16089 return (size
== 1 || size
== 2
16090 || size
== 4 || (TARGET_ZARCH
&& size
== 8));
16093 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16096 s390_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
16098 tree sfpc
= s390_builtin_decls
[S390_BUILTIN_s390_sfpc
];
16099 tree efpc
= s390_builtin_decls
[S390_BUILTIN_s390_efpc
];
16100 tree call_efpc
= build_call_expr (efpc
, 0);
16101 tree fenv_var
= create_tmp_var_raw (unsigned_type_node
);
16103 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16104 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16105 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16106 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16107 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16108 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16110 /* Generates the equivalent of feholdexcept (&fenv_var)
16112 fenv_var = __builtin_s390_efpc ();
16113 __builtin_s390_sfpc (fenv_var & mask) */
16114 tree old_fpc
= build4 (TARGET_EXPR
, unsigned_type_node
, fenv_var
, call_efpc
,
16115 NULL_TREE
, NULL_TREE
);
16117 = build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
,
16118 build_int_cst (unsigned_type_node
,
16119 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
16120 | FPC_EXCEPTION_MASK
)));
16121 tree set_new_fpc
= build_call_expr (sfpc
, 1, new_fpc
);
16122 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, old_fpc
, set_new_fpc
);
16124 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16126 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16127 new_fpc
= build2 (BIT_AND_EXPR
, unsigned_type_node
, call_efpc
,
16128 build_int_cst (unsigned_type_node
,
16129 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
)));
16130 *clear
= build_call_expr (sfpc
, 1, new_fpc
);
16132 /* Generates the equivalent of feupdateenv (fenv_var)
16134 old_fpc = __builtin_s390_efpc ();
16135 __builtin_s390_sfpc (fenv_var);
16136 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16138 old_fpc
= create_tmp_var_raw (unsigned_type_node
);
16139 tree store_old_fpc
= build4 (TARGET_EXPR
, void_type_node
, old_fpc
, call_efpc
,
16140 NULL_TREE
, NULL_TREE
);
16142 set_new_fpc
= build_call_expr (sfpc
, 1, fenv_var
);
16144 tree raise_old_except
= build2 (BIT_AND_EXPR
, unsigned_type_node
, old_fpc
,
16145 build_int_cst (unsigned_type_node
,
16147 raise_old_except
= build2 (RSHIFT_EXPR
, unsigned_type_node
, raise_old_except
,
16148 build_int_cst (unsigned_type_node
,
16150 tree atomic_feraiseexcept
16151 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
16152 raise_old_except
= build_call_expr (atomic_feraiseexcept
,
16153 1, raise_old_except
);
16155 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
16156 build2 (COMPOUND_EXPR
, void_type_node
,
16157 store_old_fpc
, set_new_fpc
),
16160 #undef FPC_EXCEPTION_MASK
16161 #undef FPC_FLAGS_MASK
16162 #undef FPC_DXC_MASK
16163 #undef FPC_EXCEPTION_MASK_SHIFT
16164 #undef FPC_FLAGS_SHIFT
16165 #undef FPC_DXC_SHIFT
16168 /* Return the vector mode to be used for inner mode MODE when doing
16170 static machine_mode
16171 s390_preferred_simd_mode (scalar_mode mode
)
16199 /* Our hardware does not require vectors to be strictly aligned. */
16201 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED
,
16202 const_tree type ATTRIBUTE_UNUSED
,
16203 int misalignment ATTRIBUTE_UNUSED
,
16204 bool is_packed ATTRIBUTE_UNUSED
)
16209 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
16213 /* The vector ABI requires vector types to be aligned on an 8 byte
16214 boundary (our stack alignment). However, we allow this to be
16215 overriden by the user, while this definitely breaks the ABI. */
16216 static HOST_WIDE_INT
16217 s390_vector_alignment (const_tree type
)
16219 tree size
= TYPE_SIZE (type
);
16221 if (!TARGET_VX_ABI
)
16222 return default_vector_alignment (type
);
16224 if (TYPE_USER_ALIGN (type
))
16225 return TYPE_ALIGN (type
);
16227 if (tree_fits_uhwi_p (size
)
16228 && tree_to_uhwi (size
) < BIGGEST_ALIGNMENT
)
16229 return tree_to_uhwi (size
);
16231 return BIGGEST_ALIGNMENT
;
16234 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16235 LARL instruction. */
16237 static HOST_WIDE_INT
16238 s390_constant_alignment (const_tree
, HOST_WIDE_INT align
)
16240 return MAX (align
, 16);
16243 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16244 /* Implement TARGET_ASM_FILE_START. */
16246 s390_asm_file_start (void)
16248 default_file_start ();
16249 s390_asm_output_machine_for_arch (asm_out_file
);
16253 /* Implement TARGET_ASM_FILE_END. */
16255 s390_asm_file_end (void)
16257 #ifdef HAVE_AS_GNU_ATTRIBUTE
16258 varpool_node
*vnode
;
16259 cgraph_node
*cnode
;
16261 FOR_EACH_VARIABLE (vnode
)
16262 if (TREE_PUBLIC (vnode
->decl
))
16263 s390_check_type_for_vector_abi (TREE_TYPE (vnode
->decl
), false, false);
16265 FOR_EACH_FUNCTION (cnode
)
16266 if (TREE_PUBLIC (cnode
->decl
))
16267 s390_check_type_for_vector_abi (TREE_TYPE (cnode
->decl
), false, false);
16270 if (s390_vector_abi
!= 0)
16271 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
16274 file_end_indicate_exec_stack ();
16276 if (flag_split_stack
)
16277 file_end_indicate_split_stack ();
16280 /* Return true if TYPE is a vector bool type. */
16282 s390_vector_bool_type_p (const_tree type
)
16284 return TYPE_VECTOR_OPAQUE (type
);
16287 /* Return the diagnostic message string if the binary operation OP is
16288 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16290 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
16292 bool bool1_p
, bool2_p
;
16296 machine_mode mode1
, mode2
;
16298 if (!TARGET_ZVECTOR
)
16301 if (!VECTOR_TYPE_P (type1
) || !VECTOR_TYPE_P (type2
))
16304 bool1_p
= s390_vector_bool_type_p (type1
);
16305 bool2_p
= s390_vector_bool_type_p (type2
);
16307 /* Mixing signed and unsigned types is forbidden for all
16309 if (!bool1_p
&& !bool2_p
16310 && TYPE_UNSIGNED (type1
) != TYPE_UNSIGNED (type2
))
16311 return N_("types differ in signedness");
16313 plusminus_p
= (op
== PLUS_EXPR
|| op
== MINUS_EXPR
);
16314 muldiv_p
= (op
== MULT_EXPR
|| op
== RDIV_EXPR
|| op
== TRUNC_DIV_EXPR
16315 || op
== CEIL_DIV_EXPR
|| op
== FLOOR_DIV_EXPR
16316 || op
== ROUND_DIV_EXPR
);
16317 compare_p
= (op
== LT_EXPR
|| op
== LE_EXPR
|| op
== GT_EXPR
|| op
== GE_EXPR
16318 || op
== EQ_EXPR
|| op
== NE_EXPR
);
16320 if (bool1_p
&& bool2_p
&& (plusminus_p
|| muldiv_p
))
16321 return N_("binary operator does not support two vector bool operands");
16323 if (bool1_p
!= bool2_p
&& (muldiv_p
|| compare_p
))
16324 return N_("binary operator does not support vector bool operand");
16326 mode1
= TYPE_MODE (type1
);
16327 mode2
= TYPE_MODE (type2
);
16329 if (bool1_p
!= bool2_p
&& plusminus_p
16330 && (GET_MODE_CLASS (mode1
) == MODE_VECTOR_FLOAT
16331 || GET_MODE_CLASS (mode2
) == MODE_VECTOR_FLOAT
))
16332 return N_("binary operator does not support mixing vector "
16333 "bool with floating point vector operands");
16338 /* Implement TARGET_C_EXCESS_PRECISION.
16340 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16341 double on s390, causing operations on float_t to operate in a higher
16342 precision than is necessary. However, it is not the case that SFmode
16343 operations have implicit excess precision, and we generate more optimal
16344 code if we let the compiler know no implicit extra precision is added.
16346 That means when we are compiling with -fexcess-precision=fast, the value
16347 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16348 float_t (though they would be correct for -fexcess-precision=standard).
16350 A complete fix would modify glibc to remove the unnecessary typedef
16351 of float_t to double. */
16353 static enum flt_eval_method
16354 s390_excess_precision (enum excess_precision_type type
)
16358 case EXCESS_PRECISION_TYPE_IMPLICIT
:
16359 case EXCESS_PRECISION_TYPE_FAST
:
16360 /* The fastest type to promote to will always be the native type,
16361 whether that occurs with implicit excess precision or
16363 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
16364 case EXCESS_PRECISION_TYPE_STANDARD
:
16365 /* Otherwise, when we are in a standards compliant mode, to
16366 ensure consistency with the implementation in glibc, report that
16367 float is evaluated to the range and precision of double. */
16368 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE
;
16370 gcc_unreachable ();
16372 return FLT_EVAL_METHOD_UNPREDICTABLE
;
16375 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16377 static unsigned HOST_WIDE_INT
16378 s390_asan_shadow_offset (void)
16380 return TARGET_64BIT
? HOST_WIDE_INT_1U
<< 52 : HOST_WIDE_INT_UC (0x20000000);
16383 #ifdef HAVE_GAS_HIDDEN
16384 # define USE_HIDDEN_LINKONCE 1
16386 # define USE_HIDDEN_LINKONCE 0
16389 /* Output an indirect branch trampoline for target register REGNO. */
16392 s390_output_indirect_thunk_function (unsigned int regno
, bool z10_p
)
16395 char thunk_label
[32];
16399 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
, regno
);
16401 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
,
16402 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
16404 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
16405 get_identifier (thunk_label
),
16406 build_function_type_list (void_type_node
, NULL_TREE
));
16407 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
16408 NULL_TREE
, void_type_node
);
16409 TREE_PUBLIC (decl
) = 1;
16410 TREE_STATIC (decl
) = 1;
16411 DECL_IGNORED_P (decl
) = 1;
16413 if (USE_HIDDEN_LINKONCE
)
16415 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
16417 targetm
.asm_out
.unique_section (decl
, 0);
16418 switch_to_section (get_named_section (decl
, NULL
, 0));
16420 targetm
.asm_out
.globalize_label (asm_out_file
, thunk_label
);
16421 fputs ("\t.hidden\t", asm_out_file
);
16422 assemble_name (asm_out_file
, thunk_label
);
16423 putc ('\n', asm_out_file
);
16424 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, thunk_label
, decl
);
16428 switch_to_section (text_section
);
16429 ASM_OUTPUT_LABEL (asm_out_file
, thunk_label
);
16432 DECL_INITIAL (decl
) = make_node (BLOCK
);
16433 current_function_decl
= decl
;
16434 allocate_struct_function (decl
, false);
16435 init_function_start (decl
);
16436 cfun
->is_thunk
= true;
16437 first_function_block_is_cold
= false;
16438 final_start_function (emit_barrier (), asm_out_file
, 1);
16440 /* This makes CFI at least usable for indirect jumps.
16442 Stopping in the thunk: backtrace will point to the thunk target
16443 is if it was interrupted by a signal. For a call this means that
16444 the call chain will be: caller->callee->thunk */
16445 if (flag_asynchronous_unwind_tables
&& flag_dwarf2_cfi_asm
)
16447 fputs ("\t.cfi_signal_frame\n", asm_out_file
);
16448 fprintf (asm_out_file
, "\t.cfi_return_column %d\n", regno
);
16449 for (i
= 0; i
< FPR15_REGNUM
; i
++)
16450 fprintf (asm_out_file
, "\t.cfi_same_value %s\n", reg_names
[i
]);
16457 /* We generate a thunk for z10 compiled code although z10 is
16458 currently not enabled. Tell the assembler to accept the
16460 if (!TARGET_CPU_Z10
)
16462 fputs ("\t.machine push\n", asm_out_file
);
16463 fputs ("\t.machine z10\n", asm_out_file
);
16465 /* We use exrl even if -mzarch hasn't been specified on the
16466 command line so we have to tell the assembler to accept
16469 fputs ("\t.machinemode zarch\n", asm_out_file
);
16471 fputs ("\texrl\t0,1f\n", asm_out_file
);
16474 fputs ("\t.machinemode esa\n", asm_out_file
);
16476 if (!TARGET_CPU_Z10
)
16477 fputs ("\t.machine pop\n", asm_out_file
);
16482 fprintf (asm_out_file
, "\tlarl\t%%r%d,1f\n",
16483 INDIRECT_BRANCH_THUNK_REGNUM
);
16486 fprintf (asm_out_file
, "\tex\t0,0(%%r%d)\n",
16487 INDIRECT_BRANCH_THUNK_REGNUM
);
16491 fputs ("0:\tj\t0b\n", asm_out_file
);
16493 /* 1: br <regno> */
16494 fprintf (asm_out_file
, "1:\tbr\t%%r%d\n", regno
);
16496 final_end_function ();
16497 init_insn_lengths ();
16498 free_after_compilation (cfun
);
16500 current_function_decl
= NULL
;
16503 /* Implement the asm.code_end target hook. */
16506 s390_code_end (void)
16510 for (i
= 1; i
< 16; i
++)
16512 if (indirect_branch_z10thunk_mask
& (1 << i
))
16513 s390_output_indirect_thunk_function (i
, true);
16515 if (indirect_branch_prez10thunk_mask
& (1 << i
))
16516 s390_output_indirect_thunk_function (i
, false);
16519 if (TARGET_INDIRECT_BRANCH_TABLE
)
16524 for (o
= 0; o
< INDIRECT_BRANCH_NUM_OPTIONS
; o
++)
16526 if (indirect_branch_table_label_no
[o
] == 0)
16529 switch_to_section (get_section (indirect_branch_table_name
[o
],
16532 for (i
= 0; i
< indirect_branch_table_label_no
[o
]; i
++)
16534 char label_start
[32];
16536 ASM_GENERATE_INTERNAL_LABEL (label_start
,
16537 indirect_branch_table_label
[o
], i
);
16539 fputs ("\t.long\t", asm_out_file
);
16540 assemble_name_raw (asm_out_file
, label_start
);
16541 fputs ("-.\n", asm_out_file
);
16543 switch_to_section (current_function_section ());
16548 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16551 s390_case_values_threshold (void)
16553 /* Disabling branch prediction for indirect jumps makes jump tables
16554 much more expensive. */
16555 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP
)
16558 return default_case_values_threshold ();
16561 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16562 back-end specific dependencies.
16564 Establish an ANTI dependency between r11 and r15 restores from FPRs
16565 to prevent the instructions scheduler from reordering them since
16566 this would break CFI. No further handling in the sched_reorder
16567 hook is required since the r11 and r15 restore will never appear in
16568 the same ready list with that change. */
16570 s390_sched_dependencies_evaluation (rtx_insn
*head
, rtx_insn
*tail
)
16572 if (!frame_pointer_needed
|| !epilogue_completed
)
16575 while (head
!= tail
&& DEBUG_INSN_P (head
))
16576 head
= NEXT_INSN (head
);
16578 rtx_insn
*r15_restore
= NULL
, *r11_restore
= NULL
;
16580 for (rtx_insn
*insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16582 rtx set
= single_set (insn
);
16584 || !RTX_FRAME_RELATED_P (insn
)
16586 || !REG_P (SET_DEST (set
))
16587 || !FP_REG_P (SET_SRC (set
)))
16590 if (REGNO (SET_DEST (set
)) == HARD_FRAME_POINTER_REGNUM
)
16591 r11_restore
= insn
;
16593 if (REGNO (SET_DEST (set
)) == STACK_POINTER_REGNUM
)
16594 r15_restore
= insn
;
16597 if (r11_restore
== NULL
|| r15_restore
== NULL
)
16599 add_dependence (r11_restore
, r15_restore
, REG_DEP_ANTI
);
16602 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16604 static unsigned HOST_WIDE_INT
16605 s390_shift_truncation_mask (machine_mode mode
)
16607 return mode
== DImode
|| mode
== SImode
? 63 : 0;
16610 /* Initialize GCC target structure. */
16612 #undef TARGET_ASM_ALIGNED_HI_OP
16613 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16614 #undef TARGET_ASM_ALIGNED_DI_OP
16615 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16616 #undef TARGET_ASM_INTEGER
16617 #define TARGET_ASM_INTEGER s390_assemble_integer
16619 #undef TARGET_ASM_OPEN_PAREN
16620 #define TARGET_ASM_OPEN_PAREN ""
16622 #undef TARGET_ASM_CLOSE_PAREN
16623 #define TARGET_ASM_CLOSE_PAREN ""
16625 #undef TARGET_OPTION_OVERRIDE
16626 #define TARGET_OPTION_OVERRIDE s390_option_override
16628 #ifdef TARGET_THREAD_SSP_OFFSET
16629 #undef TARGET_STACK_PROTECT_GUARD
16630 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16633 #undef TARGET_ENCODE_SECTION_INFO
16634 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16636 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16637 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16640 #undef TARGET_HAVE_TLS
16641 #define TARGET_HAVE_TLS true
16643 #undef TARGET_CANNOT_FORCE_CONST_MEM
16644 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16646 #undef TARGET_DELEGITIMIZE_ADDRESS
16647 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16649 #undef TARGET_LEGITIMIZE_ADDRESS
16650 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16652 #undef TARGET_RETURN_IN_MEMORY
16653 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16655 #undef TARGET_INIT_BUILTINS
16656 #define TARGET_INIT_BUILTINS s390_init_builtins
16657 #undef TARGET_EXPAND_BUILTIN
16658 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16659 #undef TARGET_BUILTIN_DECL
16660 #define TARGET_BUILTIN_DECL s390_builtin_decl
16662 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16663 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16665 #undef TARGET_ASM_OUTPUT_MI_THUNK
16666 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16667 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16668 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16670 #undef TARGET_C_EXCESS_PRECISION
16671 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16673 #undef TARGET_SCHED_ADJUST_PRIORITY
16674 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16675 #undef TARGET_SCHED_ISSUE_RATE
16676 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16680 #undef TARGET_SCHED_VARIABLE_ISSUE
16681 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16682 #undef TARGET_SCHED_REORDER
16683 #define TARGET_SCHED_REORDER s390_sched_reorder
16684 #undef TARGET_SCHED_INIT
16685 #define TARGET_SCHED_INIT s390_sched_init
16687 #undef TARGET_CANNOT_COPY_INSN_P
16688 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16689 #undef TARGET_RTX_COSTS
16690 #define TARGET_RTX_COSTS s390_rtx_costs
16691 #undef TARGET_ADDRESS_COST
16692 #define TARGET_ADDRESS_COST s390_address_cost
16693 #undef TARGET_REGISTER_MOVE_COST
16694 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16695 #undef TARGET_MEMORY_MOVE_COST
16696 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16699 s390_builtin_vectorization_cost
16701 #undef TARGET_MACHINE_DEPENDENT_REORG
16702 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16704 #undef TARGET_VALID_POINTER_MODE
16705 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16707 #undef TARGET_BUILD_BUILTIN_VA_LIST
16708 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16709 #undef TARGET_EXPAND_BUILTIN_VA_START
16710 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16711 #undef TARGET_ASAN_SHADOW_OFFSET
16712 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16713 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16714 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16716 #undef TARGET_PROMOTE_FUNCTION_MODE
16717 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16718 #undef TARGET_PASS_BY_REFERENCE
16719 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16721 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16722 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16724 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16725 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16726 #undef TARGET_FUNCTION_ARG
16727 #define TARGET_FUNCTION_ARG s390_function_arg
16728 #undef TARGET_FUNCTION_ARG_ADVANCE
16729 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16730 #undef TARGET_FUNCTION_ARG_PADDING
16731 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16732 #undef TARGET_FUNCTION_VALUE
16733 #define TARGET_FUNCTION_VALUE s390_function_value
16734 #undef TARGET_LIBCALL_VALUE
16735 #define TARGET_LIBCALL_VALUE s390_libcall_value
16736 #undef TARGET_STRICT_ARGUMENT_NAMING
16737 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16739 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16740 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16742 #undef TARGET_FIXED_CONDITION_CODE_REGS
16743 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16745 #undef TARGET_CC_MODES_COMPATIBLE
16746 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16748 #undef TARGET_INVALID_WITHIN_DOLOOP
16749 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16752 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16753 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16756 #undef TARGET_DWARF_FRAME_REG_MODE
16757 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16759 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16760 #undef TARGET_MANGLE_TYPE
16761 #define TARGET_MANGLE_TYPE s390_mangle_type
16764 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16765 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16767 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16768 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16770 #undef TARGET_PREFERRED_RELOAD_CLASS
16771 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16773 #undef TARGET_SECONDARY_RELOAD
16774 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16775 #undef TARGET_SECONDARY_MEMORY_NEEDED
16776 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16777 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16778 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16780 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16781 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16783 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16784 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16786 #undef TARGET_LEGITIMATE_ADDRESS_P
16787 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16789 #undef TARGET_LEGITIMATE_CONSTANT_P
16790 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16792 #undef TARGET_LRA_P
16793 #define TARGET_LRA_P s390_lra_p
16795 #undef TARGET_CAN_ELIMINATE
16796 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16798 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16799 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16801 #undef TARGET_LOOP_UNROLL_ADJUST
16802 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16804 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16805 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16806 #undef TARGET_TRAMPOLINE_INIT
16807 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16810 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16811 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16813 #undef TARGET_UNWIND_WORD_MODE
16814 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16816 #undef TARGET_CANONICALIZE_COMPARISON
16817 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16819 #undef TARGET_HARD_REGNO_SCRATCH_OK
16820 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16822 #undef TARGET_HARD_REGNO_NREGS
16823 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16824 #undef TARGET_HARD_REGNO_MODE_OK
16825 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16826 #undef TARGET_MODES_TIEABLE_P
16827 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16829 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16830 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16831 s390_hard_regno_call_part_clobbered
16833 #undef TARGET_ATTRIBUTE_TABLE
16834 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16836 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16837 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16839 #undef TARGET_SET_UP_BY_PROLOGUE
16840 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16842 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16843 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16845 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16846 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16847 s390_use_by_pieces_infrastructure_p
16849 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16850 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16852 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16853 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16855 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16856 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16858 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16859 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16861 #undef TARGET_VECTOR_ALIGNMENT
16862 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16864 #undef TARGET_INVALID_BINARY_OP
16865 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16867 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16868 #undef TARGET_ASM_FILE_START
16869 #define TARGET_ASM_FILE_START s390_asm_file_start
16872 #undef TARGET_ASM_FILE_END
16873 #define TARGET_ASM_FILE_END s390_asm_file_end
16875 #undef TARGET_SET_CURRENT_FUNCTION
16876 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16878 #if S390_USE_TARGET_ATTRIBUTE
16879 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16880 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16882 #undef TARGET_CAN_INLINE_P
16883 #define TARGET_CAN_INLINE_P s390_can_inline_p
16886 #undef TARGET_OPTION_RESTORE
16887 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16889 #undef TARGET_CAN_CHANGE_MODE_CLASS
16890 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16892 #undef TARGET_CONSTANT_ALIGNMENT
16893 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16895 #undef TARGET_ASM_CODE_END
16896 #define TARGET_ASM_CODE_END s390_code_end
16898 #undef TARGET_CASE_VALUES_THRESHOLD
16899 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16901 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16902 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16903 s390_sched_dependencies_evaluation
16905 #undef TARGET_SHIFT_TRUNCATION_MASK
16906 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16908 /* Use only short displacement, since long displacement is not available for
16909 the floating point instructions. */
16910 #undef TARGET_MAX_ANCHOR_OFFSET
16911 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16913 struct gcc_target targetm
= TARGET_INITIALIZER
;
16915 #include "gt-s390.h"