1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2020 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
27 #include "coretypes.h"
30 #include "target-globals.h"
39 #include "stringpool.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
55 #include "conditions.h"
57 #include "insn-attr.h"
69 #include "cfgcleanup.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
77 #include "tree-pass.h"
82 #include "tm-constrs.h"
84 #include "symbol-summary.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
89 /* This file should be included last. */
90 #include "target-def.h"
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode
);
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl
;
97 /* Define the specific costs for a given cpu. */
99 struct processor_costs
102 const int m
; /* cost of an M instruction. */
103 const int mghi
; /* cost of an MGHI instruction. */
104 const int mh
; /* cost of an MH instruction. */
105 const int mhi
; /* cost of an MHI instruction. */
106 const int ml
; /* cost of an ML instruction. */
107 const int mr
; /* cost of an MR instruction. */
108 const int ms
; /* cost of an MS instruction. */
109 const int msg
; /* cost of an MSG instruction. */
110 const int msgf
; /* cost of an MSGF instruction. */
111 const int msgfr
; /* cost of an MSGFR instruction. */
112 const int msgr
; /* cost of an MSGR instruction. */
113 const int msr
; /* cost of an MSR instruction. */
114 const int mult_df
; /* cost of multiplication in DFmode. */
117 const int sqxbr
; /* cost of square root in TFmode. */
118 const int sqdbr
; /* cost of square root in DFmode. */
119 const int sqebr
; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr
; /* cost of multiply and add in DFmode. */
122 const int maebr
; /* cost of multiply and add in SFmode. */
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
137 struct processor_costs z900_cost
=
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
169 struct processor_costs z990_cost
=
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
201 struct processor_costs z9_109_cost
=
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
233 struct processor_costs z10_cost
=
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
265 struct processor_costs z196_cost
=
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
297 struct processor_costs zEC12_cost
=
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
328 const struct s390_processor processor_table
[] =
330 { "z900", "z900", PROCESSOR_2064_Z900
, &z900_cost
, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990
, &z990_cost
, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109
, &z9_109_cost
, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC
, &z9_109_cost
, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10
, &z10_cost
, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196
, &z196_cost
, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12
, &zEC12_cost
, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13
, &zEC12_cost
, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14
, &zEC12_cost
, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15
, &zEC12_cost
, 13 },
340 { "native", "", PROCESSOR_NATIVE
, NULL
, 0 }
343 extern int reload_completed
;
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
346 static rtx_insn
*last_scheduled_insn
;
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance
[MAX_SCHED_UNITS
][NUM_SIDES
];
352 /* Estimate of number of cycles a long-running insn occupies an
354 static int fxd_longrunning
[NUM_SIDES
];
355 static int fpd_longrunning
[NUM_SIDES
];
357 /* The maximum score added for an instruction whose unit hasn't been
358 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
359 give instruction mix scheduling more priority over instruction
361 #define MAX_SCHED_MIX_SCORE 2
363 /* The maximum distance up to which individual scores will be
364 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
365 Increase this with the OOO windows size of the machine. */
366 #define MAX_SCHED_MIX_DISTANCE 70
368 /* Structure used to hold the components of a S/390 memory
369 address. A legitimate address on S/390 is of the general
371 base + index + displacement
372 where any of the components is optional.
374 base and index are registers of the class ADDR_REGS,
375 displacement is an unsigned 12-bit immediate constant. */
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378 loops. This value is used in the unroll adjust hook to detect such
379 loops. Current max is 9 coming from the memcmp loop. */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
396 ? cfun_frame_layout.fpr_bitmap & 0x0f \
397 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
401 (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
403 (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405 cfun->machine->frame_layout.gpr_save_slots[REGNO]
407 /* Number of GPRs and FPRs used for argument passing. */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
412 /* A couple of shortcuts. */
413 #define CONST_OK_FOR_J(x) \
414 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
424 #define REGNO_PAIR_OK(REGNO, MODE) \
425 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
427 /* That's the read ahead of the dynamic branch prediction unit in
428 bytes on a z10 (or higher) CPU. */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
431 /* Masks per jump target register indicating which thunk need to be
433 static GTY(()) int indirect_branch_prez10thunk_mask
= 0;
434 static GTY(()) int indirect_branch_z10thunk_mask
= 0;
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
438 enum s390_indirect_branch_option
440 s390_opt_indirect_branch_jump
= 0,
441 s390_opt_indirect_branch_call
,
442 s390_opt_function_return_reg
,
443 s390_opt_function_return_mem
446 static GTY(()) int indirect_branch_table_label_no
[INDIRECT_BRANCH_NUM_OPTIONS
] = { 0 };
447 const char *indirect_branch_table_label
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
448 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
450 { ".s390_indirect_jump", ".s390_indirect_call",
451 ".s390_return_reg", ".s390_return_mem" };
454 s390_return_addr_from_memory ()
456 return cfun_gpr_save_slot(RETURN_REGNUM
) == SAVE_SLOT_STACK
;
459 /* Indicate which ABI has been used for passing vector args.
460 0 - no vector type arguments have been passed where the ABI is relevant
461 1 - the old ABI has been used
462 2 - a vector type argument has been passed either in a vector register
463 or on the stack by value */
464 static int s390_vector_abi
= 0;
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467 switch. The vector ABI affects only vector data types. There are
468 two aspects of the vector ABI relevant here:
470 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471 ABI and natural alignment with the old.
473 2. vector <= 16 bytes are passed in VRs or by value on the stack
474 with the new ABI but by reference on the stack with the old.
476 If ARG_P is true TYPE is used for a function argument or return
477 value. The ABI marker then is set for all vector data types. If
478 ARG_P is false only type 1 vectors are being checked. */
481 s390_check_type_for_vector_abi (const_tree type
, bool arg_p
, bool in_struct_p
)
483 static hash_set
<const_tree
> visited_types_hash
;
488 if (type
== NULL_TREE
|| TREE_CODE (type
) == ERROR_MARK
)
491 if (visited_types_hash
.contains (type
))
494 visited_types_hash
.add (type
);
496 if (VECTOR_TYPE_P (type
))
498 int type_size
= int_size_in_bytes (type
);
500 /* Outside arguments only the alignment is changing and this
501 only happens for vector types >= 16 bytes. */
502 if (!arg_p
&& type_size
< 16)
505 /* In arguments vector types > 16 are passed as before (GCC
506 never enforced the bigger alignment for arguments which was
507 required by the old vector ABI). However, it might still be
508 ABI relevant due to the changed alignment if it is a struct
510 if (arg_p
&& type_size
> 16 && !in_struct_p
)
513 s390_vector_abi
= TARGET_VX_ABI
? 2 : 1;
515 else if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == ARRAY_TYPE
)
517 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 natural alignment there will never be ABI dependent padding
519 in an array type. That's why we do not set in_struct_p to
521 s390_check_type_for_vector_abi (TREE_TYPE (type
), arg_p
, in_struct_p
);
523 else if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
527 /* Check the return type. */
528 s390_check_type_for_vector_abi (TREE_TYPE (type
), true, false);
530 for (arg_chain
= TYPE_ARG_TYPES (type
);
532 arg_chain
= TREE_CHAIN (arg_chain
))
533 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain
), true, false);
535 else if (RECORD_OR_UNION_TYPE_P (type
))
539 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
541 if (TREE_CODE (field
) != FIELD_DECL
)
544 s390_check_type_for_vector_abi (TREE_TYPE (field
), arg_p
, true);
550 /* System z builtins. */
552 #include "s390-builtins.h"
554 const unsigned int bflags_builtin
[S390_BUILTIN_MAX
+ 1] =
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
566 const unsigned int opflags_builtin
[S390_BUILTIN_MAX
+ 1] =
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
578 const unsigned int bflags_overloaded_builtin
[S390_OVERLOADED_BUILTIN_MAX
+ 1] =
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
591 bflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
604 opflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
616 tree s390_builtin_types
[BT_MAX
];
617 tree s390_builtin_fn_types
[BT_FN_MAX
];
618 tree s390_builtin_decls
[S390_BUILTIN_MAX
+
619 S390_OVERLOADED_BUILTIN_MAX
+
620 S390_OVERLOADED_BUILTIN_VAR_MAX
];
622 static enum insn_code
const code_for_builtin
[S390_BUILTIN_MAX
+ 1] = {
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
628 #define OB_DEF_VAR(...)
630 #include "s390-builtins.def"
635 s390_init_builtins (void)
637 /* These definitions are being used in s390-builtins.def. */
638 tree returns_twice_attr
= tree_cons (get_identifier ("returns_twice"),
640 tree noreturn_attr
= tree_cons (get_identifier ("noreturn"), NULL
, NULL
);
641 tree c_uint64_type_node
;
643 /* The uint64_type_node from tree.c is not compatible to the C99
644 uint64_t data type. What we want is c_uint64_type_node from
645 c-common.c. But since backend code is not supposed to interface
646 with the frontend we recreate it here. */
648 c_uint64_type_node
= long_unsigned_type_node
;
650 c_uint64_type_node
= long_long_unsigned_type_node
;
653 #define DEF_TYPE(INDEX, NODE, CONST_P) \
654 if (s390_builtin_types[INDEX] == NULL) \
655 s390_builtin_types[INDEX] = (!CONST_P) ? \
656 (NODE) : build_type_variant ((NODE), 1, 0);
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
660 if (s390_builtin_types[INDEX] == NULL) \
661 s390_builtin_types[INDEX] = \
662 build_pointer_type (s390_builtin_types[INDEX_BASE]);
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
666 if (s390_builtin_types[INDEX] == NULL) \
667 s390_builtin_types[INDEX] = \
668 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
672 if (s390_builtin_types[INDEX] == NULL) \
673 s390_builtin_types[INDEX] = \
674 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
678 if (s390_builtin_types[INDEX] == NULL) \
679 s390_builtin_types[INDEX] = \
680 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
683 #define DEF_FN_TYPE(INDEX, args...) \
684 if (s390_builtin_fn_types[INDEX] == NULL) \
685 s390_builtin_fn_types[INDEX] = \
686 build_function_type_list (args, NULL_TREE);
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
693 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
694 s390_builtin_decls[S390_BUILTIN_##NAME] = \
695 add_builtin_function ("__builtin_" #NAME, \
696 s390_builtin_fn_types[FNTYPE], \
697 S390_BUILTIN_##NAME, \
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
703 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
705 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719 builtin DECL. The operand flags from s390-builtins.def have to
720 passed as OP_FLAGS. */
722 s390_const_operand_ok (tree arg
, int argnum
, int op_flags
, tree decl
)
724 if (O_UIMM_P (op_flags
))
726 int bitwidths
[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727 int bitwidth
= bitwidths
[op_flags
- O_U1
];
729 if (!tree_fits_uhwi_p (arg
)
730 || tree_to_uhwi (arg
) > (HOST_WIDE_INT_1U
<< bitwidth
) - 1)
732 error ("constant argument %d for builtin %qF is out of range "
733 "(0..%wu)", argnum
, decl
,
734 (HOST_WIDE_INT_1U
<< bitwidth
) - 1);
739 if (O_SIMM_P (op_flags
))
741 int bitwidths
[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742 int bitwidth
= bitwidths
[op_flags
- O_S2
];
744 if (!tree_fits_shwi_p (arg
)
745 || tree_to_shwi (arg
) < -(HOST_WIDE_INT_1
<< (bitwidth
- 1))
746 || tree_to_shwi (arg
) > ((HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1))
748 error ("constant argument %d for builtin %qF is out of range "
749 "(%wd..%wd)", argnum
, decl
,
750 -(HOST_WIDE_INT_1
<< (bitwidth
- 1)),
751 (HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1);
758 /* Expand an expression EXP that calls a built-in function,
759 with result going to TARGET if that's convenient
760 (and in mode MODE if that's convenient).
761 SUBTARGET may be used as the target for computing one of EXP's operands.
762 IGNORE is nonzero if the value is to be ignored. */
765 s390_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
766 machine_mode mode ATTRIBUTE_UNUSED
,
767 int ignore ATTRIBUTE_UNUSED
)
771 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
772 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
773 enum insn_code icode
;
774 rtx op
[MAX_ARGS
], pat
;
778 call_expr_arg_iterator iter
;
779 unsigned int all_op_flags
= opflags_for_builtin (fcode
);
780 machine_mode last_vec_mode
= VOIDmode
;
782 if (TARGET_DEBUG_ARG
)
785 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 (int)fcode
, IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
787 bflags_for_builtin (fcode
));
790 if (S390_USE_TARGET_ATTRIBUTE
)
794 bflags
= bflags_for_builtin (fcode
);
795 if ((bflags
& B_HTM
) && !TARGET_HTM
)
797 error ("builtin %qF is not supported without %<-mhtm%> "
798 "(default with %<-march=zEC12%> and higher).", fndecl
);
801 if (((bflags
& B_VX
) || (bflags
& B_VXE
)) && !TARGET_VX
)
803 error ("builtin %qF requires %<-mvx%> "
804 "(default with %<-march=z13%> and higher).", fndecl
);
808 if ((bflags
& B_VXE
) && !TARGET_VXE
)
810 error ("Builtin %qF requires z14 or higher.", fndecl
);
814 if ((bflags
& B_VXE2
) && !TARGET_VXE2
)
816 error ("Builtin %qF requires z15 or higher.", fndecl
);
820 if (fcode
>= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821 && fcode
< S390_ALL_BUILTIN_MAX
)
825 else if (fcode
< S390_OVERLOADED_BUILTIN_OFFSET
)
827 icode
= code_for_builtin
[fcode
];
828 /* Set a flag in the machine specific cfun part in order to support
829 saving/restoring of FPRs. */
830 if (fcode
== S390_BUILTIN_tbegin
|| fcode
== S390_BUILTIN_tbegin_retry
)
831 cfun
->machine
->tbegin_p
= true;
833 else if (fcode
< S390_OVERLOADED_BUILTIN_VAR_OFFSET
)
835 error ("unresolved overloaded builtin");
839 internal_error ("bad builtin fcode");
842 internal_error ("bad builtin icode");
844 nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
848 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
850 || GET_MODE (target
) != tmode
851 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
852 target
= gen_reg_rtx (tmode
);
854 /* There are builtins (e.g. vec_promote) with no vector
855 arguments but an element selector. So we have to also look
856 at the vector return type when emitting the modulo
858 if (VECTOR_MODE_P (insn_data
[icode
].operand
[0].mode
))
859 last_vec_mode
= insn_data
[icode
].operand
[0].mode
;
863 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
866 const struct insn_operand_data
*insn_op
;
867 unsigned int op_flags
= all_op_flags
& ((1 << O_SHIFT
) - 1);
869 all_op_flags
= all_op_flags
>> O_SHIFT
;
871 if (arg
== error_mark_node
)
873 if (arity
>= MAX_ARGS
)
876 if (O_IMM_P (op_flags
)
877 && TREE_CODE (arg
) != INTEGER_CST
)
879 error ("constant value required for builtin %qF argument %d",
884 if (!s390_const_operand_ok (arg
, arity
+ 1, op_flags
, fndecl
))
887 insn_op
= &insn_data
[icode
].operand
[arity
+ nonvoid
];
888 op
[arity
] = expand_expr (arg
, NULL_RTX
, insn_op
->mode
, EXPAND_NORMAL
);
890 /* expand_expr truncates constants to the target mode only if it
891 is "convenient". However, our checks below rely on this
893 if (CONST_INT_P (op
[arity
])
894 && SCALAR_INT_MODE_P (insn_op
->mode
)
895 && GET_MODE (op
[arity
]) != insn_op
->mode
)
896 op
[arity
] = GEN_INT (trunc_int_for_mode (INTVAL (op
[arity
]),
899 /* Wrap the expanded RTX for pointer types into a MEM expr with
900 the proper mode. This allows us to use e.g. (match_operand
901 "memory_operand"..) in the insn patterns instead of (mem
902 (match_operand "address_operand)). This is helpful for
903 patterns not just accepting MEMs. */
904 if (POINTER_TYPE_P (TREE_TYPE (arg
))
905 && insn_op
->predicate
!= address_operand
)
906 op
[arity
] = gen_rtx_MEM (insn_op
->mode
, op
[arity
]);
908 /* Expand the module operation required on element selectors. */
909 if (op_flags
== O_ELEM
)
911 gcc_assert (last_vec_mode
!= VOIDmode
);
912 op
[arity
] = simplify_expand_binop (SImode
, code_to_optab (AND
),
914 GEN_INT (GET_MODE_NUNITS (last_vec_mode
) - 1),
915 NULL_RTX
, 1, OPTAB_DIRECT
);
918 /* Record the vector mode used for an element selector. This assumes:
919 1. There is no builtin with two different vector modes and an element selector
920 2. The element selector comes after the vector type it is referring to.
921 This currently the true for all the builtins but FIXME we
922 should better check for that. */
923 if (VECTOR_MODE_P (insn_op
->mode
))
924 last_vec_mode
= insn_op
->mode
;
926 if (insn_op
->predicate (op
[arity
], insn_op
->mode
))
932 /* A memory operand is rejected by the memory_operand predicate.
933 Try making the address legal by copying it into a register. */
934 if (MEM_P (op
[arity
])
935 && insn_op
->predicate
== memory_operand
936 && (GET_MODE (XEXP (op
[arity
], 0)) == Pmode
937 || GET_MODE (XEXP (op
[arity
], 0)) == VOIDmode
))
939 op
[arity
] = replace_equiv_address (op
[arity
],
940 copy_to_mode_reg (Pmode
,
941 XEXP (op
[arity
], 0)));
943 /* Some of the builtins require different modes/types than the
944 pattern in order to implement a specific API. Instead of
945 adding many expanders which do the mode change we do it here.
946 E.g. s390_vec_add_u128 required to have vector unsigned char
947 arguments is mapped to addti3. */
948 else if (insn_op
->mode
!= VOIDmode
949 && GET_MODE (op
[arity
]) != VOIDmode
950 && GET_MODE (op
[arity
]) != insn_op
->mode
951 && ((tmp_rtx
= simplify_gen_subreg (insn_op
->mode
, op
[arity
],
952 GET_MODE (op
[arity
]), 0))
958 /* The predicate rejects the operand although the mode is fine.
959 Copy the operand to register. */
960 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
)
961 && (GET_MODE (op
[arity
]) == insn_op
->mode
962 || GET_MODE (op
[arity
]) == VOIDmode
963 || (insn_op
->predicate
== address_operand
964 && GET_MODE (op
[arity
]) == Pmode
)))
966 /* An address_operand usually has VOIDmode in the expander
967 so we cannot use this. */
968 machine_mode target_mode
=
969 (insn_op
->predicate
== address_operand
970 ? (machine_mode
) Pmode
: insn_op
->mode
);
971 op
[arity
] = copy_to_mode_reg (target_mode
, op
[arity
]);
974 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
))
976 error ("invalid argument %d for builtin %qF", arity
+ 1, fndecl
);
985 pat
= GEN_FCN (icode
) (target
);
989 pat
= GEN_FCN (icode
) (target
, op
[0]);
991 pat
= GEN_FCN (icode
) (op
[0]);
995 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
997 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
1001 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
1003 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
1007 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
1009 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
1013 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
1015 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
1019 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1021 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1037 static const int s390_hotpatch_hw_max
= 1000000;
1038 static int s390_hotpatch_hw_before_label
= 0;
1039 static int s390_hotpatch_hw_after_label
= 0;
1041 /* Check whether the hotpatch attribute is applied to a function and, if it has
1042 an argument, the argument is valid. */
1045 s390_handle_hotpatch_attribute (tree
*node
, tree name
, tree args
,
1046 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1052 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1054 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1056 *no_add_attrs
= true;
1058 if (args
!= NULL
&& TREE_CHAIN (args
) != NULL
)
1060 expr
= TREE_VALUE (args
);
1061 expr2
= TREE_VALUE (TREE_CHAIN (args
));
1063 if (args
== NULL
|| TREE_CHAIN (args
) == NULL
)
1065 else if (TREE_CODE (expr
) != INTEGER_CST
1066 || !INTEGRAL_TYPE_P (TREE_TYPE (expr
))
1067 || wi::gtu_p (wi::to_wide (expr
), s390_hotpatch_hw_max
))
1069 else if (TREE_CODE (expr2
) != INTEGER_CST
1070 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2
))
1071 || wi::gtu_p (wi::to_wide (expr2
), s390_hotpatch_hw_max
))
1077 error ("requested %qE attribute is not a comma separated pair of"
1078 " non-negative integer constants or too large (max. %d)", name
,
1079 s390_hotpatch_hw_max
);
1080 *no_add_attrs
= true;
1086 /* Expand the s390_vector_bool type attribute. */
1089 s390_handle_vectorbool_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1090 tree args ATTRIBUTE_UNUSED
,
1091 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1093 tree type
= *node
, result
= NULL_TREE
;
1096 while (POINTER_TYPE_P (type
)
1097 || TREE_CODE (type
) == FUNCTION_TYPE
1098 || TREE_CODE (type
) == METHOD_TYPE
1099 || TREE_CODE (type
) == ARRAY_TYPE
)
1100 type
= TREE_TYPE (type
);
1102 mode
= TYPE_MODE (type
);
1105 case E_DImode
: case E_V2DImode
:
1106 result
= s390_builtin_types
[BT_BV2DI
];
1108 case E_SImode
: case E_V4SImode
:
1109 result
= s390_builtin_types
[BT_BV4SI
];
1111 case E_HImode
: case E_V8HImode
:
1112 result
= s390_builtin_types
[BT_BV8HI
];
1114 case E_QImode
: case E_V16QImode
:
1115 result
= s390_builtin_types
[BT_BV16QI
];
1121 *no_add_attrs
= true; /* No need to hang on to the attribute. */
1124 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
1129 /* Check syntax of function decl attributes having a string type value. */
1132 s390_handle_string_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1133 tree args ATTRIBUTE_UNUSED
,
1134 int flags ATTRIBUTE_UNUSED
,
1139 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1141 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1143 *no_add_attrs
= true;
1146 cst
= TREE_VALUE (args
);
1148 if (TREE_CODE (cst
) != STRING_CST
)
1150 warning (OPT_Wattributes
,
1151 "%qE attribute requires a string constant argument",
1153 *no_add_attrs
= true;
1156 if (is_attribute_p ("indirect_branch", name
)
1157 || is_attribute_p ("indirect_branch_call", name
)
1158 || is_attribute_p ("function_return", name
)
1159 || is_attribute_p ("function_return_reg", name
)
1160 || is_attribute_p ("function_return_mem", name
))
1162 if (strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1163 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1164 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1166 warning (OPT_Wattributes
,
1167 "argument to %qE attribute is not "
1168 "(keep|thunk|thunk-extern)", name
);
1169 *no_add_attrs
= true;
1173 if (is_attribute_p ("indirect_branch_jump", name
)
1174 && strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1175 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1176 && strcmp (TREE_STRING_POINTER (cst
), "thunk-inline") != 0
1177 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1179 warning (OPT_Wattributes
,
1180 "argument to %qE attribute is not "
1181 "(keep|thunk|thunk-inline|thunk-extern)", name
);
1182 *no_add_attrs
= true;
1188 static const struct attribute_spec s390_attribute_table
[] = {
1189 { "hotpatch", 2, 2, true, false, false, false,
1190 s390_handle_hotpatch_attribute
, NULL
},
1191 { "s390_vector_bool", 0, 0, false, true, false, true,
1192 s390_handle_vectorbool_attribute
, NULL
},
1193 { "indirect_branch", 1, 1, true, false, false, false,
1194 s390_handle_string_attribute
, NULL
},
1195 { "indirect_branch_jump", 1, 1, true, false, false, false,
1196 s390_handle_string_attribute
, NULL
},
1197 { "indirect_branch_call", 1, 1, true, false, false, false,
1198 s390_handle_string_attribute
, NULL
},
1199 { "function_return", 1, 1, true, false, false, false,
1200 s390_handle_string_attribute
, NULL
},
1201 { "function_return_reg", 1, 1, true, false, false, false,
1202 s390_handle_string_attribute
, NULL
},
1203 { "function_return_mem", 1, 1, true, false, false, false,
1204 s390_handle_string_attribute
, NULL
},
1207 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1210 /* Return the alignment for LABEL. We default to the -falign-labels
1211 value except for the literal pool base label. */
1213 s390_label_align (rtx_insn
*label
)
1215 rtx_insn
*prev_insn
= prev_active_insn (label
);
1218 if (prev_insn
== NULL_RTX
)
1221 set
= single_set (prev_insn
);
1223 if (set
== NULL_RTX
)
1226 src
= SET_SRC (set
);
1228 /* Don't align literal pool base labels. */
1229 if (GET_CODE (src
) == UNSPEC
1230 && XINT (src
, 1) == UNSPEC_MAIN_BASE
)
1234 return align_labels
.levels
[0].log
;
1237 static GTY(()) rtx got_symbol
;
1239 /* Return the GOT table symbol. The symbol will be created when the
1240 function is invoked for the first time. */
1243 s390_got_symbol (void)
1247 got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1248 SYMBOL_REF_FLAGS (got_symbol
) = SYMBOL_FLAG_LOCAL
;
1254 static scalar_int_mode
1255 s390_libgcc_cmp_return_mode (void)
1257 return TARGET_64BIT
? DImode
: SImode
;
1260 static scalar_int_mode
1261 s390_libgcc_shift_count_mode (void)
1263 return TARGET_64BIT
? DImode
: SImode
;
1266 static scalar_int_mode
1267 s390_unwind_word_mode (void)
1269 return TARGET_64BIT
? DImode
: SImode
;
1272 /* Return true if the back end supports mode MODE. */
1274 s390_scalar_mode_supported_p (scalar_mode mode
)
1276 /* In contrast to the default implementation reject TImode constants on 31bit
1277 TARGET_ZARCH for ABI compliance. */
1278 if (!TARGET_64BIT
&& TARGET_ZARCH
&& mode
== TImode
)
1281 if (DECIMAL_FLOAT_MODE_P (mode
))
1282 return default_decimal_float_supported_p ();
1284 return default_scalar_mode_supported_p (mode
);
1287 /* Return true if the back end supports vector mode MODE. */
1289 s390_vector_mode_supported_p (machine_mode mode
)
1293 if (!VECTOR_MODE_P (mode
)
1295 || GET_MODE_SIZE (mode
) > 16)
1298 inner
= GET_MODE_INNER (mode
);
1316 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1319 s390_set_has_landing_pad_p (bool value
)
1321 cfun
->machine
->has_landing_pad_p
= value
;
1324 /* If two condition code modes are compatible, return a condition code
1325 mode which is compatible with both. Otherwise, return
1329 s390_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
1337 if (m2
== CCUmode
|| m2
== CCTmode
|| m2
== CCZ1mode
1338 || m2
== CCSmode
|| m2
== CCSRmode
|| m2
== CCURmode
)
1359 /* Return true if SET either doesn't set the CC register, or else
1360 the source and destination have matching CC modes and that
1361 CC mode is at least as constrained as REQ_MODE. */
1364 s390_match_ccmode_set (rtx set
, machine_mode req_mode
)
1366 machine_mode set_mode
;
1368 gcc_assert (GET_CODE (set
) == SET
);
1370 /* These modes are supposed to be used only in CC consumer
1372 gcc_assert (req_mode
!= CCVIALLmode
&& req_mode
!= CCVIANYmode
1373 && req_mode
!= CCVFALLmode
&& req_mode
!= CCVFANYmode
);
1375 if (GET_CODE (SET_DEST (set
)) != REG
|| !CC_REGNO_P (REGNO (SET_DEST (set
))))
1378 set_mode
= GET_MODE (SET_DEST (set
));
1400 if (req_mode
!= set_mode
)
1405 if (req_mode
!= CCSmode
&& req_mode
!= CCUmode
&& req_mode
!= CCTmode
1406 && req_mode
!= CCSRmode
&& req_mode
!= CCURmode
1407 && req_mode
!= CCZ1mode
)
1413 if (req_mode
!= CCAmode
)
1421 return (GET_MODE (SET_SRC (set
)) == set_mode
);
1424 /* Return true if every SET in INSN that sets the CC register
1425 has source and destination with matching CC modes and that
1426 CC mode is at least as constrained as REQ_MODE.
1427 If REQ_MODE is VOIDmode, always return false. */
1430 s390_match_ccmode (rtx_insn
*insn
, machine_mode req_mode
)
1434 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1435 if (req_mode
== VOIDmode
)
1438 if (GET_CODE (PATTERN (insn
)) == SET
)
1439 return s390_match_ccmode_set (PATTERN (insn
), req_mode
);
1441 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1442 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
1444 rtx set
= XVECEXP (PATTERN (insn
), 0, i
);
1445 if (GET_CODE (set
) == SET
)
1446 if (!s390_match_ccmode_set (set
, req_mode
))
1453 /* If a test-under-mask instruction can be used to implement
1454 (compare (and ... OP1) OP2), return the CC mode required
1455 to do that. Otherwise, return VOIDmode.
1456 MIXED is true if the instruction can distinguish between
1457 CC1 and CC2 for mixed selected bits (TMxx), it is false
1458 if the instruction cannot (TM). */
1461 s390_tm_ccmode (rtx op1
, rtx op2
, bool mixed
)
1465 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1466 if (GET_CODE (op1
) != CONST_INT
|| GET_CODE (op2
) != CONST_INT
)
1469 /* Selected bits all zero: CC0.
1470 e.g.: int a; if ((a & (16 + 128)) == 0) */
1471 if (INTVAL (op2
) == 0)
1474 /* Selected bits all one: CC3.
1475 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476 if (INTVAL (op2
) == INTVAL (op1
))
1479 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1481 if ((a & (16 + 128)) == 16) -> CCT1
1482 if ((a & (16 + 128)) == 128) -> CCT2 */
1485 bit1
= exact_log2 (INTVAL (op2
));
1486 bit0
= exact_log2 (INTVAL (op1
) ^ INTVAL (op2
));
1487 if (bit0
!= -1 && bit1
!= -1)
1488 return bit0
> bit1
? CCT1mode
: CCT2mode
;
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495 OP0 and OP1 of a COMPARE, return the mode to be used for the
1499 s390_select_ccmode (enum rtx_code code
, rtx op0
, rtx op1
)
1505 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1506 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1508 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1509 && CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1))))
1511 if ((GET_CODE (op0
) == PLUS
|| GET_CODE (op0
) == MINUS
1512 || GET_CODE (op1
) == NEG
)
1513 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1516 if (GET_CODE (op0
) == AND
)
1518 /* Check whether we can potentially do it via TM. */
1519 machine_mode ccmode
;
1520 ccmode
= s390_tm_ccmode (XEXP (op0
, 1), op1
, 1);
1521 if (ccmode
!= VOIDmode
)
1523 /* Relax CCTmode to CCZmode to allow fall-back to AND
1524 if that turns out to be beneficial. */
1525 return ccmode
== CCTmode
? CCZmode
: ccmode
;
1529 if (register_operand (op0
, HImode
)
1530 && GET_CODE (op1
) == CONST_INT
1531 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 65535))
1533 if (register_operand (op0
, QImode
)
1534 && GET_CODE (op1
) == CONST_INT
1535 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 255))
1544 /* The only overflow condition of NEG and ABS happens when
1545 -INT_MAX is used as parameter, which stays negative. So
1546 we have an overflow from a positive value to a negative.
1547 Using CCAP mode the resulting cc can be used for comparisons. */
1548 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1549 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1552 /* If constants are involved in an add instruction it is possible to use
1553 the resulting cc for comparisons with zero. Knowing the sign of the
1554 constant the overflow behavior gets predictable. e.g.:
1555 int a, b; if ((b = a + c) > 0)
1556 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1557 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1558 && (CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1)))
1559 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0
, 1)), 'O', "Os")
1560 /* Avoid INT32_MIN on 32 bit. */
1561 && (!TARGET_ZARCH
|| INTVAL (XEXP (op0
, 1)) != -0x7fffffff - 1))))
1563 if (INTVAL (XEXP((op0
), 1)) < 0)
1571 if (HONOR_NANS (op0
) || HONOR_NANS (op1
))
1582 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1583 && GET_CODE (op1
) != CONST_INT
)
1589 if (GET_CODE (op0
) == PLUS
1590 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1593 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1594 && GET_CODE (op1
) != CONST_INT
)
1600 if (GET_CODE (op0
) == MINUS
1601 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1604 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1605 && GET_CODE (op1
) != CONST_INT
)
1614 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615 that we can implement more efficiently. */
1618 s390_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
1619 bool op0_preserve_value
)
1621 if (op0_preserve_value
)
1624 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1625 if ((*code
== EQ
|| *code
== NE
)
1626 && *op1
== const0_rtx
1627 && GET_CODE (*op0
) == ZERO_EXTRACT
1628 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1629 && GET_CODE (XEXP (*op0
, 2)) == CONST_INT
1630 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1632 rtx inner
= XEXP (*op0
, 0);
1633 HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (GET_MODE (inner
));
1634 HOST_WIDE_INT len
= INTVAL (XEXP (*op0
, 1));
1635 HOST_WIDE_INT pos
= INTVAL (XEXP (*op0
, 2));
1637 if (len
> 0 && len
< modesize
1638 && pos
>= 0 && pos
+ len
<= modesize
1639 && modesize
<= HOST_BITS_PER_WIDE_INT
)
1641 unsigned HOST_WIDE_INT block
;
1642 block
= (HOST_WIDE_INT_1U
<< len
) - 1;
1643 block
<<= modesize
- pos
- len
;
1645 *op0
= gen_rtx_AND (GET_MODE (inner
), inner
,
1646 gen_int_mode (block
, GET_MODE (inner
)));
1650 /* Narrow AND of memory against immediate to enable TM. */
1651 if ((*code
== EQ
|| *code
== NE
)
1652 && *op1
== const0_rtx
1653 && GET_CODE (*op0
) == AND
1654 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1655 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1657 rtx inner
= XEXP (*op0
, 0);
1658 rtx mask
= XEXP (*op0
, 1);
1660 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1661 if (GET_CODE (inner
) == SUBREG
1662 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner
)))
1663 && (GET_MODE_SIZE (GET_MODE (inner
))
1664 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
1666 & GET_MODE_MASK (GET_MODE (inner
))
1667 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner
))))
1669 inner
= SUBREG_REG (inner
);
1671 /* Do not change volatile MEMs. */
1672 if (MEM_P (inner
) && !MEM_VOLATILE_P (inner
))
1674 int part
= s390_single_part (XEXP (*op0
, 1),
1675 GET_MODE (inner
), QImode
, 0);
1678 mask
= gen_int_mode (s390_extract_part (mask
, QImode
, 0), QImode
);
1679 inner
= adjust_address_nv (inner
, QImode
, part
);
1680 *op0
= gen_rtx_AND (QImode
, inner
, mask
);
1685 /* Narrow comparisons against 0xffff to HImode if possible. */
1686 if ((*code
== EQ
|| *code
== NE
)
1687 && GET_CODE (*op1
) == CONST_INT
1688 && INTVAL (*op1
) == 0xffff
1689 && SCALAR_INT_MODE_P (GET_MODE (*op0
))
1690 && (nonzero_bits (*op0
, GET_MODE (*op0
))
1691 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1693 *op0
= gen_lowpart (HImode
, *op0
);
1697 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1698 if (GET_CODE (*op0
) == UNSPEC
1699 && XINT (*op0
, 1) == UNSPEC_STRCMPCC_TO_INT
1700 && XVECLEN (*op0
, 0) == 1
1701 && GET_MODE (XVECEXP (*op0
, 0, 0)) == CCUmode
1702 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1703 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1704 && *op1
== const0_rtx
)
1706 enum rtx_code new_code
= UNKNOWN
;
1709 case EQ
: new_code
= EQ
; break;
1710 case NE
: new_code
= NE
; break;
1711 case LT
: new_code
= GTU
; break;
1712 case GT
: new_code
= LTU
; break;
1713 case LE
: new_code
= GEU
; break;
1714 case GE
: new_code
= LEU
; break;
1718 if (new_code
!= UNKNOWN
)
1720 *op0
= XVECEXP (*op0
, 0, 0);
1725 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1726 if (GET_CODE (*op0
) == UNSPEC
1727 && XINT (*op0
, 1) == UNSPEC_CC_TO_INT
1728 && XVECLEN (*op0
, 0) == 1
1729 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1730 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1731 && CONST_INT_P (*op1
))
1733 enum rtx_code new_code
= UNKNOWN
;
1734 switch (GET_MODE (XVECEXP (*op0
, 0, 0)))
1740 case EQ
: new_code
= EQ
; break;
1741 case NE
: new_code
= NE
; break;
1748 if (new_code
!= UNKNOWN
)
1750 /* For CCRAWmode put the required cc mask into the second
1752 if (GET_MODE (XVECEXP (*op0
, 0, 0)) == CCRAWmode
1753 && INTVAL (*op1
) >= 0 && INTVAL (*op1
) <= 3)
1754 *op1
= gen_rtx_CONST_INT (VOIDmode
, 1 << (3 - INTVAL (*op1
)));
1755 *op0
= XVECEXP (*op0
, 0, 0);
1760 /* Simplify cascaded EQ, NE with const0_rtx. */
1761 if ((*code
== NE
|| *code
== EQ
)
1762 && (GET_CODE (*op0
) == EQ
|| GET_CODE (*op0
) == NE
)
1763 && GET_MODE (*op0
) == SImode
1764 && GET_MODE (XEXP (*op0
, 0)) == CCZ1mode
1765 && REG_P (XEXP (*op0
, 0))
1766 && XEXP (*op0
, 1) == const0_rtx
1767 && *op1
== const0_rtx
)
1769 if ((*code
== EQ
&& GET_CODE (*op0
) == NE
)
1770 || (*code
== NE
&& GET_CODE (*op0
) == EQ
))
1774 *op0
= XEXP (*op0
, 0);
1777 /* Prefer register over memory as first operand. */
1778 if (MEM_P (*op0
) && REG_P (*op1
))
1780 rtx tem
= *op0
; *op0
= *op1
; *op1
= tem
;
1781 *code
= (int)swap_condition ((enum rtx_code
)*code
);
1784 /* A comparison result is compared against zero. Replace it with
1785 the (perhaps inverted) original comparison.
1786 This probably should be done by simplify_relational_operation. */
1787 if ((*code
== EQ
|| *code
== NE
)
1788 && *op1
== const0_rtx
1789 && COMPARISON_P (*op0
)
1790 && CC_REG_P (XEXP (*op0
, 0)))
1792 enum rtx_code new_code
;
1795 new_code
= reversed_comparison_code_parts (GET_CODE (*op0
),
1797 XEXP (*op0
, 1), NULL
);
1799 new_code
= GET_CODE (*op0
);
1801 if (new_code
!= UNKNOWN
)
1804 *op1
= XEXP (*op0
, 1);
1805 *op0
= XEXP (*op0
, 0);
1809 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1811 && (*code
== EQ
|| *code
== NE
)
1812 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1813 && GET_CODE (*op0
) == NOT
)
1815 machine_mode mode
= GET_MODE (*op0
);
1816 *op0
= gen_rtx_XOR (mode
, XEXP (*op0
, 0), *op1
);
1817 *op0
= gen_rtx_NOT (mode
, *op0
);
1821 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1823 && (*code
== EQ
|| *code
== NE
)
1824 && (GET_CODE (*op0
) == AND
|| GET_CODE (*op0
) == IOR
)
1825 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1826 && CONST_INT_P (*op1
)
1827 && *op1
== constm1_rtx
)
1829 machine_mode mode
= GET_MODE (*op0
);
1830 rtx op00
= gen_rtx_NOT (mode
, XEXP (*op0
, 0));
1831 rtx op01
= gen_rtx_NOT (mode
, XEXP (*op0
, 1));
1833 if (GET_CODE (*op0
) == AND
)
1834 *op0
= gen_rtx_IOR (mode
, op00
, op01
);
1836 *op0
= gen_rtx_AND (mode
, op00
, op01
);
1843 /* Emit a compare instruction suitable to implement the comparison
1844 OP0 CODE OP1. Return the correct condition RTL to be placed in
1845 the IF_THEN_ELSE of the conditional branch testing the result. */
1848 s390_emit_compare (enum rtx_code code
, rtx op0
, rtx op1
)
1850 machine_mode mode
= s390_select_ccmode (code
, op0
, op1
);
1853 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
1855 /* Do not output a redundant compare instruction if a
1856 compare_and_swap pattern already computed the result and the
1857 machine modes are compatible. */
1858 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0
), mode
)
1864 cc
= gen_rtx_REG (mode
, CC_REGNUM
);
1865 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (mode
, op0
, op1
)));
1868 return gen_rtx_fmt_ee (code
, VOIDmode
, cc
, const0_rtx
);
1871 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872 MEM, whose address is a pseudo containing the original MEM's address. */
1875 s390_legitimize_cs_operand (rtx mem
)
1879 if (!contains_symbol_ref_p (mem
))
1881 tmp
= gen_reg_rtx (Pmode
);
1882 emit_move_insn (tmp
, copy_rtx (XEXP (mem
, 0)));
1883 return change_address (mem
, VOIDmode
, tmp
);
1886 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1888 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889 conditional branch testing the result. */
1892 s390_emit_compare_and_swap (enum rtx_code code
, rtx old
, rtx mem
,
1893 rtx cmp
, rtx new_rtx
, machine_mode ccmode
)
1897 mem
= s390_legitimize_cs_operand (mem
);
1898 cc
= gen_rtx_REG (ccmode
, CC_REGNUM
);
1899 switch (GET_MODE (mem
))
1902 emit_insn (gen_atomic_compare_and_swapsi_internal (old
, mem
, cmp
,
1906 emit_insn (gen_atomic_compare_and_swapdi_internal (old
, mem
, cmp
,
1910 emit_insn (gen_atomic_compare_and_swapti_internal (old
, mem
, cmp
,
1918 return s390_emit_compare (code
, cc
, const0_rtx
);
1921 /* Emit a jump instruction to TARGET and return it. If COND is
1922 NULL_RTX, emit an unconditional jump, else a conditional jump under
1926 s390_emit_jump (rtx target
, rtx cond
)
1930 target
= gen_rtx_LABEL_REF (VOIDmode
, target
);
1932 target
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, target
, pc_rtx
);
1934 insn
= gen_rtx_SET (pc_rtx
, target
);
1935 return emit_jump_insn (insn
);
1938 /* Return branch condition mask to implement a branch
1939 specified by CODE. Return -1 for invalid comparisons. */
1942 s390_branch_condition_mask (rtx code
)
1944 const int CC0
= 1 << 3;
1945 const int CC1
= 1 << 2;
1946 const int CC2
= 1 << 1;
1947 const int CC3
= 1 << 0;
1949 gcc_assert (GET_CODE (XEXP (code
, 0)) == REG
);
1950 gcc_assert (REGNO (XEXP (code
, 0)) == CC_REGNUM
);
1951 gcc_assert (XEXP (code
, 1) == const0_rtx
1952 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
1953 && CONST_INT_P (XEXP (code
, 1))));
1956 switch (GET_MODE (XEXP (code
, 0)))
1960 switch (GET_CODE (code
))
1962 case EQ
: return CC0
;
1963 case NE
: return CC1
| CC2
| CC3
;
1969 switch (GET_CODE (code
))
1971 case EQ
: return CC1
;
1972 case NE
: return CC0
| CC2
| CC3
;
1978 switch (GET_CODE (code
))
1980 case EQ
: return CC2
;
1981 case NE
: return CC0
| CC1
| CC3
;
1987 switch (GET_CODE (code
))
1989 case EQ
: return CC3
;
1990 case NE
: return CC0
| CC1
| CC2
;
1996 switch (GET_CODE (code
))
1998 case EQ
: return CC0
| CC2
;
1999 case NE
: return CC1
| CC3
;
2005 switch (GET_CODE (code
))
2007 case LTU
: return CC2
| CC3
; /* carry */
2008 case GEU
: return CC0
| CC1
; /* no carry */
2014 switch (GET_CODE (code
))
2016 case GTU
: return CC0
| CC1
; /* borrow */
2017 case LEU
: return CC2
| CC3
; /* no borrow */
2023 switch (GET_CODE (code
))
2025 case EQ
: return CC0
| CC2
;
2026 case NE
: return CC1
| CC3
;
2027 case LTU
: return CC1
;
2028 case GTU
: return CC3
;
2029 case LEU
: return CC1
| CC2
;
2030 case GEU
: return CC2
| CC3
;
2035 switch (GET_CODE (code
))
2037 case EQ
: return CC0
;
2038 case NE
: return CC1
| CC2
| CC3
;
2039 case LTU
: return CC1
;
2040 case GTU
: return CC2
;
2041 case LEU
: return CC0
| CC1
;
2042 case GEU
: return CC0
| CC2
;
2048 switch (GET_CODE (code
))
2050 case EQ
: return CC0
;
2051 case NE
: return CC2
| CC1
| CC3
;
2052 case LTU
: return CC2
;
2053 case GTU
: return CC1
;
2054 case LEU
: return CC0
| CC2
;
2055 case GEU
: return CC0
| CC1
;
2061 switch (GET_CODE (code
))
2063 case EQ
: return CC0
;
2064 case NE
: return CC1
| CC2
| CC3
;
2065 case LT
: return CC1
| CC3
;
2066 case GT
: return CC2
;
2067 case LE
: return CC0
| CC1
| CC3
;
2068 case GE
: return CC0
| CC2
;
2074 switch (GET_CODE (code
))
2076 case EQ
: return CC0
;
2077 case NE
: return CC1
| CC2
| CC3
;
2078 case LT
: return CC1
;
2079 case GT
: return CC2
| CC3
;
2080 case LE
: return CC0
| CC1
;
2081 case GE
: return CC0
| CC2
| CC3
;
2087 switch (GET_CODE (code
))
2089 case EQ
: return CC0
| CC1
| CC2
;
2090 case NE
: return CC3
;
2097 switch (GET_CODE (code
))
2099 case EQ
: return CC0
;
2100 case NE
: return CC1
| CC2
| CC3
;
2101 case LT
: return CC1
;
2102 case GT
: return CC2
;
2103 case LE
: return CC0
| CC1
;
2104 case GE
: return CC0
| CC2
;
2105 case UNORDERED
: return CC3
;
2106 case ORDERED
: return CC0
| CC1
| CC2
;
2107 case UNEQ
: return CC0
| CC3
;
2108 case UNLT
: return CC1
| CC3
;
2109 case UNGT
: return CC2
| CC3
;
2110 case UNLE
: return CC0
| CC1
| CC3
;
2111 case UNGE
: return CC0
| CC2
| CC3
;
2112 case LTGT
: return CC1
| CC2
;
2118 switch (GET_CODE (code
))
2120 case EQ
: return CC0
;
2121 case NE
: return CC2
| CC1
| CC3
;
2122 case LT
: return CC2
;
2123 case GT
: return CC1
;
2124 case LE
: return CC0
| CC2
;
2125 case GE
: return CC0
| CC1
;
2126 case UNORDERED
: return CC3
;
2127 case ORDERED
: return CC0
| CC2
| CC1
;
2128 case UNEQ
: return CC0
| CC3
;
2129 case UNLT
: return CC2
| CC3
;
2130 case UNGT
: return CC1
| CC3
;
2131 case UNLE
: return CC0
| CC2
| CC3
;
2132 case UNGE
: return CC0
| CC1
| CC3
;
2133 case LTGT
: return CC2
| CC1
;
2138 /* Vector comparison modes. */
2139 /* CC2 will never be set. It however is part of the negated
2142 switch (GET_CODE (code
))
2147 case GE
: return CC0
;
2148 /* The inverted modes are in fact *any* modes. */
2152 case LT
: return CC3
| CC1
| CC2
;
2157 switch (GET_CODE (code
))
2162 case GE
: return CC0
| CC1
;
2163 /* The inverted modes are in fact *all* modes. */
2167 case LT
: return CC3
| CC2
;
2171 switch (GET_CODE (code
))
2175 case GE
: return CC0
;
2176 /* The inverted modes are in fact *any* modes. */
2179 case UNLT
: return CC3
| CC1
| CC2
;
2184 switch (GET_CODE (code
))
2188 case GE
: return CC0
| CC1
;
2189 /* The inverted modes are in fact *all* modes. */
2192 case UNLT
: return CC3
| CC2
;
2197 switch (GET_CODE (code
))
2200 return INTVAL (XEXP (code
, 1));
2202 return (INTVAL (XEXP (code
, 1))) ^ 0xf;
2213 /* Return branch condition mask to implement a compare and branch
2214 specified by CODE. Return -1 for invalid comparisons. */
2217 s390_compare_and_branch_condition_mask (rtx code
)
2219 const int CC0
= 1 << 3;
2220 const int CC1
= 1 << 2;
2221 const int CC2
= 1 << 1;
2223 switch (GET_CODE (code
))
2247 /* If INV is false, return assembler mnemonic string to implement
2248 a branch specified by CODE. If INV is true, return mnemonic
2249 for the corresponding inverted branch. */
2252 s390_branch_condition_mnemonic (rtx code
, int inv
)
2256 static const char *const mnemonic
[16] =
2258 NULL
, "o", "h", "nle",
2259 "l", "nhe", "lh", "ne",
2260 "e", "nlh", "he", "nl",
2261 "le", "nh", "no", NULL
2264 if (GET_CODE (XEXP (code
, 0)) == REG
2265 && REGNO (XEXP (code
, 0)) == CC_REGNUM
2266 && (XEXP (code
, 1) == const0_rtx
2267 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2268 && CONST_INT_P (XEXP (code
, 1)))))
2269 mask
= s390_branch_condition_mask (code
);
2271 mask
= s390_compare_and_branch_condition_mask (code
);
2273 gcc_assert (mask
>= 0);
2278 gcc_assert (mask
>= 1 && mask
<= 14);
2280 return mnemonic
[mask
];
2283 /* Return the part of op which has a value different from def.
2284 The size of the part is determined by mode.
2285 Use this function only if you already know that op really
2286 contains such a part. */
2288 unsigned HOST_WIDE_INT
2289 s390_extract_part (rtx op
, machine_mode mode
, int def
)
2291 unsigned HOST_WIDE_INT value
= 0;
2292 int max_parts
= HOST_BITS_PER_WIDE_INT
/ GET_MODE_BITSIZE (mode
);
2293 int part_bits
= GET_MODE_BITSIZE (mode
);
2294 unsigned HOST_WIDE_INT part_mask
= (HOST_WIDE_INT_1U
<< part_bits
) - 1;
2297 for (i
= 0; i
< max_parts
; i
++)
2300 value
= UINTVAL (op
);
2302 value
>>= part_bits
;
2304 if ((value
& part_mask
) != (def
& part_mask
))
2305 return value
& part_mask
;
2311 /* If OP is an integer constant of mode MODE with exactly one
2312 part of mode PART_MODE unequal to DEF, return the number of that
2313 part. Otherwise, return -1. */
2316 s390_single_part (rtx op
,
2318 machine_mode part_mode
,
2321 unsigned HOST_WIDE_INT value
= 0;
2322 int n_parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (part_mode
);
2323 unsigned HOST_WIDE_INT part_mask
2324 = (HOST_WIDE_INT_1U
<< GET_MODE_BITSIZE (part_mode
)) - 1;
2327 if (GET_CODE (op
) != CONST_INT
)
2330 for (i
= 0; i
< n_parts
; i
++)
2333 value
= UINTVAL (op
);
2335 value
>>= GET_MODE_BITSIZE (part_mode
);
2337 if ((value
& part_mask
) != (def
& part_mask
))
2345 return part
== -1 ? -1 : n_parts
- 1 - part
;
2348 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2349 bits and no other bits are set in (the lower SIZE bits of) IN.
2351 PSTART and PEND can be used to obtain the start and end
2352 position (inclusive) of the bitfield relative to 64
2353 bits. *PSTART / *PEND gives the position of the first/last bit
2354 of the bitfield counting from the highest order bit starting
2358 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in
, int size
,
2359 int *pstart
, int *pend
)
2363 int lowbit
= HOST_BITS_PER_WIDE_INT
- 1;
2364 int highbit
= HOST_BITS_PER_WIDE_INT
- size
;
2365 unsigned HOST_WIDE_INT bitmask
= HOST_WIDE_INT_1U
;
2367 gcc_assert (!!pstart
== !!pend
);
2368 for (start
= lowbit
; start
>= highbit
; bitmask
<<= 1, start
--)
2371 /* Look for the rightmost bit of a contiguous range of ones. */
2378 /* Look for the firt zero bit after the range of ones. */
2379 if (! (bitmask
& in
))
2383 /* We're one past the last one-bit. */
2387 /* No one bits found. */
2390 if (start
> highbit
)
2392 unsigned HOST_WIDE_INT mask
;
2394 /* Calculate a mask for all bits beyond the contiguous bits. */
2395 mask
= ((~HOST_WIDE_INT_0U
>> highbit
)
2396 & (~HOST_WIDE_INT_0U
<< (lowbit
- start
+ 1)));
2398 /* There are more bits set beyond the first range of one bits. */
2411 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412 if ~IN contains a contiguous bitfield. In that case, *END is <
2415 If WRAP_P is true, a bitmask that wraps around is also tested.
2416 When a wraparoud occurs *START is greater than *END (in
2417 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418 part of the range. If WRAP_P is false, no wraparound is
2422 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in
, bool wrap_p
,
2423 int size
, int *start
, int *end
)
2425 int bs
= HOST_BITS_PER_WIDE_INT
;
2428 gcc_assert (!!start
== !!end
);
2429 if ((in
& ((~HOST_WIDE_INT_0U
) >> (bs
- size
))) == 0)
2430 /* This cannot be expressed as a contiguous bitmask. Exit early because
2431 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2434 b
= s390_contiguous_bitmask_nowrap_p (in
, size
, start
, end
);
2439 b
= s390_contiguous_bitmask_nowrap_p (~in
, size
, start
, end
);
2445 gcc_assert (s
>= 1);
2446 *start
= ((e
+ 1) & (bs
- 1));
2447 *end
= ((s
- 1 + bs
) & (bs
- 1));
2453 /* Return true if OP contains the same contiguous bitfield in *all*
2454 its elements. START and END can be used to obtain the start and
2455 end position of the bitfield.
2457 START/STOP give the position of the first/last bit of the bitfield
2458 counting from the lowest order bit starting with zero. In order to
2459 use these values for S/390 instructions this has to be converted to
2460 "bits big endian" style. */
2463 s390_contiguous_bitmask_vector_p (rtx op
, int *start
, int *end
)
2465 unsigned HOST_WIDE_INT mask
;
2470 gcc_assert (!!start
== !!end
);
2471 if (!const_vec_duplicate_p (op
, &elt
)
2472 || !CONST_INT_P (elt
))
2475 size
= GET_MODE_UNIT_BITSIZE (GET_MODE (op
));
2477 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2481 mask
= UINTVAL (elt
);
2483 b
= s390_contiguous_bitmask_p (mask
, true, size
, start
, end
);
2488 *start
-= (HOST_BITS_PER_WIDE_INT
- size
);
2489 *end
-= (HOST_BITS_PER_WIDE_INT
- size
);
2497 /* Return true if C consists only of byte chunks being either 0 or
2498 0xff. If MASK is !=NULL a byte mask is generated which is
2499 appropriate for the vector generate byte mask instruction. */
2502 s390_bytemask_vector_p (rtx op
, unsigned *mask
)
2505 unsigned tmp_mask
= 0;
2506 int nunit
, unit_size
;
2508 if (!VECTOR_MODE_P (GET_MODE (op
))
2509 || GET_CODE (op
) != CONST_VECTOR
2510 || !CONST_INT_P (XVECEXP (op
, 0, 0)))
2513 nunit
= GET_MODE_NUNITS (GET_MODE (op
));
2514 unit_size
= GET_MODE_UNIT_SIZE (GET_MODE (op
));
2516 for (i
= 0; i
< nunit
; i
++)
2518 unsigned HOST_WIDE_INT c
;
2521 if (!CONST_INT_P (XVECEXP (op
, 0, i
)))
2524 c
= UINTVAL (XVECEXP (op
, 0, i
));
2525 for (j
= 0; j
< unit_size
; j
++)
2527 if ((c
& 0xff) != 0 && (c
& 0xff) != 0xff)
2529 tmp_mask
|= (c
& 1) << ((nunit
- 1 - i
) * unit_size
+ j
);
2530 c
= c
>> BITS_PER_UNIT
;
2540 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2541 equivalent to a shift followed by the AND. In particular, CONTIG
2542 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2543 for ROTL indicate a rotate to the right. */
2546 s390_extzv_shift_ok (int bitsize
, int rotl
, unsigned HOST_WIDE_INT contig
)
2551 ok
= s390_contiguous_bitmask_nowrap_p (contig
, bitsize
, &start
, &end
);
2555 return (64 - end
>= rotl
);
2558 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2560 rotl
= -rotl
+ (64 - bitsize
);
2561 return (start
>= rotl
);
2565 /* Check whether we can (and want to) split a double-word
2566 move in mode MODE from SRC to DST into two single-word
2567 moves, moving the subword FIRST_SUBWORD first. */
2570 s390_split_ok_p (rtx dst
, rtx src
, machine_mode mode
, int first_subword
)
2572 /* Floating point and vector registers cannot be split. */
2573 if (FP_REG_P (src
) || FP_REG_P (dst
) || VECTOR_REG_P (src
) || VECTOR_REG_P (dst
))
2576 /* Non-offsettable memory references cannot be split. */
2577 if ((GET_CODE (src
) == MEM
&& !offsettable_memref_p (src
))
2578 || (GET_CODE (dst
) == MEM
&& !offsettable_memref_p (dst
)))
2581 /* Moving the first subword must not clobber a register
2582 needed to move the second subword. */
2583 if (register_operand (dst
, mode
))
2585 rtx subreg
= operand_subword (dst
, first_subword
, 0, mode
);
2586 if (reg_overlap_mentioned_p (subreg
, src
))
2593 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2594 and [MEM2, MEM2 + SIZE] do overlap and false
2598 s390_overlap_p (rtx mem1
, rtx mem2
, HOST_WIDE_INT size
)
2600 rtx addr1
, addr2
, addr_delta
;
2601 HOST_WIDE_INT delta
;
2603 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2609 addr1
= XEXP (mem1
, 0);
2610 addr2
= XEXP (mem2
, 0);
2612 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2614 /* This overlapping check is used by peepholes merging memory block operations.
2615 Overlapping operations would otherwise be recognized by the S/390 hardware
2616 and would fall back to a slower implementation. Allowing overlapping
2617 operations would lead to slow code but not to wrong code. Therefore we are
2618 somewhat optimistic if we cannot prove that the memory blocks are
2620 That's why we return false here although this may accept operations on
2621 overlapping memory areas. */
2622 if (!addr_delta
|| GET_CODE (addr_delta
) != CONST_INT
)
2625 delta
= INTVAL (addr_delta
);
2628 || (delta
> 0 && delta
< size
)
2629 || (delta
< 0 && -delta
< size
))
2635 /* Check whether the address of memory reference MEM2 equals exactly
2636 the address of memory reference MEM1 plus DELTA. Return true if
2637 we can prove this to be the case, false otherwise. */
2640 s390_offset_p (rtx mem1
, rtx mem2
, rtx delta
)
2642 rtx addr1
, addr2
, addr_delta
;
2644 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2647 addr1
= XEXP (mem1
, 0);
2648 addr2
= XEXP (mem2
, 0);
2650 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2651 if (!addr_delta
|| !rtx_equal_p (addr_delta
, delta
))
2657 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2660 s390_expand_logical_operator (enum rtx_code code
, machine_mode mode
,
2663 machine_mode wmode
= mode
;
2664 rtx dst
= operands
[0];
2665 rtx src1
= operands
[1];
2666 rtx src2
= operands
[2];
2669 /* If we cannot handle the operation directly, use a temp register. */
2670 if (!s390_logical_operator_ok_p (operands
))
2671 dst
= gen_reg_rtx (mode
);
2673 /* QImode and HImode patterns make sense only if we have a destination
2674 in memory. Otherwise perform the operation in SImode. */
2675 if ((mode
== QImode
|| mode
== HImode
) && GET_CODE (dst
) != MEM
)
2678 /* Widen operands if required. */
2681 if (GET_CODE (dst
) == SUBREG
2682 && (tem
= simplify_subreg (wmode
, dst
, mode
, 0)) != 0)
2684 else if (REG_P (dst
))
2685 dst
= gen_rtx_SUBREG (wmode
, dst
, 0);
2687 dst
= gen_reg_rtx (wmode
);
2689 if (GET_CODE (src1
) == SUBREG
2690 && (tem
= simplify_subreg (wmode
, src1
, mode
, 0)) != 0)
2692 else if (GET_MODE (src1
) != VOIDmode
)
2693 src1
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src1
), 0);
2695 if (GET_CODE (src2
) == SUBREG
2696 && (tem
= simplify_subreg (wmode
, src2
, mode
, 0)) != 0)
2698 else if (GET_MODE (src2
) != VOIDmode
)
2699 src2
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src2
), 0);
2702 /* Emit the instruction. */
2703 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, wmode
, src1
, src2
));
2704 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
2705 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
2707 /* Fix up the destination if needed. */
2708 if (dst
!= operands
[0])
2709 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
2712 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2715 s390_logical_operator_ok_p (rtx
*operands
)
2717 /* If the destination operand is in memory, it needs to coincide
2718 with one of the source operands. After reload, it has to be
2719 the first source operand. */
2720 if (GET_CODE (operands
[0]) == MEM
)
2721 return rtx_equal_p (operands
[0], operands
[1])
2722 || (!reload_completed
&& rtx_equal_p (operands
[0], operands
[2]));
2727 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2728 operand IMMOP to switch from SS to SI type instructions. */
2731 s390_narrow_logical_operator (enum rtx_code code
, rtx
*memop
, rtx
*immop
)
2733 int def
= code
== AND
? -1 : 0;
2737 gcc_assert (GET_CODE (*memop
) == MEM
);
2738 gcc_assert (!MEM_VOLATILE_P (*memop
));
2740 mask
= s390_extract_part (*immop
, QImode
, def
);
2741 part
= s390_single_part (*immop
, GET_MODE (*memop
), QImode
, def
);
2742 gcc_assert (part
>= 0);
2744 *memop
= adjust_address (*memop
, QImode
, part
);
2745 *immop
= gen_int_mode (mask
, QImode
);
2749 /* How to allocate a 'struct machine_function'. */
2751 static struct machine_function
*
2752 s390_init_machine_status (void)
2754 return ggc_cleared_alloc
<machine_function
> ();
2757 /* Map for smallest class containing reg regno. */
2759 const enum reg_class regclass_map
[FIRST_PSEUDO_REGISTER
] =
2760 { GENERAL_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 0 */
2761 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 4 */
2762 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 8 */
2763 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 12 */
2764 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 16 */
2765 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 20 */
2766 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 24 */
2767 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 28 */
2768 ADDR_REGS
, CC_REGS
, ADDR_REGS
, ADDR_REGS
, /* 32 */
2769 ACCESS_REGS
, ACCESS_REGS
, VEC_REGS
, VEC_REGS
, /* 36 */
2770 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 40 */
2771 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 44 */
2772 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 48 */
2773 VEC_REGS
, VEC_REGS
/* 52 */
2776 /* Return attribute type of insn. */
2778 static enum attr_type
2779 s390_safe_attr_type (rtx_insn
*insn
)
2781 if (recog_memoized (insn
) >= 0)
2782 return get_attr_type (insn
);
2787 /* Return attribute relative_long of insn. */
2790 s390_safe_relative_long_p (rtx_insn
*insn
)
2792 if (recog_memoized (insn
) >= 0)
2793 return get_attr_relative_long (insn
) == RELATIVE_LONG_YES
;
2798 /* Return true if DISP is a valid short displacement. */
2801 s390_short_displacement (rtx disp
)
2803 /* No displacement is OK. */
2807 /* Without the long displacement facility we don't need to
2808 distingiush between long and short displacement. */
2809 if (!TARGET_LONG_DISPLACEMENT
)
2812 /* Integer displacement in range. */
2813 if (GET_CODE (disp
) == CONST_INT
)
2814 return INTVAL (disp
) >= 0 && INTVAL (disp
) < 4096;
2816 /* GOT offset is not OK, the GOT can be large. */
2817 if (GET_CODE (disp
) == CONST
2818 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
2819 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
2820 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTNTPOFF
))
2823 /* All other symbolic constants are literal pool references,
2824 which are OK as the literal pool must be small. */
2825 if (GET_CODE (disp
) == CONST
)
2831 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2832 If successful, also determines the
2833 following characteristics of `ref': `is_ptr' - whether it can be an
2834 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2835 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2836 considered a literal pool pointer for purposes of avoiding two different
2837 literal pool pointers per insn during or after reload (`B' constraint). */
2839 s390_decompose_constant_pool_ref (rtx
*ref
, rtx
*disp
, bool *is_ptr
,
2840 bool *is_base_ptr
, bool *is_pool_ptr
)
2845 if (GET_CODE (*ref
) == UNSPEC
)
2846 switch (XINT (*ref
, 1))
2850 *disp
= gen_rtx_UNSPEC (Pmode
,
2851 gen_rtvec (1, XVECEXP (*ref
, 0, 0)),
2852 UNSPEC_LTREL_OFFSET
);
2856 *ref
= XVECEXP (*ref
, 0, 1);
2863 if (!REG_P (*ref
) || GET_MODE (*ref
) != Pmode
)
2866 if (REGNO (*ref
) == STACK_POINTER_REGNUM
2867 || REGNO (*ref
) == FRAME_POINTER_REGNUM
2868 || ((reload_completed
|| reload_in_progress
)
2869 && frame_pointer_needed
2870 && REGNO (*ref
) == HARD_FRAME_POINTER_REGNUM
)
2871 || REGNO (*ref
) == ARG_POINTER_REGNUM
2873 && REGNO (*ref
) == PIC_OFFSET_TABLE_REGNUM
))
2874 *is_ptr
= *is_base_ptr
= true;
2876 if ((reload_completed
|| reload_in_progress
)
2877 && *ref
== cfun
->machine
->base_reg
)
2878 *is_ptr
= *is_base_ptr
= *is_pool_ptr
= true;
2883 /* Decompose a RTL expression ADDR for a memory address into
2884 its components, returned in OUT.
2886 Returns false if ADDR is not a valid memory address, true
2887 otherwise. If OUT is NULL, don't return the components,
2888 but check for validity only.
2890 Note: Only addresses in canonical form are recognized.
2891 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2892 canonical form so that they will be recognized. */
2895 s390_decompose_address (rtx addr
, struct s390_address
*out
)
2897 HOST_WIDE_INT offset
= 0;
2898 rtx base
= NULL_RTX
;
2899 rtx indx
= NULL_RTX
;
2900 rtx disp
= NULL_RTX
;
2902 bool pointer
= false;
2903 bool base_ptr
= false;
2904 bool indx_ptr
= false;
2905 bool literal_pool
= false;
2907 /* We may need to substitute the literal pool base register into the address
2908 below. However, at this point we do not know which register is going to
2909 be used as base, so we substitute the arg pointer register. This is going
2910 to be treated as holding a pointer below -- it shouldn't be used for any
2912 rtx fake_pool_base
= gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
);
2914 /* Decompose address into base + index + displacement. */
2916 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == UNSPEC
)
2919 else if (GET_CODE (addr
) == PLUS
)
2921 rtx op0
= XEXP (addr
, 0);
2922 rtx op1
= XEXP (addr
, 1);
2923 enum rtx_code code0
= GET_CODE (op0
);
2924 enum rtx_code code1
= GET_CODE (op1
);
2926 if (code0
== REG
|| code0
== UNSPEC
)
2928 if (code1
== REG
|| code1
== UNSPEC
)
2930 indx
= op0
; /* index + base */
2936 base
= op0
; /* base + displacement */
2941 else if (code0
== PLUS
)
2943 indx
= XEXP (op0
, 0); /* index + base + disp */
2944 base
= XEXP (op0
, 1);
2955 disp
= addr
; /* displacement */
2957 /* Extract integer part of displacement. */
2961 if (GET_CODE (disp
) == CONST_INT
)
2963 offset
= INTVAL (disp
);
2966 else if (GET_CODE (disp
) == CONST
2967 && GET_CODE (XEXP (disp
, 0)) == PLUS
2968 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
2970 offset
= INTVAL (XEXP (XEXP (disp
, 0), 1));
2971 disp
= XEXP (XEXP (disp
, 0), 0);
2975 /* Strip off CONST here to avoid special case tests later. */
2976 if (disp
&& GET_CODE (disp
) == CONST
)
2977 disp
= XEXP (disp
, 0);
2979 /* We can convert literal pool addresses to
2980 displacements by basing them off the base register. */
2981 if (disp
&& GET_CODE (disp
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (disp
))
2986 base
= fake_pool_base
, literal_pool
= true;
2988 /* Mark up the displacement. */
2989 disp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, disp
),
2990 UNSPEC_LTREL_OFFSET
);
2993 /* Validate base register. */
2994 if (!s390_decompose_constant_pool_ref (&base
, &disp
, &pointer
, &base_ptr
,
2998 /* Validate index register. */
2999 if (!s390_decompose_constant_pool_ref (&indx
, &disp
, &pointer
, &indx_ptr
,
3003 /* Prefer to use pointer as base, not index. */
3004 if (base
&& indx
&& !base_ptr
3005 && (indx_ptr
|| (!REG_POINTER (base
) && REG_POINTER (indx
))))
3012 /* Validate displacement. */
3015 /* If virtual registers are involved, the displacement will change later
3016 anyway as the virtual registers get eliminated. This could make a
3017 valid displacement invalid, but it is more likely to make an invalid
3018 displacement valid, because we sometimes access the register save area
3019 via negative offsets to one of those registers.
3020 Thus we don't check the displacement for validity here. If after
3021 elimination the displacement turns out to be invalid after all,
3022 this is fixed up by reload in any case. */
3023 /* LRA maintains always displacements up to date and we need to
3024 know the displacement is right during all LRA not only at the
3025 final elimination. */
3027 || (base
!= arg_pointer_rtx
3028 && indx
!= arg_pointer_rtx
3029 && base
!= return_address_pointer_rtx
3030 && indx
!= return_address_pointer_rtx
3031 && base
!= frame_pointer_rtx
3032 && indx
!= frame_pointer_rtx
3033 && base
!= virtual_stack_vars_rtx
3034 && indx
!= virtual_stack_vars_rtx
))
3035 if (!DISP_IN_RANGE (offset
))
3040 /* All the special cases are pointers. */
3043 /* In the small-PIC case, the linker converts @GOT
3044 and @GOTNTPOFF offsets to possible displacements. */
3045 if (GET_CODE (disp
) == UNSPEC
3046 && (XINT (disp
, 1) == UNSPEC_GOT
3047 || XINT (disp
, 1) == UNSPEC_GOTNTPOFF
)
3053 /* Accept pool label offsets. */
3054 else if (GET_CODE (disp
) == UNSPEC
3055 && XINT (disp
, 1) == UNSPEC_POOL_OFFSET
)
3058 /* Accept literal pool references. */
3059 else if (GET_CODE (disp
) == UNSPEC
3060 && XINT (disp
, 1) == UNSPEC_LTREL_OFFSET
)
3062 /* In case CSE pulled a non literal pool reference out of
3063 the pool we have to reject the address. This is
3064 especially important when loading the GOT pointer on non
3065 zarch CPUs. In this case the literal pool contains an lt
3066 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3067 will most likely exceed the displacement. */
3068 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
3069 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp
, 0, 0)))
3072 orig_disp
= gen_rtx_CONST (Pmode
, disp
);
3075 /* If we have an offset, make sure it does not
3076 exceed the size of the constant pool entry.
3077 Otherwise we might generate an out-of-range
3078 displacement for the base register form. */
3079 rtx sym
= XVECEXP (disp
, 0, 0);
3080 if (offset
>= GET_MODE_SIZE (get_pool_mode (sym
)))
3083 orig_disp
= plus_constant (Pmode
, orig_disp
, offset
);
3098 out
->disp
= orig_disp
;
3099 out
->pointer
= pointer
;
3100 out
->literal_pool
= literal_pool
;
3106 /* Decompose a RTL expression OP for an address style operand into its
3107 components, and return the base register in BASE and the offset in
3108 OFFSET. While OP looks like an address it is never supposed to be
3111 Return true if OP is a valid address operand, false if not. */
3114 s390_decompose_addrstyle_without_index (rtx op
, rtx
*base
,
3115 HOST_WIDE_INT
*offset
)
3119 /* We can have an integer constant, an address register,
3120 or a sum of the two. */
3121 if (CONST_SCALAR_INT_P (op
))
3126 if (op
&& GET_CODE (op
) == PLUS
&& CONST_SCALAR_INT_P (XEXP (op
, 1)))
3131 while (op
&& GET_CODE (op
) == SUBREG
)
3132 op
= SUBREG_REG (op
);
3134 if (op
&& GET_CODE (op
) != REG
)
3139 if (off
== NULL_RTX
)
3141 else if (CONST_INT_P (off
))
3142 *offset
= INTVAL (off
);
3143 else if (CONST_WIDE_INT_P (off
))
3144 /* The offset will anyway be cut down to 12 bits so take just
3145 the lowest order chunk of the wide int. */
3146 *offset
= CONST_WIDE_INT_ELT (off
, 0);
3156 /* Check that OP is a valid shift count operand.
3157 It should be of the following structure:
3158 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3159 where subreg, and and plus are optional.
3161 If IMPLICIT_MASK is > 0 and OP contains and
3163 it is checked whether IMPLICIT_MASK and the immediate match.
3164 Otherwise, no checking is performed.
3167 s390_valid_shift_count (rtx op
, HOST_WIDE_INT implicit_mask
)
3170 while (GET_CODE (op
) == SUBREG
&& subreg_lowpart_p (op
))
3173 /* Check for an and with proper constant. */
3174 if (GET_CODE (op
) == AND
)
3176 rtx op1
= XEXP (op
, 0);
3177 rtx imm
= XEXP (op
, 1);
3179 if (GET_CODE (op1
) == SUBREG
&& subreg_lowpart_p (op1
))
3180 op1
= XEXP (op1
, 0);
3182 if (!(register_operand (op1
, GET_MODE (op1
)) || GET_CODE (op1
) == PLUS
))
3185 if (!immediate_operand (imm
, GET_MODE (imm
)))
3188 HOST_WIDE_INT val
= INTVAL (imm
);
3189 if (implicit_mask
> 0
3190 && (val
& implicit_mask
) != implicit_mask
)
3196 /* Check the rest. */
3197 return s390_decompose_addrstyle_without_index (op
, NULL
, NULL
);
3200 /* Return true if CODE is a valid address without index. */
3203 s390_legitimate_address_without_index_p (rtx op
)
3205 struct s390_address addr
;
3207 if (!s390_decompose_address (XEXP (op
, 0), &addr
))
3216 /* Return TRUE if ADDR is an operand valid for a load/store relative
3217 instruction. Be aware that the alignment of the operand needs to
3218 be checked separately.
3219 Valid addresses are single references or a sum of a reference and a
3220 constant integer. Return these parts in SYMREF and ADDEND. You can
3221 pass NULL in REF and/or ADDEND if you are not interested in these
3225 s390_loadrelative_operand_p (rtx addr
, rtx
*symref
, HOST_WIDE_INT
*addend
)
3227 HOST_WIDE_INT tmpaddend
= 0;
3229 if (GET_CODE (addr
) == CONST
)
3230 addr
= XEXP (addr
, 0);
3232 if (GET_CODE (addr
) == PLUS
)
3234 if (!CONST_INT_P (XEXP (addr
, 1)))
3237 tmpaddend
= INTVAL (XEXP (addr
, 1));
3238 addr
= XEXP (addr
, 0);
3241 if (GET_CODE (addr
) == SYMBOL_REF
3242 || (GET_CODE (addr
) == UNSPEC
3243 && (XINT (addr
, 1) == UNSPEC_GOTENT
3244 || XINT (addr
, 1) == UNSPEC_PLT
)))
3249 *addend
= tmpaddend
;
3256 /* Return true if the address in OP is valid for constraint letter C
3257 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3258 pool MEMs should be accepted. Only the Q, R, S, T constraint
3259 letters are allowed for C. */
3262 s390_check_qrst_address (char c
, rtx op
, bool lit_pool_ok
)
3265 struct s390_address addr
;
3266 bool decomposed
= false;
3268 if (!address_operand (op
, GET_MODE (op
)))
3271 /* This check makes sure that no symbolic address (except literal
3272 pool references) are accepted by the R or T constraints. */
3273 if (s390_loadrelative_operand_p (op
, &symref
, NULL
)
3275 || !SYMBOL_REF_P (symref
)
3276 || !CONSTANT_POOL_ADDRESS_P (symref
)))
3279 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3282 if (!s390_decompose_address (op
, &addr
))
3284 if (addr
.literal_pool
)
3289 /* With reload, we sometimes get intermediate address forms that are
3290 actually invalid as-is, but we need to accept them in the most
3291 generic cases below ('R' or 'T'), since reload will in fact fix
3292 them up. LRA behaves differently here; we never see such forms,
3293 but on the other hand, we need to strictly reject every invalid
3294 address form. After both reload and LRA invalid address forms
3295 must be rejected, because nothing will fix them up later. Perform
3296 this check right up front. */
3297 if (lra_in_progress
|| reload_completed
)
3299 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3306 case 'Q': /* no index short displacement */
3307 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3311 if (!s390_short_displacement (addr
.disp
))
3315 case 'R': /* with index short displacement */
3316 if (TARGET_LONG_DISPLACEMENT
)
3318 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3320 if (!s390_short_displacement (addr
.disp
))
3323 /* Any invalid address here will be fixed up by reload,
3324 so accept it for the most generic constraint. */
3327 case 'S': /* no index long displacement */
3328 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3334 case 'T': /* with index long displacement */
3335 /* Any invalid address here will be fixed up by reload,
3336 so accept it for the most generic constraint. */
3346 /* Evaluates constraint strings described by the regular expression
3347 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3348 the constraint given in STR, or 0 else. */
3351 s390_mem_constraint (const char *str
, rtx op
)
3358 /* Check for offsettable variants of memory constraints. */
3359 if (!MEM_P (op
) || MEM_VOLATILE_P (op
))
3361 if ((reload_completed
|| reload_in_progress
)
3362 ? !offsettable_memref_p (op
) : !offsettable_nonstrict_memref_p (op
))
3364 return s390_check_qrst_address (str
[1], XEXP (op
, 0), true);
3366 /* Check for non-literal-pool variants of memory constraints. */
3369 return s390_check_qrst_address (str
[1], XEXP (op
, 0), false);
3374 if (GET_CODE (op
) != MEM
)
3376 return s390_check_qrst_address (c
, XEXP (op
, 0), true);
3378 /* Simply check for the basic form of a shift count. Reload will
3379 take care of making sure we have a proper base register. */
3380 if (!s390_decompose_addrstyle_without_index (op
, NULL
, NULL
))
3384 return s390_check_qrst_address (str
[1], op
, true);
3392 /* Evaluates constraint strings starting with letter O. Input
3393 parameter C is the second letter following the "O" in the constraint
3394 string. Returns 1 if VALUE meets the respective constraint and 0
3398 s390_O_constraint_str (const char c
, HOST_WIDE_INT value
)
3406 return trunc_int_for_mode (value
, SImode
) == value
;
3410 || s390_single_part (GEN_INT (value
), DImode
, SImode
, 0) == 1;
3413 return s390_single_part (GEN_INT (value
- 1), DImode
, SImode
, -1) == 1;
3421 /* Evaluates constraint strings starting with letter N. Parameter STR
3422 contains the letters following letter "N" in the constraint string.
3423 Returns true if VALUE matches the constraint. */
3426 s390_N_constraint_str (const char *str
, HOST_WIDE_INT value
)
3428 machine_mode mode
, part_mode
;
3430 int part
, part_goal
;
3436 part_goal
= str
[0] - '0';
3480 if (GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (part_mode
))
3483 part
= s390_single_part (GEN_INT (value
), mode
, part_mode
, def
);
3486 if (part_goal
!= -1 && part_goal
!= part
)
3493 /* Returns true if the input parameter VALUE is a float zero. */
3496 s390_float_const_zero_p (rtx value
)
3498 return (GET_MODE_CLASS (GET_MODE (value
)) == MODE_FLOAT
3499 && value
== CONST0_RTX (GET_MODE (value
)));
3502 /* Implement TARGET_REGISTER_MOVE_COST. */
3505 s390_register_move_cost (machine_mode mode
,
3506 reg_class_t from
, reg_class_t to
)
3508 /* On s390, copy between fprs and gprs is expensive. */
3510 /* It becomes somewhat faster having ldgr/lgdr. */
3511 if (TARGET_Z10
&& GET_MODE_SIZE (mode
) == 8)
3513 /* ldgr is single cycle. */
3514 if (reg_classes_intersect_p (from
, GENERAL_REGS
)
3515 && reg_classes_intersect_p (to
, FP_REGS
))
3517 /* lgdr needs 3 cycles. */
3518 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
3519 && reg_classes_intersect_p (from
, FP_REGS
))
3523 /* Otherwise copying is done via memory. */
3524 if ((reg_classes_intersect_p (from
, GENERAL_REGS
)
3525 && reg_classes_intersect_p (to
, FP_REGS
))
3526 || (reg_classes_intersect_p (from
, FP_REGS
)
3527 && reg_classes_intersect_p (to
, GENERAL_REGS
)))
3530 /* We usually do not want to copy via CC. */
3531 if (reg_classes_intersect_p (from
, CC_REGS
)
3532 || reg_classes_intersect_p (to
, CC_REGS
))
3538 /* Implement TARGET_MEMORY_MOVE_COST. */
3541 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
3542 reg_class_t rclass ATTRIBUTE_UNUSED
,
3543 bool in ATTRIBUTE_UNUSED
)
3548 /* Compute a (partial) cost for rtx X. Return true if the complete
3549 cost has been computed, and false if subexpressions should be
3550 scanned. In either case, *TOTAL contains the cost result. The
3551 initial value of *TOTAL is the default value computed by
3552 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3553 code of the superexpression of x. */
3556 s390_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
3557 int opno ATTRIBUTE_UNUSED
,
3558 int *total
, bool speed ATTRIBUTE_UNUSED
)
3560 int code
= GET_CODE (x
);
3568 case CONST_WIDE_INT
:
3575 /* Without this a conditional move instruction would be
3576 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3577 comparison operator). That's a bit pessimistic. */
3579 if (!TARGET_Z196
|| GET_CODE (SET_SRC (x
)) != IF_THEN_ELSE
)
3582 rtx cond
= XEXP (SET_SRC (x
), 0);
3584 if (!CC_REG_P (XEXP (cond
, 0)) || !CONST_INT_P (XEXP (cond
, 1)))
3587 /* It is going to be a load/store on condition. Make it
3588 slightly more expensive than a normal load. */
3589 *total
= COSTS_N_INSNS (1) + 1;
3591 rtx dst
= SET_DEST (x
);
3592 rtx then
= XEXP (SET_SRC (x
), 1);
3593 rtx els
= XEXP (SET_SRC (x
), 2);
3595 /* It is a real IF-THEN-ELSE. An additional move will be
3596 needed to implement that. */
3599 && !rtx_equal_p (dst
, then
)
3600 && !rtx_equal_p (dst
, els
))
3601 *total
+= COSTS_N_INSNS (1) / 2;
3603 /* A minor penalty for constants we cannot directly handle. */
3604 if ((CONST_INT_P (then
) || CONST_INT_P (els
))
3605 && (!TARGET_Z13
|| MEM_P (dst
)
3606 || (CONST_INT_P (then
) && !satisfies_constraint_K (then
))
3607 || (CONST_INT_P (els
) && !satisfies_constraint_K (els
))))
3608 *total
+= COSTS_N_INSNS (1) / 2;
3610 /* A store on condition can only handle register src operands. */
3611 if (MEM_P (dst
) && (!REG_P (then
) || !REG_P (els
)))
3612 *total
+= COSTS_N_INSNS (1) / 2;
3620 && (mode
== SImode
|| mode
== DImode
)
3621 && GET_CODE (XEXP (x
, 0)) == NOT
3622 && GET_CODE (XEXP (x
, 1)) == NOT
)
3624 *total
= COSTS_N_INSNS (1);
3625 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3627 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3633 if (GET_CODE (XEXP (x
, 0)) == AND
3634 && GET_CODE (XEXP (x
, 1)) == ASHIFT
3635 && REG_P (XEXP (XEXP (x
, 0), 0))
3636 && REG_P (XEXP (XEXP (x
, 1), 0))
3637 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3638 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3639 && (UINTVAL (XEXP (XEXP (x
, 0), 1)) ==
3640 (HOST_WIDE_INT_1U
<< UINTVAL (XEXP (XEXP (x
, 1), 1))) - 1))
3642 *total
= COSTS_N_INSNS (2);
3646 /* ~AND on a 128 bit mode. This can be done using a vector
3649 && GET_CODE (XEXP (x
, 0)) == NOT
3650 && GET_CODE (XEXP (x
, 1)) == NOT
3651 && REG_P (XEXP (XEXP (x
, 0), 0))
3652 && REG_P (XEXP (XEXP (x
, 1), 0))
3653 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x
, 0), 0))) == 16
3654 && s390_hard_regno_mode_ok (VR0_REGNUM
,
3655 GET_MODE (XEXP (XEXP (x
, 0), 0))))
3657 *total
= COSTS_N_INSNS (1);
3661 *total
= COSTS_N_INSNS (1);
3667 && (mode
== SImode
|| mode
== DImode
)
3668 && GET_CODE (XEXP (x
, 0)) == NOT
3669 && GET_CODE (XEXP (x
, 1)) == NOT
)
3671 *total
= COSTS_N_INSNS (1);
3672 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3674 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3689 *total
= COSTS_N_INSNS (1);
3697 rtx left
= XEXP (x
, 0);
3698 rtx right
= XEXP (x
, 1);
3699 if (GET_CODE (right
) == CONST_INT
3700 && CONST_OK_FOR_K (INTVAL (right
)))
3701 *total
= s390_cost
->mhi
;
3702 else if (GET_CODE (left
) == SIGN_EXTEND
)
3703 *total
= s390_cost
->mh
;
3705 *total
= s390_cost
->ms
; /* msr, ms, msy */
3710 rtx left
= XEXP (x
, 0);
3711 rtx right
= XEXP (x
, 1);
3714 if (GET_CODE (right
) == CONST_INT
3715 && CONST_OK_FOR_K (INTVAL (right
)))
3716 *total
= s390_cost
->mghi
;
3717 else if (GET_CODE (left
) == SIGN_EXTEND
)
3718 *total
= s390_cost
->msgf
;
3720 *total
= s390_cost
->msg
; /* msgr, msg */
3722 else /* TARGET_31BIT */
3724 if (GET_CODE (left
) == SIGN_EXTEND
3725 && GET_CODE (right
) == SIGN_EXTEND
)
3726 /* mulsidi case: mr, m */
3727 *total
= s390_cost
->m
;
3728 else if (GET_CODE (left
) == ZERO_EXTEND
3729 && GET_CODE (right
) == ZERO_EXTEND
)
3730 /* umulsidi case: ml, mlr */
3731 *total
= s390_cost
->ml
;
3733 /* Complex calculation is required. */
3734 *total
= COSTS_N_INSNS (40);
3740 *total
= s390_cost
->mult_df
;
3743 *total
= s390_cost
->mxbr
;
3754 *total
= s390_cost
->madbr
;
3757 *total
= s390_cost
->maebr
;
3762 /* Negate in the third argument is free: FMSUB. */
3763 if (GET_CODE (XEXP (x
, 2)) == NEG
)
3765 *total
+= (rtx_cost (XEXP (x
, 0), mode
, FMA
, 0, speed
)
3766 + rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
)
3767 + rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, FMA
, 2, speed
));
3774 if (mode
== TImode
) /* 128 bit division */
3775 *total
= s390_cost
->dlgr
;
3776 else if (mode
== DImode
)
3778 rtx right
= XEXP (x
, 1);
3779 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3780 *total
= s390_cost
->dlr
;
3781 else /* 64 by 64 bit division */
3782 *total
= s390_cost
->dlgr
;
3784 else if (mode
== SImode
) /* 32 bit division */
3785 *total
= s390_cost
->dlr
;
3792 rtx right
= XEXP (x
, 1);
3793 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3795 *total
= s390_cost
->dsgfr
;
3797 *total
= s390_cost
->dr
;
3798 else /* 64 by 64 bit division */
3799 *total
= s390_cost
->dsgr
;
3801 else if (mode
== SImode
) /* 32 bit division */
3802 *total
= s390_cost
->dlr
;
3803 else if (mode
== SFmode
)
3805 *total
= s390_cost
->debr
;
3807 else if (mode
== DFmode
)
3809 *total
= s390_cost
->ddbr
;
3811 else if (mode
== TFmode
)
3813 *total
= s390_cost
->dxbr
;
3819 *total
= s390_cost
->sqebr
;
3820 else if (mode
== DFmode
)
3821 *total
= s390_cost
->sqdbr
;
3823 *total
= s390_cost
->sqxbr
;
3828 if (outer_code
== MULT
|| outer_code
== DIV
|| outer_code
== MOD
3829 || outer_code
== PLUS
|| outer_code
== MINUS
3830 || outer_code
== COMPARE
)
3835 *total
= COSTS_N_INSNS (1);
3837 /* nxrk, nxgrk ~(a^b)==0 */
3839 && GET_CODE (XEXP (x
, 0)) == NOT
3840 && XEXP (x
, 1) == const0_rtx
3841 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == XOR
3842 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3845 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3847 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
3852 /* nnrk, nngrk, nork, nogrk */
3854 && (GET_CODE (XEXP (x
, 0)) == AND
|| GET_CODE (XEXP (x
, 0)) == IOR
)
3855 && XEXP (x
, 1) == const0_rtx
3856 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3857 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == NOT
3858 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == NOT
3861 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3863 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 1), 0)))
3868 if (GET_CODE (XEXP (x
, 0)) == AND
3869 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3870 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
3872 rtx op0
= XEXP (XEXP (x
, 0), 0);
3873 rtx op1
= XEXP (XEXP (x
, 0), 1);
3874 rtx op2
= XEXP (x
, 1);
3876 if (memory_operand (op0
, GET_MODE (op0
))
3877 && s390_tm_ccmode (op1
, op2
, 0) != VOIDmode
)
3879 if (register_operand (op0
, GET_MODE (op0
))
3880 && s390_tm_ccmode (op1
, op2
, 1) != VOIDmode
)
3890 /* Return the cost of an address rtx ADDR. */
3893 s390_address_cost (rtx addr
, machine_mode mode ATTRIBUTE_UNUSED
,
3894 addr_space_t as ATTRIBUTE_UNUSED
,
3895 bool speed ATTRIBUTE_UNUSED
)
3897 struct s390_address ad
;
3898 if (!s390_decompose_address (addr
, &ad
))
3901 return ad
.indx
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3904 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3906 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
3908 int misalign ATTRIBUTE_UNUSED
)
3910 switch (type_of_cost
)
3918 case vector_gather_load
:
3919 case vector_scatter_store
:
3922 case cond_branch_not_taken
:
3924 case vec_promote_demote
:
3925 case unaligned_load
:
3926 case unaligned_store
:
3929 case cond_branch_taken
:
3933 return TYPE_VECTOR_SUBPARTS (vectype
) - 1;
3940 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3941 otherwise return 0. */
3944 tls_symbolic_operand (rtx op
)
3946 if (GET_CODE (op
) != SYMBOL_REF
)
3948 return SYMBOL_REF_TLS_MODEL (op
);
3951 /* Split DImode access register reference REG (on 64-bit) into its constituent
3952 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3953 gen_highpart cannot be used as they assume all registers are word-sized,
3954 while our access registers have only half that size. */
3957 s390_split_access_reg (rtx reg
, rtx
*lo
, rtx
*hi
)
3959 gcc_assert (TARGET_64BIT
);
3960 gcc_assert (ACCESS_REG_P (reg
));
3961 gcc_assert (GET_MODE (reg
) == DImode
);
3962 gcc_assert (!(REGNO (reg
) & 1));
3964 *lo
= gen_rtx_REG (SImode
, REGNO (reg
) + 1);
3965 *hi
= gen_rtx_REG (SImode
, REGNO (reg
));
3968 /* Return true if OP contains a symbol reference */
3971 symbolic_reference_mentioned_p (rtx op
)
3976 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3979 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3980 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3986 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3987 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3991 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3998 /* Return true if OP contains a reference to a thread-local symbol. */
4001 tls_symbolic_reference_mentioned_p (rtx op
)
4006 if (GET_CODE (op
) == SYMBOL_REF
)
4007 return tls_symbolic_operand (op
);
4009 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4010 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4016 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4017 if (tls_symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4021 else if (fmt
[i
] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op
, i
)))
4029 /* Return true if OP is a legitimate general operand when
4030 generating PIC code. It is given that flag_pic is on
4031 and that OP satisfies CONSTANT_P. */
4034 legitimate_pic_operand_p (rtx op
)
4036 /* Accept all non-symbolic constants. */
4037 if (!SYMBOLIC_CONST (op
))
4040 /* Accept addresses that can be expressed relative to (pc). */
4041 if (larl_operand (op
, VOIDmode
))
4044 /* Reject everything else; must be handled
4045 via emit_symbolic_move. */
4049 /* Returns true if the constant value OP is a legitimate general operand.
4050 It is given that OP satisfies CONSTANT_P. */
4053 s390_legitimate_constant_p (machine_mode mode
, rtx op
)
4055 if (TARGET_VX
&& VECTOR_MODE_P (mode
) && GET_CODE (op
) == CONST_VECTOR
)
4057 if (GET_MODE_SIZE (mode
) != 16)
4060 if (!satisfies_constraint_j00 (op
)
4061 && !satisfies_constraint_jm1 (op
)
4062 && !satisfies_constraint_jKK (op
)
4063 && !satisfies_constraint_jxx (op
)
4064 && !satisfies_constraint_jyy (op
))
4068 /* Accept all non-symbolic constants. */
4069 if (!SYMBOLIC_CONST (op
))
4072 /* Accept immediate LARL operands. */
4073 if (larl_operand (op
, mode
))
4076 /* Thread-local symbols are never legal constants. This is
4077 so that emit_call knows that computing such addresses
4078 might require a function call. */
4079 if (TLS_SYMBOLIC_CONST (op
))
4082 /* In the PIC case, symbolic constants must *not* be
4083 forced into the literal pool. We accept them here,
4084 so that they will be handled by emit_symbolic_move. */
4088 /* All remaining non-PIC symbolic constants are
4089 forced into the literal pool. */
4093 /* Determine if it's legal to put X into the constant pool. This
4094 is not possible if X contains the address of a symbol that is
4095 not constant (TLS) or not known at final link time (PIC). */
4098 s390_cannot_force_const_mem (machine_mode mode
, rtx x
)
4100 switch (GET_CODE (x
))
4104 case CONST_WIDE_INT
:
4106 /* Accept all non-symbolic constants. */
4110 /* Labels are OK iff we are non-PIC. */
4111 return flag_pic
!= 0;
4114 /* 'Naked' TLS symbol references are never OK,
4115 non-TLS symbols are OK iff we are non-PIC. */
4116 if (tls_symbolic_operand (x
))
4119 return flag_pic
!= 0;
4122 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0));
4125 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0))
4126 || s390_cannot_force_const_mem (mode
, XEXP (x
, 1));
4129 switch (XINT (x
, 1))
4131 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4132 case UNSPEC_LTREL_OFFSET
:
4140 case UNSPEC_GOTNTPOFF
:
4141 case UNSPEC_INDNTPOFF
:
4144 /* If the literal pool shares the code section, be put
4145 execute template placeholders into the pool as well. */
4157 /* Returns true if the constant value OP is a legitimate general
4158 operand during and after reload. The difference to
4159 legitimate_constant_p is that this function will not accept
4160 a constant that would need to be forced to the literal pool
4161 before it can be used as operand.
4162 This function accepts all constants which can be loaded directly
4166 legitimate_reload_constant_p (rtx op
)
4168 /* Accept la(y) operands. */
4169 if (GET_CODE (op
) == CONST_INT
4170 && DISP_IN_RANGE (INTVAL (op
)))
4173 /* Accept l(g)hi/l(g)fi operands. */
4174 if (GET_CODE (op
) == CONST_INT
4175 && (CONST_OK_FOR_K (INTVAL (op
)) || CONST_OK_FOR_Os (INTVAL (op
))))
4178 /* Accept lliXX operands. */
4180 && GET_CODE (op
) == CONST_INT
4181 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4182 && s390_single_part (op
, word_mode
, HImode
, 0) >= 0)
4186 && GET_CODE (op
) == CONST_INT
4187 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4188 && s390_single_part (op
, word_mode
, SImode
, 0) >= 0)
4191 /* Accept larl operands. */
4192 if (larl_operand (op
, VOIDmode
))
4195 /* Accept floating-point zero operands that fit into a single GPR. */
4196 if (GET_CODE (op
) == CONST_DOUBLE
4197 && s390_float_const_zero_p (op
)
4198 && GET_MODE_SIZE (GET_MODE (op
)) <= UNITS_PER_WORD
)
4201 /* Accept double-word operands that can be split. */
4202 if (GET_CODE (op
) == CONST_WIDE_INT
4203 || (GET_CODE (op
) == CONST_INT
4204 && trunc_int_for_mode (INTVAL (op
), word_mode
) != INTVAL (op
)))
4206 machine_mode dword_mode
= word_mode
== SImode
? DImode
: TImode
;
4207 rtx hi
= operand_subword (op
, 0, 0, dword_mode
);
4208 rtx lo
= operand_subword (op
, 1, 0, dword_mode
);
4209 return legitimate_reload_constant_p (hi
)
4210 && legitimate_reload_constant_p (lo
);
4213 /* Everything else cannot be handled without reload. */
4217 /* Returns true if the constant value OP is a legitimate fp operand
4218 during and after reload.
4219 This function accepts all constants which can be loaded directly
4223 legitimate_reload_fp_constant_p (rtx op
)
4225 /* Accept floating-point zero operands if the load zero instruction
4226 can be used. Prior to z196 the load fp zero instruction caused a
4227 performance penalty if the result is used as BFP number. */
4229 && GET_CODE (op
) == CONST_DOUBLE
4230 && s390_float_const_zero_p (op
))
4236 /* Returns true if the constant value OP is a legitimate vector operand
4237 during and after reload.
4238 This function accepts all constants which can be loaded directly
4242 legitimate_reload_vector_constant_p (rtx op
)
4244 if (TARGET_VX
&& GET_MODE_SIZE (GET_MODE (op
)) == 16
4245 && (satisfies_constraint_j00 (op
)
4246 || satisfies_constraint_jm1 (op
)
4247 || satisfies_constraint_jKK (op
)
4248 || satisfies_constraint_jxx (op
)
4249 || satisfies_constraint_jyy (op
)))
4255 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4256 return the class of reg to actually use. */
4259 s390_preferred_reload_class (rtx op
, reg_class_t rclass
)
4261 switch (GET_CODE (op
))
4263 /* Constants we cannot reload into general registers
4264 must be forced into the literal pool. */
4268 case CONST_WIDE_INT
:
4269 if (reg_class_subset_p (GENERAL_REGS
, rclass
)
4270 && legitimate_reload_constant_p (op
))
4271 return GENERAL_REGS
;
4272 else if (reg_class_subset_p (ADDR_REGS
, rclass
)
4273 && legitimate_reload_constant_p (op
))
4275 else if (reg_class_subset_p (FP_REGS
, rclass
)
4276 && legitimate_reload_fp_constant_p (op
))
4278 else if (reg_class_subset_p (VEC_REGS
, rclass
)
4279 && legitimate_reload_vector_constant_p (op
))
4284 /* If a symbolic constant or a PLUS is reloaded,
4285 it is most likely being used as an address, so
4286 prefer ADDR_REGS. If 'class' is not a superset
4287 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4289 /* Symrefs cannot be pushed into the literal pool with -fPIC
4290 so we *MUST NOT* return NO_REGS for these cases
4291 (s390_cannot_force_const_mem will return true).
4293 On the other hand we MUST return NO_REGS for symrefs with
4294 invalid addend which might have been pushed to the literal
4295 pool (no -fPIC). Usually we would expect them to be
4296 handled via secondary reload but this does not happen if
4297 they are used as literal pool slot replacement in reload
4298 inheritance (see emit_input_reload_insns). */
4299 if (GET_CODE (XEXP (op
, 0)) == PLUS
4300 && GET_CODE (XEXP (XEXP(op
, 0), 0)) == SYMBOL_REF
4301 && GET_CODE (XEXP (XEXP(op
, 0), 1)) == CONST_INT
)
4303 if (flag_pic
&& reg_class_subset_p (ADDR_REGS
, rclass
))
4311 if (!legitimate_reload_constant_p (op
))
4315 /* load address will be used. */
4316 if (reg_class_subset_p (ADDR_REGS
, rclass
))
4328 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4329 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4333 s390_check_symref_alignment (rtx addr
, HOST_WIDE_INT alignment
)
4335 HOST_WIDE_INT addend
;
4338 /* The "required alignment" might be 0 (e.g. for certain structs
4339 accessed via BLKmode). Early abort in this case, as well as when
4340 an alignment > 8 is required. */
4341 if (alignment
< 2 || alignment
> 8)
4344 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4347 if (addend
& (alignment
- 1))
4350 if (GET_CODE (symref
) == SYMBOL_REF
)
4352 /* s390_encode_section_info is not called for anchors, since they don't
4353 have corresponding VAR_DECLs. Therefore, we cannot rely on
4354 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4355 if (SYMBOL_REF_ANCHOR_P (symref
))
4357 HOST_WIDE_INT block_offset
= SYMBOL_REF_BLOCK_OFFSET (symref
);
4358 unsigned int block_alignment
= (SYMBOL_REF_BLOCK (symref
)->alignment
4361 gcc_assert (block_offset
>= 0);
4362 return ((block_offset
& (alignment
- 1)) == 0
4363 && block_alignment
>= alignment
);
4366 /* We have load-relative instructions for 2-byte, 4-byte, and
4367 8-byte alignment so allow only these. */
4370 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref
);
4371 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref
);
4372 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref
);
4373 default: return false;
4377 if (GET_CODE (symref
) == UNSPEC
4378 && alignment
<= UNITS_PER_LONG
)
4384 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4385 operand SCRATCH is used to reload the even part of the address and
4389 s390_reload_larl_operand (rtx reg
, rtx addr
, rtx scratch
)
4391 HOST_WIDE_INT addend
;
4394 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4398 /* Easy case. The addend is even so larl will do fine. */
4399 emit_move_insn (reg
, addr
);
4402 /* We can leave the scratch register untouched if the target
4403 register is a valid base register. */
4404 if (REGNO (reg
) < FIRST_PSEUDO_REGISTER
4405 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
)
4408 gcc_assert (REGNO (scratch
) < FIRST_PSEUDO_REGISTER
);
4409 gcc_assert (REGNO_REG_CLASS (REGNO (scratch
)) == ADDR_REGS
);
4412 emit_move_insn (scratch
,
4413 gen_rtx_CONST (Pmode
,
4414 gen_rtx_PLUS (Pmode
, symref
,
4415 GEN_INT (addend
- 1))));
4417 emit_move_insn (scratch
, symref
);
4419 /* Increment the address using la in order to avoid clobbering cc. */
4420 s390_load_address (reg
, gen_rtx_PLUS (Pmode
, scratch
, const1_rtx
));
4424 /* Generate what is necessary to move between REG and MEM using
4425 SCRATCH. The direction is given by TOMEM. */
4428 s390_reload_symref_address (rtx reg
, rtx mem
, rtx scratch
, bool tomem
)
4430 /* Reload might have pulled a constant out of the literal pool.
4431 Force it back in. */
4432 if (CONST_INT_P (mem
) || GET_CODE (mem
) == CONST_DOUBLE
4433 || GET_CODE (mem
) == CONST_WIDE_INT
4434 || GET_CODE (mem
) == CONST_VECTOR
4435 || GET_CODE (mem
) == CONST
)
4436 mem
= force_const_mem (GET_MODE (reg
), mem
);
4438 gcc_assert (MEM_P (mem
));
4440 /* For a load from memory we can leave the scratch register
4441 untouched if the target register is a valid base register. */
4443 && REGNO (reg
) < FIRST_PSEUDO_REGISTER
4444 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
4445 && GET_MODE (reg
) == GET_MODE (scratch
))
4448 /* Load address into scratch register. Since we can't have a
4449 secondary reload for a secondary reload we have to cover the case
4450 where larl would need a secondary reload here as well. */
4451 s390_reload_larl_operand (scratch
, XEXP (mem
, 0), scratch
);
4453 /* Now we can use a standard load/store to do the move. */
4455 emit_move_insn (replace_equiv_address (mem
, scratch
), reg
);
4457 emit_move_insn (reg
, replace_equiv_address (mem
, scratch
));
4460 /* Inform reload about cases where moving X with a mode MODE to a register in
4461 RCLASS requires an extra scratch or immediate register. Return the class
4462 needed for the immediate register. */
4465 s390_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
4466 machine_mode mode
, secondary_reload_info
*sri
)
4468 enum reg_class rclass
= (enum reg_class
) rclass_i
;
4470 /* Intermediate register needed. */
4471 if (reg_classes_intersect_p (CC_REGS
, rclass
))
4472 return GENERAL_REGS
;
4476 /* The vst/vl vector move instructions allow only for short
4479 && GET_CODE (XEXP (x
, 0)) == PLUS
4480 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4481 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x
, 0), 1)))
4482 && reg_class_subset_p (rclass
, VEC_REGS
)
4483 && (!reg_class_subset_p (rclass
, FP_REGS
)
4484 || (GET_MODE_SIZE (mode
) > 8
4485 && s390_class_max_nregs (FP_REGS
, mode
) == 1)))
4488 sri
->icode
= (TARGET_64BIT
?
4489 CODE_FOR_reloaddi_la_in
:
4490 CODE_FOR_reloadsi_la_in
);
4492 sri
->icode
= (TARGET_64BIT
?
4493 CODE_FOR_reloaddi_la_out
:
4494 CODE_FOR_reloadsi_la_out
);
4500 HOST_WIDE_INT offset
;
4503 /* On z10 several optimizer steps may generate larl operands with
4506 && s390_loadrelative_operand_p (x
, &symref
, &offset
)
4508 && !SYMBOL_FLAG_NOTALIGN2_P (symref
)
4509 && (offset
& 1) == 1)
4510 sri
->icode
= ((mode
== DImode
) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4511 : CODE_FOR_reloadsi_larl_odd_addend_z10
);
4513 /* Handle all the (mem (symref)) accesses we cannot use the z10
4514 instructions for. */
4516 && s390_loadrelative_operand_p (XEXP (x
, 0), NULL
, NULL
)
4518 || !reg_class_subset_p (rclass
, GENERAL_REGS
)
4519 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
4520 || !s390_check_symref_alignment (XEXP (x
, 0),
4521 GET_MODE_SIZE (mode
))))
4523 #define __SECONDARY_RELOAD_CASE(M,m) \
4526 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4527 CODE_FOR_reload##m##di_tomem_z10; \
4529 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4530 CODE_FOR_reload##m##si_tomem_z10; \
4533 switch (GET_MODE (x
))
4535 __SECONDARY_RELOAD_CASE (QI
, qi
);
4536 __SECONDARY_RELOAD_CASE (HI
, hi
);
4537 __SECONDARY_RELOAD_CASE (SI
, si
);
4538 __SECONDARY_RELOAD_CASE (DI
, di
);
4539 __SECONDARY_RELOAD_CASE (TI
, ti
);
4540 __SECONDARY_RELOAD_CASE (SF
, sf
);
4541 __SECONDARY_RELOAD_CASE (DF
, df
);
4542 __SECONDARY_RELOAD_CASE (TF
, tf
);
4543 __SECONDARY_RELOAD_CASE (SD
, sd
);
4544 __SECONDARY_RELOAD_CASE (DD
, dd
);
4545 __SECONDARY_RELOAD_CASE (TD
, td
);
4546 __SECONDARY_RELOAD_CASE (V1QI
, v1qi
);
4547 __SECONDARY_RELOAD_CASE (V2QI
, v2qi
);
4548 __SECONDARY_RELOAD_CASE (V4QI
, v4qi
);
4549 __SECONDARY_RELOAD_CASE (V8QI
, v8qi
);
4550 __SECONDARY_RELOAD_CASE (V16QI
, v16qi
);
4551 __SECONDARY_RELOAD_CASE (V1HI
, v1hi
);
4552 __SECONDARY_RELOAD_CASE (V2HI
, v2hi
);
4553 __SECONDARY_RELOAD_CASE (V4HI
, v4hi
);
4554 __SECONDARY_RELOAD_CASE (V8HI
, v8hi
);
4555 __SECONDARY_RELOAD_CASE (V1SI
, v1si
);
4556 __SECONDARY_RELOAD_CASE (V2SI
, v2si
);
4557 __SECONDARY_RELOAD_CASE (V4SI
, v4si
);
4558 __SECONDARY_RELOAD_CASE (V1DI
, v1di
);
4559 __SECONDARY_RELOAD_CASE (V2DI
, v2di
);
4560 __SECONDARY_RELOAD_CASE (V1TI
, v1ti
);
4561 __SECONDARY_RELOAD_CASE (V1SF
, v1sf
);
4562 __SECONDARY_RELOAD_CASE (V2SF
, v2sf
);
4563 __SECONDARY_RELOAD_CASE (V4SF
, v4sf
);
4564 __SECONDARY_RELOAD_CASE (V1DF
, v1df
);
4565 __SECONDARY_RELOAD_CASE (V2DF
, v2df
);
4566 __SECONDARY_RELOAD_CASE (V1TF
, v1tf
);
4570 #undef __SECONDARY_RELOAD_CASE
4574 /* We need a scratch register when loading a PLUS expression which
4575 is not a legitimate operand of the LOAD ADDRESS instruction. */
4576 /* LRA can deal with transformation of plus op very well -- so we
4577 don't need to prompt LRA in this case. */
4578 if (! lra_in_progress
&& in_p
&& s390_plus_operand (x
, mode
))
4579 sri
->icode
= (TARGET_64BIT
?
4580 CODE_FOR_reloaddi_plus
: CODE_FOR_reloadsi_plus
);
4582 /* Performing a multiword move from or to memory we have to make sure the
4583 second chunk in memory is addressable without causing a displacement
4584 overflow. If that would be the case we calculate the address in
4585 a scratch register. */
4587 && GET_CODE (XEXP (x
, 0)) == PLUS
4588 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4589 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x
, 0), 1))
4590 + GET_MODE_SIZE (mode
) - 1))
4592 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4593 in a s_operand address since we may fallback to lm/stm. So we only
4594 have to care about overflows in the b+i+d case. */
4595 if ((reg_classes_intersect_p (GENERAL_REGS
, rclass
)
4596 && s390_class_max_nregs (GENERAL_REGS
, mode
) > 1
4597 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == PLUS
)
4598 /* For FP_REGS no lm/stm is available so this check is triggered
4599 for displacement overflows in b+i+d and b+d like addresses. */
4600 || (reg_classes_intersect_p (FP_REGS
, rclass
)
4601 && s390_class_max_nregs (FP_REGS
, mode
) > 1))
4604 sri
->icode
= (TARGET_64BIT
?
4605 CODE_FOR_reloaddi_la_in
:
4606 CODE_FOR_reloadsi_la_in
);
4608 sri
->icode
= (TARGET_64BIT
?
4609 CODE_FOR_reloaddi_la_out
:
4610 CODE_FOR_reloadsi_la_out
);
4614 /* A scratch address register is needed when a symbolic constant is
4615 copied to r0 compiling with -fPIC. In other cases the target
4616 register might be used as temporary (see legitimize_pic_address). */
4617 if (in_p
&& SYMBOLIC_CONST (x
) && flag_pic
== 2 && rclass
!= ADDR_REGS
)
4618 sri
->icode
= (TARGET_64BIT
?
4619 CODE_FOR_reloaddi_PIC_addr
:
4620 CODE_FOR_reloadsi_PIC_addr
);
4622 /* Either scratch or no register needed. */
4626 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4628 We need secondary memory to move data between GPRs and FPRs.
4630 - With DFP the ldgr lgdr instructions are available. Due to the
4631 different alignment we cannot use them for SFmode. For 31 bit a
4632 64 bit value in GPR would be a register pair so here we still
4633 need to go via memory.
4635 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4636 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4637 in full VRs so as before also on z13 we do these moves via
4640 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4643 s390_secondary_memory_needed (machine_mode mode
,
4644 reg_class_t class1
, reg_class_t class2
)
4646 return (((reg_classes_intersect_p (class1
, VEC_REGS
)
4647 && reg_classes_intersect_p (class2
, GENERAL_REGS
))
4648 || (reg_classes_intersect_p (class1
, GENERAL_REGS
)
4649 && reg_classes_intersect_p (class2
, VEC_REGS
)))
4650 && (TARGET_TPF
|| !TARGET_DFP
|| !TARGET_64BIT
4651 || GET_MODE_SIZE (mode
) != 8)
4652 && (!TARGET_VX
|| (SCALAR_FLOAT_MODE_P (mode
)
4653 && GET_MODE_SIZE (mode
) > 8)));
4656 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4658 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4659 because the movsi and movsf patterns don't handle r/f moves. */
4662 s390_secondary_memory_needed_mode (machine_mode mode
)
4664 if (GET_MODE_BITSIZE (mode
) < 32)
4665 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
4669 /* Generate code to load SRC, which is PLUS that is not a
4670 legitimate operand for the LA instruction, into TARGET.
4671 SCRATCH may be used as scratch register. */
4674 s390_expand_plus_operand (rtx target
, rtx src
,
4678 struct s390_address ad
;
4680 /* src must be a PLUS; get its two operands. */
4681 gcc_assert (GET_CODE (src
) == PLUS
);
4682 gcc_assert (GET_MODE (src
) == Pmode
);
4684 /* Check if any of the two operands is already scheduled
4685 for replacement by reload. This can happen e.g. when
4686 float registers occur in an address. */
4687 sum1
= find_replacement (&XEXP (src
, 0));
4688 sum2
= find_replacement (&XEXP (src
, 1));
4689 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4691 /* If the address is already strictly valid, there's nothing to do. */
4692 if (!s390_decompose_address (src
, &ad
)
4693 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4694 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
4696 /* Otherwise, one of the operands cannot be an address register;
4697 we reload its value into the scratch register. */
4698 if (true_regnum (sum1
) < 1 || true_regnum (sum1
) > 15)
4700 emit_move_insn (scratch
, sum1
);
4703 if (true_regnum (sum2
) < 1 || true_regnum (sum2
) > 15)
4705 emit_move_insn (scratch
, sum2
);
4709 /* According to the way these invalid addresses are generated
4710 in reload.c, it should never happen (at least on s390) that
4711 *neither* of the PLUS components, after find_replacements
4712 was applied, is an address register. */
4713 if (sum1
== scratch
&& sum2
== scratch
)
4719 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4722 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4723 is only ever performed on addresses, so we can mark the
4724 sum as legitimate for LA in any case. */
4725 s390_load_address (target
, src
);
4729 /* Return true if ADDR is a valid memory address.
4730 STRICT specifies whether strict register checking applies. */
4733 s390_legitimate_address_p (machine_mode mode
, rtx addr
, bool strict
)
4735 struct s390_address ad
;
4738 && larl_operand (addr
, VOIDmode
)
4739 && (mode
== VOIDmode
4740 || s390_check_symref_alignment (addr
, GET_MODE_SIZE (mode
))))
4743 if (!s390_decompose_address (addr
, &ad
))
4746 /* The vector memory instructions only support short displacements.
4747 Reject invalid displacements early to prevent plenty of lay
4748 instructions to be generated later which then cannot be merged
4751 && VECTOR_MODE_P (mode
)
4752 && ad
.disp
!= NULL_RTX
4753 && CONST_INT_P (ad
.disp
)
4754 && !SHORT_DISP_IN_RANGE (INTVAL (ad
.disp
)))
4759 if (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4762 if (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
)))
4768 && !(REGNO (ad
.base
) >= FIRST_PSEUDO_REGISTER
4769 || REGNO_REG_CLASS (REGNO (ad
.base
)) == ADDR_REGS
))
4773 && !(REGNO (ad
.indx
) >= FIRST_PSEUDO_REGISTER
4774 || REGNO_REG_CLASS (REGNO (ad
.indx
)) == ADDR_REGS
))
4780 /* Return true if OP is a valid operand for the LA instruction.
4781 In 31-bit, we need to prove that the result is used as an
4782 address, as LA performs only a 31-bit addition. */
4785 legitimate_la_operand_p (rtx op
)
4787 struct s390_address addr
;
4788 if (!s390_decompose_address (op
, &addr
))
4791 return (TARGET_64BIT
|| addr
.pointer
);
4794 /* Return true if it is valid *and* preferable to use LA to
4795 compute the sum of OP1 and OP2. */
4798 preferred_la_operand_p (rtx op1
, rtx op2
)
4800 struct s390_address addr
;
4802 if (op2
!= const0_rtx
)
4803 op1
= gen_rtx_PLUS (Pmode
, op1
, op2
);
4805 if (!s390_decompose_address (op1
, &addr
))
4807 if (addr
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (addr
.base
)))
4809 if (addr
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (addr
.indx
)))
4812 /* Avoid LA instructions with index (and base) register on z196 or
4813 later; it is preferable to use regular add instructions when
4814 possible. Starting with zEC12 the la with index register is
4815 "uncracked" again but still slower than a regular add. */
4816 if (addr
.indx
&& s390_tune
>= PROCESSOR_2817_Z196
)
4819 if (!TARGET_64BIT
&& !addr
.pointer
)
4825 if ((addr
.base
&& REG_P (addr
.base
) && REG_POINTER (addr
.base
))
4826 || (addr
.indx
&& REG_P (addr
.indx
) && REG_POINTER (addr
.indx
)))
4832 /* Emit a forced load-address operation to load SRC into DST.
4833 This will use the LOAD ADDRESS instruction even in situations
4834 where legitimate_la_operand_p (SRC) returns false. */
4837 s390_load_address (rtx dst
, rtx src
)
4840 emit_move_insn (dst
, src
);
4842 emit_insn (gen_force_la_31 (dst
, src
));
4845 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4848 s390_rel_address_ok_p (rtx symbol_ref
)
4852 if (symbol_ref
== s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref
))
4855 decl
= SYMBOL_REF_DECL (symbol_ref
);
4857 if (!flag_pic
|| SYMBOL_REF_LOCAL_P (symbol_ref
))
4858 return (s390_pic_data_is_text_relative
4860 && TREE_CODE (decl
) == FUNCTION_DECL
));
4865 /* Return a legitimate reference for ORIG (an address) using the
4866 register REG. If REG is 0, a new pseudo is generated.
4868 There are two types of references that must be handled:
4870 1. Global data references must load the address from the GOT, via
4871 the PIC reg. An insn is emitted to do this load, and the reg is
4874 2. Static data references, constant pool addresses, and code labels
4875 compute the address as an offset from the GOT, whose base is in
4876 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4877 differentiate them from global data objects. The returned
4878 address is the PIC reg + an unspec constant.
4880 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4881 reg also appears in the address. */
4884 legitimize_pic_address (rtx orig
, rtx reg
)
4887 rtx addend
= const0_rtx
;
4890 gcc_assert (!TLS_SYMBOLIC_CONST (addr
));
4892 if (GET_CODE (addr
) == CONST
)
4893 addr
= XEXP (addr
, 0);
4895 if (GET_CODE (addr
) == PLUS
)
4897 addend
= XEXP (addr
, 1);
4898 addr
= XEXP (addr
, 0);
4901 if ((GET_CODE (addr
) == LABEL_REF
4902 || (SYMBOL_REF_P (addr
) && s390_rel_address_ok_p (addr
))
4903 || (GET_CODE (addr
) == UNSPEC
&&
4904 (XINT (addr
, 1) == UNSPEC_GOTENT
4905 || XINT (addr
, 1) == UNSPEC_PLT
)))
4906 && GET_CODE (addend
) == CONST_INT
)
4908 /* This can be locally addressed. */
4910 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4911 rtx const_addr
= (GET_CODE (addr
) == UNSPEC
?
4912 gen_rtx_CONST (Pmode
, addr
) : addr
);
4914 if (larl_operand (const_addr
, VOIDmode
)
4915 && INTVAL (addend
) < HOST_WIDE_INT_1
<< 31
4916 && INTVAL (addend
) >= -(HOST_WIDE_INT_1
<< 31))
4918 if (INTVAL (addend
) & 1)
4920 /* LARL can't handle odd offsets, so emit a pair of LARL
4922 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4924 if (!DISP_IN_RANGE (INTVAL (addend
)))
4926 HOST_WIDE_INT even
= INTVAL (addend
) - 1;
4927 addr
= gen_rtx_PLUS (Pmode
, addr
, GEN_INT (even
));
4928 addr
= gen_rtx_CONST (Pmode
, addr
);
4929 addend
= const1_rtx
;
4932 emit_move_insn (temp
, addr
);
4933 new_rtx
= gen_rtx_PLUS (Pmode
, temp
, addend
);
4937 s390_load_address (reg
, new_rtx
);
4943 /* If the offset is even, we can just use LARL. This
4944 will happen automatically. */
4949 /* No larl - Access local symbols relative to the GOT. */
4951 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4953 if (reload_in_progress
|| reload_completed
)
4954 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
4956 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
4957 if (addend
!= const0_rtx
)
4958 addr
= gen_rtx_PLUS (Pmode
, addr
, addend
);
4959 addr
= gen_rtx_CONST (Pmode
, addr
);
4960 addr
= force_const_mem (Pmode
, addr
);
4961 emit_move_insn (temp
, addr
);
4963 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
4966 s390_load_address (reg
, new_rtx
);
4971 else if (GET_CODE (addr
) == SYMBOL_REF
&& addend
== const0_rtx
)
4973 /* A non-local symbol reference without addend.
4975 The symbol ref is wrapped into an UNSPEC to make sure the
4976 proper operand modifier (@GOT or @GOTENT) will be emitted.
4977 This will tell the linker to put the symbol into the GOT.
4979 Additionally the code dereferencing the GOT slot is emitted here.
4981 An addend to the symref needs to be added afterwards.
4982 legitimize_pic_address calls itself recursively to handle
4983 that case. So no need to do it here. */
4986 reg
= gen_reg_rtx (Pmode
);
4990 /* Use load relative if possible.
4991 lgrl <target>, sym@GOTENT */
4992 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
4993 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
4994 new_rtx
= gen_const_mem (GET_MODE (reg
), new_rtx
);
4996 emit_move_insn (reg
, new_rtx
);
4999 else if (flag_pic
== 1)
5001 /* Assume GOT offset is a valid displacement operand (< 4k
5002 or < 512k with z990). This is handled the same way in
5003 both 31- and 64-bit code (@GOT).
5004 lg <target>, sym@GOT(r12) */
5006 if (reload_in_progress
|| reload_completed
)
5007 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5009 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5010 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5011 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5012 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5013 emit_move_insn (reg
, new_rtx
);
5018 /* If the GOT offset might be >= 4k, we determine the position
5019 of the GOT entry via a PC-relative LARL (@GOTENT).
5020 larl temp, sym@GOTENT
5021 lg <target>, 0(temp) */
5023 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5025 gcc_assert (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
5026 || REGNO_REG_CLASS (REGNO (temp
)) == ADDR_REGS
);
5028 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5029 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5030 emit_move_insn (temp
, new_rtx
);
5031 new_rtx
= gen_const_mem (Pmode
, temp
);
5032 emit_move_insn (reg
, new_rtx
);
5037 else if (GET_CODE (addr
) == UNSPEC
&& GET_CODE (addend
) == CONST_INT
)
5039 gcc_assert (XVECLEN (addr
, 0) == 1);
5040 switch (XINT (addr
, 1))
5042 /* These address symbols (or PLT slots) relative to the GOT
5043 (not GOT slots!). In general this will exceed the
5044 displacement range so these value belong into the literal
5048 new_rtx
= force_const_mem (Pmode
, orig
);
5051 /* For -fPIC the GOT size might exceed the displacement
5052 range so make sure the value is in the literal pool. */
5055 new_rtx
= force_const_mem (Pmode
, orig
);
5058 /* For @GOTENT larl is used. This is handled like local
5064 /* For @PLT larl is used. This is handled like local
5070 /* Everything else cannot happen. */
5075 else if (addend
!= const0_rtx
)
5077 /* Otherwise, compute the sum. */
5079 rtx base
= legitimize_pic_address (addr
, reg
);
5080 new_rtx
= legitimize_pic_address (addend
,
5081 base
== reg
? NULL_RTX
: reg
);
5082 if (GET_CODE (new_rtx
) == CONST_INT
)
5083 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
5086 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
5088 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
5089 new_rtx
= XEXP (new_rtx
, 1);
5091 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
5094 if (GET_CODE (new_rtx
) == CONST
)
5095 new_rtx
= XEXP (new_rtx
, 0);
5096 new_rtx
= force_operand (new_rtx
, 0);
5102 /* Load the thread pointer into a register. */
5105 s390_get_thread_pointer (void)
5107 rtx tp
= gen_reg_rtx (Pmode
);
5109 emit_insn (gen_get_thread_pointer (Pmode
, tp
));
5111 mark_reg_pointer (tp
, BITS_PER_WORD
);
5116 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5117 in s390_tls_symbol which always refers to __tls_get_offset.
5118 The returned offset is written to RESULT_REG and an USE rtx is
5119 generated for TLS_CALL. */
5121 static GTY(()) rtx s390_tls_symbol
;
5124 s390_emit_tls_call_insn (rtx result_reg
, rtx tls_call
)
5129 emit_insn (s390_load_got ());
5131 if (!s390_tls_symbol
)
5132 s390_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tls_get_offset");
5134 insn
= s390_emit_call (s390_tls_symbol
, tls_call
, result_reg
,
5135 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
5137 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), result_reg
);
5138 RTL_CONST_CALL_P (insn
) = 1;
5141 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5142 this (thread-local) address. REG may be used as temporary. */
5145 legitimize_tls_address (rtx addr
, rtx reg
)
5147 rtx new_rtx
, tls_call
, temp
, base
, r2
;
5150 if (GET_CODE (addr
) == SYMBOL_REF
)
5151 switch (tls_symbolic_operand (addr
))
5153 case TLS_MODEL_GLOBAL_DYNAMIC
:
5155 r2
= gen_rtx_REG (Pmode
, 2);
5156 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_TLSGD
);
5157 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5158 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5159 emit_move_insn (r2
, new_rtx
);
5160 s390_emit_tls_call_insn (r2
, tls_call
);
5161 insn
= get_insns ();
5164 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5165 temp
= gen_reg_rtx (Pmode
);
5166 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5168 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5171 s390_load_address (reg
, new_rtx
);
5176 case TLS_MODEL_LOCAL_DYNAMIC
:
5178 r2
= gen_rtx_REG (Pmode
, 2);
5179 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM
);
5180 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5181 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5182 emit_move_insn (r2
, new_rtx
);
5183 s390_emit_tls_call_insn (r2
, tls_call
);
5184 insn
= get_insns ();
5187 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM_NTPOFF
);
5188 temp
= gen_reg_rtx (Pmode
);
5189 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5191 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5192 base
= gen_reg_rtx (Pmode
);
5193 s390_load_address (base
, new_rtx
);
5195 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_DTPOFF
);
5196 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5197 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5198 temp
= gen_reg_rtx (Pmode
);
5199 emit_move_insn (temp
, new_rtx
);
5201 new_rtx
= gen_rtx_PLUS (Pmode
, base
, temp
);
5204 s390_load_address (reg
, new_rtx
);
5209 case TLS_MODEL_INITIAL_EXEC
:
5212 /* Assume GOT offset < 4k. This is handled the same way
5213 in both 31- and 64-bit code. */
5215 if (reload_in_progress
|| reload_completed
)
5216 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5218 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTNTPOFF
);
5219 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5220 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5221 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5222 temp
= gen_reg_rtx (Pmode
);
5223 emit_move_insn (temp
, new_rtx
);
5227 /* If the GOT offset might be >= 4k, we determine the position
5228 of the GOT entry via a PC-relative LARL. */
5230 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_INDNTPOFF
);
5231 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5232 temp
= gen_reg_rtx (Pmode
);
5233 emit_move_insn (temp
, new_rtx
);
5235 new_rtx
= gen_const_mem (Pmode
, temp
);
5236 temp
= gen_reg_rtx (Pmode
);
5237 emit_move_insn (temp
, new_rtx
);
5240 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5243 s390_load_address (reg
, new_rtx
);
5248 case TLS_MODEL_LOCAL_EXEC
:
5249 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5250 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5251 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5252 temp
= gen_reg_rtx (Pmode
);
5253 emit_move_insn (temp
, new_rtx
);
5255 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5258 s390_load_address (reg
, new_rtx
);
5267 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == UNSPEC
)
5269 switch (XINT (XEXP (addr
, 0), 1))
5271 case UNSPEC_INDNTPOFF
:
5280 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
5281 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
5283 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5284 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5285 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5287 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5288 new_rtx
= plus_constant (Pmode
, new_rtx
,
5289 INTVAL (XEXP (XEXP (addr
, 0), 1)));
5290 new_rtx
= force_operand (new_rtx
, 0);
5294 gcc_unreachable (); /* for now ... */
5299 /* Emit insns making the address in operands[1] valid for a standard
5300 move to operands[0]. operands[1] is replaced by an address which
5301 should be used instead of the former RTX to emit the move
5305 emit_symbolic_move (rtx
*operands
)
5307 rtx temp
= !can_create_pseudo_p () ? operands
[0] : gen_reg_rtx (Pmode
);
5309 if (GET_CODE (operands
[0]) == MEM
)
5310 operands
[1] = force_reg (Pmode
, operands
[1]);
5311 else if (TLS_SYMBOLIC_CONST (operands
[1]))
5312 operands
[1] = legitimize_tls_address (operands
[1], temp
);
5314 operands
[1] = legitimize_pic_address (operands
[1], temp
);
5317 /* Try machine-dependent ways of modifying an illegitimate address X
5318 to be legitimate. If we find one, return the new, valid address.
5320 OLDX is the address as it was before break_out_memory_refs was called.
5321 In some cases it is useful to look at this to decide what needs to be done.
5323 MODE is the mode of the operand pointed to by X.
5325 When -fpic is used, special handling is needed for symbolic references.
5326 See comments by legitimize_pic_address for details. */
5329 s390_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
5330 machine_mode mode ATTRIBUTE_UNUSED
)
5332 rtx constant_term
= const0_rtx
;
5334 if (TLS_SYMBOLIC_CONST (x
))
5336 x
= legitimize_tls_address (x
, 0);
5338 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5341 else if (GET_CODE (x
) == PLUS
5342 && (TLS_SYMBOLIC_CONST (XEXP (x
, 0))
5343 || TLS_SYMBOLIC_CONST (XEXP (x
, 1))))
5349 if (SYMBOLIC_CONST (x
)
5350 || (GET_CODE (x
) == PLUS
5351 && (SYMBOLIC_CONST (XEXP (x
, 0))
5352 || SYMBOLIC_CONST (XEXP (x
, 1)))))
5353 x
= legitimize_pic_address (x
, 0);
5355 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5359 x
= eliminate_constant_term (x
, &constant_term
);
5361 /* Optimize loading of large displacements by splitting them
5362 into the multiple of 4K and the rest; this allows the
5363 former to be CSE'd if possible.
5365 Don't do this if the displacement is added to a register
5366 pointing into the stack frame, as the offsets will
5367 change later anyway. */
5369 if (GET_CODE (constant_term
) == CONST_INT
5370 && !TARGET_LONG_DISPLACEMENT
5371 && !DISP_IN_RANGE (INTVAL (constant_term
))
5372 && !(REG_P (x
) && REGNO_PTR_FRAME_P (REGNO (x
))))
5374 HOST_WIDE_INT lower
= INTVAL (constant_term
) & 0xfff;
5375 HOST_WIDE_INT upper
= INTVAL (constant_term
) ^ lower
;
5377 rtx temp
= gen_reg_rtx (Pmode
);
5378 rtx val
= force_operand (GEN_INT (upper
), temp
);
5380 emit_move_insn (temp
, val
);
5382 x
= gen_rtx_PLUS (Pmode
, x
, temp
);
5383 constant_term
= GEN_INT (lower
);
5386 if (GET_CODE (x
) == PLUS
)
5388 if (GET_CODE (XEXP (x
, 0)) == REG
)
5390 rtx temp
= gen_reg_rtx (Pmode
);
5391 rtx val
= force_operand (XEXP (x
, 1), temp
);
5393 emit_move_insn (temp
, val
);
5395 x
= gen_rtx_PLUS (Pmode
, XEXP (x
, 0), temp
);
5398 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5400 rtx temp
= gen_reg_rtx (Pmode
);
5401 rtx val
= force_operand (XEXP (x
, 0), temp
);
5403 emit_move_insn (temp
, val
);
5405 x
= gen_rtx_PLUS (Pmode
, temp
, XEXP (x
, 1));
5409 if (constant_term
!= const0_rtx
)
5410 x
= gen_rtx_PLUS (Pmode
, x
, constant_term
);
5415 /* Try a machine-dependent way of reloading an illegitimate address AD
5416 operand. If we find one, push the reload and return the new address.
5418 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5419 and TYPE is the reload type of the current reload. */
5422 legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
5423 int opnum
, int type
)
5425 if (!optimize
|| TARGET_LONG_DISPLACEMENT
)
5428 if (GET_CODE (ad
) == PLUS
)
5430 rtx tem
= simplify_binary_operation (PLUS
, Pmode
,
5431 XEXP (ad
, 0), XEXP (ad
, 1));
5436 if (GET_CODE (ad
) == PLUS
5437 && GET_CODE (XEXP (ad
, 0)) == REG
5438 && GET_CODE (XEXP (ad
, 1)) == CONST_INT
5439 && !DISP_IN_RANGE (INTVAL (XEXP (ad
, 1))))
5441 HOST_WIDE_INT lower
= INTVAL (XEXP (ad
, 1)) & 0xfff;
5442 HOST_WIDE_INT upper
= INTVAL (XEXP (ad
, 1)) ^ lower
;
5443 rtx cst
, tem
, new_rtx
;
5445 cst
= GEN_INT (upper
);
5446 if (!legitimate_reload_constant_p (cst
))
5447 cst
= force_const_mem (Pmode
, cst
);
5449 tem
= gen_rtx_PLUS (Pmode
, XEXP (ad
, 0), cst
);
5450 new_rtx
= gen_rtx_PLUS (Pmode
, tem
, GEN_INT (lower
));
5452 push_reload (XEXP (tem
, 1), 0, &XEXP (tem
, 1), 0,
5453 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
5454 opnum
, (enum reload_type
) type
);
5461 /* Emit code to move LEN bytes from DST to SRC. */
5464 s390_expand_cpymem (rtx dst
, rtx src
, rtx len
)
5466 /* When tuning for z10 or higher we rely on the Glibc functions to
5467 do the right thing. Only for constant lengths below 64k we will
5468 generate inline code. */
5469 if (s390_tune
>= PROCESSOR_2097_Z10
5470 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5473 /* Expand memcpy for constant length operands without a loop if it
5474 is shorter that way.
5476 With a constant length argument a
5477 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5478 if (GET_CODE (len
) == CONST_INT
5479 && INTVAL (len
) >= 0
5480 && INTVAL (len
) <= 256 * 6
5481 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5485 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5487 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5488 rtx newsrc
= adjust_address (src
, BLKmode
, o
);
5489 emit_insn (gen_cpymem_short (newdst
, newsrc
,
5490 GEN_INT (l
> 256 ? 255 : l
- 1)));
5494 else if (TARGET_MVCLE
)
5496 emit_insn (gen_cpymem_long (dst
, src
, convert_to_mode (Pmode
, len
, 1)));
5501 rtx dst_addr
, src_addr
, count
, blocks
, temp
;
5502 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5503 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5504 rtx_code_label
*end_label
= gen_label_rtx ();
5507 mode
= GET_MODE (len
);
5508 if (mode
== VOIDmode
)
5511 dst_addr
= gen_reg_rtx (Pmode
);
5512 src_addr
= gen_reg_rtx (Pmode
);
5513 count
= gen_reg_rtx (mode
);
5514 blocks
= gen_reg_rtx (mode
);
5516 convert_move (count
, len
, 1);
5517 emit_cmp_and_jump_insns (count
, const0_rtx
,
5518 EQ
, NULL_RTX
, mode
, 1, end_label
);
5520 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5521 emit_move_insn (src_addr
, force_operand (XEXP (src
, 0), NULL_RTX
));
5522 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5523 src
= change_address (src
, VOIDmode
, src_addr
);
5525 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5528 emit_move_insn (count
, temp
);
5530 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5533 emit_move_insn (blocks
, temp
);
5535 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5536 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5538 emit_label (loop_start_label
);
5541 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 768))
5545 /* Issue a read prefetch for the +3 cache line. */
5546 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (768)),
5547 const0_rtx
, const0_rtx
);
5548 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5549 emit_insn (prefetch
);
5551 /* Issue a write prefetch for the +3 cache line. */
5552 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (768)),
5553 const1_rtx
, const0_rtx
);
5554 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5555 emit_insn (prefetch
);
5558 emit_insn (gen_cpymem_short (dst
, src
, GEN_INT (255)));
5559 s390_load_address (dst_addr
,
5560 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5561 s390_load_address (src_addr
,
5562 gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (256)));
5564 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5567 emit_move_insn (blocks
, temp
);
5569 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5570 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5572 emit_jump (loop_start_label
);
5573 emit_label (loop_end_label
);
5575 emit_insn (gen_cpymem_short (dst
, src
,
5576 convert_to_mode (Pmode
, count
, 1)));
5577 emit_label (end_label
);
5582 /* Emit code to set LEN bytes at DST to VAL.
5583 Make use of clrmem if VAL is zero. */
5586 s390_expand_setmem (rtx dst
, rtx len
, rtx val
)
5588 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) <= 0)
5591 gcc_assert (GET_CODE (val
) == CONST_INT
|| GET_MODE (val
) == QImode
);
5593 /* Expand setmem/clrmem for a constant length operand without a
5594 loop if it will be shorter that way.
5595 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5596 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5597 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5598 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5599 if (GET_CODE (len
) == CONST_INT
5600 && ((val
== const0_rtx
5601 && (INTVAL (len
) <= 256 * 4
5602 || (INTVAL (len
) <= 256 * 5 && TARGET_SETMEM_PFD(val
,len
))))
5603 || (val
!= const0_rtx
&& INTVAL (len
) <= 257 * 4))
5604 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5608 if (val
== const0_rtx
)
5609 /* clrmem: emit 256 byte blockwise XCs. */
5610 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5612 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5613 emit_insn (gen_clrmem_short (newdst
,
5614 GEN_INT (l
> 256 ? 255 : l
- 1)));
5617 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5618 setting first byte to val and using a 256 byte mvc with one
5619 byte overlap to propagate the byte. */
5620 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 257, o
+= 257)
5622 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5623 emit_move_insn (adjust_address (dst
, QImode
, o
), val
);
5626 rtx newdstp1
= adjust_address (dst
, BLKmode
, o
+ 1);
5627 emit_insn (gen_cpymem_short (newdstp1
, newdst
,
5628 GEN_INT (l
> 257 ? 255 : l
- 2)));
5633 else if (TARGET_MVCLE
)
5635 val
= force_not_mem (convert_modes (Pmode
, QImode
, val
, 1));
5637 emit_insn (gen_setmem_long_di (dst
, convert_to_mode (Pmode
, len
, 1),
5640 emit_insn (gen_setmem_long_si (dst
, convert_to_mode (Pmode
, len
, 1),
5646 rtx dst_addr
, count
, blocks
, temp
, dstp1
= NULL_RTX
;
5647 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5648 rtx_code_label
*onebyte_end_label
= gen_label_rtx ();
5649 rtx_code_label
*zerobyte_end_label
= gen_label_rtx ();
5650 rtx_code_label
*restbyte_end_label
= gen_label_rtx ();
5653 mode
= GET_MODE (len
);
5654 if (mode
== VOIDmode
)
5657 dst_addr
= gen_reg_rtx (Pmode
);
5658 count
= gen_reg_rtx (mode
);
5659 blocks
= gen_reg_rtx (mode
);
5661 convert_move (count
, len
, 1);
5662 emit_cmp_and_jump_insns (count
, const0_rtx
,
5663 EQ
, NULL_RTX
, mode
, 1, zerobyte_end_label
,
5664 profile_probability::very_unlikely ());
5666 /* We need to make a copy of the target address since memset is
5667 supposed to return it unmodified. We have to make it here
5668 already since the new reg is used at onebyte_end_label. */
5669 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5670 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5672 if (val
!= const0_rtx
)
5674 /* When using the overlapping mvc the original target
5675 address is only accessed as single byte entity (even by
5676 the mvc reading this value). */
5677 set_mem_size (dst
, 1);
5678 dstp1
= adjust_address (dst
, VOIDmode
, 1);
5679 emit_cmp_and_jump_insns (count
,
5680 const1_rtx
, EQ
, NULL_RTX
, mode
, 1,
5682 profile_probability::very_unlikely ());
5685 /* There is one unconditional (mvi+mvc)/xc after the loop
5686 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5687 or one (xc) here leaves this number of bytes to be handled by
5689 temp
= expand_binop (mode
, add_optab
, count
,
5690 val
== const0_rtx
? constm1_rtx
: GEN_INT (-2),
5691 count
, 1, OPTAB_DIRECT
);
5693 emit_move_insn (count
, temp
);
5695 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5698 emit_move_insn (blocks
, temp
);
5700 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5701 EQ
, NULL_RTX
, mode
, 1, restbyte_end_label
);
5703 emit_jump (loop_start_label
);
5705 if (val
!= const0_rtx
)
5707 /* The 1 byte != 0 special case. Not handled efficiently
5708 since we require two jumps for that. However, this
5709 should be very rare. */
5710 emit_label (onebyte_end_label
);
5711 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5712 emit_jump (zerobyte_end_label
);
5715 emit_label (loop_start_label
);
5717 if (TARGET_SETMEM_PFD (val
, len
))
5719 /* Issue a write prefetch. */
5720 rtx distance
= GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE
);
5721 rtx prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, distance
),
5722 const1_rtx
, const0_rtx
);
5723 emit_insn (prefetch
);
5724 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5727 if (val
== const0_rtx
)
5728 emit_insn (gen_clrmem_short (dst
, GEN_INT (255)));
5731 /* Set the first byte in the block to the value and use an
5732 overlapping mvc for the block. */
5733 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5734 emit_insn (gen_cpymem_short (dstp1
, dst
, GEN_INT (254)));
5736 s390_load_address (dst_addr
,
5737 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5739 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5742 emit_move_insn (blocks
, temp
);
5744 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5745 NE
, NULL_RTX
, mode
, 1, loop_start_label
);
5747 emit_label (restbyte_end_label
);
5749 if (val
== const0_rtx
)
5750 emit_insn (gen_clrmem_short (dst
, convert_to_mode (Pmode
, count
, 1)));
5753 /* Set the first byte in the block to the value and use an
5754 overlapping mvc for the block. */
5755 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5756 /* execute only uses the lowest 8 bits of count that's
5757 exactly what we need here. */
5758 emit_insn (gen_cpymem_short (dstp1
, dst
,
5759 convert_to_mode (Pmode
, count
, 1)));
5762 emit_label (zerobyte_end_label
);
5766 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5767 and return the result in TARGET. */
5770 s390_expand_cmpmem (rtx target
, rtx op0
, rtx op1
, rtx len
)
5772 rtx ccreg
= gen_rtx_REG (CCUmode
, CC_REGNUM
);
5775 /* When tuning for z10 or higher we rely on the Glibc functions to
5776 do the right thing. Only for constant lengths below 64k we will
5777 generate inline code. */
5778 if (s390_tune
>= PROCESSOR_2097_Z10
5779 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5782 /* As the result of CMPINT is inverted compared to what we need,
5783 we have to swap the operands. */
5784 tmp
= op0
; op0
= op1
; op1
= tmp
;
5786 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) >= 0 && INTVAL (len
) <= 256)
5788 if (INTVAL (len
) > 0)
5790 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (INTVAL (len
) - 1)));
5791 emit_insn (gen_cmpint (target
, ccreg
));
5794 emit_move_insn (target
, const0_rtx
);
5796 else if (TARGET_MVCLE
)
5798 emit_insn (gen_cmpmem_long (op0
, op1
, convert_to_mode (Pmode
, len
, 1)));
5799 emit_insn (gen_cmpint (target
, ccreg
));
5803 rtx addr0
, addr1
, count
, blocks
, temp
;
5804 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5805 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5806 rtx_code_label
*end_label
= gen_label_rtx ();
5809 mode
= GET_MODE (len
);
5810 if (mode
== VOIDmode
)
5813 addr0
= gen_reg_rtx (Pmode
);
5814 addr1
= gen_reg_rtx (Pmode
);
5815 count
= gen_reg_rtx (mode
);
5816 blocks
= gen_reg_rtx (mode
);
5818 convert_move (count
, len
, 1);
5819 emit_cmp_and_jump_insns (count
, const0_rtx
,
5820 EQ
, NULL_RTX
, mode
, 1, end_label
);
5822 emit_move_insn (addr0
, force_operand (XEXP (op0
, 0), NULL_RTX
));
5823 emit_move_insn (addr1
, force_operand (XEXP (op1
, 0), NULL_RTX
));
5824 op0
= change_address (op0
, VOIDmode
, addr0
);
5825 op1
= change_address (op1
, VOIDmode
, addr1
);
5827 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5830 emit_move_insn (count
, temp
);
5832 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5835 emit_move_insn (blocks
, temp
);
5837 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5838 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5840 emit_label (loop_start_label
);
5843 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 512))
5847 /* Issue a read prefetch for the +2 cache line of operand 1. */
5848 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (512)),
5849 const0_rtx
, const0_rtx
);
5850 emit_insn (prefetch
);
5851 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5853 /* Issue a read prefetch for the +2 cache line of operand 2. */
5854 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (512)),
5855 const0_rtx
, const0_rtx
);
5856 emit_insn (prefetch
);
5857 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5860 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (255)));
5861 temp
= gen_rtx_NE (VOIDmode
, ccreg
, const0_rtx
);
5862 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5863 gen_rtx_LABEL_REF (VOIDmode
, end_label
), pc_rtx
);
5864 temp
= gen_rtx_SET (pc_rtx
, temp
);
5865 emit_jump_insn (temp
);
5867 s390_load_address (addr0
,
5868 gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (256)));
5869 s390_load_address (addr1
,
5870 gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (256)));
5872 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5875 emit_move_insn (blocks
, temp
);
5877 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5878 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5880 emit_jump (loop_start_label
);
5881 emit_label (loop_end_label
);
5883 emit_insn (gen_cmpmem_short (op0
, op1
,
5884 convert_to_mode (Pmode
, count
, 1)));
5885 emit_label (end_label
);
5887 emit_insn (gen_cmpint (target
, ccreg
));
5892 /* Emit a conditional jump to LABEL for condition code mask MASK using
5893 comparsion operator COMPARISON. Return the emitted jump insn. */
5896 s390_emit_ccraw_jump (HOST_WIDE_INT mask
, enum rtx_code comparison
, rtx label
)
5900 gcc_assert (comparison
== EQ
|| comparison
== NE
);
5901 gcc_assert (mask
> 0 && mask
< 15);
5903 temp
= gen_rtx_fmt_ee (comparison
, VOIDmode
,
5904 gen_rtx_REG (CCRAWmode
, CC_REGNUM
), GEN_INT (mask
));
5905 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5906 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
5907 temp
= gen_rtx_SET (pc_rtx
, temp
);
5908 return emit_jump_insn (temp
);
5911 /* Emit the instructions to implement strlen of STRING and store the
5912 result in TARGET. The string has the known ALIGNMENT. This
5913 version uses vector instructions and is therefore not appropriate
5914 for targets prior to z13. */
5917 s390_expand_vec_strlen (rtx target
, rtx string
, rtx alignment
)
5919 rtx highest_index_to_load_reg
= gen_reg_rtx (Pmode
);
5920 rtx str_reg
= gen_reg_rtx (V16QImode
);
5921 rtx str_addr_base_reg
= gen_reg_rtx (Pmode
);
5922 rtx str_idx_reg
= gen_reg_rtx (Pmode
);
5923 rtx result_reg
= gen_reg_rtx (V16QImode
);
5924 rtx is_aligned_label
= gen_label_rtx ();
5925 rtx into_loop_label
= NULL_RTX
;
5926 rtx loop_start_label
= gen_label_rtx ();
5928 rtx len
= gen_reg_rtx (QImode
);
5931 s390_load_address (str_addr_base_reg
, XEXP (string
, 0));
5932 emit_move_insn (str_idx_reg
, const0_rtx
);
5934 if (INTVAL (alignment
) < 16)
5936 /* Check whether the address happens to be aligned properly so
5937 jump directly to the aligned loop. */
5938 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode
,
5939 str_addr_base_reg
, GEN_INT (15)),
5940 const0_rtx
, EQ
, NULL_RTX
,
5941 Pmode
, 1, is_aligned_label
);
5943 temp
= gen_reg_rtx (Pmode
);
5944 temp
= expand_binop (Pmode
, and_optab
, str_addr_base_reg
,
5945 GEN_INT (15), temp
, 1, OPTAB_DIRECT
);
5946 gcc_assert (REG_P (temp
));
5947 highest_index_to_load_reg
=
5948 expand_binop (Pmode
, sub_optab
, GEN_INT (15), temp
,
5949 highest_index_to_load_reg
, 1, OPTAB_DIRECT
);
5950 gcc_assert (REG_P (highest_index_to_load_reg
));
5951 emit_insn (gen_vllv16qi (str_reg
,
5952 convert_to_mode (SImode
, highest_index_to_load_reg
, 1),
5953 gen_rtx_MEM (BLKmode
, str_addr_base_reg
)));
5955 into_loop_label
= gen_label_rtx ();
5956 s390_emit_jump (into_loop_label
, NULL_RTX
);
5960 emit_label (is_aligned_label
);
5961 LABEL_NUSES (is_aligned_label
) = INTVAL (alignment
) < 16 ? 2 : 1;
5963 /* Reaching this point we are only performing 16 bytes aligned
5965 emit_move_insn (highest_index_to_load_reg
, GEN_INT (15));
5967 emit_label (loop_start_label
);
5968 LABEL_NUSES (loop_start_label
) = 1;
5970 /* Load 16 bytes of the string into VR. */
5971 emit_move_insn (str_reg
,
5972 gen_rtx_MEM (V16QImode
,
5973 gen_rtx_PLUS (Pmode
, str_idx_reg
,
5974 str_addr_base_reg
)));
5975 if (into_loop_label
!= NULL_RTX
)
5977 emit_label (into_loop_label
);
5978 LABEL_NUSES (into_loop_label
) = 1;
5981 /* Increment string index by 16 bytes. */
5982 expand_binop (Pmode
, add_optab
, str_idx_reg
, GEN_INT (16),
5983 str_idx_reg
, 1, OPTAB_DIRECT
);
5985 emit_insn (gen_vec_vfenesv16qi (result_reg
, str_reg
, str_reg
,
5986 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
5988 add_int_reg_note (s390_emit_ccraw_jump (8, NE
, loop_start_label
),
5990 profile_probability::very_likely ().to_reg_br_prob_note ());
5991 emit_insn (gen_vec_extractv16qiqi (len
, result_reg
, GEN_INT (7)));
5993 /* If the string pointer wasn't aligned we have loaded less then 16
5994 bytes and the remaining bytes got filled with zeros (by vll).
5995 Now we have to check whether the resulting index lies within the
5996 bytes actually part of the string. */
5998 cond
= s390_emit_compare (GT
, convert_to_mode (Pmode
, len
, 1),
5999 highest_index_to_load_reg
);
6000 s390_load_address (highest_index_to_load_reg
,
6001 gen_rtx_PLUS (Pmode
, highest_index_to_load_reg
,
6004 emit_insn (gen_movdicc (str_idx_reg
, cond
,
6005 highest_index_to_load_reg
, str_idx_reg
));
6007 emit_insn (gen_movsicc (str_idx_reg
, cond
,
6008 highest_index_to_load_reg
, str_idx_reg
));
6010 add_reg_br_prob_note (s390_emit_jump (is_aligned_label
, cond
),
6011 profile_probability::very_unlikely ());
6013 expand_binop (Pmode
, add_optab
, str_idx_reg
,
6014 GEN_INT (-16), str_idx_reg
, 1, OPTAB_DIRECT
);
6015 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6017 temp
= expand_binop (Pmode
, add_optab
, str_idx_reg
,
6018 convert_to_mode (Pmode
, len
, 1),
6019 target
, 1, OPTAB_DIRECT
);
6021 emit_move_insn (target
, temp
);
6025 s390_expand_vec_movstr (rtx result
, rtx dst
, rtx src
)
6027 rtx temp
= gen_reg_rtx (Pmode
);
6028 rtx src_addr
= XEXP (src
, 0);
6029 rtx dst_addr
= XEXP (dst
, 0);
6030 rtx src_addr_reg
= gen_reg_rtx (Pmode
);
6031 rtx dst_addr_reg
= gen_reg_rtx (Pmode
);
6032 rtx offset
= gen_reg_rtx (Pmode
);
6033 rtx vsrc
= gen_reg_rtx (V16QImode
);
6034 rtx vpos
= gen_reg_rtx (V16QImode
);
6035 rtx loadlen
= gen_reg_rtx (SImode
);
6036 rtx gpos_qi
= gen_reg_rtx(QImode
);
6037 rtx gpos
= gen_reg_rtx (SImode
);
6038 rtx done_label
= gen_label_rtx ();
6039 rtx loop_label
= gen_label_rtx ();
6040 rtx exit_label
= gen_label_rtx ();
6041 rtx full_label
= gen_label_rtx ();
6043 /* Perform a quick check for string ending on the first up to 16
6044 bytes and exit early if successful. */
6046 emit_insn (gen_vlbb (vsrc
, src
, GEN_INT (6)));
6047 emit_insn (gen_lcbb (loadlen
, src_addr
, GEN_INT (6)));
6048 emit_insn (gen_vfenezv16qi (vpos
, vsrc
, vsrc
));
6049 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6050 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6051 /* gpos is the byte index if a zero was found and 16 otherwise.
6052 So if it is lower than the loaded bytes we have a hit. */
6053 emit_cmp_and_jump_insns (gpos
, loadlen
, GE
, NULL_RTX
, SImode
, 1,
6055 emit_insn (gen_vstlv16qi (vsrc
, gpos
, dst
));
6057 force_expand_binop (Pmode
, add_optab
, dst_addr
, gpos
, result
,
6059 emit_jump (exit_label
);
6062 emit_label (full_label
);
6063 LABEL_NUSES (full_label
) = 1;
6065 /* Calculate `offset' so that src + offset points to the last byte
6066 before 16 byte alignment. */
6068 /* temp = src_addr & 0xf */
6069 force_expand_binop (Pmode
, and_optab
, src_addr
, GEN_INT (15), temp
,
6072 /* offset = 0xf - temp */
6073 emit_move_insn (offset
, GEN_INT (15));
6074 force_expand_binop (Pmode
, sub_optab
, offset
, temp
, offset
,
6077 /* Store `offset' bytes in the dstination string. The quick check
6078 has loaded at least `offset' bytes into vsrc. */
6080 emit_insn (gen_vstlv16qi (vsrc
, gen_lowpart (SImode
, offset
), dst
));
6082 /* Advance to the next byte to be loaded. */
6083 force_expand_binop (Pmode
, add_optab
, offset
, const1_rtx
, offset
,
6086 /* Make sure the addresses are single regs which can be used as a
6088 emit_move_insn (src_addr_reg
, src_addr
);
6089 emit_move_insn (dst_addr_reg
, dst_addr
);
6093 emit_label (loop_label
);
6094 LABEL_NUSES (loop_label
) = 1;
6096 emit_move_insn (vsrc
,
6097 gen_rtx_MEM (V16QImode
,
6098 gen_rtx_PLUS (Pmode
, src_addr_reg
, offset
)));
6100 emit_insn (gen_vec_vfenesv16qi (vpos
, vsrc
, vsrc
,
6101 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6102 add_int_reg_note (s390_emit_ccraw_jump (8, EQ
, done_label
),
6103 REG_BR_PROB
, profile_probability::very_unlikely ()
6104 .to_reg_br_prob_note ());
6106 emit_move_insn (gen_rtx_MEM (V16QImode
,
6107 gen_rtx_PLUS (Pmode
, dst_addr_reg
, offset
)),
6110 force_expand_binop (Pmode
, add_optab
, offset
, GEN_INT (16),
6111 offset
, 1, OPTAB_DIRECT
);
6113 emit_jump (loop_label
);
6118 /* We are done. Add the offset of the zero character to the dst_addr
6119 pointer to get the result. */
6121 emit_label (done_label
);
6122 LABEL_NUSES (done_label
) = 1;
6124 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, offset
, dst_addr_reg
,
6127 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6128 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6130 emit_insn (gen_vstlv16qi (vsrc
, gpos
, gen_rtx_MEM (BLKmode
, dst_addr_reg
)));
6132 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, gpos
, result
,
6137 emit_label (exit_label
);
6138 LABEL_NUSES (exit_label
) = 1;
6142 /* Expand conditional increment or decrement using alc/slb instructions.
6143 Should generate code setting DST to either SRC or SRC + INCREMENT,
6144 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6145 Returns true if successful, false otherwise.
6147 That makes it possible to implement some if-constructs without jumps e.g.:
6148 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6149 unsigned int a, b, c;
6150 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6151 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6152 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6153 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6155 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6156 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6157 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6158 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6159 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6162 s390_expand_addcc (enum rtx_code cmp_code
, rtx cmp_op0
, rtx cmp_op1
,
6163 rtx dst
, rtx src
, rtx increment
)
6165 machine_mode cmp_mode
;
6166 machine_mode cc_mode
;
6172 if ((GET_MODE (cmp_op0
) == SImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6173 && (GET_MODE (cmp_op1
) == SImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6175 else if ((GET_MODE (cmp_op0
) == DImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6176 && (GET_MODE (cmp_op1
) == DImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6181 /* Try ADD LOGICAL WITH CARRY. */
6182 if (increment
== const1_rtx
)
6184 /* Determine CC mode to use. */
6185 if (cmp_code
== EQ
|| cmp_code
== NE
)
6187 if (cmp_op1
!= const0_rtx
)
6189 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6190 NULL_RTX
, 0, OPTAB_WIDEN
);
6191 cmp_op1
= const0_rtx
;
6194 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6197 if (cmp_code
== LTU
|| cmp_code
== LEU
)
6202 cmp_code
= swap_condition (cmp_code
);
6219 /* Emit comparison instruction pattern. */
6220 if (!register_operand (cmp_op0
, cmp_mode
))
6221 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6223 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6224 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6225 /* We use insn_invalid_p here to add clobbers if required. */
6226 ret
= insn_invalid_p (emit_insn (insn
), false);
6229 /* Emit ALC instruction pattern. */
6230 op_res
= gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6231 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6234 if (src
!= const0_rtx
)
6236 if (!register_operand (src
, GET_MODE (dst
)))
6237 src
= force_reg (GET_MODE (dst
), src
);
6239 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, src
);
6240 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, const0_rtx
);
6243 p
= rtvec_alloc (2);
6245 gen_rtx_SET (dst
, op_res
);
6247 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6248 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6253 /* Try SUBTRACT LOGICAL WITH BORROW. */
6254 if (increment
== constm1_rtx
)
6256 /* Determine CC mode to use. */
6257 if (cmp_code
== EQ
|| cmp_code
== NE
)
6259 if (cmp_op1
!= const0_rtx
)
6261 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6262 NULL_RTX
, 0, OPTAB_WIDEN
);
6263 cmp_op1
= const0_rtx
;
6266 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6269 if (cmp_code
== GTU
|| cmp_code
== GEU
)
6274 cmp_code
= swap_condition (cmp_code
);
6291 /* Emit comparison instruction pattern. */
6292 if (!register_operand (cmp_op0
, cmp_mode
))
6293 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6295 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6296 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6297 /* We use insn_invalid_p here to add clobbers if required. */
6298 ret
= insn_invalid_p (emit_insn (insn
), false);
6301 /* Emit SLB instruction pattern. */
6302 if (!register_operand (src
, GET_MODE (dst
)))
6303 src
= force_reg (GET_MODE (dst
), src
);
6305 op_res
= gen_rtx_MINUS (GET_MODE (dst
),
6306 gen_rtx_MINUS (GET_MODE (dst
), src
, const0_rtx
),
6307 gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6308 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6310 p
= rtvec_alloc (2);
6312 gen_rtx_SET (dst
, op_res
);
6314 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6315 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6323 /* Expand code for the insv template. Return true if successful. */
6326 s390_expand_insv (rtx dest
, rtx op1
, rtx op2
, rtx src
)
6328 int bitsize
= INTVAL (op1
);
6329 int bitpos
= INTVAL (op2
);
6330 machine_mode mode
= GET_MODE (dest
);
6332 int smode_bsize
, mode_bsize
;
6335 if (bitsize
+ bitpos
> GET_MODE_BITSIZE (mode
))
6338 /* Generate INSERT IMMEDIATE (IILL et al). */
6339 /* (set (ze (reg)) (const_int)). */
6341 && register_operand (dest
, word_mode
)
6342 && (bitpos
% 16) == 0
6343 && (bitsize
% 16) == 0
6344 && const_int_operand (src
, VOIDmode
))
6346 HOST_WIDE_INT val
= INTVAL (src
);
6347 int regpos
= bitpos
+ bitsize
;
6349 while (regpos
> bitpos
)
6351 machine_mode putmode
;
6354 if (TARGET_EXTIMM
&& (regpos
% 32 == 0) && (regpos
>= bitpos
+ 32))
6359 putsize
= GET_MODE_BITSIZE (putmode
);
6361 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6364 gen_int_mode (val
, putmode
));
6367 gcc_assert (regpos
== bitpos
);
6371 smode
= smallest_int_mode_for_size (bitsize
);
6372 smode_bsize
= GET_MODE_BITSIZE (smode
);
6373 mode_bsize
= GET_MODE_BITSIZE (mode
);
6375 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6377 && (bitsize
% BITS_PER_UNIT
) == 0
6379 && (register_operand (src
, word_mode
)
6380 || const_int_operand (src
, VOIDmode
)))
6382 /* Emit standard pattern if possible. */
6383 if (smode_bsize
== bitsize
)
6385 emit_move_insn (adjust_address (dest
, smode
, 0),
6386 gen_lowpart (smode
, src
));
6390 /* (set (ze (mem)) (const_int)). */
6391 else if (const_int_operand (src
, VOIDmode
))
6393 int size
= bitsize
/ BITS_PER_UNIT
;
6394 rtx src_mem
= adjust_address (force_const_mem (word_mode
, src
),
6396 UNITS_PER_WORD
- size
);
6398 dest
= adjust_address (dest
, BLKmode
, 0);
6399 set_mem_size (dest
, size
);
6400 s390_expand_cpymem (dest
, src_mem
, GEN_INT (size
));
6404 /* (set (ze (mem)) (reg)). */
6405 else if (register_operand (src
, word_mode
))
6408 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
, op1
,
6412 /* Emit st,stcmh sequence. */
6413 int stcmh_width
= bitsize
- 32;
6414 int size
= stcmh_width
/ BITS_PER_UNIT
;
6416 emit_move_insn (adjust_address (dest
, SImode
, size
),
6417 gen_lowpart (SImode
, src
));
6418 set_mem_size (dest
, size
);
6419 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6420 GEN_INT (stcmh_width
),
6422 gen_rtx_LSHIFTRT (word_mode
, src
, GEN_INT (32)));
6428 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6429 if ((bitpos
% BITS_PER_UNIT
) == 0
6430 && (bitsize
% BITS_PER_UNIT
) == 0
6431 && (bitpos
& 32) == ((bitpos
+ bitsize
- 1) & 32)
6433 && (mode
== DImode
|| mode
== SImode
)
6434 && register_operand (dest
, mode
))
6436 /* Emit a strict_low_part pattern if possible. */
6437 if (smode_bsize
== bitsize
&& bitpos
== mode_bsize
- smode_bsize
)
6439 op
= gen_rtx_STRICT_LOW_PART (VOIDmode
, gen_lowpart (smode
, dest
));
6440 op
= gen_rtx_SET (op
, gen_lowpart (smode
, src
));
6441 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6442 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
)));
6446 /* ??? There are more powerful versions of ICM that are not
6447 completely represented in the md file. */
6450 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6451 if (TARGET_Z10
&& (mode
== DImode
|| mode
== SImode
))
6453 machine_mode mode_s
= GET_MODE (src
);
6455 if (CONSTANT_P (src
))
6457 /* For constant zero values the representation with AND
6458 appears to be folded in more situations than the (set
6459 (zero_extract) ...).
6460 We only do this when the start and end of the bitfield
6461 remain in the same SImode chunk. That way nihf or nilf
6463 The AND patterns might still generate a risbg for this. */
6464 if (src
== const0_rtx
&& bitpos
/ 32 == (bitpos
+ bitsize
- 1) / 32)
6467 src
= force_reg (mode
, src
);
6469 else if (mode_s
!= mode
)
6471 gcc_assert (GET_MODE_BITSIZE (mode_s
) >= bitsize
);
6472 src
= force_reg (mode_s
, src
);
6473 src
= gen_lowpart (mode
, src
);
6476 op
= gen_rtx_ZERO_EXTRACT (mode
, dest
, op1
, op2
),
6477 op
= gen_rtx_SET (op
, src
);
6481 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6482 op
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
));
6492 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6493 register that holds VAL of mode MODE shifted by COUNT bits. */
6496 s390_expand_mask_and_shift (rtx val
, machine_mode mode
, rtx count
)
6498 val
= expand_simple_binop (SImode
, AND
, val
, GEN_INT (GET_MODE_MASK (mode
)),
6499 NULL_RTX
, 1, OPTAB_DIRECT
);
6500 return expand_simple_binop (SImode
, ASHIFT
, val
, count
,
6501 NULL_RTX
, 1, OPTAB_DIRECT
);
6504 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6505 the result in TARGET. */
6508 s390_expand_vec_compare (rtx target
, enum rtx_code cond
,
6509 rtx cmp_op1
, rtx cmp_op2
)
6511 machine_mode mode
= GET_MODE (target
);
6512 bool neg_p
= false, swap_p
= false;
6515 if (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_VECTOR_FLOAT
)
6519 /* NE a != b -> !(a == b) */
6520 case NE
: cond
= EQ
; neg_p
= true; break;
6522 emit_insn (gen_vec_cmpungt (target
, cmp_op1
, cmp_op2
));
6525 emit_insn (gen_vec_cmpunge (target
, cmp_op1
, cmp_op2
));
6527 case LE
: cond
= GE
; swap_p
= true; break;
6528 /* UNLE: (a u<= b) -> (b u>= a). */
6530 emit_insn (gen_vec_cmpunge (target
, cmp_op2
, cmp_op1
));
6532 /* LT: a < b -> b > a */
6533 case LT
: cond
= GT
; swap_p
= true; break;
6534 /* UNLT: (a u< b) -> (b u> a). */
6536 emit_insn (gen_vec_cmpungt (target
, cmp_op2
, cmp_op1
));
6539 emit_insn (gen_vec_cmpuneq (target
, cmp_op1
, cmp_op2
));
6542 emit_insn (gen_vec_cmpltgt (target
, cmp_op1
, cmp_op2
));
6545 emit_insn (gen_vec_cmpordered (target
, cmp_op1
, cmp_op2
));
6548 emit_insn (gen_vec_cmpunordered (target
, cmp_op1
, cmp_op2
));
6557 /* NE: a != b -> !(a == b) */
6558 case NE
: cond
= EQ
; neg_p
= true; break;
6559 /* GE: a >= b -> !(b > a) */
6560 case GE
: cond
= GT
; neg_p
= true; swap_p
= true; break;
6561 /* GEU: a >= b -> !(b > a) */
6562 case GEU
: cond
= GTU
; neg_p
= true; swap_p
= true; break;
6563 /* LE: a <= b -> !(a > b) */
6564 case LE
: cond
= GT
; neg_p
= true; break;
6565 /* LEU: a <= b -> !(a > b) */
6566 case LEU
: cond
= GTU
; neg_p
= true; break;
6567 /* LT: a < b -> b > a */
6568 case LT
: cond
= GT
; swap_p
= true; break;
6569 /* LTU: a < b -> b > a */
6570 case LTU
: cond
= GTU
; swap_p
= true; break;
6577 tmp
= cmp_op1
; cmp_op1
= cmp_op2
; cmp_op2
= tmp
;
6580 emit_insn (gen_rtx_SET (target
, gen_rtx_fmt_ee (cond
,
6582 cmp_op1
, cmp_op2
)));
6584 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (mode
, target
)));
6587 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6588 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6589 elements in CMP1 and CMP2 fulfill the comparison.
6590 This function is only used to emit patterns for the vx builtins and
6591 therefore only handles comparison codes required by the
6594 s390_expand_vec_compare_cc (rtx target
, enum rtx_code code
,
6595 rtx cmp1
, rtx cmp2
, bool all_p
)
6597 machine_mode cc_producer_mode
, cc_consumer_mode
, scratch_mode
;
6598 rtx tmp_reg
= gen_reg_rtx (SImode
);
6599 bool swap_p
= false;
6601 if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_INT
)
6607 cc_producer_mode
= CCVEQmode
;
6611 code
= swap_condition (code
);
6616 cc_producer_mode
= CCVIHmode
;
6620 code
= swap_condition (code
);
6625 cc_producer_mode
= CCVIHUmode
;
6631 scratch_mode
= GET_MODE (cmp1
);
6632 /* These codes represent inverted CC interpretations. Inverting
6633 an ALL CC mode results in an ANY CC mode and the other way
6634 around. Invert the all_p flag here to compensate for
6636 if (code
== NE
|| code
== LE
|| code
== LEU
)
6639 cc_consumer_mode
= all_p
? CCVIALLmode
: CCVIANYmode
;
6641 else if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_FLOAT
)
6647 case EQ
: cc_producer_mode
= CCVEQmode
; break;
6648 case NE
: cc_producer_mode
= CCVEQmode
; inv_p
= true; break;
6649 case GT
: cc_producer_mode
= CCVFHmode
; break;
6650 case GE
: cc_producer_mode
= CCVFHEmode
; break;
6651 case UNLE
: cc_producer_mode
= CCVFHmode
; inv_p
= true; break;
6652 case UNLT
: cc_producer_mode
= CCVFHEmode
; inv_p
= true; break;
6653 case LT
: cc_producer_mode
= CCVFHmode
; code
= GT
; swap_p
= true; break;
6654 case LE
: cc_producer_mode
= CCVFHEmode
; code
= GE
; swap_p
= true; break;
6655 default: gcc_unreachable ();
6657 scratch_mode
= related_int_vector_mode (GET_MODE (cmp1
)).require ();
6662 cc_consumer_mode
= all_p
? CCVFALLmode
: CCVFANYmode
;
6674 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6675 gen_rtvec (2, gen_rtx_SET (
6676 gen_rtx_REG (cc_producer_mode
, CC_REGNUM
),
6677 gen_rtx_COMPARE (cc_producer_mode
, cmp1
, cmp2
)),
6678 gen_rtx_CLOBBER (VOIDmode
,
6679 gen_rtx_SCRATCH (scratch_mode
)))));
6680 emit_move_insn (target
, const0_rtx
);
6681 emit_move_insn (tmp_reg
, const1_rtx
);
6683 emit_move_insn (target
,
6684 gen_rtx_IF_THEN_ELSE (SImode
,
6685 gen_rtx_fmt_ee (code
, VOIDmode
,
6686 gen_rtx_REG (cc_consumer_mode
, CC_REGNUM
),
6691 /* Invert the comparison CODE applied to a CC mode. This is only safe
6692 if we know whether there result was created by a floating point
6693 compare or not. For the CCV modes this is encoded as part of the
6696 s390_reverse_condition (machine_mode mode
, enum rtx_code code
)
6698 /* Reversal of FP compares takes care -- an ordered compare
6699 becomes an unordered compare and vice versa. */
6700 if (mode
== CCVFALLmode
|| mode
== CCVFANYmode
|| mode
== CCSFPSmode
)
6701 return reverse_condition_maybe_unordered (code
);
6702 else if (mode
== CCVIALLmode
|| mode
== CCVIANYmode
)
6703 return reverse_condition (code
);
6708 /* Generate a vector comparison expression loading either elements of
6709 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6713 s390_expand_vcond (rtx target
, rtx then
, rtx els
,
6714 enum rtx_code cond
, rtx cmp_op1
, rtx cmp_op2
)
6717 machine_mode result_mode
;
6720 machine_mode target_mode
= GET_MODE (target
);
6721 machine_mode cmp_mode
= GET_MODE (cmp_op1
);
6722 rtx op
= (cond
== LT
) ? els
: then
;
6724 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6725 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6726 for short and byte (x >> 15 and x >> 7 respectively). */
6727 if ((cond
== LT
|| cond
== GE
)
6728 && target_mode
== cmp_mode
6729 && cmp_op2
== CONST0_RTX (cmp_mode
)
6730 && op
== CONST0_RTX (target_mode
)
6731 && s390_vector_mode_supported_p (target_mode
)
6732 && GET_MODE_CLASS (target_mode
) == MODE_VECTOR_INT
)
6734 rtx negop
= (cond
== LT
) ? then
: els
;
6736 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (target_mode
)) - 1;
6738 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6739 if (negop
== CONST1_RTX (target_mode
))
6741 rtx res
= expand_simple_binop (cmp_mode
, LSHIFTRT
, cmp_op1
,
6742 GEN_INT (shift
), target
,
6745 emit_move_insn (target
, res
);
6749 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6750 else if (all_ones_operand (negop
, target_mode
))
6752 rtx res
= expand_simple_binop (cmp_mode
, ASHIFTRT
, cmp_op1
,
6753 GEN_INT (shift
), target
,
6756 emit_move_insn (target
, res
);
6761 /* We always use an integral type vector to hold the comparison
6763 result_mode
= related_int_vector_mode (cmp_mode
).require ();
6764 result_target
= gen_reg_rtx (result_mode
);
6766 /* We allow vector immediates as comparison operands that
6767 can be handled by the optimization above but not by the
6768 following code. Hence, force them into registers here. */
6769 if (!REG_P (cmp_op1
))
6770 cmp_op1
= force_reg (GET_MODE (cmp_op1
), cmp_op1
);
6772 if (!REG_P (cmp_op2
))
6773 cmp_op2
= force_reg (GET_MODE (cmp_op2
), cmp_op2
);
6775 s390_expand_vec_compare (result_target
, cond
,
6778 /* If the results are supposed to be either -1 or 0 we are done
6779 since this is what our compare instructions generate anyway. */
6780 if (all_ones_operand (then
, GET_MODE (then
))
6781 && const0_operand (els
, GET_MODE (els
)))
6783 emit_move_insn (target
, gen_rtx_SUBREG (target_mode
,
6788 /* Otherwise we will do a vsel afterwards. */
6789 /* This gets triggered e.g.
6790 with gcc.c-torture/compile/pr53410-1.c */
6792 then
= force_reg (target_mode
, then
);
6795 els
= force_reg (target_mode
, els
);
6797 tmp
= gen_rtx_fmt_ee (EQ
, VOIDmode
,
6799 CONST0_RTX (result_mode
));
6801 /* We compared the result against zero above so we have to swap then
6803 tmp
= gen_rtx_IF_THEN_ELSE (target_mode
, tmp
, els
, then
);
6805 gcc_assert (target_mode
== GET_MODE (then
));
6806 emit_insn (gen_rtx_SET (target
, tmp
));
6809 /* Emit the RTX necessary to initialize the vector TARGET with values
6812 s390_expand_vec_init (rtx target
, rtx vals
)
6814 machine_mode mode
= GET_MODE (target
);
6815 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6816 int n_elts
= GET_MODE_NUNITS (mode
);
6817 bool all_same
= true, all_regs
= true, all_const_int
= true;
6821 for (i
= 0; i
< n_elts
; ++i
)
6823 x
= XVECEXP (vals
, 0, i
);
6825 if (!CONST_INT_P (x
))
6826 all_const_int
= false;
6828 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6835 /* Use vector gen mask or vector gen byte mask if possible. */
6836 if (all_same
&& all_const_int
6837 && (XVECEXP (vals
, 0, 0) == const0_rtx
6838 || s390_contiguous_bitmask_vector_p (XVECEXP (vals
, 0, 0),
6840 || s390_bytemask_vector_p (XVECEXP (vals
, 0, 0), NULL
)))
6842 emit_insn (gen_rtx_SET (target
,
6843 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0))));
6847 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6850 rtx elem
= XVECEXP (vals
, 0, 0);
6852 /* vec_splats accepts general_operand as source. */
6853 if (!general_operand (elem
, GET_MODE (elem
)))
6854 elem
= force_reg (inner_mode
, elem
);
6856 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, elem
)));
6863 && GET_MODE_SIZE (inner_mode
) == 8)
6865 /* Use vector load pair. */
6866 emit_insn (gen_rtx_SET (target
,
6867 gen_rtx_VEC_CONCAT (mode
,
6868 XVECEXP (vals
, 0, 0),
6869 XVECEXP (vals
, 0, 1))));
6873 /* Use vector load logical element and zero. */
6874 if (TARGET_VXE
&& (mode
== V4SImode
|| mode
== V4SFmode
))
6878 x
= XVECEXP (vals
, 0, 0);
6879 if (memory_operand (x
, inner_mode
))
6881 for (i
= 1; i
< n_elts
; ++i
)
6882 found
= found
&& XVECEXP (vals
, 0, i
) == const0_rtx
;
6886 machine_mode half_mode
= (inner_mode
== SFmode
6887 ? V2SFmode
: V2SImode
);
6888 emit_insn (gen_rtx_SET (target
,
6889 gen_rtx_VEC_CONCAT (mode
,
6890 gen_rtx_VEC_CONCAT (half_mode
,
6893 gen_rtx_VEC_CONCAT (half_mode
,
6901 /* We are about to set the vector elements one by one. Zero out the
6902 full register first in order to help the data flow framework to
6903 detect it as full VR set. */
6904 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
6906 /* Unfortunately the vec_init expander is not allowed to fail. So
6907 we have to implement the fallback ourselves. */
6908 for (i
= 0; i
< n_elts
; i
++)
6910 rtx elem
= XVECEXP (vals
, 0, i
);
6911 if (!general_operand (elem
, GET_MODE (elem
)))
6912 elem
= force_reg (inner_mode
, elem
);
6914 emit_insn (gen_rtx_SET (target
,
6915 gen_rtx_UNSPEC (mode
,
6917 GEN_INT (i
), target
),
6922 /* Structure to hold the initial parameters for a compare_and_swap operation
6923 in HImode and QImode. */
6925 struct alignment_context
6927 rtx memsi
; /* SI aligned memory location. */
6928 rtx shift
; /* Bit offset with regard to lsb. */
6929 rtx modemask
; /* Mask of the HQImode shifted by SHIFT bits. */
6930 rtx modemaski
; /* ~modemask */
6931 bool aligned
; /* True if memory is aligned, false else. */
6934 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6935 structure AC for transparent simplifying, if the memory alignment is known
6936 to be at least 32bit. MEM is the memory location for the actual operation
6937 and MODE its mode. */
6940 init_alignment_context (struct alignment_context
*ac
, rtx mem
,
6943 ac
->shift
= GEN_INT (GET_MODE_SIZE (SImode
) - GET_MODE_SIZE (mode
));
6944 ac
->aligned
= (MEM_ALIGN (mem
) >= GET_MODE_BITSIZE (SImode
));
6947 ac
->memsi
= adjust_address (mem
, SImode
, 0); /* Memory is aligned. */
6950 /* Alignment is unknown. */
6951 rtx byteoffset
, addr
, align
;
6953 /* Force the address into a register. */
6954 addr
= force_reg (Pmode
, XEXP (mem
, 0));
6956 /* Align it to SImode. */
6957 align
= expand_simple_binop (Pmode
, AND
, addr
,
6958 GEN_INT (-GET_MODE_SIZE (SImode
)),
6959 NULL_RTX
, 1, OPTAB_DIRECT
);
6961 ac
->memsi
= gen_rtx_MEM (SImode
, align
);
6962 MEM_VOLATILE_P (ac
->memsi
) = MEM_VOLATILE_P (mem
);
6963 set_mem_alias_set (ac
->memsi
, ALIAS_SET_MEMORY_BARRIER
);
6964 set_mem_align (ac
->memsi
, GET_MODE_BITSIZE (SImode
));
6966 /* Calculate shiftcount. */
6967 byteoffset
= expand_simple_binop (Pmode
, AND
, addr
,
6968 GEN_INT (GET_MODE_SIZE (SImode
) - 1),
6969 NULL_RTX
, 1, OPTAB_DIRECT
);
6970 /* As we already have some offset, evaluate the remaining distance. */
6971 ac
->shift
= expand_simple_binop (SImode
, MINUS
, ac
->shift
, byteoffset
,
6972 NULL_RTX
, 1, OPTAB_DIRECT
);
6975 /* Shift is the byte count, but we need the bitcount. */
6976 ac
->shift
= expand_simple_binop (SImode
, ASHIFT
, ac
->shift
, GEN_INT (3),
6977 NULL_RTX
, 1, OPTAB_DIRECT
);
6979 /* Calculate masks. */
6980 ac
->modemask
= expand_simple_binop (SImode
, ASHIFT
,
6981 GEN_INT (GET_MODE_MASK (mode
)),
6982 ac
->shift
, NULL_RTX
, 1, OPTAB_DIRECT
);
6983 ac
->modemaski
= expand_simple_unop (SImode
, NOT
, ac
->modemask
,
6987 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6988 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6989 perform the merge in SEQ2. */
6992 s390_two_part_insv (struct alignment_context
*ac
, rtx
*seq1
, rtx
*seq2
,
6993 machine_mode mode
, rtx val
, rtx ins
)
7000 tmp
= copy_to_mode_reg (SImode
, val
);
7001 if (s390_expand_insv (tmp
, GEN_INT (GET_MODE_BITSIZE (mode
)),
7005 *seq2
= get_insns ();
7012 /* Failed to use insv. Generate a two part shift and mask. */
7014 tmp
= s390_expand_mask_and_shift (ins
, mode
, ac
->shift
);
7015 *seq1
= get_insns ();
7019 tmp
= expand_simple_binop (SImode
, IOR
, tmp
, val
, NULL_RTX
, 1, OPTAB_DIRECT
);
7020 *seq2
= get_insns ();
7026 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7027 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7028 value to set if CMP == MEM. */
7031 s390_expand_cs_hqi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7032 rtx cmp
, rtx new_rtx
, bool is_weak
)
7034 struct alignment_context ac
;
7035 rtx cmpv
, newv
, val
, cc
, seq0
, seq1
, seq2
, seq3
;
7036 rtx res
= gen_reg_rtx (SImode
);
7037 rtx_code_label
*csloop
= NULL
, *csend
= NULL
;
7039 gcc_assert (MEM_P (mem
));
7041 init_alignment_context (&ac
, mem
, mode
);
7043 /* Load full word. Subsequent loads are performed by CS. */
7044 val
= expand_simple_binop (SImode
, AND
, ac
.memsi
, ac
.modemaski
,
7045 NULL_RTX
, 1, OPTAB_DIRECT
);
7047 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7048 possible, we try to use insv to make this happen efficiently. If
7049 that fails we'll generate code both inside and outside the loop. */
7050 cmpv
= s390_two_part_insv (&ac
, &seq0
, &seq2
, mode
, val
, cmp
);
7051 newv
= s390_two_part_insv (&ac
, &seq1
, &seq3
, mode
, val
, new_rtx
);
7058 /* Start CS loop. */
7061 /* Begin assuming success. */
7062 emit_move_insn (btarget
, const1_rtx
);
7064 csloop
= gen_label_rtx ();
7065 csend
= gen_label_rtx ();
7066 emit_label (csloop
);
7069 /* val = "<mem>00..0<mem>"
7070 * cmp = "00..0<cmp>00..0"
7071 * new = "00..0<new>00..0"
7077 cc
= s390_emit_compare_and_swap (EQ
, res
, ac
.memsi
, cmpv
, newv
, CCZ1mode
);
7079 emit_insn (gen_cstorecc4 (btarget
, cc
, XEXP (cc
, 0), XEXP (cc
, 1)));
7084 /* Jump to end if we're done (likely?). */
7085 s390_emit_jump (csend
, cc
);
7087 /* Check for changes outside mode, and loop internal if so.
7088 Arrange the moves so that the compare is adjacent to the
7089 branch so that we can generate CRJ. */
7090 tmp
= copy_to_reg (val
);
7091 force_expand_binop (SImode
, and_optab
, res
, ac
.modemaski
, val
,
7093 cc
= s390_emit_compare (NE
, val
, tmp
);
7094 s390_emit_jump (csloop
, cc
);
7097 emit_move_insn (btarget
, const0_rtx
);
7101 /* Return the correct part of the bitfield. */
7102 convert_move (vtarget
, expand_simple_binop (SImode
, LSHIFTRT
, res
, ac
.shift
,
7103 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7106 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7108 s390_expand_cs_tdsi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7109 rtx cmp
, rtx new_rtx
, bool is_weak
)
7111 rtx output
= vtarget
;
7112 rtx_code_label
*skip_cs_label
= NULL
;
7113 bool do_const_opt
= false;
7115 if (!register_operand (output
, mode
))
7116 output
= gen_reg_rtx (mode
);
7118 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7119 with the constant first and skip the compare_and_swap because its very
7120 expensive and likely to fail anyway.
7121 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7122 cause spurious in that case.
7123 Note 2: It may be useful to do this also for non-constant INPUT.
7124 Note 3: Currently only targets with "load on condition" are supported
7125 (z196 and newer). */
7128 && (mode
== SImode
|| mode
== DImode
))
7129 do_const_opt
= (is_weak
&& CONST_INT_P (cmp
));
7133 rtx cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7135 skip_cs_label
= gen_label_rtx ();
7136 emit_move_insn (btarget
, const0_rtx
);
7137 if (CONST_INT_P (cmp
) && INTVAL (cmp
) == 0)
7139 rtvec lt
= rtvec_alloc (2);
7141 /* Load-and-test + conditional jump. */
7143 = gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, mem
, cmp
));
7144 RTVEC_ELT (lt
, 1) = gen_rtx_SET (output
, mem
);
7145 emit_insn (gen_rtx_PARALLEL (VOIDmode
, lt
));
7149 emit_move_insn (output
, mem
);
7150 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, output
, cmp
)));
7152 s390_emit_jump (skip_cs_label
, gen_rtx_NE (VOIDmode
, cc
, const0_rtx
));
7153 add_reg_br_prob_note (get_last_insn (),
7154 profile_probability::very_unlikely ());
7155 /* If the jump is not taken, OUTPUT is the expected value. */
7157 /* Reload newval to a register manually, *after* the compare and jump
7158 above. Otherwise Reload might place it before the jump. */
7161 cmp
= force_reg (mode
, cmp
);
7162 new_rtx
= force_reg (mode
, new_rtx
);
7163 s390_emit_compare_and_swap (EQ
, output
, mem
, cmp
, new_rtx
,
7164 (do_const_opt
) ? CCZmode
: CCZ1mode
);
7165 if (skip_cs_label
!= NULL
)
7166 emit_label (skip_cs_label
);
7168 /* We deliberately accept non-register operands in the predicate
7169 to ensure the write back to the output operand happens *before*
7170 the store-flags code below. This makes it easier for combine
7171 to merge the store-flags code with a potential test-and-branch
7172 pattern following (immediately!) afterwards. */
7173 if (output
!= vtarget
)
7174 emit_move_insn (vtarget
, output
);
7180 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7181 btarget has already been initialized with 0 above. */
7182 cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7183 cond
= gen_rtx_EQ (VOIDmode
, cc
, const0_rtx
);
7184 ite
= gen_rtx_IF_THEN_ELSE (SImode
, cond
, const1_rtx
, btarget
);
7185 emit_insn (gen_rtx_SET (btarget
, ite
));
7191 cc
= gen_rtx_REG (CCZ1mode
, CC_REGNUM
);
7192 cond
= gen_rtx_EQ (SImode
, cc
, const0_rtx
);
7193 emit_insn (gen_cstorecc4 (btarget
, cond
, cc
, const0_rtx
));
7197 /* Expand an atomic compare and swap operation. MEM is the memory location,
7198 CMP the old value to compare MEM with and NEW_RTX the value to set if
7202 s390_expand_cs (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7203 rtx cmp
, rtx new_rtx
, bool is_weak
)
7210 s390_expand_cs_tdsi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7214 s390_expand_cs_hqi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7221 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7222 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7226 s390_expand_atomic_exchange_tdsi (rtx output
, rtx mem
, rtx input
)
7228 machine_mode mode
= GET_MODE (mem
);
7229 rtx_code_label
*csloop
;
7232 && (mode
== DImode
|| mode
== SImode
)
7233 && CONST_INT_P (input
) && INTVAL (input
) == 0)
7235 emit_move_insn (output
, const0_rtx
);
7237 emit_insn (gen_atomic_fetch_anddi (output
, mem
, const0_rtx
, input
));
7239 emit_insn (gen_atomic_fetch_andsi (output
, mem
, const0_rtx
, input
));
7243 input
= force_reg (mode
, input
);
7244 emit_move_insn (output
, mem
);
7245 csloop
= gen_label_rtx ();
7246 emit_label (csloop
);
7247 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, output
, mem
, output
,
7251 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7252 and VAL the value to play with. If AFTER is true then store the value
7253 MEM holds after the operation, if AFTER is false then store the value MEM
7254 holds before the operation. If TARGET is zero then discard that value, else
7255 store it to TARGET. */
7258 s390_expand_atomic (machine_mode mode
, enum rtx_code code
,
7259 rtx target
, rtx mem
, rtx val
, bool after
)
7261 struct alignment_context ac
;
7263 rtx new_rtx
= gen_reg_rtx (SImode
);
7264 rtx orig
= gen_reg_rtx (SImode
);
7265 rtx_code_label
*csloop
= gen_label_rtx ();
7267 gcc_assert (!target
|| register_operand (target
, VOIDmode
));
7268 gcc_assert (MEM_P (mem
));
7270 init_alignment_context (&ac
, mem
, mode
);
7272 /* Shift val to the correct bit positions.
7273 Preserve "icm", but prevent "ex icm". */
7274 if (!(ac
.aligned
&& code
== SET
&& MEM_P (val
)))
7275 val
= s390_expand_mask_and_shift (val
, mode
, ac
.shift
);
7277 /* Further preparation insns. */
7278 if (code
== PLUS
|| code
== MINUS
)
7279 emit_move_insn (orig
, val
);
7280 else if (code
== MULT
|| code
== AND
) /* val = "11..1<val>11..1" */
7281 val
= expand_simple_binop (SImode
, XOR
, val
, ac
.modemaski
,
7282 NULL_RTX
, 1, OPTAB_DIRECT
);
7284 /* Load full word. Subsequent loads are performed by CS. */
7285 cmp
= force_reg (SImode
, ac
.memsi
);
7287 /* Start CS loop. */
7288 emit_label (csloop
);
7289 emit_move_insn (new_rtx
, cmp
);
7291 /* Patch new with val at correct position. */
7296 val
= expand_simple_binop (SImode
, code
, new_rtx
, orig
,
7297 NULL_RTX
, 1, OPTAB_DIRECT
);
7298 val
= expand_simple_binop (SImode
, AND
, val
, ac
.modemask
,
7299 NULL_RTX
, 1, OPTAB_DIRECT
);
7302 if (ac
.aligned
&& MEM_P (val
))
7303 store_bit_field (new_rtx
, GET_MODE_BITSIZE (mode
), 0,
7304 0, 0, SImode
, val
, false);
7307 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, ac
.modemaski
,
7308 NULL_RTX
, 1, OPTAB_DIRECT
);
7309 new_rtx
= expand_simple_binop (SImode
, IOR
, new_rtx
, val
,
7310 NULL_RTX
, 1, OPTAB_DIRECT
);
7316 new_rtx
= expand_simple_binop (SImode
, code
, new_rtx
, val
,
7317 NULL_RTX
, 1, OPTAB_DIRECT
);
7319 case MULT
: /* NAND */
7320 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, val
,
7321 NULL_RTX
, 1, OPTAB_DIRECT
);
7322 new_rtx
= expand_simple_binop (SImode
, XOR
, new_rtx
, ac
.modemask
,
7323 NULL_RTX
, 1, OPTAB_DIRECT
);
7329 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, cmp
,
7330 ac
.memsi
, cmp
, new_rtx
,
7333 /* Return the correct part of the bitfield. */
7335 convert_move (target
, expand_simple_binop (SImode
, LSHIFTRT
,
7336 after
? new_rtx
: cmp
, ac
.shift
,
7337 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7340 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7341 We need to emit DTP-relative relocations. */
7343 static void s390_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
7346 s390_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7351 fputs ("\t.long\t", file
);
7354 fputs ("\t.quad\t", file
);
7359 output_addr_const (file
, x
);
7360 fputs ("@DTPOFF", file
);
7363 /* Return the proper mode for REGNO being represented in the dwarf
7366 s390_dwarf_frame_reg_mode (int regno
)
7368 machine_mode save_mode
= default_dwarf_frame_reg_mode (regno
);
7370 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7371 if (GENERAL_REGNO_P (regno
))
7374 /* The rightmost 64 bits of vector registers are call-clobbered. */
7375 if (GET_MODE_SIZE (save_mode
) > 8)
7381 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7382 /* Implement TARGET_MANGLE_TYPE. */
7385 s390_mangle_type (const_tree type
)
7387 type
= TYPE_MAIN_VARIANT (type
);
7389 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
7390 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
7393 if (type
== s390_builtin_types
[BT_BV16QI
]) return "U6__boolc";
7394 if (type
== s390_builtin_types
[BT_BV8HI
]) return "U6__bools";
7395 if (type
== s390_builtin_types
[BT_BV4SI
]) return "U6__booli";
7396 if (type
== s390_builtin_types
[BT_BV2DI
]) return "U6__booll";
7398 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
7399 && TARGET_LONG_DOUBLE_128
)
7402 /* For all other types, use normal C++ mangling. */
7407 /* In the name of slightly smaller debug output, and to cater to
7408 general assembler lossage, recognize various UNSPEC sequences
7409 and turn them back into a direct symbol reference. */
7412 s390_delegitimize_address (rtx orig_x
)
7416 orig_x
= delegitimize_mem_from_attrs (orig_x
);
7419 /* Extract the symbol ref from:
7420 (plus:SI (reg:SI 12 %r12)
7421 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7422 UNSPEC_GOTOFF/PLTOFF)))
7424 (plus:SI (reg:SI 12 %r12)
7425 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7426 UNSPEC_GOTOFF/PLTOFF)
7427 (const_int 4 [0x4])))) */
7428 if (GET_CODE (x
) == PLUS
7429 && REG_P (XEXP (x
, 0))
7430 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
7431 && GET_CODE (XEXP (x
, 1)) == CONST
)
7433 HOST_WIDE_INT offset
= 0;
7435 /* The const operand. */
7436 y
= XEXP (XEXP (x
, 1), 0);
7438 if (GET_CODE (y
) == PLUS
7439 && GET_CODE (XEXP (y
, 1)) == CONST_INT
)
7441 offset
= INTVAL (XEXP (y
, 1));
7445 if (GET_CODE (y
) == UNSPEC
7446 && (XINT (y
, 1) == UNSPEC_GOTOFF
7447 || XINT (y
, 1) == UNSPEC_PLTOFF
))
7448 return plus_constant (Pmode
, XVECEXP (y
, 0, 0), offset
);
7451 if (GET_CODE (x
) != MEM
)
7455 if (GET_CODE (x
) == PLUS
7456 && GET_CODE (XEXP (x
, 1)) == CONST
7457 && GET_CODE (XEXP (x
, 0)) == REG
7458 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7460 y
= XEXP (XEXP (x
, 1), 0);
7461 if (GET_CODE (y
) == UNSPEC
7462 && XINT (y
, 1) == UNSPEC_GOT
)
7463 y
= XVECEXP (y
, 0, 0);
7467 else if (GET_CODE (x
) == CONST
)
7469 /* Extract the symbol ref from:
7470 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7471 UNSPEC_PLT/GOTENT))) */
7474 if (GET_CODE (y
) == UNSPEC
7475 && (XINT (y
, 1) == UNSPEC_GOTENT
7476 || XINT (y
, 1) == UNSPEC_PLT
))
7477 y
= XVECEXP (y
, 0, 0);
7484 if (GET_MODE (orig_x
) != Pmode
)
7486 if (GET_MODE (orig_x
) == BLKmode
)
7488 y
= lowpart_subreg (GET_MODE (orig_x
), y
, Pmode
);
7495 /* Output operand OP to stdio stream FILE.
7496 OP is an address (register + offset) which is not used to address data;
7497 instead the rightmost bits are interpreted as the value. */
7500 print_addrstyle_operand (FILE *file
, rtx op
)
7502 HOST_WIDE_INT offset
;
7505 /* Extract base register and offset. */
7506 if (!s390_decompose_addrstyle_without_index (op
, &base
, &offset
))
7512 gcc_assert (GET_CODE (base
) == REG
);
7513 gcc_assert (REGNO (base
) < FIRST_PSEUDO_REGISTER
);
7514 gcc_assert (REGNO_REG_CLASS (REGNO (base
)) == ADDR_REGS
);
7517 /* Offsets are constricted to twelve bits. */
7518 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
& ((1 << 12) - 1));
7520 fprintf (file
, "(%s)", reg_names
[REGNO (base
)]);
7523 /* Print the shift count operand OP to FILE.
7524 OP is an address-style operand in a form which
7525 s390_valid_shift_count permits. Subregs and no-op
7526 and-masking of the operand are stripped. */
7529 print_shift_count_operand (FILE *file
, rtx op
)
7531 /* No checking of the and mask required here. */
7532 if (!s390_valid_shift_count (op
, 0))
7535 while (op
&& GET_CODE (op
) == SUBREG
)
7536 op
= SUBREG_REG (op
);
7538 if (GET_CODE (op
) == AND
)
7541 print_addrstyle_operand (file
, op
);
7544 /* Assigns the number of NOP halfwords to be emitted before and after the
7545 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7546 If hotpatching is disabled for the function, the values are set to zero.
7550 s390_function_num_hotpatch_hw (tree decl
,
7556 attr
= lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl
));
7558 /* Handle the arguments of the hotpatch attribute. The values
7559 specified via attribute might override the cmdline argument
7563 tree args
= TREE_VALUE (attr
);
7565 *hw_before
= TREE_INT_CST_LOW (TREE_VALUE (args
));
7566 *hw_after
= TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args
)));
7570 /* Use the values specified by the cmdline arguments. */
7571 *hw_before
= s390_hotpatch_hw_before_label
;
7572 *hw_after
= s390_hotpatch_hw_after_label
;
7576 /* Write the current .machine and .machinemode specification to the assembler
7579 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7581 s390_asm_output_machine_for_arch (FILE *asm_out_file
)
7583 fprintf (asm_out_file
, "\t.machinemode %s\n",
7584 (TARGET_ZARCH
) ? "zarch" : "esa");
7585 fprintf (asm_out_file
, "\t.machine \"%s",
7586 processor_table
[s390_arch
].binutils_name
);
7587 if (S390_USE_ARCHITECTURE_MODIFIERS
)
7591 cpu_flags
= processor_flags_table
[(int) s390_arch
];
7592 if (TARGET_HTM
&& !(cpu_flags
& PF_TX
))
7593 fprintf (asm_out_file
, "+htm");
7594 else if (!TARGET_HTM
&& (cpu_flags
& PF_TX
))
7595 fprintf (asm_out_file
, "+nohtm");
7596 if (TARGET_VX
&& !(cpu_flags
& PF_VX
))
7597 fprintf (asm_out_file
, "+vx");
7598 else if (!TARGET_VX
&& (cpu_flags
& PF_VX
))
7599 fprintf (asm_out_file
, "+novx");
7601 fprintf (asm_out_file
, "\"\n");
7604 /* Write an extra function header before the very start of the function. */
7607 s390_asm_output_function_prefix (FILE *asm_out_file
,
7608 const char *fnname ATTRIBUTE_UNUSED
)
7610 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
) == NULL
)
7612 /* Since only the function specific options are saved but not the indications
7613 which options are set, it's too much work here to figure out which options
7614 have actually changed. Thus, generate .machine and .machinemode whenever a
7615 function has the target attribute or pragma. */
7616 fprintf (asm_out_file
, "\t.machinemode push\n");
7617 fprintf (asm_out_file
, "\t.machine push\n");
7618 s390_asm_output_machine_for_arch (asm_out_file
);
7621 /* Write an extra function footer after the very end of the function. */
7624 s390_asm_declare_function_size (FILE *asm_out_file
,
7625 const char *fnname
, tree decl
)
7627 if (!flag_inhibit_size_directive
)
7628 ASM_OUTPUT_MEASURED_SIZE (asm_out_file
, fnname
);
7629 if (DECL_FUNCTION_SPECIFIC_TARGET (decl
) == NULL
)
7631 fprintf (asm_out_file
, "\t.machine pop\n");
7632 fprintf (asm_out_file
, "\t.machinemode pop\n");
7636 /* Write the extra assembler code needed to declare a function properly. */
7639 s390_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
7642 int hw_before
, hw_after
;
7644 s390_function_num_hotpatch_hw (decl
, &hw_before
, &hw_after
);
7647 unsigned int function_alignment
;
7650 /* Add a trampoline code area before the function label and initialize it
7651 with two-byte nop instructions. This area can be overwritten with code
7652 that jumps to a patched version of the function. */
7653 asm_fprintf (asm_out_file
, "\tnopr\t%%r0"
7654 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7656 for (i
= 1; i
< hw_before
; i
++)
7657 fputs ("\tnopr\t%r0\n", asm_out_file
);
7659 /* Note: The function label must be aligned so that (a) the bytes of the
7660 following nop do not cross a cacheline boundary, and (b) a jump address
7661 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7662 stored directly before the label without crossing a cacheline
7663 boundary. All this is necessary to make sure the trampoline code can
7664 be changed atomically.
7665 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7666 if there are NOPs before the function label, the alignment is placed
7667 before them. So it is necessary to duplicate the alignment after the
7669 function_alignment
= MAX (8, DECL_ALIGN (decl
) / BITS_PER_UNIT
);
7670 if (! DECL_USER_ALIGN (decl
))
7672 = MAX (function_alignment
,
7673 (unsigned int) align_functions
.levels
[0].get_value ());
7674 fputs ("\t# alignment for hotpatch\n", asm_out_file
);
7675 ASM_OUTPUT_ALIGN (asm_out_file
, align_functions
.levels
[0].log
);
7678 if (S390_USE_TARGET_ATTRIBUTE
&& TARGET_DEBUG_ARG
)
7680 asm_fprintf (asm_out_file
, "\t# fn:%s ar%d\n", fname
, s390_arch
);
7681 asm_fprintf (asm_out_file
, "\t# fn:%s tu%d\n", fname
, s390_tune
);
7682 asm_fprintf (asm_out_file
, "\t# fn:%s sg%d\n", fname
, s390_stack_guard
);
7683 asm_fprintf (asm_out_file
, "\t# fn:%s ss%d\n", fname
, s390_stack_size
);
7684 asm_fprintf (asm_out_file
, "\t# fn:%s bc%d\n", fname
, s390_branch_cost
);
7685 asm_fprintf (asm_out_file
, "\t# fn:%s wf%d\n", fname
,
7686 s390_warn_framesize
);
7687 asm_fprintf (asm_out_file
, "\t# fn:%s ba%d\n", fname
, TARGET_BACKCHAIN
);
7688 asm_fprintf (asm_out_file
, "\t# fn:%s hd%d\n", fname
, TARGET_HARD_DFP
);
7689 asm_fprintf (asm_out_file
, "\t# fn:%s hf%d\n", fname
, !TARGET_SOFT_FLOAT
);
7690 asm_fprintf (asm_out_file
, "\t# fn:%s ht%d\n", fname
, TARGET_OPT_HTM
);
7691 asm_fprintf (asm_out_file
, "\t# fn:%s vx%d\n", fname
, TARGET_OPT_VX
);
7692 asm_fprintf (asm_out_file
, "\t# fn:%s ps%d\n", fname
,
7693 TARGET_PACKED_STACK
);
7694 asm_fprintf (asm_out_file
, "\t# fn:%s se%d\n", fname
, TARGET_SMALL_EXEC
);
7695 asm_fprintf (asm_out_file
, "\t# fn:%s mv%d\n", fname
, TARGET_MVCLE
);
7696 asm_fprintf (asm_out_file
, "\t# fn:%s zv%d\n", fname
, TARGET_ZVECTOR
);
7697 asm_fprintf (asm_out_file
, "\t# fn:%s wd%d\n", fname
,
7698 s390_warn_dynamicstack_p
);
7700 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
7702 asm_fprintf (asm_out_file
,
7703 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7707 /* Output machine-dependent UNSPECs occurring in address constant X
7708 in assembler syntax to stdio stream FILE. Returns true if the
7709 constant X could be recognized, false otherwise. */
7712 s390_output_addr_const_extra (FILE *file
, rtx x
)
7714 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 1)
7715 switch (XINT (x
, 1))
7718 output_addr_const (file
, XVECEXP (x
, 0, 0));
7719 fprintf (file
, "@GOTENT");
7722 output_addr_const (file
, XVECEXP (x
, 0, 0));
7723 fprintf (file
, "@GOT");
7726 output_addr_const (file
, XVECEXP (x
, 0, 0));
7727 fprintf (file
, "@GOTOFF");
7730 output_addr_const (file
, XVECEXP (x
, 0, 0));
7731 fprintf (file
, "@PLT");
7734 output_addr_const (file
, XVECEXP (x
, 0, 0));
7735 fprintf (file
, "@PLTOFF");
7738 output_addr_const (file
, XVECEXP (x
, 0, 0));
7739 fprintf (file
, "@TLSGD");
7742 assemble_name (file
, get_some_local_dynamic_name ());
7743 fprintf (file
, "@TLSLDM");
7746 output_addr_const (file
, XVECEXP (x
, 0, 0));
7747 fprintf (file
, "@DTPOFF");
7750 output_addr_const (file
, XVECEXP (x
, 0, 0));
7751 fprintf (file
, "@NTPOFF");
7753 case UNSPEC_GOTNTPOFF
:
7754 output_addr_const (file
, XVECEXP (x
, 0, 0));
7755 fprintf (file
, "@GOTNTPOFF");
7757 case UNSPEC_INDNTPOFF
:
7758 output_addr_const (file
, XVECEXP (x
, 0, 0));
7759 fprintf (file
, "@INDNTPOFF");
7763 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 2)
7764 switch (XINT (x
, 1))
7766 case UNSPEC_POOL_OFFSET
:
7767 x
= gen_rtx_MINUS (GET_MODE (x
), XVECEXP (x
, 0, 0), XVECEXP (x
, 0, 1));
7768 output_addr_const (file
, x
);
7774 /* Output address operand ADDR in assembler syntax to
7775 stdio stream FILE. */
7778 print_operand_address (FILE *file
, rtx addr
)
7780 struct s390_address ad
;
7781 memset (&ad
, 0, sizeof (s390_address
));
7783 if (s390_loadrelative_operand_p (addr
, NULL
, NULL
))
7787 output_operand_lossage ("symbolic memory references are "
7788 "only supported on z10 or later");
7791 output_addr_const (file
, addr
);
7795 if (!s390_decompose_address (addr
, &ad
)
7796 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7797 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
7798 output_operand_lossage ("cannot decompose address");
7801 output_addr_const (file
, ad
.disp
);
7803 fprintf (file
, "0");
7805 if (ad
.base
&& ad
.indx
)
7806 fprintf (file
, "(%s,%s)", reg_names
[REGNO (ad
.indx
)],
7807 reg_names
[REGNO (ad
.base
)]);
7809 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
7812 /* Output operand X in assembler syntax to stdio stream FILE.
7813 CODE specified the format flag. The following format flags
7816 'A': On z14 or higher: If operand is a mem print the alignment
7817 hint usable with vl/vst prefixed by a comma.
7818 'C': print opcode suffix for branch condition.
7819 'D': print opcode suffix for inverse branch condition.
7820 'E': print opcode suffix for branch on index instruction.
7821 'G': print the size of the operand in bytes.
7822 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7823 'M': print the second word of a TImode operand.
7824 'N': print the second word of a DImode operand.
7825 'O': print only the displacement of a memory reference or address.
7826 'R': print only the base register of a memory reference or address.
7827 'S': print S-type memory reference (base+displacement).
7828 'Y': print address style operand without index (e.g. shift count or setmem
7831 'b': print integer X as if it's an unsigned byte.
7832 'c': print integer X as if it's an signed byte.
7833 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7834 'f': "end" contiguous bitmask X in SImode.
7835 'h': print integer X as if it's a signed halfword.
7836 'i': print the first nonzero HImode part of X.
7837 'j': print the first HImode part unequal to -1 of X.
7838 'k': print the first nonzero SImode part of X.
7839 'm': print the first SImode part unequal to -1 of X.
7840 'o': print integer X as if it's an unsigned 32bit word.
7841 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7842 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7843 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7844 'x': print integer X as if it's an unsigned halfword.
7845 'v': print register number as vector register (v1 instead of f1).
7849 print_operand (FILE *file
, rtx x
, int code
)
7856 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS
&& MEM_P (x
))
7858 if (MEM_ALIGN (x
) >= 128)
7859 fprintf (file
, ",4");
7860 else if (MEM_ALIGN (x
) == 64)
7861 fprintf (file
, ",3");
7865 fprintf (file
, s390_branch_condition_mnemonic (x
, FALSE
));
7869 fprintf (file
, s390_branch_condition_mnemonic (x
, TRUE
));
7873 if (GET_CODE (x
) == LE
)
7874 fprintf (file
, "l");
7875 else if (GET_CODE (x
) == GT
)
7876 fprintf (file
, "h");
7878 output_operand_lossage ("invalid comparison operator "
7879 "for 'E' output modifier");
7883 if (GET_CODE (x
) == SYMBOL_REF
)
7885 fprintf (file
, "%s", ":tls_load:");
7886 output_addr_const (file
, x
);
7888 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
7890 fprintf (file
, "%s", ":tls_gdcall:");
7891 output_addr_const (file
, XVECEXP (x
, 0, 0));
7893 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSLDM
)
7895 fprintf (file
, "%s", ":tls_ldcall:");
7896 const char *name
= get_some_local_dynamic_name ();
7898 assemble_name (file
, name
);
7901 output_operand_lossage ("invalid reference for 'J' output modifier");
7905 fprintf (file
, "%u", GET_MODE_SIZE (GET_MODE (x
)));
7910 struct s390_address ad
;
7913 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
7916 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7919 output_operand_lossage ("invalid address for 'O' output modifier");
7924 output_addr_const (file
, ad
.disp
);
7926 fprintf (file
, "0");
7932 struct s390_address ad
;
7935 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
7938 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7941 output_operand_lossage ("invalid address for 'R' output modifier");
7946 fprintf (file
, "%s", reg_names
[REGNO (ad
.base
)]);
7948 fprintf (file
, "0");
7954 struct s390_address ad
;
7959 output_operand_lossage ("memory reference expected for "
7960 "'S' output modifier");
7963 ret
= s390_decompose_address (XEXP (x
, 0), &ad
);
7966 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7969 output_operand_lossage ("invalid address for 'S' output modifier");
7974 output_addr_const (file
, ad
.disp
);
7976 fprintf (file
, "0");
7979 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
7984 if (GET_CODE (x
) == REG
)
7985 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
7986 else if (GET_CODE (x
) == MEM
)
7987 x
= change_address (x
, VOIDmode
,
7988 plus_constant (Pmode
, XEXP (x
, 0), 4));
7990 output_operand_lossage ("register or memory expression expected "
7991 "for 'N' output modifier");
7995 if (GET_CODE (x
) == REG
)
7996 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
7997 else if (GET_CODE (x
) == MEM
)
7998 x
= change_address (x
, VOIDmode
,
7999 plus_constant (Pmode
, XEXP (x
, 0), 8));
8001 output_operand_lossage ("register or memory expression expected "
8002 "for 'M' output modifier");
8006 print_shift_count_operand (file
, x
);
8010 switch (GET_CODE (x
))
8013 /* Print FP regs as fx instead of vx when they are accessed
8014 through non-vector mode. */
8016 || VECTOR_NOFP_REG_P (x
)
8017 || (FP_REG_P (x
) && VECTOR_MODE_P (GET_MODE (x
)))
8018 || (VECTOR_REG_P (x
)
8019 && (GET_MODE_SIZE (GET_MODE (x
)) /
8020 s390_class_max_nregs (FP_REGS
, GET_MODE (x
))) > 8))
8021 fprintf (file
, "%%v%s", reg_names
[REGNO (x
)] + 2);
8023 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
8027 output_address (GET_MODE (x
), XEXP (x
, 0));
8034 output_addr_const (file
, x
);
8047 ival
= ((ival
& 0xff) ^ 0x80) - 0x80;
8053 ival
= ((ival
& 0xffff) ^ 0x8000) - 0x8000;
8056 ival
= s390_extract_part (x
, HImode
, 0);
8059 ival
= s390_extract_part (x
, HImode
, -1);
8062 ival
= s390_extract_part (x
, SImode
, 0);
8065 ival
= s390_extract_part (x
, SImode
, -1);
8077 len
= (code
== 's' || code
== 'e' ? 64 : 32);
8078 ok
= s390_contiguous_bitmask_p (ival
, true, len
, &start
, &end
);
8080 if (code
== 's' || code
== 't')
8087 output_operand_lossage ("invalid constant for output modifier '%c'", code
);
8089 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8092 case CONST_WIDE_INT
:
8094 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8095 CONST_WIDE_INT_ELT (x
, 0) & 0xff);
8096 else if (code
== 'x')
8097 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8098 CONST_WIDE_INT_ELT (x
, 0) & 0xffff);
8099 else if (code
== 'h')
8100 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8101 ((CONST_WIDE_INT_ELT (x
, 0) & 0xffff) ^ 0x8000) - 0x8000);
8105 output_operand_lossage ("invalid constant - try using "
8106 "an output modifier");
8108 output_operand_lossage ("invalid constant for output modifier '%c'",
8116 gcc_assert (const_vec_duplicate_p (x
));
8117 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8118 ((INTVAL (XVECEXP (x
, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8126 ok
= s390_contiguous_bitmask_vector_p (x
, &start
, &end
);
8128 ival
= (code
== 's') ? start
: end
;
8129 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8135 bool ok
= s390_bytemask_vector_p (x
, &mask
);
8137 fprintf (file
, "%u", mask
);
8142 output_operand_lossage ("invalid constant vector for output "
8143 "modifier '%c'", code
);
8149 output_operand_lossage ("invalid expression - try using "
8150 "an output modifier");
8152 output_operand_lossage ("invalid expression for output "
8153 "modifier '%c'", code
);
8158 /* Target hook for assembling integer objects. We need to define it
8159 here to work a round a bug in some versions of GAS, which couldn't
8160 handle values smaller than INT_MIN when printed in decimal. */
8163 s390_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
8165 if (size
== 8 && aligned_p
8166 && GET_CODE (x
) == CONST_INT
&& INTVAL (x
) < INT_MIN
)
8168 fprintf (asm_out_file
, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX
"\n",
8172 return default_assemble_integer (x
, size
, aligned_p
);
8175 /* Returns true if register REGNO is used for forming
8176 a memory address in expression X. */
8179 reg_used_in_mem_p (int regno
, rtx x
)
8181 enum rtx_code code
= GET_CODE (x
);
8187 if (refers_to_regno_p (regno
, XEXP (x
, 0)))
8190 else if (code
== SET
8191 && GET_CODE (SET_DEST (x
)) == PC
)
8193 if (refers_to_regno_p (regno
, SET_SRC (x
)))
8197 fmt
= GET_RTX_FORMAT (code
);
8198 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8201 && reg_used_in_mem_p (regno
, XEXP (x
, i
)))
8204 else if (fmt
[i
] == 'E')
8205 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8206 if (reg_used_in_mem_p (regno
, XVECEXP (x
, i
, j
)))
8212 /* Returns true if expression DEP_RTX sets an address register
8213 used by instruction INSN to address memory. */
8216 addr_generation_dependency_p (rtx dep_rtx
, rtx_insn
*insn
)
8220 if (NONJUMP_INSN_P (dep_rtx
))
8221 dep_rtx
= PATTERN (dep_rtx
);
8223 if (GET_CODE (dep_rtx
) == SET
)
8225 target
= SET_DEST (dep_rtx
);
8226 if (GET_CODE (target
) == STRICT_LOW_PART
)
8227 target
= XEXP (target
, 0);
8228 while (GET_CODE (target
) == SUBREG
)
8229 target
= SUBREG_REG (target
);
8231 if (GET_CODE (target
) == REG
)
8233 int regno
= REGNO (target
);
8235 if (s390_safe_attr_type (insn
) == TYPE_LA
)
8237 pat
= PATTERN (insn
);
8238 if (GET_CODE (pat
) == PARALLEL
)
8240 gcc_assert (XVECLEN (pat
, 0) == 2);
8241 pat
= XVECEXP (pat
, 0, 0);
8243 gcc_assert (GET_CODE (pat
) == SET
);
8244 return refers_to_regno_p (regno
, SET_SRC (pat
));
8246 else if (get_attr_atype (insn
) == ATYPE_AGEN
)
8247 return reg_used_in_mem_p (regno
, PATTERN (insn
));
8253 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8256 s390_agen_dep_p (rtx_insn
*dep_insn
, rtx_insn
*insn
)
8258 rtx dep_rtx
= PATTERN (dep_insn
);
8261 if (GET_CODE (dep_rtx
) == SET
8262 && addr_generation_dependency_p (dep_rtx
, insn
))
8264 else if (GET_CODE (dep_rtx
) == PARALLEL
)
8266 for (i
= 0; i
< XVECLEN (dep_rtx
, 0); i
++)
8268 if (addr_generation_dependency_p (XVECEXP (dep_rtx
, 0, i
), insn
))
8276 /* A C statement (sans semicolon) to update the integer scheduling priority
8277 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8278 reduce the priority to execute INSN later. Do not define this macro if
8279 you do not need to adjust the scheduling priorities of insns.
8281 A STD instruction should be scheduled earlier,
8282 in order to use the bypass. */
8284 s390_adjust_priority (rtx_insn
*insn
, int priority
)
8286 if (! INSN_P (insn
))
8289 if (s390_tune
<= PROCESSOR_2064_Z900
)
8292 switch (s390_safe_attr_type (insn
))
8296 priority
= priority
<< 3;
8300 priority
= priority
<< 1;
8309 /* The number of instructions that can be issued per cycle. */
8312 s390_issue_rate (void)
8316 case PROCESSOR_2084_Z990
:
8317 case PROCESSOR_2094_Z9_109
:
8318 case PROCESSOR_2094_Z9_EC
:
8319 case PROCESSOR_2817_Z196
:
8321 case PROCESSOR_2097_Z10
:
8323 case PROCESSOR_2064_Z900
:
8324 /* Starting with EC12 we use the sched_reorder hook to take care
8325 of instruction dispatch constraints. The algorithm only
8326 picks the best instruction and assumes only a single
8327 instruction gets issued per cycle. */
8328 case PROCESSOR_2827_ZEC12
:
8329 case PROCESSOR_2964_Z13
:
8330 case PROCESSOR_3906_Z14
:
8337 s390_first_cycle_multipass_dfa_lookahead (void)
8343 annotate_constant_pool_refs_1 (rtx
*x
)
8348 gcc_assert (GET_CODE (*x
) != SYMBOL_REF
8349 || !CONSTANT_POOL_ADDRESS_P (*x
));
8351 /* Literal pool references can only occur inside a MEM ... */
8352 if (GET_CODE (*x
) == MEM
)
8354 rtx memref
= XEXP (*x
, 0);
8356 if (GET_CODE (memref
) == SYMBOL_REF
8357 && CONSTANT_POOL_ADDRESS_P (memref
))
8359 rtx base
= cfun
->machine
->base_reg
;
8360 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, memref
, base
),
8363 *x
= replace_equiv_address (*x
, addr
);
8367 if (GET_CODE (memref
) == CONST
8368 && GET_CODE (XEXP (memref
, 0)) == PLUS
8369 && GET_CODE (XEXP (XEXP (memref
, 0), 1)) == CONST_INT
8370 && GET_CODE (XEXP (XEXP (memref
, 0), 0)) == SYMBOL_REF
8371 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref
, 0), 0)))
8373 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (memref
, 0), 1));
8374 rtx sym
= XEXP (XEXP (memref
, 0), 0);
8375 rtx base
= cfun
->machine
->base_reg
;
8376 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8379 *x
= replace_equiv_address (*x
, plus_constant (Pmode
, addr
, off
));
8384 /* ... or a load-address type pattern. */
8385 if (GET_CODE (*x
) == SET
)
8387 rtx addrref
= SET_SRC (*x
);
8389 if (GET_CODE (addrref
) == SYMBOL_REF
8390 && CONSTANT_POOL_ADDRESS_P (addrref
))
8392 rtx base
= cfun
->machine
->base_reg
;
8393 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addrref
, base
),
8396 SET_SRC (*x
) = addr
;
8400 if (GET_CODE (addrref
) == CONST
8401 && GET_CODE (XEXP (addrref
, 0)) == PLUS
8402 && GET_CODE (XEXP (XEXP (addrref
, 0), 1)) == CONST_INT
8403 && GET_CODE (XEXP (XEXP (addrref
, 0), 0)) == SYMBOL_REF
8404 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref
, 0), 0)))
8406 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (addrref
, 0), 1));
8407 rtx sym
= XEXP (XEXP (addrref
, 0), 0);
8408 rtx base
= cfun
->machine
->base_reg
;
8409 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8412 SET_SRC (*x
) = plus_constant (Pmode
, addr
, off
);
8417 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8418 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8422 annotate_constant_pool_refs_1 (&XEXP (*x
, i
));
8424 else if (fmt
[i
] == 'E')
8426 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8427 annotate_constant_pool_refs_1 (&XVECEXP (*x
, i
, j
));
8432 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8433 Fix up MEMs as required.
8434 Skip insns which support relative addressing, because they do not use a base
8438 annotate_constant_pool_refs (rtx_insn
*insn
)
8440 if (s390_safe_relative_long_p (insn
))
8442 annotate_constant_pool_refs_1 (&PATTERN (insn
));
8446 find_constant_pool_ref_1 (rtx x
, rtx
*ref
)
8451 /* Likewise POOL_ENTRY insns. */
8452 if (GET_CODE (x
) == UNSPEC_VOLATILE
8453 && XINT (x
, 1) == UNSPECV_POOL_ENTRY
)
8456 gcc_assert (GET_CODE (x
) != SYMBOL_REF
8457 || !CONSTANT_POOL_ADDRESS_P (x
));
8459 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_LTREF
)
8461 rtx sym
= XVECEXP (x
, 0, 0);
8462 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
8463 && CONSTANT_POOL_ADDRESS_P (sym
));
8465 if (*ref
== NULL_RTX
)
8468 gcc_assert (*ref
== sym
);
8473 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8474 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8478 find_constant_pool_ref_1 (XEXP (x
, i
), ref
);
8480 else if (fmt
[i
] == 'E')
8482 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8483 find_constant_pool_ref_1 (XVECEXP (x
, i
, j
), ref
);
8488 /* Find an annotated literal pool symbol referenced in INSN,
8489 and store it at REF. Will abort if INSN contains references to
8490 more than one such pool symbol; multiple references to the same
8491 symbol are allowed, however.
8493 The rtx pointed to by REF must be initialized to NULL_RTX
8494 by the caller before calling this routine.
8496 Skip insns which support relative addressing, because they do not use a base
8500 find_constant_pool_ref (rtx_insn
*insn
, rtx
*ref
)
8502 if (s390_safe_relative_long_p (insn
))
8504 find_constant_pool_ref_1 (PATTERN (insn
), ref
);
8508 replace_constant_pool_ref_1 (rtx
*x
, rtx ref
, rtx offset
)
8513 gcc_assert (*x
!= ref
);
8515 if (GET_CODE (*x
) == UNSPEC
8516 && XINT (*x
, 1) == UNSPEC_LTREF
8517 && XVECEXP (*x
, 0, 0) == ref
)
8519 *x
= gen_rtx_PLUS (Pmode
, XVECEXP (*x
, 0, 1), offset
);
8523 if (GET_CODE (*x
) == PLUS
8524 && GET_CODE (XEXP (*x
, 1)) == CONST_INT
8525 && GET_CODE (XEXP (*x
, 0)) == UNSPEC
8526 && XINT (XEXP (*x
, 0), 1) == UNSPEC_LTREF
8527 && XVECEXP (XEXP (*x
, 0), 0, 0) == ref
)
8529 rtx addr
= gen_rtx_PLUS (Pmode
, XVECEXP (XEXP (*x
, 0), 0, 1), offset
);
8530 *x
= plus_constant (Pmode
, addr
, INTVAL (XEXP (*x
, 1)));
8534 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8535 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8539 replace_constant_pool_ref_1 (&XEXP (*x
, i
), ref
, offset
);
8541 else if (fmt
[i
] == 'E')
8543 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8544 replace_constant_pool_ref_1 (&XVECEXP (*x
, i
, j
), ref
, offset
);
8549 /* Replace every reference to the annotated literal pool
8550 symbol REF in INSN by its base plus OFFSET.
8551 Skip insns which support relative addressing, because they do not use a base
8555 replace_constant_pool_ref (rtx_insn
*insn
, rtx ref
, rtx offset
)
8557 if (s390_safe_relative_long_p (insn
))
8559 replace_constant_pool_ref_1 (&PATTERN (insn
), ref
, offset
);
8562 /* We keep a list of constants which we have to add to internal
8563 constant tables in the middle of large functions. */
8565 #define NR_C_MODES 32
8566 machine_mode constant_modes
[NR_C_MODES
] =
8568 TFmode
, TImode
, TDmode
,
8569 V16QImode
, V8HImode
, V4SImode
, V2DImode
, V1TImode
,
8570 V4SFmode
, V2DFmode
, V1TFmode
,
8571 DFmode
, DImode
, DDmode
,
8572 V8QImode
, V4HImode
, V2SImode
, V1DImode
, V2SFmode
, V1DFmode
,
8573 SFmode
, SImode
, SDmode
,
8574 V4QImode
, V2HImode
, V1SImode
, V1SFmode
,
8583 struct constant
*next
;
8585 rtx_code_label
*label
;
8588 struct constant_pool
8590 struct constant_pool
*next
;
8591 rtx_insn
*first_insn
;
8592 rtx_insn
*pool_insn
;
8594 rtx_insn
*emit_pool_after
;
8596 struct constant
*constants
[NR_C_MODES
];
8597 struct constant
*execute
;
8598 rtx_code_label
*label
;
8602 /* Allocate new constant_pool structure. */
8604 static struct constant_pool
*
8605 s390_alloc_pool (void)
8607 struct constant_pool
*pool
;
8610 pool
= (struct constant_pool
*) xmalloc (sizeof *pool
);
8612 for (i
= 0; i
< NR_C_MODES
; i
++)
8613 pool
->constants
[i
] = NULL
;
8615 pool
->execute
= NULL
;
8616 pool
->label
= gen_label_rtx ();
8617 pool
->first_insn
= NULL
;
8618 pool
->pool_insn
= NULL
;
8619 pool
->insns
= BITMAP_ALLOC (NULL
);
8621 pool
->emit_pool_after
= NULL
;
8626 /* Create new constant pool covering instructions starting at INSN
8627 and chain it to the end of POOL_LIST. */
8629 static struct constant_pool
*
8630 s390_start_pool (struct constant_pool
**pool_list
, rtx_insn
*insn
)
8632 struct constant_pool
*pool
, **prev
;
8634 pool
= s390_alloc_pool ();
8635 pool
->first_insn
= insn
;
8637 for (prev
= pool_list
; *prev
; prev
= &(*prev
)->next
)
8644 /* End range of instructions covered by POOL at INSN and emit
8645 placeholder insn representing the pool. */
8648 s390_end_pool (struct constant_pool
*pool
, rtx_insn
*insn
)
8650 rtx pool_size
= GEN_INT (pool
->size
+ 8 /* alignment slop */);
8653 insn
= get_last_insn ();
8655 pool
->pool_insn
= emit_insn_after (gen_pool (pool_size
), insn
);
8656 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
8659 /* Add INSN to the list of insns covered by POOL. */
8662 s390_add_pool_insn (struct constant_pool
*pool
, rtx insn
)
8664 bitmap_set_bit (pool
->insns
, INSN_UID (insn
));
8667 /* Return pool out of POOL_LIST that covers INSN. */
8669 static struct constant_pool
*
8670 s390_find_pool (struct constant_pool
*pool_list
, rtx insn
)
8672 struct constant_pool
*pool
;
8674 for (pool
= pool_list
; pool
; pool
= pool
->next
)
8675 if (bitmap_bit_p (pool
->insns
, INSN_UID (insn
)))
8681 /* Add constant VAL of mode MODE to the constant pool POOL. */
8684 s390_add_constant (struct constant_pool
*pool
, rtx val
, machine_mode mode
)
8689 for (i
= 0; i
< NR_C_MODES
; i
++)
8690 if (constant_modes
[i
] == mode
)
8692 gcc_assert (i
!= NR_C_MODES
);
8694 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8695 if (rtx_equal_p (val
, c
->value
))
8700 c
= (struct constant
*) xmalloc (sizeof *c
);
8702 c
->label
= gen_label_rtx ();
8703 c
->next
= pool
->constants
[i
];
8704 pool
->constants
[i
] = c
;
8705 pool
->size
+= GET_MODE_SIZE (mode
);
8709 /* Return an rtx that represents the offset of X from the start of
8713 s390_pool_offset (struct constant_pool
*pool
, rtx x
)
8717 label
= gen_rtx_LABEL_REF (GET_MODE (x
), pool
->label
);
8718 x
= gen_rtx_UNSPEC (GET_MODE (x
), gen_rtvec (2, x
, label
),
8719 UNSPEC_POOL_OFFSET
);
8720 return gen_rtx_CONST (GET_MODE (x
), x
);
8723 /* Find constant VAL of mode MODE in the constant pool POOL.
8724 Return an RTX describing the distance from the start of
8725 the pool to the location of the new constant. */
8728 s390_find_constant (struct constant_pool
*pool
, rtx val
,
8734 for (i
= 0; i
< NR_C_MODES
; i
++)
8735 if (constant_modes
[i
] == mode
)
8737 gcc_assert (i
!= NR_C_MODES
);
8739 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8740 if (rtx_equal_p (val
, c
->value
))
8745 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8748 /* Check whether INSN is an execute. Return the label_ref to its
8749 execute target template if so, NULL_RTX otherwise. */
8752 s390_execute_label (rtx insn
)
8755 && GET_CODE (PATTERN (insn
)) == PARALLEL
8756 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
8757 && (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
8758 || XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE_JUMP
))
8760 if (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
)
8761 return XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 2);
8764 gcc_assert (JUMP_P (insn
));
8765 /* For jump insns as execute target:
8766 - There is one operand less in the parallel (the
8767 modification register of the execute is always 0).
8768 - The execute target label is wrapped into an
8769 if_then_else in order to hide it from jump analysis. */
8770 return XEXP (XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 0), 0);
8777 /* Find execute target for INSN in the constant pool POOL.
8778 Return an RTX describing the distance from the start of
8779 the pool to the location of the execute target. */
8782 s390_find_execute (struct constant_pool
*pool
, rtx insn
)
8786 for (c
= pool
->execute
; c
!= NULL
; c
= c
->next
)
8787 if (INSN_UID (insn
) == INSN_UID (c
->value
))
8792 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8795 /* For an execute INSN, extract the execute target template. */
8798 s390_execute_target (rtx insn
)
8800 rtx pattern
= PATTERN (insn
);
8801 gcc_assert (s390_execute_label (insn
));
8803 if (XVECLEN (pattern
, 0) == 2)
8805 pattern
= copy_rtx (XVECEXP (pattern
, 0, 1));
8809 rtvec vec
= rtvec_alloc (XVECLEN (pattern
, 0) - 1);
8812 for (i
= 0; i
< XVECLEN (pattern
, 0) - 1; i
++)
8813 RTVEC_ELT (vec
, i
) = copy_rtx (XVECEXP (pattern
, 0, i
+ 1));
8815 pattern
= gen_rtx_PARALLEL (VOIDmode
, vec
);
8821 /* Indicate that INSN cannot be duplicated. This is the case for
8822 execute insns that carry a unique label. */
8825 s390_cannot_copy_insn_p (rtx_insn
*insn
)
8827 rtx label
= s390_execute_label (insn
);
8828 return label
&& label
!= const0_rtx
;
8831 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8832 do not emit the pool base label. */
8835 s390_dump_pool (struct constant_pool
*pool
, bool remote_label
)
8838 rtx_insn
*insn
= pool
->pool_insn
;
8841 /* Switch to rodata section. */
8842 insn
= emit_insn_after (gen_pool_section_start (), insn
);
8843 INSN_ADDRESSES_NEW (insn
, -1);
8845 /* Ensure minimum pool alignment. */
8846 insn
= emit_insn_after (gen_pool_align (GEN_INT (8)), insn
);
8847 INSN_ADDRESSES_NEW (insn
, -1);
8849 /* Emit pool base label. */
8852 insn
= emit_label_after (pool
->label
, insn
);
8853 INSN_ADDRESSES_NEW (insn
, -1);
8856 /* Dump constants in descending alignment requirement order,
8857 ensuring proper alignment for every constant. */
8858 for (i
= 0; i
< NR_C_MODES
; i
++)
8859 for (c
= pool
->constants
[i
]; c
; c
= c
->next
)
8861 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8862 rtx value
= copy_rtx (c
->value
);
8863 if (GET_CODE (value
) == CONST
8864 && GET_CODE (XEXP (value
, 0)) == UNSPEC
8865 && XINT (XEXP (value
, 0), 1) == UNSPEC_LTREL_OFFSET
8866 && XVECLEN (XEXP (value
, 0), 0) == 1)
8867 value
= s390_pool_offset (pool
, XVECEXP (XEXP (value
, 0), 0, 0));
8869 insn
= emit_label_after (c
->label
, insn
);
8870 INSN_ADDRESSES_NEW (insn
, -1);
8872 value
= gen_rtx_UNSPEC_VOLATILE (constant_modes
[i
],
8873 gen_rtvec (1, value
),
8874 UNSPECV_POOL_ENTRY
);
8875 insn
= emit_insn_after (value
, insn
);
8876 INSN_ADDRESSES_NEW (insn
, -1);
8879 /* Ensure minimum alignment for instructions. */
8880 insn
= emit_insn_after (gen_pool_align (GEN_INT (2)), insn
);
8881 INSN_ADDRESSES_NEW (insn
, -1);
8883 /* Output in-pool execute template insns. */
8884 for (c
= pool
->execute
; c
; c
= c
->next
)
8886 insn
= emit_label_after (c
->label
, insn
);
8887 INSN_ADDRESSES_NEW (insn
, -1);
8889 insn
= emit_insn_after (s390_execute_target (c
->value
), insn
);
8890 INSN_ADDRESSES_NEW (insn
, -1);
8893 /* Switch back to previous section. */
8894 insn
= emit_insn_after (gen_pool_section_end (), insn
);
8895 INSN_ADDRESSES_NEW (insn
, -1);
8897 insn
= emit_barrier_after (insn
);
8898 INSN_ADDRESSES_NEW (insn
, -1);
8900 /* Remove placeholder insn. */
8901 remove_insn (pool
->pool_insn
);
8904 /* Free all memory used by POOL. */
8907 s390_free_pool (struct constant_pool
*pool
)
8909 struct constant
*c
, *next
;
8912 for (i
= 0; i
< NR_C_MODES
; i
++)
8913 for (c
= pool
->constants
[i
]; c
; c
= next
)
8919 for (c
= pool
->execute
; c
; c
= next
)
8925 BITMAP_FREE (pool
->insns
);
8930 /* Collect main literal pool. Return NULL on overflow. */
8932 static struct constant_pool
*
8933 s390_mainpool_start (void)
8935 struct constant_pool
*pool
;
8938 pool
= s390_alloc_pool ();
8940 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8942 if (NONJUMP_INSN_P (insn
)
8943 && GET_CODE (PATTERN (insn
)) == SET
8944 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC_VOLATILE
8945 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPECV_MAIN_POOL
)
8947 /* There might be two main_pool instructions if base_reg
8948 is call-clobbered; one for shrink-wrapped code and one
8949 for the rest. We want to keep the first. */
8950 if (pool
->pool_insn
)
8952 insn
= PREV_INSN (insn
);
8953 delete_insn (NEXT_INSN (insn
));
8956 pool
->pool_insn
= insn
;
8959 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
8961 rtx pool_ref
= NULL_RTX
;
8962 find_constant_pool_ref (insn
, &pool_ref
);
8965 rtx constant
= get_pool_constant (pool_ref
);
8966 machine_mode mode
= get_pool_mode (pool_ref
);
8967 s390_add_constant (pool
, constant
, mode
);
8971 /* If hot/cold partitioning is enabled we have to make sure that
8972 the literal pool is emitted in the same section where the
8973 initialization of the literal pool base pointer takes place.
8974 emit_pool_after is only used in the non-overflow case on non
8975 Z cpus where we can emit the literal pool at the end of the
8976 function body within the text section. */
8978 && NOTE_KIND (insn
) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8979 && !pool
->emit_pool_after
)
8980 pool
->emit_pool_after
= PREV_INSN (insn
);
8983 gcc_assert (pool
->pool_insn
|| pool
->size
== 0);
8985 if (pool
->size
>= 4096)
8987 /* We're going to chunkify the pool, so remove the main
8988 pool placeholder insn. */
8989 remove_insn (pool
->pool_insn
);
8991 s390_free_pool (pool
);
8995 /* If the functions ends with the section where the literal pool
8996 should be emitted set the marker to its end. */
8997 if (pool
&& !pool
->emit_pool_after
)
8998 pool
->emit_pool_after
= get_last_insn ();
9003 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9004 Modify the current function to output the pool constants as well as
9005 the pool register setup instruction. */
9008 s390_mainpool_finish (struct constant_pool
*pool
)
9010 rtx base_reg
= cfun
->machine
->base_reg
;
9014 /* If the pool is empty, we're done. */
9015 if (pool
->size
== 0)
9017 /* We don't actually need a base register after all. */
9018 cfun
->machine
->base_reg
= NULL_RTX
;
9020 if (pool
->pool_insn
)
9021 remove_insn (pool
->pool_insn
);
9022 s390_free_pool (pool
);
9026 /* We need correct insn addresses. */
9027 shorten_branches (get_insns ());
9029 /* Use a LARL to load the pool register. The pool is
9030 located in the .rodata section, so we emit it after the function. */
9031 set
= gen_main_base_64 (base_reg
, pool
->label
);
9032 insn
= emit_insn_after (set
, pool
->pool_insn
);
9033 INSN_ADDRESSES_NEW (insn
, -1);
9034 remove_insn (pool
->pool_insn
);
9036 insn
= get_last_insn ();
9037 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9038 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9040 s390_dump_pool (pool
, 0);
9042 /* Replace all literal pool references. */
9044 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9046 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9048 rtx addr
, pool_ref
= NULL_RTX
;
9049 find_constant_pool_ref (insn
, &pool_ref
);
9052 if (s390_execute_label (insn
))
9053 addr
= s390_find_execute (pool
, insn
);
9055 addr
= s390_find_constant (pool
, get_pool_constant (pool_ref
),
9056 get_pool_mode (pool_ref
));
9058 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9059 INSN_CODE (insn
) = -1;
9065 /* Free the pool. */
9066 s390_free_pool (pool
);
9069 /* Chunkify the literal pool. */
9071 #define S390_POOL_CHUNK_MIN 0xc00
9072 #define S390_POOL_CHUNK_MAX 0xe00
9074 static struct constant_pool
*
9075 s390_chunkify_start (void)
9077 struct constant_pool
*curr_pool
= NULL
, *pool_list
= NULL
;
9081 /* We need correct insn addresses. */
9083 shorten_branches (get_insns ());
9085 /* Scan all insns and move literals to pool chunks. */
9087 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9089 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9091 rtx pool_ref
= NULL_RTX
;
9092 find_constant_pool_ref (insn
, &pool_ref
);
9095 rtx constant
= get_pool_constant (pool_ref
);
9096 machine_mode mode
= get_pool_mode (pool_ref
);
9099 curr_pool
= s390_start_pool (&pool_list
, insn
);
9101 s390_add_constant (curr_pool
, constant
, mode
);
9102 s390_add_pool_insn (curr_pool
, insn
);
9106 if (JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
) || LABEL_P (insn
))
9109 s390_add_pool_insn (curr_pool
, insn
);
9112 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_VAR_LOCATION
)
9116 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn
)
9117 || INSN_ADDRESSES (INSN_UID (insn
)) == -1)
9120 if (curr_pool
->size
< S390_POOL_CHUNK_MAX
)
9123 s390_end_pool (curr_pool
, NULL
);
9128 s390_end_pool (curr_pool
, NULL
);
9130 /* Find all labels that are branched into
9131 from an insn belonging to a different chunk. */
9133 far_labels
= BITMAP_ALLOC (NULL
);
9135 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9137 rtx_jump_table_data
*table
;
9139 /* Labels marked with LABEL_PRESERVE_P can be target
9140 of non-local jumps, so we have to mark them.
9141 The same holds for named labels.
9143 Don't do that, however, if it is the label before
9147 && (LABEL_PRESERVE_P (insn
) || LABEL_NAME (insn
)))
9149 rtx_insn
*vec_insn
= NEXT_INSN (insn
);
9150 if (! vec_insn
|| ! JUMP_TABLE_DATA_P (vec_insn
))
9151 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (insn
));
9153 /* Check potential targets in a table jump (casesi_jump). */
9154 else if (tablejump_p (insn
, NULL
, &table
))
9156 rtx vec_pat
= PATTERN (table
);
9157 int i
, diff_p
= GET_CODE (vec_pat
) == ADDR_DIFF_VEC
;
9159 for (i
= 0; i
< XVECLEN (vec_pat
, diff_p
); i
++)
9161 rtx label
= XEXP (XVECEXP (vec_pat
, diff_p
, i
), 0);
9163 if (s390_find_pool (pool_list
, label
)
9164 != s390_find_pool (pool_list
, insn
))
9165 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9168 /* If we have a direct jump (conditional or unconditional),
9169 check all potential targets. */
9170 else if (JUMP_P (insn
))
9172 rtx pat
= PATTERN (insn
);
9174 if (GET_CODE (pat
) == PARALLEL
)
9175 pat
= XVECEXP (pat
, 0, 0);
9177 if (GET_CODE (pat
) == SET
)
9179 rtx label
= JUMP_LABEL (insn
);
9180 if (label
&& !ANY_RETURN_P (label
))
9182 if (s390_find_pool (pool_list
, label
)
9183 != s390_find_pool (pool_list
, insn
))
9184 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9190 /* Insert base register reload insns before every pool. */
9192 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9194 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9196 rtx_insn
*insn
= curr_pool
->first_insn
;
9197 INSN_ADDRESSES_NEW (emit_insn_before (new_insn
, insn
), -1);
9200 /* Insert base register reload insns at every far label. */
9202 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9204 && bitmap_bit_p (far_labels
, CODE_LABEL_NUMBER (insn
)))
9206 struct constant_pool
*pool
= s390_find_pool (pool_list
, insn
);
9209 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9211 INSN_ADDRESSES_NEW (emit_insn_after (new_insn
, insn
), -1);
9216 BITMAP_FREE (far_labels
);
9219 /* Recompute insn addresses. */
9221 init_insn_lengths ();
9222 shorten_branches (get_insns ());
9227 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9228 After we have decided to use this list, finish implementing
9229 all changes to the current function as required. */
9232 s390_chunkify_finish (struct constant_pool
*pool_list
)
9234 struct constant_pool
*curr_pool
= NULL
;
9238 /* Replace all literal pool references. */
9240 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9242 curr_pool
= s390_find_pool (pool_list
, insn
);
9246 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9248 rtx addr
, pool_ref
= NULL_RTX
;
9249 find_constant_pool_ref (insn
, &pool_ref
);
9252 if (s390_execute_label (insn
))
9253 addr
= s390_find_execute (curr_pool
, insn
);
9255 addr
= s390_find_constant (curr_pool
,
9256 get_pool_constant (pool_ref
),
9257 get_pool_mode (pool_ref
));
9259 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9260 INSN_CODE (insn
) = -1;
9265 /* Dump out all literal pools. */
9267 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9268 s390_dump_pool (curr_pool
, 0);
9270 /* Free pool list. */
9274 struct constant_pool
*next
= pool_list
->next
;
9275 s390_free_pool (pool_list
);
9280 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9283 s390_output_pool_entry (rtx exp
, machine_mode mode
, unsigned int align
)
9285 switch (GET_MODE_CLASS (mode
))
9288 case MODE_DECIMAL_FLOAT
:
9289 gcc_assert (GET_CODE (exp
) == CONST_DOUBLE
);
9291 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp
),
9292 as_a
<scalar_float_mode
> (mode
), align
);
9296 assemble_integer (exp
, GET_MODE_SIZE (mode
), align
, 1);
9297 mark_symbol_refs_as_used (exp
);
9300 case MODE_VECTOR_INT
:
9301 case MODE_VECTOR_FLOAT
:
9304 machine_mode inner_mode
;
9305 gcc_assert (GET_CODE (exp
) == CONST_VECTOR
);
9307 inner_mode
= GET_MODE_INNER (GET_MODE (exp
));
9308 for (i
= 0; i
< XVECLEN (exp
, 0); i
++)
9309 s390_output_pool_entry (XVECEXP (exp
, 0, i
),
9313 : GET_MODE_BITSIZE (inner_mode
));
9323 /* Return an RTL expression representing the value of the return address
9324 for the frame COUNT steps up from the current frame. FRAME is the
9325 frame pointer of that frame. */
9328 s390_return_addr_rtx (int count
, rtx frame ATTRIBUTE_UNUSED
)
9333 /* Without backchain, we fail for all but the current frame. */
9335 if (!TARGET_BACKCHAIN
&& count
> 0)
9338 /* For the current frame, we need to make sure the initial
9339 value of RETURN_REGNUM is actually saved. */
9342 return get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
);
9344 if (TARGET_PACKED_STACK
)
9345 offset
= -2 * UNITS_PER_LONG
;
9347 offset
= RETURN_REGNUM
* UNITS_PER_LONG
;
9349 addr
= plus_constant (Pmode
, frame
, offset
);
9350 addr
= memory_address (Pmode
, addr
);
9351 return gen_rtx_MEM (Pmode
, addr
);
9354 /* Return an RTL expression representing the back chain stored in
9355 the current stack frame. */
9358 s390_back_chain_rtx (void)
9362 gcc_assert (TARGET_BACKCHAIN
);
9364 if (TARGET_PACKED_STACK
)
9365 chain
= plus_constant (Pmode
, stack_pointer_rtx
,
9366 STACK_POINTER_OFFSET
- UNITS_PER_LONG
);
9368 chain
= stack_pointer_rtx
;
9370 chain
= gen_rtx_MEM (Pmode
, chain
);
9374 /* Find first call clobbered register unused in a function.
9375 This could be used as base register in a leaf function
9376 or for holding the return address before epilogue. */
9379 find_unused_clobbered_reg (void)
9382 for (i
= 0; i
< 6; i
++)
9383 if (!df_regs_ever_live_p (i
))
9389 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9390 clobbered hard regs in SETREG. */
9393 s390_reg_clobbered_rtx (rtx setreg
, const_rtx set_insn ATTRIBUTE_UNUSED
, void *data
)
9395 char *regs_ever_clobbered
= (char *)data
;
9396 unsigned int i
, regno
;
9397 machine_mode mode
= GET_MODE (setreg
);
9399 if (GET_CODE (setreg
) == SUBREG
)
9401 rtx inner
= SUBREG_REG (setreg
);
9402 if (!GENERAL_REG_P (inner
) && !FP_REG_P (inner
))
9404 regno
= subreg_regno (setreg
);
9406 else if (GENERAL_REG_P (setreg
) || FP_REG_P (setreg
))
9407 regno
= REGNO (setreg
);
9412 i
< end_hard_regno (mode
, regno
);
9414 regs_ever_clobbered
[i
] = 1;
9417 /* Walks through all basic blocks of the current function looking
9418 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9419 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9420 each of those regs. */
9423 s390_regs_ever_clobbered (char regs_ever_clobbered
[])
9429 memset (regs_ever_clobbered
, 0, 32);
9431 /* For non-leaf functions we have to consider all call clobbered regs to be
9435 for (i
= 0; i
< 32; i
++)
9436 regs_ever_clobbered
[i
] = call_used_regs
[i
];
9439 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9440 this work is done by liveness analysis (mark_regs_live_at_end).
9441 Special care is needed for functions containing landing pads. Landing pads
9442 may use the eh registers, but the code which sets these registers is not
9443 contained in that function. Hence s390_regs_ever_clobbered is not able to
9444 deal with this automatically. */
9445 if (crtl
->calls_eh_return
|| cfun
->machine
->has_landing_pad_p
)
9446 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; i
++)
9447 if (crtl
->calls_eh_return
9448 || (cfun
->machine
->has_landing_pad_p
9449 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i
))))
9450 regs_ever_clobbered
[EH_RETURN_DATA_REGNO (i
)] = 1;
9452 /* For nonlocal gotos all call-saved registers have to be saved.
9453 This flag is also set for the unwinding code in libgcc.
9454 See expand_builtin_unwind_init. For regs_ever_live this is done by
9456 if (crtl
->saves_all_registers
)
9457 for (i
= 0; i
< 32; i
++)
9458 if (!call_used_regs
[i
])
9459 regs_ever_clobbered
[i
] = 1;
9461 FOR_EACH_BB_FN (cur_bb
, cfun
)
9463 FOR_BB_INSNS (cur_bb
, cur_insn
)
9467 if (!INSN_P (cur_insn
))
9470 pat
= PATTERN (cur_insn
);
9472 /* Ignore GPR restore insns. */
9473 if (epilogue_completed
&& RTX_FRAME_RELATED_P (cur_insn
))
9475 if (GET_CODE (pat
) == SET
9476 && GENERAL_REG_P (SET_DEST (pat
)))
9479 if (GET_MODE (SET_SRC (pat
)) == DImode
9480 && FP_REG_P (SET_SRC (pat
)))
9484 if (GET_CODE (SET_SRC (pat
)) == MEM
)
9489 if (GET_CODE (pat
) == PARALLEL
9490 && load_multiple_operation (pat
, VOIDmode
))
9494 note_stores (cur_insn
,
9495 s390_reg_clobbered_rtx
,
9496 regs_ever_clobbered
);
9501 /* Determine the frame area which actually has to be accessed
9502 in the function epilogue. The values are stored at the
9503 given pointers AREA_BOTTOM (address of the lowest used stack
9504 address) and AREA_TOP (address of the first item which does
9505 not belong to the stack frame). */
9508 s390_frame_area (int *area_bottom
, int *area_top
)
9515 if (cfun_frame_layout
.first_restore_gpr
!= -1)
9517 b
= (cfun_frame_layout
.gprs_offset
9518 + cfun_frame_layout
.first_restore_gpr
* UNITS_PER_LONG
);
9519 t
= b
+ (cfun_frame_layout
.last_restore_gpr
9520 - cfun_frame_layout
.first_restore_gpr
+ 1) * UNITS_PER_LONG
;
9523 if (TARGET_64BIT
&& cfun_save_high_fprs_p
)
9525 b
= MIN (b
, cfun_frame_layout
.f8_offset
);
9526 t
= MAX (t
, (cfun_frame_layout
.f8_offset
9527 + cfun_frame_layout
.high_fprs
* 8));
9532 if (cfun_fpr_save_p (FPR4_REGNUM
))
9534 b
= MIN (b
, cfun_frame_layout
.f4_offset
);
9535 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 8);
9537 if (cfun_fpr_save_p (FPR6_REGNUM
))
9539 b
= MIN (b
, cfun_frame_layout
.f4_offset
+ 8);
9540 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 16);
9546 /* Update gpr_save_slots in the frame layout trying to make use of
9547 FPRs as GPR save slots.
9548 This is a helper routine of s390_register_info. */
9551 s390_register_info_gprtofpr ()
9553 int save_reg_slot
= FPR0_REGNUM
;
9556 if (TARGET_TPF
|| !TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
9559 /* builtin_eh_return needs to be able to modify the return address
9560 on the stack. It could also adjust the FPR save slot instead but
9561 is it worth the trouble?! */
9562 if (crtl
->calls_eh_return
)
9565 for (i
= 15; i
>= 6; i
--)
9567 if (cfun_gpr_save_slot (i
) == SAVE_SLOT_NONE
)
9570 /* Advance to the next FP register which can be used as a
9572 while ((!call_used_regs
[save_reg_slot
]
9573 || df_regs_ever_live_p (save_reg_slot
)
9574 || cfun_fpr_save_p (save_reg_slot
))
9575 && FP_REGNO_P (save_reg_slot
))
9577 if (!FP_REGNO_P (save_reg_slot
))
9579 /* We only want to use ldgr/lgdr if we can get rid of
9580 stm/lm entirely. So undo the gpr slot allocation in
9581 case we ran out of FPR save slots. */
9582 for (j
= 6; j
<= 15; j
++)
9583 if (FP_REGNO_P (cfun_gpr_save_slot (j
)))
9584 cfun_gpr_save_slot (j
) = SAVE_SLOT_STACK
;
9587 cfun_gpr_save_slot (i
) = save_reg_slot
++;
9591 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9593 This is a helper routine for s390_register_info. */
9596 s390_register_info_stdarg_fpr ()
9602 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9603 f0-f4 for 64 bit. */
9605 || !TARGET_HARD_FLOAT
9606 || !cfun
->va_list_fpr_size
9607 || crtl
->args
.info
.fprs
>= FP_ARG_NUM_REG
)
9610 min_fpr
= crtl
->args
.info
.fprs
;
9611 max_fpr
= min_fpr
+ cfun
->va_list_fpr_size
- 1;
9612 if (max_fpr
>= FP_ARG_NUM_REG
)
9613 max_fpr
= FP_ARG_NUM_REG
- 1;
9615 /* FPR argument regs start at f0. */
9616 min_fpr
+= FPR0_REGNUM
;
9617 max_fpr
+= FPR0_REGNUM
;
9619 for (i
= min_fpr
; i
<= max_fpr
; i
++)
9620 cfun_set_fpr_save (i
);
9623 /* Reserve the GPR save slots for GPRs which need to be saved due to
9625 This is a helper routine for s390_register_info. */
9628 s390_register_info_stdarg_gpr ()
9635 || !cfun
->va_list_gpr_size
9636 || crtl
->args
.info
.gprs
>= GP_ARG_NUM_REG
)
9639 min_gpr
= crtl
->args
.info
.gprs
;
9640 max_gpr
= min_gpr
+ cfun
->va_list_gpr_size
- 1;
9641 if (max_gpr
>= GP_ARG_NUM_REG
)
9642 max_gpr
= GP_ARG_NUM_REG
- 1;
9644 /* GPR argument regs start at r2. */
9645 min_gpr
+= GPR2_REGNUM
;
9646 max_gpr
+= GPR2_REGNUM
;
9648 /* If r6 was supposed to be saved into an FPR and now needs to go to
9649 the stack for vararg we have to adjust the restore range to make
9650 sure that the restore is done from stack as well. */
9651 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM
))
9652 && min_gpr
<= GPR6_REGNUM
9653 && max_gpr
>= GPR6_REGNUM
)
9655 if (cfun_frame_layout
.first_restore_gpr
== -1
9656 || cfun_frame_layout
.first_restore_gpr
> GPR6_REGNUM
)
9657 cfun_frame_layout
.first_restore_gpr
= GPR6_REGNUM
;
9658 if (cfun_frame_layout
.last_restore_gpr
== -1
9659 || cfun_frame_layout
.last_restore_gpr
< GPR6_REGNUM
)
9660 cfun_frame_layout
.last_restore_gpr
= GPR6_REGNUM
;
9663 if (cfun_frame_layout
.first_save_gpr
== -1
9664 || cfun_frame_layout
.first_save_gpr
> min_gpr
)
9665 cfun_frame_layout
.first_save_gpr
= min_gpr
;
9667 if (cfun_frame_layout
.last_save_gpr
== -1
9668 || cfun_frame_layout
.last_save_gpr
< max_gpr
)
9669 cfun_frame_layout
.last_save_gpr
= max_gpr
;
9671 for (i
= min_gpr
; i
<= max_gpr
; i
++)
9672 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9675 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9676 prologue and epilogue. */
9679 s390_register_info_set_ranges ()
9683 /* Find the first and the last save slot supposed to use the stack
9684 to set the restore range.
9685 Vararg regs might be marked as save to stack but only the
9686 call-saved regs really need restoring (i.e. r6). This code
9687 assumes that the vararg regs have not yet been recorded in
9688 cfun_gpr_save_slot. */
9689 for (i
= 0; i
< 16 && cfun_gpr_save_slot (i
) != SAVE_SLOT_STACK
; i
++);
9690 for (j
= 15; j
> i
&& cfun_gpr_save_slot (j
) != SAVE_SLOT_STACK
; j
--);
9691 cfun_frame_layout
.first_restore_gpr
= (i
== 16) ? -1 : i
;
9692 cfun_frame_layout
.last_restore_gpr
= (i
== 16) ? -1 : j
;
9693 cfun_frame_layout
.first_save_gpr
= (i
== 16) ? -1 : i
;
9694 cfun_frame_layout
.last_save_gpr
= (i
== 16) ? -1 : j
;
9697 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9698 for registers which need to be saved in function prologue.
9699 This function can be used until the insns emitted for save/restore
9700 of the regs are visible in the RTL stream. */
9703 s390_register_info ()
9706 char clobbered_regs
[32];
9708 gcc_assert (!epilogue_completed
);
9710 if (reload_completed
)
9711 /* After reload we rely on our own routine to determine which
9712 registers need saving. */
9713 s390_regs_ever_clobbered (clobbered_regs
);
9715 /* During reload we use regs_ever_live as a base since reload
9716 does changes in there which we otherwise would not be aware
9718 for (i
= 0; i
< 32; i
++)
9719 clobbered_regs
[i
] = df_regs_ever_live_p (i
);
9721 for (i
= 0; i
< 32; i
++)
9722 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_regs
[i
];
9724 /* Mark the call-saved FPRs which need to be saved.
9725 This needs to be done before checking the special GPRs since the
9726 stack pointer usage depends on whether high FPRs have to be saved
9728 cfun_frame_layout
.fpr_bitmap
= 0;
9729 cfun_frame_layout
.high_fprs
= 0;
9730 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
9731 if (clobbered_regs
[i
] && !call_used_regs
[i
])
9733 cfun_set_fpr_save (i
);
9734 if (i
>= FPR8_REGNUM
)
9735 cfun_frame_layout
.high_fprs
++;
9738 /* Register 12 is used for GOT address, but also as temp in prologue
9739 for split-stack stdarg functions (unless r14 is available). */
9741 |= ((flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
9742 || (flag_split_stack
&& cfun
->stdarg
9743 && (crtl
->is_leaf
|| TARGET_TPF_PROFILING
9744 || has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
))));
9746 clobbered_regs
[BASE_REGNUM
]
9747 |= (cfun
->machine
->base_reg
9748 && REGNO (cfun
->machine
->base_reg
) == BASE_REGNUM
);
9750 clobbered_regs
[HARD_FRAME_POINTER_REGNUM
]
9751 |= !!frame_pointer_needed
;
9753 /* On pre z900 machines this might take until machine dependent
9755 save_return_addr_p will only be set on non-zarch machines so
9756 there is no risk that r14 goes into an FPR instead of a stack
9758 clobbered_regs
[RETURN_REGNUM
]
9760 || TARGET_TPF_PROFILING
9761 || cfun_frame_layout
.save_return_addr_p
9762 || crtl
->calls_eh_return
);
9764 clobbered_regs
[STACK_POINTER_REGNUM
]
9766 || TARGET_TPF_PROFILING
9767 || cfun_save_high_fprs_p
9768 || get_frame_size () > 0
9769 || (reload_completed
&& cfun_frame_layout
.frame_size
> 0)
9770 || cfun
->calls_alloca
);
9772 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 16);
9774 for (i
= 6; i
< 16; i
++)
9775 if (clobbered_regs
[i
])
9776 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9778 s390_register_info_stdarg_fpr ();
9779 s390_register_info_gprtofpr ();
9780 s390_register_info_set_ranges ();
9781 /* stdarg functions might need to save GPRs 2 to 6. This might
9782 override the GPR->FPR save decision made by
9783 s390_register_info_gprtofpr for r6 since vararg regs must go to
9785 s390_register_info_stdarg_gpr ();
9788 /* Return true if REGNO is a global register, but not one
9789 of the special ones that need to be saved/restored in anyway. */
9792 global_not_special_regno_p (int regno
)
9794 return (global_regs
[regno
]
9795 /* These registers are special and need to be
9796 restored in any case. */
9797 && !(regno
== STACK_POINTER_REGNUM
9798 || regno
== RETURN_REGNUM
9799 || regno
== BASE_REGNUM
9800 || (flag_pic
&& regno
== (int)PIC_OFFSET_TABLE_REGNUM
)));
9803 /* This function is called by s390_optimize_prologue in order to get
9804 rid of unnecessary GPR save/restore instructions. The register info
9805 for the GPRs is re-computed and the ranges are re-calculated. */
9808 s390_optimize_register_info ()
9810 char clobbered_regs
[32];
9813 gcc_assert (epilogue_completed
);
9815 s390_regs_ever_clobbered (clobbered_regs
);
9817 /* Global registers do not need to be saved and restored unless it
9818 is one of our special regs. (r12, r13, r14, or r15). */
9819 for (i
= 0; i
< 32; i
++)
9820 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_not_special_regno_p (i
);
9822 /* There is still special treatment needed for cases invisible to
9823 s390_regs_ever_clobbered. */
9824 clobbered_regs
[RETURN_REGNUM
]
9825 |= (TARGET_TPF_PROFILING
9826 /* When expanding builtin_return_addr in ESA mode we do not
9827 know whether r14 will later be needed as scratch reg when
9828 doing branch splitting. So the builtin always accesses the
9829 r14 save slot and we need to stick to the save/restore
9830 decision for r14 even if it turns out that it didn't get
9832 || cfun_frame_layout
.save_return_addr_p
9833 || crtl
->calls_eh_return
);
9835 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 6);
9837 for (i
= 6; i
< 16; i
++)
9838 if (!clobbered_regs
[i
])
9839 cfun_gpr_save_slot (i
) = SAVE_SLOT_NONE
;
9841 s390_register_info_set_ranges ();
9842 s390_register_info_stdarg_gpr ();
9845 /* Fill cfun->machine with info about frame of current function. */
9848 s390_frame_info (void)
9850 HOST_WIDE_INT lowest_offset
;
9852 cfun_frame_layout
.first_save_gpr_slot
= cfun_frame_layout
.first_save_gpr
;
9853 cfun_frame_layout
.last_save_gpr_slot
= cfun_frame_layout
.last_save_gpr
;
9855 /* The va_arg builtin uses a constant distance of 16 *
9856 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9857 pointer. So even if we are going to save the stack pointer in an
9858 FPR we need the stack space in order to keep the offsets
9860 if (cfun
->stdarg
&& cfun_save_arg_fprs_p
)
9862 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
9864 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
9865 cfun_frame_layout
.first_save_gpr_slot
= STACK_POINTER_REGNUM
;
9868 cfun_frame_layout
.frame_size
= get_frame_size ();
9869 if (!TARGET_64BIT
&& cfun_frame_layout
.frame_size
> 0x7fff0000)
9870 fatal_error (input_location
,
9871 "total size of local variables exceeds architecture limit");
9873 if (!TARGET_PACKED_STACK
)
9875 /* Fixed stack layout. */
9876 cfun_frame_layout
.backchain_offset
= 0;
9877 cfun_frame_layout
.f0_offset
= 16 * UNITS_PER_LONG
;
9878 cfun_frame_layout
.f4_offset
= cfun_frame_layout
.f0_offset
+ 2 * 8;
9879 cfun_frame_layout
.f8_offset
= -cfun_frame_layout
.high_fprs
* 8;
9880 cfun_frame_layout
.gprs_offset
= (cfun_frame_layout
.first_save_gpr_slot
9883 else if (TARGET_BACKCHAIN
)
9885 /* Kernel stack layout - packed stack, backchain, no float */
9886 gcc_assert (TARGET_SOFT_FLOAT
);
9887 cfun_frame_layout
.backchain_offset
= (STACK_POINTER_OFFSET
9890 /* The distance between the backchain and the return address
9891 save slot must not change. So we always need a slot for the
9892 stack pointer which resides in between. */
9893 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
9895 cfun_frame_layout
.gprs_offset
9896 = cfun_frame_layout
.backchain_offset
- cfun_gprs_save_area_size
;
9898 /* FPRs will not be saved. Nevertheless pick sane values to
9899 keep area calculations valid. */
9900 cfun_frame_layout
.f0_offset
=
9901 cfun_frame_layout
.f4_offset
=
9902 cfun_frame_layout
.f8_offset
= cfun_frame_layout
.gprs_offset
;
9908 /* Packed stack layout without backchain. */
9910 /* With stdarg FPRs need their dedicated slots. */
9911 num_fprs
= (TARGET_64BIT
&& cfun
->stdarg
? 2
9912 : (cfun_fpr_save_p (FPR4_REGNUM
) +
9913 cfun_fpr_save_p (FPR6_REGNUM
)));
9914 cfun_frame_layout
.f4_offset
= STACK_POINTER_OFFSET
- 8 * num_fprs
;
9916 num_fprs
= (cfun
->stdarg
? 2
9917 : (cfun_fpr_save_p (FPR0_REGNUM
)
9918 + cfun_fpr_save_p (FPR2_REGNUM
)));
9919 cfun_frame_layout
.f0_offset
= cfun_frame_layout
.f4_offset
- 8 * num_fprs
;
9921 cfun_frame_layout
.gprs_offset
9922 = cfun_frame_layout
.f0_offset
- cfun_gprs_save_area_size
;
9924 cfun_frame_layout
.f8_offset
= (cfun_frame_layout
.gprs_offset
9925 - cfun_frame_layout
.high_fprs
* 8);
9928 if (cfun_save_high_fprs_p
)
9929 cfun_frame_layout
.frame_size
+= cfun_frame_layout
.high_fprs
* 8;
9932 cfun_frame_layout
.frame_size
+= crtl
->outgoing_args_size
;
9934 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9935 sized area at the bottom of the stack. This is required also for
9936 leaf functions. When GCC generates a local stack reference it
9937 will always add STACK_POINTER_OFFSET to all these references. */
9939 && !TARGET_TPF_PROFILING
9940 && cfun_frame_layout
.frame_size
== 0
9941 && !cfun
->calls_alloca
)
9944 /* Calculate the number of bytes we have used in our own register
9945 save area. With the packed stack layout we can re-use the
9946 remaining bytes for normal stack elements. */
9948 if (TARGET_PACKED_STACK
)
9949 lowest_offset
= MIN (MIN (cfun_frame_layout
.f0_offset
,
9950 cfun_frame_layout
.f4_offset
),
9951 cfun_frame_layout
.gprs_offset
);
9955 if (TARGET_BACKCHAIN
)
9956 lowest_offset
= MIN (lowest_offset
, cfun_frame_layout
.backchain_offset
);
9958 cfun_frame_layout
.frame_size
+= STACK_POINTER_OFFSET
- lowest_offset
;
9960 /* If under 31 bit an odd number of gprs has to be saved we have to
9961 adjust the frame size to sustain 8 byte alignment of stack
9963 cfun_frame_layout
.frame_size
= ((cfun_frame_layout
.frame_size
+
9964 STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
9965 & ~(STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
9968 /* Generate frame layout. Fills in register and frame data for the current
9969 function in cfun->machine. This routine can be called multiple times;
9970 it will re-do the complete frame layout every time. */
9973 s390_init_frame_layout (void)
9975 HOST_WIDE_INT frame_size
;
9978 /* After LRA the frame layout is supposed to be read-only and should
9979 not be re-computed. */
9980 if (reload_completed
)
9985 frame_size
= cfun_frame_layout
.frame_size
;
9987 /* Try to predict whether we'll need the base register. */
9988 base_used
= crtl
->uses_const_pool
9989 || (!DISP_IN_RANGE (frame_size
)
9990 && !CONST_OK_FOR_K (frame_size
));
9992 /* Decide which register to use as literal pool base. In small
9993 leaf functions, try to use an unused call-clobbered register
9994 as base register to avoid save/restore overhead. */
9996 cfun
->machine
->base_reg
= NULL_RTX
;
10002 /* Prefer r5 (most likely to be free). */
10003 for (br
= 5; br
>= 2 && df_regs_ever_live_p (br
); br
--)
10005 cfun
->machine
->base_reg
=
10006 gen_rtx_REG (Pmode
, (br
>= 2) ? br
: BASE_REGNUM
);
10009 s390_register_info ();
10010 s390_frame_info ();
10012 while (frame_size
!= cfun_frame_layout
.frame_size
);
10015 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10016 the TX is nonescaping. A transaction is considered escaping if
10017 there is at least one path from tbegin returning CC0 to the
10018 function exit block without an tend.
10020 The check so far has some limitations:
10021 - only single tbegin/tend BBs are supported
10022 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10023 - when CC is copied to a GPR and the CC0 check is done with the GPR
10024 this is not supported
10028 s390_optimize_nonescaping_tx (void)
10030 const unsigned int CC0
= 1 << 3;
10031 basic_block tbegin_bb
= NULL
;
10032 basic_block tend_bb
= NULL
;
10035 bool result
= true;
10037 rtx_insn
*tbegin_insn
= NULL
;
10039 if (!cfun
->machine
->tbegin_p
)
10042 for (bb_index
= 0; bb_index
< n_basic_blocks_for_fn (cfun
); bb_index
++)
10044 bb
= BASIC_BLOCK_FOR_FN (cfun
, bb_index
);
10049 FOR_BB_INSNS (bb
, insn
)
10051 rtx ite
, cc
, pat
, target
;
10052 unsigned HOST_WIDE_INT mask
;
10054 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
10057 pat
= PATTERN (insn
);
10059 if (GET_CODE (pat
) == PARALLEL
)
10060 pat
= XVECEXP (pat
, 0, 0);
10062 if (GET_CODE (pat
) != SET
10063 || GET_CODE (SET_SRC (pat
)) != UNSPEC_VOLATILE
)
10066 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TBEGIN
)
10070 tbegin_insn
= insn
;
10072 /* Just return if the tbegin doesn't have clobbers. */
10073 if (GET_CODE (PATTERN (insn
)) != PARALLEL
)
10076 if (tbegin_bb
!= NULL
)
10079 /* Find the next conditional jump. */
10080 for (tmp
= NEXT_INSN (insn
);
10082 tmp
= NEXT_INSN (tmp
))
10084 if (reg_set_p (gen_rtx_REG (CCmode
, CC_REGNUM
), tmp
))
10089 ite
= SET_SRC (PATTERN (tmp
));
10090 if (GET_CODE (ite
) != IF_THEN_ELSE
)
10093 cc
= XEXP (XEXP (ite
, 0), 0);
10094 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
))
10095 || GET_MODE (cc
) != CCRAWmode
10096 || GET_CODE (XEXP (XEXP (ite
, 0), 1)) != CONST_INT
)
10099 if (bb
->succs
->length () != 2)
10102 mask
= INTVAL (XEXP (XEXP (ite
, 0), 1));
10103 if (GET_CODE (XEXP (ite
, 0)) == NE
)
10107 target
= XEXP (ite
, 1);
10108 else if (mask
== (CC0
^ 0xf))
10109 target
= XEXP (ite
, 2);
10117 ei
= ei_start (bb
->succs
);
10118 e1
= ei_safe_edge (ei
);
10120 e2
= ei_safe_edge (ei
);
10122 if (e2
->flags
& EDGE_FALLTHRU
)
10125 e1
= ei_safe_edge (ei
);
10128 if (!(e1
->flags
& EDGE_FALLTHRU
))
10131 tbegin_bb
= (target
== pc_rtx
) ? e1
->dest
: e2
->dest
;
10133 if (tmp
== BB_END (bb
))
10138 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TEND
)
10140 if (tend_bb
!= NULL
)
10147 /* Either we successfully remove the FPR clobbers here or we are not
10148 able to do anything for this TX. Both cases don't qualify for
10150 cfun
->machine
->tbegin_p
= false;
10152 if (tbegin_bb
== NULL
|| tend_bb
== NULL
)
10155 calculate_dominance_info (CDI_POST_DOMINATORS
);
10156 result
= dominated_by_p (CDI_POST_DOMINATORS
, tbegin_bb
, tend_bb
);
10157 free_dominance_info (CDI_POST_DOMINATORS
);
10162 PATTERN (tbegin_insn
) = gen_rtx_PARALLEL (VOIDmode
,
10164 XVECEXP (PATTERN (tbegin_insn
), 0, 0),
10165 XVECEXP (PATTERN (tbegin_insn
), 0, 1)));
10166 INSN_CODE (tbegin_insn
) = -1;
10167 df_insn_rescan (tbegin_insn
);
10172 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10173 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10175 static unsigned int
10176 s390_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
10178 return s390_class_max_nregs (REGNO_REG_CLASS (regno
), mode
);
10181 /* Implement TARGET_HARD_REGNO_MODE_OK.
10183 Integer modes <= word size fit into any GPR.
10184 Integer modes > word size fit into successive GPRs, starting with
10185 an even-numbered register.
10186 SImode and DImode fit into FPRs as well.
10188 Floating point modes <= word size fit into any FPR or GPR.
10189 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10190 into any FPR, or an even-odd GPR pair.
10191 TFmode fits only into an even-odd FPR pair.
10193 Complex floating point modes fit either into two FPRs, or into
10194 successive GPRs (again starting with an even number).
10195 TCmode fits only into two successive even-odd FPR pairs.
10197 Condition code modes fit only into the CC register. */
10200 s390_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10202 if (!TARGET_VX
&& VECTOR_NOFP_REGNO_P (regno
))
10205 switch (REGNO_REG_CLASS (regno
))
10208 return ((GET_MODE_CLASS (mode
) == MODE_INT
10209 && s390_class_max_nregs (VEC_REGS
, mode
) == 1)
10211 || (TARGET_VXE
&& mode
== SFmode
)
10212 || s390_vector_mode_supported_p (mode
));
10216 && ((GET_MODE_CLASS (mode
) == MODE_INT
10217 && s390_class_max_nregs (FP_REGS
, mode
) == 1)
10219 || s390_vector_mode_supported_p (mode
)))
10222 if (REGNO_PAIR_OK (regno
, mode
))
10224 if (mode
== SImode
|| mode
== DImode
)
10227 if (FLOAT_MODE_P (mode
) && GET_MODE_CLASS (mode
) != MODE_VECTOR_FLOAT
)
10232 if (FRAME_REGNO_P (regno
) && mode
== Pmode
)
10237 if (REGNO_PAIR_OK (regno
, mode
))
10240 || (mode
!= TFmode
&& mode
!= TCmode
&& mode
!= TDmode
))
10245 if (GET_MODE_CLASS (mode
) == MODE_CC
)
10249 if (REGNO_PAIR_OK (regno
, mode
))
10251 if (mode
== SImode
|| mode
== Pmode
)
10262 /* Implement TARGET_MODES_TIEABLE_P. */
10265 s390_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10267 return ((mode1
== SFmode
|| mode1
== DFmode
)
10268 == (mode2
== SFmode
|| mode2
== DFmode
));
10271 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10274 s390_hard_regno_rename_ok (unsigned int old_reg
, unsigned int new_reg
)
10276 /* Once we've decided upon a register to use as base register, it must
10277 no longer be used for any other purpose. */
10278 if (cfun
->machine
->base_reg
)
10279 if (REGNO (cfun
->machine
->base_reg
) == old_reg
10280 || REGNO (cfun
->machine
->base_reg
) == new_reg
)
10283 /* Prevent regrename from using call-saved regs which haven't
10284 actually been saved. This is necessary since regrename assumes
10285 the backend save/restore decisions are based on
10286 df_regs_ever_live. Since we have our own routine we have to tell
10287 regrename manually about it. */
10288 if (GENERAL_REGNO_P (new_reg
)
10289 && !call_used_regs
[new_reg
]
10290 && cfun_gpr_save_slot (new_reg
) == SAVE_SLOT_NONE
)
10296 /* Return nonzero if register REGNO can be used as a scratch register
10300 s390_hard_regno_scratch_ok (unsigned int regno
)
10302 /* See s390_hard_regno_rename_ok. */
10303 if (GENERAL_REGNO_P (regno
)
10304 && !call_used_regs
[regno
]
10305 && cfun_gpr_save_slot (regno
) == SAVE_SLOT_NONE
)
10311 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10312 code that runs in z/Architecture mode, but conforms to the 31-bit
10313 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10314 bytes are saved across calls, however. */
10317 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
10322 && GET_MODE_SIZE (mode
) > 4
10323 && ((regno
>= 6 && regno
<= 15) || regno
== 32))
10327 && GET_MODE_SIZE (mode
) > 8
10328 && (((TARGET_64BIT
&& regno
>= 24 && regno
<= 31))
10329 || (!TARGET_64BIT
&& (regno
== 18 || regno
== 19))))
10335 /* Maximum number of registers to represent a value of mode MODE
10336 in a register of class RCLASS. */
10339 s390_class_max_nregs (enum reg_class rclass
, machine_mode mode
)
10342 bool reg_pair_required_p
= false;
10348 reg_size
= TARGET_VX
? 16 : 8;
10350 /* TF and TD modes would fit into a VR but we put them into a
10351 register pair since we do not have 128bit FP instructions on
10354 && SCALAR_FLOAT_MODE_P (mode
)
10355 && GET_MODE_SIZE (mode
) >= 16)
10356 reg_pair_required_p
= true;
10358 /* Even if complex types would fit into a single FPR/VR we force
10359 them into a register pair to deal with the parts more easily.
10360 (FIXME: What about complex ints?) */
10361 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
10362 reg_pair_required_p
= true;
10368 reg_size
= UNITS_PER_WORD
;
10372 if (reg_pair_required_p
)
10373 return 2 * ((GET_MODE_SIZE (mode
) / 2 + reg_size
- 1) / reg_size
);
10375 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
10378 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10381 s390_can_change_mode_class (machine_mode from_mode
,
10382 machine_mode to_mode
,
10383 reg_class_t rclass
)
10385 machine_mode small_mode
;
10386 machine_mode big_mode
;
10388 /* V1TF and TF have different representations in vector
10390 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10391 && ((from_mode
== V1TFmode
&& to_mode
== TFmode
)
10392 || (from_mode
== TFmode
&& to_mode
== V1TFmode
)))
10395 if (GET_MODE_SIZE (from_mode
) == GET_MODE_SIZE (to_mode
))
10398 if (GET_MODE_SIZE (from_mode
) < GET_MODE_SIZE (to_mode
))
10400 small_mode
= from_mode
;
10401 big_mode
= to_mode
;
10405 small_mode
= to_mode
;
10406 big_mode
= from_mode
;
10409 /* Values residing in VRs are little-endian style. All modes are
10410 placed left-aligned in an VR. This means that we cannot allow
10411 switching between modes with differing sizes. Also if the vector
10412 facility is available we still place TFmode values in VR register
10413 pairs, since the only instructions we have operating on TFmodes
10414 only deal with register pairs. Therefore we have to allow DFmode
10415 subregs of TFmodes to enable the TFmode splitters. */
10416 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10417 && (GET_MODE_SIZE (small_mode
) < 8
10418 || s390_class_max_nregs (VEC_REGS
, big_mode
) == 1))
10421 /* Likewise for access registers, since they have only half the
10422 word size on 64-bit. */
10423 if (reg_classes_intersect_p (ACCESS_REGS
, rclass
))
10429 /* Return true if we use LRA instead of reload pass. */
10433 return s390_lra_flag
;
10436 /* Return true if register FROM can be eliminated via register TO. */
10439 s390_can_eliminate (const int from
, const int to
)
10441 /* We have not marked the base register as fixed.
10442 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10443 If a function requires the base register, we say here that this
10444 elimination cannot be performed. This will cause reload to free
10445 up the base register (as if it were fixed). On the other hand,
10446 if the current function does *not* require the base register, we
10447 say here the elimination succeeds, which in turn allows reload
10448 to allocate the base register for any other purpose. */
10449 if (from
== BASE_REGNUM
&& to
== BASE_REGNUM
)
10451 s390_init_frame_layout ();
10452 return cfun
->machine
->base_reg
== NULL_RTX
;
10455 /* Everything else must point into the stack frame. */
10456 gcc_assert (to
== STACK_POINTER_REGNUM
10457 || to
== HARD_FRAME_POINTER_REGNUM
);
10459 gcc_assert (from
== FRAME_POINTER_REGNUM
10460 || from
== ARG_POINTER_REGNUM
10461 || from
== RETURN_ADDRESS_POINTER_REGNUM
);
10463 /* Make sure we actually saved the return address. */
10464 if (from
== RETURN_ADDRESS_POINTER_REGNUM
)
10465 if (!crtl
->calls_eh_return
10467 && !cfun_frame_layout
.save_return_addr_p
)
10473 /* Return offset between register FROM and TO initially after prolog. */
10476 s390_initial_elimination_offset (int from
, int to
)
10478 HOST_WIDE_INT offset
;
10480 /* ??? Why are we called for non-eliminable pairs? */
10481 if (!s390_can_eliminate (from
, to
))
10486 case FRAME_POINTER_REGNUM
:
10487 offset
= (get_frame_size()
10488 + STACK_POINTER_OFFSET
10489 + crtl
->outgoing_args_size
);
10492 case ARG_POINTER_REGNUM
:
10493 s390_init_frame_layout ();
10494 offset
= cfun_frame_layout
.frame_size
+ STACK_POINTER_OFFSET
;
10497 case RETURN_ADDRESS_POINTER_REGNUM
:
10498 s390_init_frame_layout ();
10500 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10502 /* If it turns out that for stdarg nothing went into the reg
10503 save area we also do not need the return address
10505 if (cfun
->stdarg
&& !cfun_save_arg_fprs_p
)
10508 gcc_unreachable ();
10511 /* In order to make the following work it is not necessary for
10512 r14 to have a save slot. It is sufficient if one other GPR
10513 got one. Since the GPRs are always stored without gaps we
10514 are able to calculate where the r14 save slot would
10516 offset
= (cfun_frame_layout
.frame_size
+ cfun_frame_layout
.gprs_offset
+
10517 (RETURN_REGNUM
- cfun_frame_layout
.first_save_gpr_slot
) *
10526 gcc_unreachable ();
10532 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10533 to register BASE. Return generated insn. */
10536 save_fpr (rtx base
, int offset
, int regnum
)
10539 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10541 if (regnum
>= 16 && regnum
<= (16 + FP_ARG_NUM_REG
))
10542 set_mem_alias_set (addr
, get_varargs_alias_set ());
10544 set_mem_alias_set (addr
, get_frame_alias_set ());
10546 return emit_move_insn (addr
, gen_rtx_REG (DFmode
, regnum
));
10549 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10550 to register BASE. Return generated insn. */
10553 restore_fpr (rtx base
, int offset
, int regnum
)
10556 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10557 set_mem_alias_set (addr
, get_frame_alias_set ());
10559 return emit_move_insn (gen_rtx_REG (DFmode
, regnum
), addr
);
10562 /* Generate insn to save registers FIRST to LAST into
10563 the register save area located at offset OFFSET
10564 relative to register BASE. */
10567 save_gprs (rtx base
, int offset
, int first
, int last
)
10569 rtx addr
, insn
, note
;
10572 addr
= plus_constant (Pmode
, base
, offset
);
10573 addr
= gen_rtx_MEM (Pmode
, addr
);
10575 set_mem_alias_set (addr
, get_frame_alias_set ());
10577 /* Special-case single register. */
10581 insn
= gen_movdi (addr
, gen_rtx_REG (Pmode
, first
));
10583 insn
= gen_movsi (addr
, gen_rtx_REG (Pmode
, first
));
10585 if (!global_not_special_regno_p (first
))
10586 RTX_FRAME_RELATED_P (insn
) = 1;
10591 insn
= gen_store_multiple (addr
,
10592 gen_rtx_REG (Pmode
, first
),
10593 GEN_INT (last
- first
+ 1));
10595 if (first
<= 6 && cfun
->stdarg
)
10596 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
10598 rtx mem
= XEXP (XVECEXP (PATTERN (insn
), 0, i
), 0);
10600 if (first
+ i
<= 6)
10601 set_mem_alias_set (mem
, get_varargs_alias_set ());
10604 /* We need to set the FRAME_RELATED flag on all SETs
10605 inside the store-multiple pattern.
10607 However, we must not emit DWARF records for registers 2..5
10608 if they are stored for use by variable arguments ...
10610 ??? Unfortunately, it is not enough to simply not the
10611 FRAME_RELATED flags for those SETs, because the first SET
10612 of the PARALLEL is always treated as if it had the flag
10613 set, even if it does not. Therefore we emit a new pattern
10614 without those registers as REG_FRAME_RELATED_EXPR note. */
10616 if (first
>= 6 && !global_not_special_regno_p (first
))
10618 rtx pat
= PATTERN (insn
);
10620 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
10621 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
10622 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat
,
10624 RTX_FRAME_RELATED_P (XVECEXP (pat
, 0, i
)) = 1;
10626 RTX_FRAME_RELATED_P (insn
) = 1;
10628 else if (last
>= 6)
10632 for (start
= first
>= 6 ? first
: 6; start
<= last
; start
++)
10633 if (!global_not_special_regno_p (start
))
10639 addr
= plus_constant (Pmode
, base
,
10640 offset
+ (start
- first
) * UNITS_PER_LONG
);
10645 note
= gen_movdi (gen_rtx_MEM (Pmode
, addr
),
10646 gen_rtx_REG (Pmode
, start
));
10648 note
= gen_movsi (gen_rtx_MEM (Pmode
, addr
),
10649 gen_rtx_REG (Pmode
, start
));
10650 note
= PATTERN (note
);
10652 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10653 RTX_FRAME_RELATED_P (insn
) = 1;
10658 note
= gen_store_multiple (gen_rtx_MEM (Pmode
, addr
),
10659 gen_rtx_REG (Pmode
, start
),
10660 GEN_INT (last
- start
+ 1));
10661 note
= PATTERN (note
);
10663 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10665 for (i
= 0; i
< XVECLEN (note
, 0); i
++)
10666 if (GET_CODE (XVECEXP (note
, 0, i
)) == SET
10667 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note
,
10669 RTX_FRAME_RELATED_P (XVECEXP (note
, 0, i
)) = 1;
10671 RTX_FRAME_RELATED_P (insn
) = 1;
10677 /* Generate insn to restore registers FIRST to LAST from
10678 the register save area located at offset OFFSET
10679 relative to register BASE. */
10682 restore_gprs (rtx base
, int offset
, int first
, int last
)
10686 addr
= plus_constant (Pmode
, base
, offset
);
10687 addr
= gen_rtx_MEM (Pmode
, addr
);
10688 set_mem_alias_set (addr
, get_frame_alias_set ());
10690 /* Special-case single register. */
10694 insn
= gen_movdi (gen_rtx_REG (Pmode
, first
), addr
);
10696 insn
= gen_movsi (gen_rtx_REG (Pmode
, first
), addr
);
10698 RTX_FRAME_RELATED_P (insn
) = 1;
10702 insn
= gen_load_multiple (gen_rtx_REG (Pmode
, first
),
10704 GEN_INT (last
- first
+ 1));
10705 RTX_FRAME_RELATED_P (insn
) = 1;
10709 /* Return insn sequence to load the GOT register. */
10712 s390_load_got (void)
10716 /* We cannot use pic_offset_table_rtx here since we use this
10717 function also for non-pic if __tls_get_offset is called and in
10718 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10720 rtx got_rtx
= gen_rtx_REG (Pmode
, 12);
10724 emit_move_insn (got_rtx
, s390_got_symbol ());
10726 insns
= get_insns ();
10731 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10732 and the change to the stack pointer. */
10735 s390_emit_stack_tie (void)
10737 rtx mem
= gen_frame_mem (BLKmode
,
10738 gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
));
10740 emit_insn (gen_stack_tie (mem
));
10743 /* Copy GPRS into FPR save slots. */
10746 s390_save_gprs_to_fprs (void)
10750 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
10753 for (i
= 6; i
< 16; i
++)
10755 if (FP_REGNO_P (cfun_gpr_save_slot (i
)))
10758 emit_move_insn (gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
)),
10759 gen_rtx_REG (DImode
, i
));
10760 RTX_FRAME_RELATED_P (insn
) = 1;
10761 /* This prevents dwarf2cfi from interpreting the set. Doing
10762 so it might emit def_cfa_register infos setting an FPR as
10764 add_reg_note (insn
, REG_CFA_REGISTER
, copy_rtx (PATTERN (insn
)));
10769 /* Restore GPRs from FPR save slots. */
10772 s390_restore_gprs_from_fprs (void)
10776 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
10779 /* Restore the GPRs starting with the stack pointer. That way the
10780 stack pointer already has its original value when it comes to
10781 restoring the hard frame pointer. So we can set the cfa reg back
10782 to the stack pointer. */
10783 for (i
= STACK_POINTER_REGNUM
; i
>= 6; i
--)
10787 if (!FP_REGNO_P (cfun_gpr_save_slot (i
)))
10790 rtx fpr
= gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
));
10792 if (i
== STACK_POINTER_REGNUM
)
10793 insn
= emit_insn (gen_stack_restore_from_fpr (fpr
));
10795 insn
= emit_move_insn (gen_rtx_REG (DImode
, i
), fpr
);
10797 df_set_regs_ever_live (i
, true);
10798 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, i
));
10800 /* If either the stack pointer or the frame pointer get restored
10801 set the CFA value to its value at function start. Doing this
10802 for the frame pointer results in .cfi_def_cfa_register 15
10803 what is ok since if the stack pointer got modified it has
10804 been restored already. */
10805 if (i
== STACK_POINTER_REGNUM
|| i
== HARD_FRAME_POINTER_REGNUM
)
10806 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10807 plus_constant (Pmode
, stack_pointer_rtx
,
10808 STACK_POINTER_OFFSET
));
10809 RTX_FRAME_RELATED_P (insn
) = 1;
10814 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10819 const pass_data pass_data_s390_early_mach
=
10821 RTL_PASS
, /* type */
10822 "early_mach", /* name */
10823 OPTGROUP_NONE
, /* optinfo_flags */
10824 TV_MACH_DEP
, /* tv_id */
10825 0, /* properties_required */
10826 0, /* properties_provided */
10827 0, /* properties_destroyed */
10828 0, /* todo_flags_start */
10829 ( TODO_df_verify
| TODO_df_finish
), /* todo_flags_finish */
10832 class pass_s390_early_mach
: public rtl_opt_pass
10835 pass_s390_early_mach (gcc::context
*ctxt
)
10836 : rtl_opt_pass (pass_data_s390_early_mach
, ctxt
)
10839 /* opt_pass methods: */
10840 virtual unsigned int execute (function
*);
10842 }; // class pass_s390_early_mach
10845 pass_s390_early_mach::execute (function
*fun
)
10849 /* Try to get rid of the FPR clobbers. */
10850 s390_optimize_nonescaping_tx ();
10852 /* Re-compute register info. */
10853 s390_register_info ();
10855 /* If we're using a base register, ensure that it is always valid for
10856 the first non-prologue instruction. */
10857 if (fun
->machine
->base_reg
)
10858 emit_insn_at_entry (gen_main_pool (fun
->machine
->base_reg
));
10860 /* Annotate all constant pool references to let the scheduler know
10861 they implicitly use the base register. */
10862 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
10865 annotate_constant_pool_refs (insn
);
10866 df_insn_rescan (insn
);
10871 } // anon namespace
10874 make_pass_s390_early_mach (gcc::context
*ctxt
)
10876 return new pass_s390_early_mach (ctxt
);
10879 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10880 - push too big immediates to the literal pool and annotate the refs
10881 - emit frame related notes for stack pointer changes. */
10884 s390_prologue_plus_offset (rtx target
, rtx reg
, rtx offset
, bool frame_related_p
)
10887 rtx orig_offset
= offset
;
10889 gcc_assert (REG_P (target
));
10890 gcc_assert (REG_P (reg
));
10891 gcc_assert (CONST_INT_P (offset
));
10893 if (offset
== const0_rtx
) /* lr/lgr */
10895 insn
= emit_move_insn (target
, reg
);
10897 else if (DISP_IN_RANGE (INTVAL (offset
))) /* la */
10899 insn
= emit_move_insn (target
, gen_rtx_PLUS (Pmode
, reg
,
10904 if (!satisfies_constraint_K (offset
) /* ahi/aghi */
10906 || (!satisfies_constraint_Op (offset
) /* alfi/algfi */
10907 && !satisfies_constraint_On (offset
)))) /* slfi/slgfi */
10908 offset
= force_const_mem (Pmode
, offset
);
10912 insn
= emit_move_insn (target
, reg
);
10913 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
10916 insn
= emit_insn (gen_add2_insn (target
, offset
));
10918 if (!CONST_INT_P (offset
))
10920 annotate_constant_pool_refs (insn
);
10922 if (frame_related_p
)
10923 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10924 gen_rtx_SET (target
,
10925 gen_rtx_PLUS (Pmode
, target
,
10930 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
10932 /* If this is a stack adjustment and we are generating a stack clash
10933 prologue, then add a REG_STACK_CHECK note to signal that this insn
10934 should be left alone. */
10935 if (flag_stack_clash_protection
&& target
== stack_pointer_rtx
)
10936 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
10941 /* Emit a compare instruction with a volatile memory access as stack
10942 probe. It does not waste store tags and does not clobber any
10943 registers apart from the condition code. */
10945 s390_emit_stack_probe (rtx addr
)
10947 rtx mem
= gen_rtx_MEM (Pmode
, addr
);
10948 MEM_VOLATILE_P (mem
) = 1;
10949 emit_insn (gen_probe_stack (mem
));
10952 /* Use a runtime loop if we have to emit more probes than this. */
10953 #define MIN_UNROLL_PROBES 3
10955 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10956 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
10957 probe relative to the stack pointer.
10959 Note that SIZE is negative.
10961 The return value is true if TEMP_REG has been clobbered. */
10963 allocate_stack_space (rtx size
, HOST_WIDE_INT last_probe_offset
,
10966 bool temp_reg_clobbered_p
= false;
10967 HOST_WIDE_INT probe_interval
10968 = 1 << param_stack_clash_protection_probe_interval
;
10969 HOST_WIDE_INT guard_size
10970 = 1 << param_stack_clash_protection_guard_size
;
10972 if (flag_stack_clash_protection
)
10974 if (last_probe_offset
+ -INTVAL (size
) < guard_size
)
10975 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
10978 rtx offset
= GEN_INT (probe_interval
- UNITS_PER_LONG
);
10979 HOST_WIDE_INT rounded_size
= -INTVAL (size
) & -probe_interval
;
10980 HOST_WIDE_INT num_probes
= rounded_size
/ probe_interval
;
10981 HOST_WIDE_INT residual
= -INTVAL (size
) - rounded_size
;
10983 if (num_probes
< MIN_UNROLL_PROBES
)
10985 /* Emit unrolled probe statements. */
10987 for (unsigned int i
= 0; i
< num_probes
; i
++)
10989 s390_prologue_plus_offset (stack_pointer_rtx
,
10991 GEN_INT (-probe_interval
), true);
10992 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
10996 if (num_probes
> 0)
10997 last_probe_offset
= INTVAL (offset
);
10998 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
11002 /* Emit a loop probing the pages. */
11004 rtx_code_label
*loop_start_label
= gen_label_rtx ();
11006 /* From now on temp_reg will be the CFA register. */
11007 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11008 GEN_INT (-rounded_size
), true);
11009 emit_label (loop_start_label
);
11011 s390_prologue_plus_offset (stack_pointer_rtx
,
11013 GEN_INT (-probe_interval
), false);
11014 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11017 emit_cmp_and_jump_insns (stack_pointer_rtx
, temp_reg
,
11019 Pmode
, 1, loop_start_label
);
11021 /* Without this make_edges ICEes. */
11022 JUMP_LABEL (get_last_insn ()) = loop_start_label
;
11023 LABEL_NUSES (loop_start_label
) = 1;
11025 /* That's going to be a NOP since stack pointer and
11026 temp_reg are supposed to be the same here. We just
11027 emit it to set the CFA reg back to r15. */
11028 s390_prologue_plus_offset (stack_pointer_rtx
, temp_reg
,
11030 temp_reg_clobbered_p
= true;
11031 last_probe_offset
= INTVAL (offset
);
11032 dump_stack_clash_frame_info (PROBE_LOOP
, residual
!= 0);
11035 /* Handle any residual allocation request. */
11036 s390_prologue_plus_offset (stack_pointer_rtx
,
11038 GEN_INT (-residual
), true);
11039 last_probe_offset
+= residual
;
11040 if (last_probe_offset
>= probe_interval
)
11041 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11044 - UNITS_PER_LONG
)));
11046 return temp_reg_clobbered_p
;
11050 /* Subtract frame size from stack pointer. */
11051 s390_prologue_plus_offset (stack_pointer_rtx
,
11055 return temp_reg_clobbered_p
;
11058 /* Expand the prologue into a bunch of separate insns. */
11061 s390_emit_prologue (void)
11069 /* Choose best register to use for temp use within prologue.
11070 TPF with profiling must avoid the register 14 - the tracing function
11071 needs the original contents of r14 to be preserved. */
11073 if (!has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
)
11075 && !TARGET_TPF_PROFILING
)
11076 temp_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11077 else if (flag_split_stack
&& cfun
->stdarg
)
11078 temp_reg
= gen_rtx_REG (Pmode
, 12);
11080 temp_reg
= gen_rtx_REG (Pmode
, 1);
11082 /* When probing for stack-clash mitigation, we have to track the distance
11083 between the stack pointer and closest known reference.
11085 Most of the time we have to make a worst case assumption. The
11086 only exception is when TARGET_BACKCHAIN is active, in which case
11087 we know *sp (offset 0) was written. */
11088 HOST_WIDE_INT probe_interval
11089 = 1 << param_stack_clash_protection_probe_interval
;
11090 HOST_WIDE_INT last_probe_offset
11091 = (TARGET_BACKCHAIN
11092 ? (TARGET_PACKED_STACK
? STACK_POINTER_OFFSET
- UNITS_PER_LONG
: 0)
11093 : probe_interval
- (STACK_BOUNDARY
/ UNITS_PER_WORD
));
11095 s390_save_gprs_to_fprs ();
11097 /* Save call saved gprs. */
11098 if (cfun_frame_layout
.first_save_gpr
!= -1)
11100 insn
= save_gprs (stack_pointer_rtx
,
11101 cfun_frame_layout
.gprs_offset
+
11102 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11103 - cfun_frame_layout
.first_save_gpr_slot
),
11104 cfun_frame_layout
.first_save_gpr
,
11105 cfun_frame_layout
.last_save_gpr
);
11107 /* This is not 100% correct. If we have more than one register saved,
11108 then LAST_PROBE_OFFSET can move even closer to sp. */
11110 = (cfun_frame_layout
.gprs_offset
+
11111 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11112 - cfun_frame_layout
.first_save_gpr_slot
));
11117 /* Dummy insn to mark literal pool slot. */
11119 if (cfun
->machine
->base_reg
)
11120 emit_insn (gen_main_pool (cfun
->machine
->base_reg
));
11122 offset
= cfun_frame_layout
.f0_offset
;
11124 /* Save f0 and f2. */
11125 for (i
= FPR0_REGNUM
; i
<= FPR0_REGNUM
+ 1; i
++)
11127 if (cfun_fpr_save_p (i
))
11129 save_fpr (stack_pointer_rtx
, offset
, i
);
11130 if (offset
< last_probe_offset
)
11131 last_probe_offset
= offset
;
11134 else if (!TARGET_PACKED_STACK
|| cfun
->stdarg
)
11138 /* Save f4 and f6. */
11139 offset
= cfun_frame_layout
.f4_offset
;
11140 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11142 if (cfun_fpr_save_p (i
))
11144 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11145 if (offset
< last_probe_offset
)
11146 last_probe_offset
= offset
;
11149 /* If f4 and f6 are call clobbered they are saved due to
11150 stdargs and therefore are not frame related. */
11151 if (!call_used_regs
[i
])
11152 RTX_FRAME_RELATED_P (insn
) = 1;
11154 else if (!TARGET_PACKED_STACK
|| call_used_regs
[i
])
11158 if (TARGET_PACKED_STACK
11159 && cfun_save_high_fprs_p
11160 && cfun_frame_layout
.f8_offset
+ cfun_frame_layout
.high_fprs
* 8 > 0)
11162 offset
= (cfun_frame_layout
.f8_offset
11163 + (cfun_frame_layout
.high_fprs
- 1) * 8);
11165 for (i
= FPR15_REGNUM
; i
>= FPR8_REGNUM
&& offset
>= 0; i
--)
11166 if (cfun_fpr_save_p (i
))
11168 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11169 if (offset
< last_probe_offset
)
11170 last_probe_offset
= offset
;
11172 RTX_FRAME_RELATED_P (insn
) = 1;
11175 if (offset
>= cfun_frame_layout
.f8_offset
)
11179 if (!TARGET_PACKED_STACK
)
11180 next_fpr
= cfun_save_high_fprs_p
? FPR15_REGNUM
: 0;
11182 if (flag_stack_usage_info
)
11183 current_function_static_stack_size
= cfun_frame_layout
.frame_size
;
11185 /* Decrement stack pointer. */
11187 if (cfun_frame_layout
.frame_size
> 0)
11189 rtx frame_off
= GEN_INT (-cfun_frame_layout
.frame_size
);
11190 rtx_insn
*stack_pointer_backup_loc
;
11191 bool temp_reg_clobbered_p
;
11193 if (s390_stack_size
)
11195 HOST_WIDE_INT stack_guard
;
11197 if (s390_stack_guard
)
11198 stack_guard
= s390_stack_guard
;
11201 /* If no value for stack guard is provided the smallest power of 2
11202 larger than the current frame size is chosen. */
11204 while (stack_guard
< cfun_frame_layout
.frame_size
)
11208 if (cfun_frame_layout
.frame_size
>= s390_stack_size
)
11210 warning (0, "frame size of function %qs is %wd"
11211 " bytes exceeding user provided stack limit of "
11213 "An unconditional trap is added.",
11214 current_function_name(), cfun_frame_layout
.frame_size
,
11216 emit_insn (gen_trap ());
11221 /* stack_guard has to be smaller than s390_stack_size.
11222 Otherwise we would emit an AND with zero which would
11223 not match the test under mask pattern. */
11224 if (stack_guard
>= s390_stack_size
)
11226 warning (0, "frame size of function %qs is %wd"
11227 " bytes which is more than half the stack size. "
11228 "The dynamic check would not be reliable. "
11229 "No check emitted for this function.",
11230 current_function_name(),
11231 cfun_frame_layout
.frame_size
);
11235 HOST_WIDE_INT stack_check_mask
= ((s390_stack_size
- 1)
11236 & ~(stack_guard
- 1));
11238 rtx t
= gen_rtx_AND (Pmode
, stack_pointer_rtx
,
11239 GEN_INT (stack_check_mask
));
11241 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode
,
11243 t
, const0_rtx
, const0_rtx
));
11245 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode
,
11247 t
, const0_rtx
, const0_rtx
));
11252 if (s390_warn_framesize
> 0
11253 && cfun_frame_layout
.frame_size
>= s390_warn_framesize
)
11254 warning (0, "frame size of %qs is %wd bytes",
11255 current_function_name (), cfun_frame_layout
.frame_size
);
11257 if (s390_warn_dynamicstack_p
&& cfun
->calls_alloca
)
11258 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11260 /* Save the location where we could backup the incoming stack
11262 stack_pointer_backup_loc
= get_last_insn ();
11264 temp_reg_clobbered_p
= allocate_stack_space (frame_off
, last_probe_offset
,
11267 if (TARGET_BACKCHAIN
|| next_fpr
)
11269 if (temp_reg_clobbered_p
)
11271 /* allocate_stack_space had to make use of temp_reg and
11272 we need it to hold a backup of the incoming stack
11273 pointer. Calculate back that value from the current
11275 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11276 GEN_INT (cfun_frame_layout
.frame_size
),
11281 /* allocate_stack_space didn't actually required
11282 temp_reg. Insert the stack pointer backup insn
11283 before the stack pointer decrement code - knowing now
11284 that the value will survive. */
11285 emit_insn_after (gen_move_insn (temp_reg
, stack_pointer_rtx
),
11286 stack_pointer_backup_loc
);
11290 /* Set backchain. */
11292 if (TARGET_BACKCHAIN
)
11294 if (cfun_frame_layout
.backchain_offset
)
11295 addr
= gen_rtx_MEM (Pmode
,
11296 plus_constant (Pmode
, stack_pointer_rtx
,
11297 cfun_frame_layout
.backchain_offset
));
11299 addr
= gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
11300 set_mem_alias_set (addr
, get_frame_alias_set ());
11301 insn
= emit_insn (gen_move_insn (addr
, temp_reg
));
11304 /* If we support non-call exceptions (e.g. for Java),
11305 we need to make sure the backchain pointer is set up
11306 before any possibly trapping memory access. */
11307 if (TARGET_BACKCHAIN
&& cfun
->can_throw_non_call_exceptions
)
11309 addr
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
11310 emit_clobber (addr
);
11313 else if (flag_stack_clash_protection
)
11314 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
11316 /* Save fprs 8 - 15 (64 bit ABI). */
11318 if (cfun_save_high_fprs_p
&& next_fpr
)
11320 /* If the stack might be accessed through a different register
11321 we have to make sure that the stack pointer decrement is not
11322 moved below the use of the stack slots. */
11323 s390_emit_stack_tie ();
11325 insn
= emit_insn (gen_add2_insn (temp_reg
,
11326 GEN_INT (cfun_frame_layout
.f8_offset
)));
11330 for (i
= FPR8_REGNUM
; i
<= next_fpr
; i
++)
11331 if (cfun_fpr_save_p (i
))
11333 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
,
11334 cfun_frame_layout
.frame_size
11335 + cfun_frame_layout
.f8_offset
11338 insn
= save_fpr (temp_reg
, offset
, i
);
11340 RTX_FRAME_RELATED_P (insn
) = 1;
11341 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11342 gen_rtx_SET (gen_rtx_MEM (DFmode
, addr
),
11343 gen_rtx_REG (DFmode
, i
)));
11347 /* Set frame pointer, if needed. */
11349 if (frame_pointer_needed
)
11351 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
11352 RTX_FRAME_RELATED_P (insn
) = 1;
11355 /* Set up got pointer, if needed. */
11357 if (flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
11359 rtx_insn
*insns
= s390_load_got ();
11361 for (rtx_insn
*insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
11362 annotate_constant_pool_refs (insn
);
11367 #if TARGET_TPF != 0
11368 if (TARGET_TPF_PROFILING
)
11370 /* Generate a BAS instruction to serve as a function entry
11371 intercept to facilitate the use of tracing algorithms located
11372 at the branch target. */
11373 emit_insn (gen_prologue_tpf (
11374 GEN_INT (s390_tpf_trace_hook_prologue_check
),
11375 GEN_INT (s390_tpf_trace_hook_prologue_target
)));
11377 /* Emit a blockage here so that all code lies between the
11378 profiling mechanisms. */
11379 emit_insn (gen_blockage ());
11384 /* Expand the epilogue into a bunch of separate insns. */
11387 s390_emit_epilogue (bool sibcall
)
11389 rtx frame_pointer
, return_reg
= NULL_RTX
, cfa_restores
= NULL_RTX
;
11390 int area_bottom
, area_top
, offset
= 0;
11394 #if TARGET_TPF != 0
11395 if (TARGET_TPF_PROFILING
)
11397 /* Generate a BAS instruction to serve as a function entry
11398 intercept to facilitate the use of tracing algorithms located
11399 at the branch target. */
11401 /* Emit a blockage here so that all code lies between the
11402 profiling mechanisms. */
11403 emit_insn (gen_blockage ());
11405 emit_insn (gen_epilogue_tpf (
11406 GEN_INT (s390_tpf_trace_hook_epilogue_check
),
11407 GEN_INT (s390_tpf_trace_hook_epilogue_target
)));
11411 /* Check whether to use frame or stack pointer for restore. */
11413 frame_pointer
= (frame_pointer_needed
11414 ? hard_frame_pointer_rtx
: stack_pointer_rtx
);
11416 s390_frame_area (&area_bottom
, &area_top
);
11418 /* Check whether we can access the register save area.
11419 If not, increment the frame pointer as required. */
11421 if (area_top
<= area_bottom
)
11423 /* Nothing to restore. */
11425 else if (DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_bottom
)
11426 && DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_top
- 1))
11428 /* Area is in range. */
11429 offset
= cfun_frame_layout
.frame_size
;
11434 rtx frame_off
, cfa
;
11436 offset
= area_bottom
< 0 ? -area_bottom
: 0;
11437 frame_off
= GEN_INT (cfun_frame_layout
.frame_size
- offset
);
11439 cfa
= gen_rtx_SET (frame_pointer
,
11440 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11441 if (DISP_IN_RANGE (INTVAL (frame_off
)))
11445 set
= gen_rtx_SET (frame_pointer
,
11446 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11447 insn
= emit_insn (set
);
11451 if (!CONST_OK_FOR_K (INTVAL (frame_off
)))
11452 frame_off
= force_const_mem (Pmode
, frame_off
);
11454 insn
= emit_insn (gen_add2_insn (frame_pointer
, frame_off
));
11455 annotate_constant_pool_refs (insn
);
11457 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, cfa
);
11458 RTX_FRAME_RELATED_P (insn
) = 1;
11461 /* Restore call saved fprs. */
11465 if (cfun_save_high_fprs_p
)
11467 next_offset
= cfun_frame_layout
.f8_offset
;
11468 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
11470 if (cfun_fpr_save_p (i
))
11472 restore_fpr (frame_pointer
,
11473 offset
+ next_offset
, i
);
11475 = alloc_reg_note (REG_CFA_RESTORE
,
11476 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11485 next_offset
= cfun_frame_layout
.f4_offset
;
11487 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11489 if (cfun_fpr_save_p (i
))
11491 restore_fpr (frame_pointer
,
11492 offset
+ next_offset
, i
);
11494 = alloc_reg_note (REG_CFA_RESTORE
,
11495 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11498 else if (!TARGET_PACKED_STACK
)
11504 /* Restore call saved gprs. */
11506 if (cfun_frame_layout
.first_restore_gpr
!= -1)
11511 /* Check for global register and save them
11512 to stack location from where they get restored. */
11514 for (i
= cfun_frame_layout
.first_restore_gpr
;
11515 i
<= cfun_frame_layout
.last_restore_gpr
;
11518 if (global_not_special_regno_p (i
))
11520 addr
= plus_constant (Pmode
, frame_pointer
,
11521 offset
+ cfun_frame_layout
.gprs_offset
11522 + (i
- cfun_frame_layout
.first_save_gpr_slot
)
11524 addr
= gen_rtx_MEM (Pmode
, addr
);
11525 set_mem_alias_set (addr
, get_frame_alias_set ());
11526 emit_move_insn (addr
, gen_rtx_REG (Pmode
, i
));
11530 = alloc_reg_note (REG_CFA_RESTORE
,
11531 gen_rtx_REG (Pmode
, i
), cfa_restores
);
11534 /* Fetch return address from stack before load multiple,
11535 this will do good for scheduling.
11537 Only do this if we already decided that r14 needs to be
11538 saved to a stack slot. (And not just because r14 happens to
11539 be in between two GPRs which need saving.) Otherwise it
11540 would be difficult to take that decision back in
11541 s390_optimize_prologue.
11543 This optimization is only helpful on in-order machines. */
11545 && cfun_gpr_save_slot (RETURN_REGNUM
) == SAVE_SLOT_STACK
11546 && s390_tune
<= PROCESSOR_2097_Z10
)
11548 int return_regnum
= find_unused_clobbered_reg();
11550 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11552 && return_regnum
== INDIRECT_BRANCH_THUNK_REGNUM
))
11554 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM
!= 4);
11557 return_reg
= gen_rtx_REG (Pmode
, return_regnum
);
11559 addr
= plus_constant (Pmode
, frame_pointer
,
11560 offset
+ cfun_frame_layout
.gprs_offset
11562 - cfun_frame_layout
.first_save_gpr_slot
)
11564 addr
= gen_rtx_MEM (Pmode
, addr
);
11565 set_mem_alias_set (addr
, get_frame_alias_set ());
11566 emit_move_insn (return_reg
, addr
);
11568 /* Once we did that optimization we have to make sure
11569 s390_optimize_prologue does not try to remove the store
11570 of r14 since we will not be able to find the load issued
11572 cfun_frame_layout
.save_return_addr_p
= true;
11575 insn
= restore_gprs (frame_pointer
,
11576 offset
+ cfun_frame_layout
.gprs_offset
11577 + (cfun_frame_layout
.first_restore_gpr
11578 - cfun_frame_layout
.first_save_gpr_slot
)
11580 cfun_frame_layout
.first_restore_gpr
,
11581 cfun_frame_layout
.last_restore_gpr
);
11582 insn
= emit_insn (insn
);
11583 REG_NOTES (insn
) = cfa_restores
;
11584 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11585 plus_constant (Pmode
, stack_pointer_rtx
,
11586 STACK_POINTER_OFFSET
));
11587 RTX_FRAME_RELATED_P (insn
) = 1;
11590 s390_restore_gprs_from_fprs ();
11594 if (!return_reg
&& !s390_can_use_return_insn ())
11595 /* We planned to emit (return), be we are not allowed to. */
11596 return_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11599 /* Emit (return) and (use). */
11600 emit_jump_insn (gen_return_use (return_reg
));
11602 /* The fact that RETURN_REGNUM is used is already reflected by
11603 EPILOGUE_USES. Emit plain (return). */
11604 emit_jump_insn (gen_return ());
11608 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11611 s300_set_up_by_prologue (hard_reg_set_container
*regs
)
11613 if (cfun
->machine
->base_reg
11614 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
11615 SET_HARD_REG_BIT (regs
->set
, REGNO (cfun
->machine
->base_reg
));
11618 /* -fsplit-stack support. */
11620 /* A SYMBOL_REF for __morestack. */
11621 static GTY(()) rtx morestack_ref
;
11623 /* When using -fsplit-stack, the allocation routines set a field in
11624 the TCB to the bottom of the stack plus this much space, measured
11627 #define SPLIT_STACK_AVAILABLE 1024
11629 /* Emit the parmblock for __morestack into .rodata section. It
11630 consists of 3 pointer size entries:
11632 - size of stack arguments
11633 - offset between parm block and __morestack return label */
11636 s390_output_split_stack_data (rtx parm_block
, rtx call_done
,
11637 rtx frame_size
, rtx args_size
)
11639 rtx ops
[] = { parm_block
, call_done
};
11641 switch_to_section (targetm
.asm_out
.function_rodata_section
11642 (current_function_decl
));
11645 output_asm_insn (".align\t8", NULL
);
11647 output_asm_insn (".align\t4", NULL
);
11649 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
11650 CODE_LABEL_NUMBER (parm_block
));
11653 output_asm_insn (".quad\t%0", &frame_size
);
11654 output_asm_insn (".quad\t%0", &args_size
);
11655 output_asm_insn (".quad\t%1-%0", ops
);
11659 output_asm_insn (".long\t%0", &frame_size
);
11660 output_asm_insn (".long\t%0", &args_size
);
11661 output_asm_insn (".long\t%1-%0", ops
);
11664 switch_to_section (current_function_section ());
11667 /* Emit -fsplit-stack prologue, which goes before the regular function
11671 s390_expand_split_stack_prologue (void)
11673 rtx r1
, guard
, cc
= NULL
;
11675 /* Offset from thread pointer to __private_ss. */
11676 int psso
= TARGET_64BIT
? 0x38 : 0x20;
11677 /* Pointer size in bytes. */
11678 /* Frame size and argument size - the two parameters to __morestack. */
11679 HOST_WIDE_INT frame_size
= cfun_frame_layout
.frame_size
;
11680 /* Align argument size to 8 bytes - simplifies __morestack code. */
11681 HOST_WIDE_INT args_size
= crtl
->args
.size
>= 0
11682 ? ((crtl
->args
.size
+ 7) & ~7)
11684 /* Label to be called by __morestack. */
11685 rtx_code_label
*call_done
= NULL
;
11686 rtx_code_label
*parm_base
= NULL
;
11689 gcc_assert (flag_split_stack
&& reload_completed
);
11691 r1
= gen_rtx_REG (Pmode
, 1);
11693 /* If no stack frame will be allocated, don't do anything. */
11696 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11698 /* If va_start is used, just use r15. */
11699 emit_move_insn (r1
,
11700 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11701 GEN_INT (STACK_POINTER_OFFSET
)));
11707 if (morestack_ref
== NULL_RTX
)
11709 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11710 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
11711 | SYMBOL_FLAG_FUNCTION
);
11714 if (CONST_OK_FOR_K (frame_size
) || CONST_OK_FOR_Op (frame_size
))
11716 /* If frame_size will fit in an add instruction, do a stack space
11717 check, and only call __morestack if there's not enough space. */
11719 /* Get thread pointer. r1 is the only register we can always destroy - r0
11720 could contain a static chain (and cannot be used to address memory
11721 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11722 emit_insn (gen_get_thread_pointer (Pmode
, r1
));
11723 /* Aim at __private_ss. */
11724 guard
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, r1
, psso
));
11726 /* If less that 1kiB used, skip addition and compare directly with
11728 if (frame_size
> SPLIT_STACK_AVAILABLE
)
11730 emit_move_insn (r1
, guard
);
11732 emit_insn (gen_adddi3 (r1
, r1
, GEN_INT (frame_size
)));
11734 emit_insn (gen_addsi3 (r1
, r1
, GEN_INT (frame_size
)));
11738 /* Compare the (maybe adjusted) guard with the stack pointer. */
11739 cc
= s390_emit_compare (LT
, stack_pointer_rtx
, guard
);
11742 call_done
= gen_label_rtx ();
11743 parm_base
= gen_label_rtx ();
11744 LABEL_NUSES (parm_base
)++;
11745 LABEL_NUSES (call_done
)++;
11747 /* %r1 = litbase. */
11748 insn
= emit_move_insn (r1
, gen_rtx_LABEL_REF (VOIDmode
, parm_base
));
11749 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11750 LABEL_NUSES (parm_base
)++;
11752 /* Now, we need to call __morestack. It has very special calling
11753 conventions: it preserves param/return/static chain registers for
11754 calling main function body, and looks for its own parameters at %r1. */
11756 tmp
= gen_split_stack_cond_call (Pmode
,
11760 GEN_INT (frame_size
),
11761 GEN_INT (args_size
),
11764 tmp
= gen_split_stack_call (Pmode
,
11768 GEN_INT (frame_size
),
11769 GEN_INT (args_size
));
11771 insn
= emit_jump_insn (tmp
);
11772 JUMP_LABEL (insn
) = call_done
;
11773 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11774 add_reg_note (insn
, REG_LABEL_OPERAND
, call_done
);
11778 /* Mark the jump as very unlikely to be taken. */
11779 add_reg_br_prob_note (insn
,
11780 profile_probability::very_unlikely ());
11782 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11784 /* If va_start is used, and __morestack was not called, just use
11786 emit_move_insn (r1
,
11787 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11788 GEN_INT (STACK_POINTER_OFFSET
)));
11796 /* __morestack will call us here. */
11798 emit_label (call_done
);
11801 /* We may have to tell the dataflow pass that the split stack prologue
11802 is initializing a register. */
11805 s390_live_on_entry (bitmap regs
)
11807 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11809 gcc_assert (flag_split_stack
);
11810 bitmap_set_bit (regs
, 1);
11814 /* Return true if the function can use simple_return to return outside
11815 of a shrink-wrapped region. At present shrink-wrapping is supported
11819 s390_can_use_simple_return_insn (void)
11824 /* Return true if the epilogue is guaranteed to contain only a return
11825 instruction and if a direct return can therefore be used instead.
11826 One of the main advantages of using direct return instructions
11827 is that we can then use conditional returns. */
11830 s390_can_use_return_insn (void)
11834 if (!reload_completed
)
11840 if (TARGET_TPF_PROFILING
)
11843 for (i
= 0; i
< 16; i
++)
11844 if (cfun_gpr_save_slot (i
) != SAVE_SLOT_NONE
)
11847 /* For 31 bit this is not covered by the frame_size check below
11848 since f4, f6 are saved in the register save area without needing
11849 additional stack space. */
11851 && (cfun_fpr_save_p (FPR4_REGNUM
) || cfun_fpr_save_p (FPR6_REGNUM
)))
11854 if (cfun
->machine
->base_reg
11855 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
11858 return cfun_frame_layout
.frame_size
== 0;
11861 /* The VX ABI differs for vararg functions. Therefore we need the
11862 prototype of the callee to be available when passing vector type
11864 static const char *
11865 s390_invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
11867 return ((TARGET_VX_ABI
11869 && VECTOR_TYPE_P (TREE_TYPE (val
))
11870 && (funcdecl
== NULL_TREE
11871 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
11872 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
11873 ? N_("vector argument passed to unprototyped function")
11878 /* Return the size in bytes of a function argument of
11879 type TYPE and/or mode MODE. At least one of TYPE or
11880 MODE must be specified. */
11883 s390_function_arg_size (machine_mode mode
, const_tree type
)
11886 return int_size_in_bytes (type
);
11888 /* No type info available for some library calls ... */
11889 if (mode
!= BLKmode
)
11890 return GET_MODE_SIZE (mode
);
11892 /* If we have neither type nor mode, abort */
11893 gcc_unreachable ();
11896 /* Return true if a function argument of type TYPE and mode MODE
11897 is to be passed in a vector register, if available. */
11900 s390_function_arg_vector (machine_mode mode
, const_tree type
)
11902 if (!TARGET_VX_ABI
)
11905 if (s390_function_arg_size (mode
, type
) > 16)
11908 /* No type info available for some library calls ... */
11910 return VECTOR_MODE_P (mode
);
11912 /* The ABI says that record types with a single member are treated
11913 just like that member would be. */
11914 int empty_base_seen
= 0;
11915 const_tree orig_type
= type
;
11916 while (TREE_CODE (type
) == RECORD_TYPE
)
11918 tree field
, single
= NULL_TREE
;
11920 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
11922 if (TREE_CODE (field
) != FIELD_DECL
)
11925 if (DECL_FIELD_ABI_IGNORED (field
))
11927 if (lookup_attribute ("no_unique_address",
11928 DECL_ATTRIBUTES (field
)))
11929 empty_base_seen
|= 2;
11931 empty_base_seen
|= 1;
11935 if (single
== NULL_TREE
)
11936 single
= TREE_TYPE (field
);
11941 if (single
== NULL_TREE
)
11945 /* If the field declaration adds extra byte due to
11946 e.g. padding this is not accepted as vector type. */
11947 if (int_size_in_bytes (single
) <= 0
11948 || int_size_in_bytes (single
) != int_size_in_bytes (type
))
11954 if (!VECTOR_TYPE_P (type
))
11957 if (warn_psabi
&& empty_base_seen
)
11959 static unsigned last_reported_type_uid
;
11960 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
11961 if (uid
!= last_reported_type_uid
)
11963 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
11964 last_reported_type_uid
= uid
;
11965 if (empty_base_seen
& 1)
11966 inform (input_location
,
11967 "parameter passing for argument of type %qT when C++17 "
11968 "is enabled changed to match C++14 %{in GCC 10.1%}",
11971 inform (input_location
,
11972 "parameter passing for argument of type %qT with "
11973 "%<[[no_unique_address]]%> members changed "
11974 "%{in GCC 10.1%}", orig_type
, url
);
11980 /* Return true if a function argument of type TYPE and mode MODE
11981 is to be passed in a floating-point register, if available. */
11984 s390_function_arg_float (machine_mode mode
, const_tree type
)
11986 if (s390_function_arg_size (mode
, type
) > 8)
11989 /* Soft-float changes the ABI: no floating-point registers are used. */
11990 if (TARGET_SOFT_FLOAT
)
11993 /* No type info available for some library calls ... */
11995 return mode
== SFmode
|| mode
== DFmode
|| mode
== SDmode
|| mode
== DDmode
;
11997 /* The ABI says that record types with a single member are treated
11998 just like that member would be. */
11999 int empty_base_seen
= 0;
12000 const_tree orig_type
= type
;
12001 while (TREE_CODE (type
) == RECORD_TYPE
)
12003 tree field
, single
= NULL_TREE
;
12005 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12007 if (TREE_CODE (field
) != FIELD_DECL
)
12009 if (DECL_FIELD_ABI_IGNORED (field
))
12011 if (lookup_attribute ("no_unique_address",
12012 DECL_ATTRIBUTES (field
)))
12013 empty_base_seen
|= 2;
12015 empty_base_seen
|= 1;
12019 if (single
== NULL_TREE
)
12020 single
= TREE_TYPE (field
);
12025 if (single
== NULL_TREE
)
12031 if (TREE_CODE (type
) != REAL_TYPE
)
12034 if (warn_psabi
&& empty_base_seen
)
12036 static unsigned last_reported_type_uid
;
12037 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12038 if (uid
!= last_reported_type_uid
)
12040 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12041 last_reported_type_uid
= uid
;
12042 if (empty_base_seen
& 1)
12043 inform (input_location
,
12044 "parameter passing for argument of type %qT when C++17 "
12045 "is enabled changed to match C++14 %{in GCC 10.1%}",
12048 inform (input_location
,
12049 "parameter passing for argument of type %qT with "
12050 "%<[[no_unique_address]]%> members changed "
12051 "%{in GCC 10.1%}", orig_type
, url
);
12058 /* Return true if a function argument of type TYPE and mode MODE
12059 is to be passed in an integer register, or a pair of integer
12060 registers, if available. */
12063 s390_function_arg_integer (machine_mode mode
, const_tree type
)
12065 int size
= s390_function_arg_size (mode
, type
);
12069 /* No type info available for some library calls ... */
12071 return GET_MODE_CLASS (mode
) == MODE_INT
12072 || (TARGET_SOFT_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
));
12074 /* We accept small integral (and similar) types. */
12075 if (INTEGRAL_TYPE_P (type
)
12076 || POINTER_TYPE_P (type
)
12077 || TREE_CODE (type
) == NULLPTR_TYPE
12078 || TREE_CODE (type
) == OFFSET_TYPE
12079 || (TARGET_SOFT_FLOAT
&& TREE_CODE (type
) == REAL_TYPE
))
12082 /* We also accept structs of size 1, 2, 4, 8 that are not
12083 passed in floating-point registers. */
12084 if (AGGREGATE_TYPE_P (type
)
12085 && exact_log2 (size
) >= 0
12086 && !s390_function_arg_float (mode
, type
))
12092 /* Return 1 if a function argument ARG is to be passed by reference.
12093 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12094 are passed by value, all other structures (and complex numbers) are
12095 passed by reference. */
12098 s390_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
12100 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12102 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12108 if (tree type
= arg
.type
)
12110 if (AGGREGATE_TYPE_P (type
) && exact_log2 (size
) < 0)
12113 if (TREE_CODE (type
) == COMPLEX_TYPE
12114 || TREE_CODE (type
) == VECTOR_TYPE
)
12121 /* Update the data in CUM to advance over argument ARG. */
12124 s390_function_arg_advance (cumulative_args_t cum_v
,
12125 const function_arg_info
&arg
)
12127 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12129 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12131 /* We are called for unnamed vector stdarg arguments which are
12132 passed on the stack. In this case this hook does not have to
12133 do anything since stack arguments are tracked by common
12139 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12143 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12145 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12146 cum
->gprs
+= ((size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
);
12149 gcc_unreachable ();
12152 /* Define where to put the arguments to a function.
12153 Value is zero to push the argument on the stack,
12154 or a hard register in which to store the argument.
12156 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12157 the preceding args and about the function being called.
12158 ARG is a description of the argument.
12160 On S/390, we use general purpose registers 2 through 6 to
12161 pass integer, pointer, and certain structure arguments, and
12162 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12163 to pass floating point arguments. All remaining arguments
12164 are pushed to the stack. */
12167 s390_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
12169 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12172 s390_check_type_for_vector_abi (arg
.type
, true, false);
12174 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12176 /* Vector arguments being part of the ellipsis are passed on the
12178 if (!arg
.named
|| (cum
->vrs
+ 1 > VEC_ARG_NUM_REG
))
12181 return gen_rtx_REG (arg
.mode
, cum
->vrs
+ FIRST_VEC_ARG_REGNO
);
12183 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12185 if (cum
->fprs
+ 1 > FP_ARG_NUM_REG
)
12188 return gen_rtx_REG (arg
.mode
, cum
->fprs
+ 16);
12190 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12192 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12193 int n_gprs
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12195 if (cum
->gprs
+ n_gprs
> GP_ARG_NUM_REG
)
12197 else if (n_gprs
== 1 || UNITS_PER_WORD
== UNITS_PER_LONG
)
12198 return gen_rtx_REG (arg
.mode
, cum
->gprs
+ 2);
12199 else if (n_gprs
== 2)
12201 rtvec p
= rtvec_alloc (2);
12204 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 2),
12207 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 3),
12210 return gen_rtx_PARALLEL (arg
.mode
, p
);
12214 /* After the real arguments, expand_call calls us once again with an
12215 end marker. Whatever we return here is passed as operand 2 to the
12218 We don't need this feature ... */
12219 else if (arg
.end_marker_p ())
12222 gcc_unreachable ();
12225 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12226 left-justified when placed on the stack during parameter passing. */
12228 static pad_direction
12229 s390_function_arg_padding (machine_mode mode
, const_tree type
)
12231 if (s390_function_arg_vector (mode
, type
))
12234 return default_function_arg_padding (mode
, type
);
12237 /* Return true if return values of type TYPE should be returned
12238 in a memory buffer whose address is passed by the caller as
12239 hidden first argument. */
12242 s390_return_in_memory (const_tree type
, const_tree fundecl ATTRIBUTE_UNUSED
)
12244 /* We accept small integral (and similar) types. */
12245 if (INTEGRAL_TYPE_P (type
)
12246 || POINTER_TYPE_P (type
)
12247 || TREE_CODE (type
) == OFFSET_TYPE
12248 || TREE_CODE (type
) == REAL_TYPE
)
12249 return int_size_in_bytes (type
) > 8;
12251 /* vector types which fit into a VR. */
12253 && VECTOR_TYPE_P (type
)
12254 && int_size_in_bytes (type
) <= 16)
12257 /* Aggregates and similar constructs are always returned
12259 if (AGGREGATE_TYPE_P (type
)
12260 || TREE_CODE (type
) == COMPLEX_TYPE
12261 || VECTOR_TYPE_P (type
))
12264 /* ??? We get called on all sorts of random stuff from
12265 aggregate_value_p. We can't abort, but it's not clear
12266 what's safe to return. Pretend it's a struct I guess. */
12270 /* Function arguments and return values are promoted to word size. */
12272 static machine_mode
12273 s390_promote_function_mode (const_tree type
, machine_mode mode
,
12275 const_tree fntype ATTRIBUTE_UNUSED
,
12276 int for_return ATTRIBUTE_UNUSED
)
12278 if (INTEGRAL_MODE_P (mode
)
12279 && GET_MODE_SIZE (mode
) < UNITS_PER_LONG
)
12281 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
12282 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
12289 /* Define where to return a (scalar) value of type RET_TYPE.
12290 If RET_TYPE is null, define where to return a (scalar)
12291 value of mode MODE from a libcall. */
12294 s390_function_and_libcall_value (machine_mode mode
,
12295 const_tree ret_type
,
12296 const_tree fntype_or_decl
,
12297 bool outgoing ATTRIBUTE_UNUSED
)
12299 /* For vector return types it is important to use the RET_TYPE
12300 argument whenever available since the middle-end might have
12301 changed the mode to a scalar mode. */
12302 bool vector_ret_type_p
= ((ret_type
&& VECTOR_TYPE_P (ret_type
))
12303 || (!ret_type
&& VECTOR_MODE_P (mode
)));
12305 /* For normal functions perform the promotion as
12306 promote_function_mode would do. */
12309 int unsignedp
= TYPE_UNSIGNED (ret_type
);
12310 mode
= promote_function_mode (ret_type
, mode
, &unsignedp
,
12311 fntype_or_decl
, 1);
12314 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
12315 || SCALAR_FLOAT_MODE_P (mode
)
12316 || (TARGET_VX_ABI
&& vector_ret_type_p
));
12317 gcc_assert (GET_MODE_SIZE (mode
) <= (TARGET_VX_ABI
? 16 : 8));
12319 if (TARGET_VX_ABI
&& vector_ret_type_p
)
12320 return gen_rtx_REG (mode
, FIRST_VEC_ARG_REGNO
);
12321 else if (TARGET_HARD_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
))
12322 return gen_rtx_REG (mode
, 16);
12323 else if (GET_MODE_SIZE (mode
) <= UNITS_PER_LONG
12324 || UNITS_PER_LONG
== UNITS_PER_WORD
)
12325 return gen_rtx_REG (mode
, 2);
12326 else if (GET_MODE_SIZE (mode
) == 2 * UNITS_PER_LONG
)
12328 /* This case is triggered when returning a 64 bit value with
12329 -m31 -mzarch. Although the value would fit into a single
12330 register it has to be forced into a 32 bit register pair in
12331 order to match the ABI. */
12332 rtvec p
= rtvec_alloc (2);
12335 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 2), const0_rtx
);
12337 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 3), GEN_INT (4));
12339 return gen_rtx_PARALLEL (mode
, p
);
12342 gcc_unreachable ();
12345 /* Define where to return a scalar return value of type RET_TYPE. */
12348 s390_function_value (const_tree ret_type
, const_tree fn_decl_or_type
,
12351 return s390_function_and_libcall_value (TYPE_MODE (ret_type
), ret_type
,
12352 fn_decl_or_type
, outgoing
);
12355 /* Define where to return a scalar libcall return value of mode
12359 s390_libcall_value (machine_mode mode
, const_rtx fun ATTRIBUTE_UNUSED
)
12361 return s390_function_and_libcall_value (mode
, NULL_TREE
,
12366 /* Create and return the va_list datatype.
12368 On S/390, va_list is an array type equivalent to
12370 typedef struct __va_list_tag
12374 void *__overflow_arg_area;
12375 void *__reg_save_area;
12378 where __gpr and __fpr hold the number of general purpose
12379 or floating point arguments used up to now, respectively,
12380 __overflow_arg_area points to the stack location of the
12381 next argument passed on the stack, and __reg_save_area
12382 always points to the start of the register area in the
12383 call frame of the current function. The function prologue
12384 saves all registers used for argument passing into this
12385 area if the function uses variable arguments. */
12388 s390_build_builtin_va_list (void)
12390 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
12392 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
12395 build_decl (BUILTINS_LOCATION
,
12396 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
12398 f_gpr
= build_decl (BUILTINS_LOCATION
,
12399 FIELD_DECL
, get_identifier ("__gpr"),
12400 long_integer_type_node
);
12401 f_fpr
= build_decl (BUILTINS_LOCATION
,
12402 FIELD_DECL
, get_identifier ("__fpr"),
12403 long_integer_type_node
);
12404 f_ovf
= build_decl (BUILTINS_LOCATION
,
12405 FIELD_DECL
, get_identifier ("__overflow_arg_area"),
12407 f_sav
= build_decl (BUILTINS_LOCATION
,
12408 FIELD_DECL
, get_identifier ("__reg_save_area"),
12411 va_list_gpr_counter_field
= f_gpr
;
12412 va_list_fpr_counter_field
= f_fpr
;
12414 DECL_FIELD_CONTEXT (f_gpr
) = record
;
12415 DECL_FIELD_CONTEXT (f_fpr
) = record
;
12416 DECL_FIELD_CONTEXT (f_ovf
) = record
;
12417 DECL_FIELD_CONTEXT (f_sav
) = record
;
12419 TYPE_STUB_DECL (record
) = type_decl
;
12420 TYPE_NAME (record
) = type_decl
;
12421 TYPE_FIELDS (record
) = f_gpr
;
12422 DECL_CHAIN (f_gpr
) = f_fpr
;
12423 DECL_CHAIN (f_fpr
) = f_ovf
;
12424 DECL_CHAIN (f_ovf
) = f_sav
;
12426 layout_type (record
);
12428 /* The correct type is an array type of one element. */
12429 return build_array_type (record
, build_index_type (size_zero_node
));
12432 /* Implement va_start by filling the va_list structure VALIST.
12433 STDARG_P is always true, and ignored.
12434 NEXTARG points to the first anonymous stack argument.
12436 The following global variables are used to initialize
12437 the va_list structure:
12440 holds number of gprs and fprs used for named arguments.
12441 crtl->args.arg_offset_rtx:
12442 holds the offset of the first anonymous stack argument
12443 (relative to the virtual arg pointer). */
12446 s390_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
12448 HOST_WIDE_INT n_gpr
, n_fpr
;
12450 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12451 tree gpr
, fpr
, ovf
, sav
, t
;
12453 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12454 f_fpr
= DECL_CHAIN (f_gpr
);
12455 f_ovf
= DECL_CHAIN (f_fpr
);
12456 f_sav
= DECL_CHAIN (f_ovf
);
12458 valist
= build_simple_mem_ref (valist
);
12459 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12460 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12461 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12462 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12464 /* Count number of gp and fp argument registers used. */
12466 n_gpr
= crtl
->args
.info
.gprs
;
12467 n_fpr
= crtl
->args
.info
.fprs
;
12469 if (cfun
->va_list_gpr_size
)
12471 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
12472 build_int_cst (NULL_TREE
, n_gpr
));
12473 TREE_SIDE_EFFECTS (t
) = 1;
12474 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12477 if (cfun
->va_list_fpr_size
)
12479 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
12480 build_int_cst (NULL_TREE
, n_fpr
));
12481 TREE_SIDE_EFFECTS (t
) = 1;
12482 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12485 if (flag_split_stack
12486 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
12488 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12493 reg
= gen_reg_rtx (Pmode
);
12494 cfun
->machine
->split_stack_varargs_pointer
= reg
;
12497 emit_move_insn (reg
, gen_rtx_REG (Pmode
, 1));
12498 seq
= get_insns ();
12501 push_topmost_sequence ();
12502 emit_insn_after (seq
, entry_of_function ());
12503 pop_topmost_sequence ();
12506 /* Find the overflow area.
12507 FIXME: This currently is too pessimistic when the vector ABI is
12508 enabled. In that case we *always* set up the overflow area
12510 if (n_gpr
+ cfun
->va_list_gpr_size
> GP_ARG_NUM_REG
12511 || n_fpr
+ cfun
->va_list_fpr_size
> FP_ARG_NUM_REG
12514 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12515 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
12517 t
= make_tree (TREE_TYPE (ovf
), cfun
->machine
->split_stack_varargs_pointer
);
12519 off
= INTVAL (crtl
->args
.arg_offset_rtx
);
12520 off
= off
< 0 ? 0 : off
;
12521 if (TARGET_DEBUG_ARG
)
12522 fprintf (stderr
, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12523 (int)n_gpr
, (int)n_fpr
, off
);
12525 t
= fold_build_pointer_plus_hwi (t
, off
);
12527 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
12528 TREE_SIDE_EFFECTS (t
) = 1;
12529 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12532 /* Find the register save area. */
12533 if ((cfun
->va_list_gpr_size
&& n_gpr
< GP_ARG_NUM_REG
)
12534 || (cfun
->va_list_fpr_size
&& n_fpr
< FP_ARG_NUM_REG
))
12536 t
= make_tree (TREE_TYPE (sav
), return_address_pointer_rtx
);
12537 t
= fold_build_pointer_plus_hwi (t
, -RETURN_REGNUM
* UNITS_PER_LONG
);
12539 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
12540 TREE_SIDE_EFFECTS (t
) = 1;
12541 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12545 /* Implement va_arg by updating the va_list structure
12546 VALIST as required to retrieve an argument of type
12547 TYPE, and returning that argument.
12549 Generates code equivalent to:
12551 if (integral value) {
12552 if (size <= 4 && args.gpr < 5 ||
12553 size > 4 && args.gpr < 4 )
12554 ret = args.reg_save_area[args.gpr+8]
12556 ret = *args.overflow_arg_area++;
12557 } else if (vector value) {
12558 ret = *args.overflow_arg_area;
12559 args.overflow_arg_area += size / 8;
12560 } else if (float value) {
12562 ret = args.reg_save_area[args.fpr+64]
12564 ret = *args.overflow_arg_area++;
12565 } else if (aggregate value) {
12567 ret = *args.reg_save_area[args.gpr]
12569 ret = **args.overflow_arg_area++;
12573 s390_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
12574 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
12576 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12577 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
12578 int indirect_p
, size
, n_reg
, sav_ofs
, sav_scale
, max_reg
;
12579 tree lab_false
, lab_over
= NULL_TREE
;
12580 tree addr
= create_tmp_var (ptr_type_node
, "addr");
12581 bool left_align_p
; /* How a value < UNITS_PER_LONG is aligned within
12584 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12585 f_fpr
= DECL_CHAIN (f_gpr
);
12586 f_ovf
= DECL_CHAIN (f_fpr
);
12587 f_sav
= DECL_CHAIN (f_ovf
);
12589 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12590 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12591 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12593 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12594 both appear on a lhs. */
12595 valist
= unshare_expr (valist
);
12596 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12598 size
= int_size_in_bytes (type
);
12600 s390_check_type_for_vector_abi (type
, true, false);
12602 if (pass_va_arg_by_reference (type
))
12604 if (TARGET_DEBUG_ARG
)
12606 fprintf (stderr
, "va_arg: aggregate type");
12610 /* Aggregates are passed by reference. */
12615 /* kernel stack layout on 31 bit: It is assumed here that no padding
12616 will be added by s390_frame_info because for va_args always an even
12617 number of gprs has to be saved r15-r2 = 14 regs. */
12618 sav_ofs
= 2 * UNITS_PER_LONG
;
12619 sav_scale
= UNITS_PER_LONG
;
12620 size
= UNITS_PER_LONG
;
12621 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12622 left_align_p
= false;
12624 else if (s390_function_arg_vector (TYPE_MODE (type
), type
))
12626 if (TARGET_DEBUG_ARG
)
12628 fprintf (stderr
, "va_arg: vector type");
12638 left_align_p
= true;
12640 else if (s390_function_arg_float (TYPE_MODE (type
), type
))
12642 if (TARGET_DEBUG_ARG
)
12644 fprintf (stderr
, "va_arg: float type");
12648 /* FP args go in FP registers, if present. */
12652 sav_ofs
= 16 * UNITS_PER_LONG
;
12654 max_reg
= FP_ARG_NUM_REG
- n_reg
;
12655 left_align_p
= false;
12659 if (TARGET_DEBUG_ARG
)
12661 fprintf (stderr
, "va_arg: other type");
12665 /* Otherwise into GP registers. */
12668 n_reg
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12670 /* kernel stack layout on 31 bit: It is assumed here that no padding
12671 will be added by s390_frame_info because for va_args always an even
12672 number of gprs has to be saved r15-r2 = 14 regs. */
12673 sav_ofs
= 2 * UNITS_PER_LONG
;
12675 if (size
< UNITS_PER_LONG
)
12676 sav_ofs
+= UNITS_PER_LONG
- size
;
12678 sav_scale
= UNITS_PER_LONG
;
12679 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12680 left_align_p
= false;
12683 /* Pull the value out of the saved registers ... */
12685 if (reg
!= NULL_TREE
)
12688 if (reg > ((typeof (reg))max_reg))
12691 addr = sav + sav_ofs + reg * save_scale;
12698 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
12699 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
12701 t
= fold_convert (TREE_TYPE (reg
), size_int (max_reg
));
12702 t
= build2 (GT_EXPR
, boolean_type_node
, reg
, t
);
12703 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
12704 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
12705 gimplify_and_add (t
, pre_p
);
12707 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
12708 u
= build2 (MULT_EXPR
, TREE_TYPE (reg
), reg
,
12709 fold_convert (TREE_TYPE (reg
), size_int (sav_scale
)));
12710 t
= fold_build_pointer_plus (t
, u
);
12712 gimplify_assign (addr
, t
, pre_p
);
12714 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
12716 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
12719 /* ... Otherwise out of the overflow area. */
12722 if (size
< UNITS_PER_LONG
&& !left_align_p
)
12723 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
- size
);
12725 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
12727 gimplify_assign (addr
, t
, pre_p
);
12729 if (size
< UNITS_PER_LONG
&& left_align_p
)
12730 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
);
12732 t
= fold_build_pointer_plus_hwi (t
, size
);
12734 gimplify_assign (ovf
, t
, pre_p
);
12736 if (reg
!= NULL_TREE
)
12737 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
12740 /* Increment register save count. */
12744 u
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (reg
), reg
,
12745 fold_convert (TREE_TYPE (reg
), size_int (n_reg
)));
12746 gimplify_and_add (u
, pre_p
);
12751 t
= build_pointer_type_for_mode (build_pointer_type (type
),
12753 addr
= fold_convert (t
, addr
);
12754 addr
= build_va_arg_indirect_ref (addr
);
12758 t
= build_pointer_type_for_mode (type
, ptr_mode
, true);
12759 addr
= fold_convert (t
, addr
);
12762 return build_va_arg_indirect_ref (addr
);
12765 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12767 DEST - Register location where CC will be stored.
12768 TDB - Pointer to a 256 byte area where to store the transaction.
12769 diagnostic block. NULL if TDB is not needed.
12770 RETRY - Retry count value. If non-NULL a retry loop for CC2
12772 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12773 of the tbegin instruction pattern. */
12776 s390_expand_tbegin (rtx dest
, rtx tdb
, rtx retry
, bool clobber_fprs_p
)
12778 rtx retry_plus_two
= gen_reg_rtx (SImode
);
12779 rtx retry_reg
= gen_reg_rtx (SImode
);
12780 rtx_code_label
*retry_label
= NULL
;
12782 if (retry
!= NULL_RTX
)
12784 emit_move_insn (retry_reg
, retry
);
12785 emit_insn (gen_addsi3 (retry_plus_two
, retry_reg
, const2_rtx
));
12786 emit_insn (gen_addsi3 (retry_reg
, retry_reg
, const1_rtx
));
12787 retry_label
= gen_label_rtx ();
12788 emit_label (retry_label
);
12791 if (clobber_fprs_p
)
12794 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12797 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12801 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12804 emit_move_insn (dest
, gen_rtx_UNSPEC (SImode
,
12805 gen_rtvec (1, gen_rtx_REG (CCRAWmode
,
12807 UNSPEC_CC_TO_INT
));
12808 if (retry
!= NULL_RTX
)
12810 const int CC0
= 1 << 3;
12811 const int CC1
= 1 << 2;
12812 const int CC3
= 1 << 0;
12814 rtx count
= gen_reg_rtx (SImode
);
12815 rtx_code_label
*leave_label
= gen_label_rtx ();
12817 /* Exit for success and permanent failures. */
12818 jump
= s390_emit_jump (leave_label
,
12819 gen_rtx_EQ (VOIDmode
,
12820 gen_rtx_REG (CCRAWmode
, CC_REGNUM
),
12821 gen_rtx_CONST_INT (VOIDmode
, CC0
| CC1
| CC3
)));
12822 LABEL_NUSES (leave_label
) = 1;
12824 /* CC2 - transient failure. Perform retry with ppa. */
12825 emit_move_insn (count
, retry_plus_two
);
12826 emit_insn (gen_subsi3 (count
, count
, retry_reg
));
12827 emit_insn (gen_tx_assist (count
));
12828 jump
= emit_jump_insn (gen_doloop_si64 (retry_label
,
12831 JUMP_LABEL (jump
) = retry_label
;
12832 LABEL_NUSES (retry_label
) = 1;
12833 emit_label (leave_label
);
12838 /* Return the decl for the target specific builtin with the function
12842 s390_builtin_decl (unsigned fcode
, bool initialized_p ATTRIBUTE_UNUSED
)
12844 if (fcode
>= S390_BUILTIN_MAX
)
12845 return error_mark_node
;
12847 return s390_builtin_decls
[fcode
];
12850 /* We call mcount before the function prologue. So a profiled leaf
12851 function should stay a leaf function. */
12854 s390_keep_leaf_when_profiled ()
12859 /* Output assembly code for the trampoline template to
12862 On S/390, we use gpr 1 internally in the trampoline code;
12863 gpr 0 is used to hold the static chain. */
12866 s390_asm_trampoline_template (FILE *file
)
12869 op
[0] = gen_rtx_REG (Pmode
, 0);
12870 op
[1] = gen_rtx_REG (Pmode
, 1);
12874 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
12875 output_asm_insn ("lmg\t%0,%1,14(%1)", op
); /* 6 byte */
12876 output_asm_insn ("br\t%1", op
); /* 2 byte */
12877 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 10));
12881 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
12882 output_asm_insn ("lm\t%0,%1,6(%1)", op
); /* 4 byte */
12883 output_asm_insn ("br\t%1", op
); /* 2 byte */
12884 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 8));
12888 /* Emit RTL insns to initialize the variable parts of a trampoline.
12889 FNADDR is an RTX for the address of the function's pure code.
12890 CXT is an RTX for the static chain value for the function. */
12893 s390_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
12895 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
12898 emit_block_move (m_tramp
, assemble_trampoline_template (),
12899 GEN_INT (2 * UNITS_PER_LONG
), BLOCK_OP_NORMAL
);
12901 mem
= adjust_address (m_tramp
, Pmode
, 2 * UNITS_PER_LONG
);
12902 emit_move_insn (mem
, cxt
);
12903 mem
= adjust_address (m_tramp
, Pmode
, 3 * UNITS_PER_LONG
);
12904 emit_move_insn (mem
, fnaddr
);
12908 output_asm_nops (const char *user
, int hw
)
12910 asm_fprintf (asm_out_file
, "\t# NOPs for %s (%d halfwords)\n", user
, hw
);
12915 output_asm_insn ("brcl\t0,0", NULL
);
12920 output_asm_insn ("bc\t0,0", NULL
);
12925 output_asm_insn ("bcr\t0,0", NULL
);
12931 /* Output assembler code to FILE to increment profiler label # LABELNO
12932 for profiling a function entry. */
12935 s390_function_profiler (FILE *file
, int labelno
)
12940 ASM_GENERATE_INTERNAL_LABEL (label
, "LP", labelno
);
12942 fprintf (file
, "# function profiler \n");
12944 op
[0] = gen_rtx_REG (Pmode
, RETURN_REGNUM
);
12945 op
[1] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
12946 op
[1] = gen_rtx_MEM (Pmode
, plus_constant (Pmode
, op
[1], UNITS_PER_LONG
));
12947 op
[7] = GEN_INT (UNITS_PER_LONG
);
12949 op
[2] = gen_rtx_REG (Pmode
, 1);
12950 op
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
12951 SYMBOL_REF_FLAGS (op
[3]) = SYMBOL_FLAG_LOCAL
;
12953 op
[4] = gen_rtx_SYMBOL_REF (Pmode
, flag_fentry
? "__fentry__" : "_mcount");
12956 op
[4] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[4]), UNSPEC_PLT
);
12957 op
[4] = gen_rtx_CONST (Pmode
, op
[4]);
12960 if (flag_record_mcount
)
12961 fprintf (file
, "1:\n");
12965 if (flag_nop_mcount
)
12966 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12967 else if (cfun
->static_chain_decl
)
12968 warning (OPT_Wcannot_profile
, "nested functions cannot be profiled "
12969 "with %<-mfentry%> on s390");
12971 output_asm_insn ("brasl\t0,%4", op
);
12973 else if (TARGET_64BIT
)
12975 if (flag_nop_mcount
)
12976 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12977 /* brasl */ 3 + /* lg */ 3);
12980 output_asm_insn ("stg\t%0,%1", op
);
12981 if (flag_dwarf2_cfi_asm
)
12982 output_asm_insn (".cfi_rel_offset\t%0,%7", op
);
12983 output_asm_insn ("larl\t%2,%3", op
);
12984 output_asm_insn ("brasl\t%0,%4", op
);
12985 output_asm_insn ("lg\t%0,%1", op
);
12986 if (flag_dwarf2_cfi_asm
)
12987 output_asm_insn (".cfi_restore\t%0", op
);
12992 if (flag_nop_mcount
)
12993 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12994 /* brasl */ 3 + /* l */ 2);
12997 output_asm_insn ("st\t%0,%1", op
);
12998 if (flag_dwarf2_cfi_asm
)
12999 output_asm_insn (".cfi_rel_offset\t%0,%7", op
);
13000 output_asm_insn ("larl\t%2,%3", op
);
13001 output_asm_insn ("brasl\t%0,%4", op
);
13002 output_asm_insn ("l\t%0,%1", op
);
13003 if (flag_dwarf2_cfi_asm
)
13004 output_asm_insn (".cfi_restore\t%0", op
);
13008 if (flag_record_mcount
)
13010 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
13011 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
13012 fprintf (file
, "\t.previous\n");
13016 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13017 into its SYMBOL_REF_FLAGS. */
13020 s390_encode_section_info (tree decl
, rtx rtl
, int first
)
13022 default_encode_section_info (decl
, rtl
, first
);
13024 if (TREE_CODE (decl
) == VAR_DECL
)
13026 /* Store the alignment to be able to check if we can use
13027 a larl/load-relative instruction. We only handle the cases
13028 that can go wrong (i.e. no FUNC_DECLs). */
13029 if (DECL_ALIGN (decl
) == 0 || DECL_ALIGN (decl
) % 16)
13030 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13031 else if (DECL_ALIGN (decl
) % 32)
13032 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13033 else if (DECL_ALIGN (decl
) % 64)
13034 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13037 /* Literal pool references don't have a decl so they are handled
13038 differently here. We rely on the information in the MEM_ALIGN
13039 entry to decide upon the alignment. */
13041 && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
13042 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl
, 0)))
13044 if (MEM_ALIGN (rtl
) == 0 || MEM_ALIGN (rtl
) % 16)
13045 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13046 else if (MEM_ALIGN (rtl
) % 32)
13047 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13048 else if (MEM_ALIGN (rtl
) % 64)
13049 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13053 /* Output thunk to FILE that implements a C++ virtual function call (with
13054 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13055 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13056 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13057 relative to the resulting this pointer. */
13060 s390_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
13061 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
13064 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
13068 assemble_start_function (thunk
, fnname
);
13069 /* Make sure unwind info is emitted for the thunk if needed. */
13070 final_start_function (emit_barrier (), file
, 1);
13072 /* Operand 0 is the target function. */
13073 op
[0] = XEXP (DECL_RTL (function
), 0);
13074 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (op
[0]))
13077 op
[0] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[0]),
13078 TARGET_64BIT
? UNSPEC_PLT
: UNSPEC_GOT
);
13079 op
[0] = gen_rtx_CONST (Pmode
, op
[0]);
13082 /* Operand 1 is the 'this' pointer. */
13083 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
13084 op
[1] = gen_rtx_REG (Pmode
, 3);
13086 op
[1] = gen_rtx_REG (Pmode
, 2);
13088 /* Operand 2 is the delta. */
13089 op
[2] = GEN_INT (delta
);
13091 /* Operand 3 is the vcall_offset. */
13092 op
[3] = GEN_INT (vcall_offset
);
13094 /* Operand 4 is the temporary register. */
13095 op
[4] = gen_rtx_REG (Pmode
, 1);
13097 /* Operands 5 to 8 can be used as labels. */
13103 /* Operand 9 can be used for temporary register. */
13106 /* Generate code. */
13109 /* Setup literal pool pointer if required. */
13110 if ((!DISP_IN_RANGE (delta
)
13111 && !CONST_OK_FOR_K (delta
)
13112 && !CONST_OK_FOR_Os (delta
))
13113 || (!DISP_IN_RANGE (vcall_offset
)
13114 && !CONST_OK_FOR_K (vcall_offset
)
13115 && !CONST_OK_FOR_Os (vcall_offset
)))
13117 op
[5] = gen_label_rtx ();
13118 output_asm_insn ("larl\t%4,%5", op
);
13121 /* Add DELTA to this pointer. */
13124 if (CONST_OK_FOR_J (delta
))
13125 output_asm_insn ("la\t%1,%2(%1)", op
);
13126 else if (DISP_IN_RANGE (delta
))
13127 output_asm_insn ("lay\t%1,%2(%1)", op
);
13128 else if (CONST_OK_FOR_K (delta
))
13129 output_asm_insn ("aghi\t%1,%2", op
);
13130 else if (CONST_OK_FOR_Os (delta
))
13131 output_asm_insn ("agfi\t%1,%2", op
);
13134 op
[6] = gen_label_rtx ();
13135 output_asm_insn ("agf\t%1,%6-%5(%4)", op
);
13139 /* Perform vcall adjustment. */
13142 if (DISP_IN_RANGE (vcall_offset
))
13144 output_asm_insn ("lg\t%4,0(%1)", op
);
13145 output_asm_insn ("ag\t%1,%3(%4)", op
);
13147 else if (CONST_OK_FOR_K (vcall_offset
))
13149 output_asm_insn ("lghi\t%4,%3", op
);
13150 output_asm_insn ("ag\t%4,0(%1)", op
);
13151 output_asm_insn ("ag\t%1,0(%4)", op
);
13153 else if (CONST_OK_FOR_Os (vcall_offset
))
13155 output_asm_insn ("lgfi\t%4,%3", op
);
13156 output_asm_insn ("ag\t%4,0(%1)", op
);
13157 output_asm_insn ("ag\t%1,0(%4)", op
);
13161 op
[7] = gen_label_rtx ();
13162 output_asm_insn ("llgf\t%4,%7-%5(%4)", op
);
13163 output_asm_insn ("ag\t%4,0(%1)", op
);
13164 output_asm_insn ("ag\t%1,0(%4)", op
);
13168 /* Jump to target. */
13169 output_asm_insn ("jg\t%0", op
);
13171 /* Output literal pool if required. */
13174 output_asm_insn (".align\t4", op
);
13175 targetm
.asm_out
.internal_label (file
, "L",
13176 CODE_LABEL_NUMBER (op
[5]));
13180 targetm
.asm_out
.internal_label (file
, "L",
13181 CODE_LABEL_NUMBER (op
[6]));
13182 output_asm_insn (".long\t%2", op
);
13186 targetm
.asm_out
.internal_label (file
, "L",
13187 CODE_LABEL_NUMBER (op
[7]));
13188 output_asm_insn (".long\t%3", op
);
13193 /* Setup base pointer if required. */
13195 || (!DISP_IN_RANGE (delta
)
13196 && !CONST_OK_FOR_K (delta
)
13197 && !CONST_OK_FOR_Os (delta
))
13198 || (!DISP_IN_RANGE (delta
)
13199 && !CONST_OK_FOR_K (vcall_offset
)
13200 && !CONST_OK_FOR_Os (vcall_offset
)))
13202 op
[5] = gen_label_rtx ();
13203 output_asm_insn ("basr\t%4,0", op
);
13204 targetm
.asm_out
.internal_label (file
, "L",
13205 CODE_LABEL_NUMBER (op
[5]));
13208 /* Add DELTA to this pointer. */
13211 if (CONST_OK_FOR_J (delta
))
13212 output_asm_insn ("la\t%1,%2(%1)", op
);
13213 else if (DISP_IN_RANGE (delta
))
13214 output_asm_insn ("lay\t%1,%2(%1)", op
);
13215 else if (CONST_OK_FOR_K (delta
))
13216 output_asm_insn ("ahi\t%1,%2", op
);
13217 else if (CONST_OK_FOR_Os (delta
))
13218 output_asm_insn ("afi\t%1,%2", op
);
13221 op
[6] = gen_label_rtx ();
13222 output_asm_insn ("a\t%1,%6-%5(%4)", op
);
13226 /* Perform vcall adjustment. */
13229 if (CONST_OK_FOR_J (vcall_offset
))
13231 output_asm_insn ("l\t%4,0(%1)", op
);
13232 output_asm_insn ("a\t%1,%3(%4)", op
);
13234 else if (DISP_IN_RANGE (vcall_offset
))
13236 output_asm_insn ("l\t%4,0(%1)", op
);
13237 output_asm_insn ("ay\t%1,%3(%4)", op
);
13239 else if (CONST_OK_FOR_K (vcall_offset
))
13241 output_asm_insn ("lhi\t%4,%3", op
);
13242 output_asm_insn ("a\t%4,0(%1)", op
);
13243 output_asm_insn ("a\t%1,0(%4)", op
);
13245 else if (CONST_OK_FOR_Os (vcall_offset
))
13247 output_asm_insn ("iilf\t%4,%3", op
);
13248 output_asm_insn ("a\t%4,0(%1)", op
);
13249 output_asm_insn ("a\t%1,0(%4)", op
);
13253 op
[7] = gen_label_rtx ();
13254 output_asm_insn ("l\t%4,%7-%5(%4)", op
);
13255 output_asm_insn ("a\t%4,0(%1)", op
);
13256 output_asm_insn ("a\t%1,0(%4)", op
);
13259 /* We had to clobber the base pointer register.
13260 Re-setup the base pointer (with a different base). */
13261 op
[5] = gen_label_rtx ();
13262 output_asm_insn ("basr\t%4,0", op
);
13263 targetm
.asm_out
.internal_label (file
, "L",
13264 CODE_LABEL_NUMBER (op
[5]));
13267 /* Jump to target. */
13268 op
[8] = gen_label_rtx ();
13271 output_asm_insn ("l\t%4,%8-%5(%4)", op
);
13272 else if (!nonlocal
)
13273 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13274 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13275 else if (flag_pic
== 1)
13277 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13278 output_asm_insn ("l\t%4,%0(%4)", op
);
13280 else if (flag_pic
== 2)
13282 op
[9] = gen_rtx_REG (Pmode
, 0);
13283 output_asm_insn ("l\t%9,%8-4-%5(%4)", op
);
13284 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13285 output_asm_insn ("ar\t%4,%9", op
);
13286 output_asm_insn ("l\t%4,0(%4)", op
);
13289 output_asm_insn ("br\t%4", op
);
13291 /* Output literal pool. */
13292 output_asm_insn (".align\t4", op
);
13294 if (nonlocal
&& flag_pic
== 2)
13295 output_asm_insn (".long\t%0", op
);
13298 op
[0] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13299 SYMBOL_REF_FLAGS (op
[0]) = SYMBOL_FLAG_LOCAL
;
13302 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[8]));
13304 output_asm_insn (".long\t%0", op
);
13306 output_asm_insn (".long\t%0-%5", op
);
13310 targetm
.asm_out
.internal_label (file
, "L",
13311 CODE_LABEL_NUMBER (op
[6]));
13312 output_asm_insn (".long\t%2", op
);
13316 targetm
.asm_out
.internal_label (file
, "L",
13317 CODE_LABEL_NUMBER (op
[7]));
13318 output_asm_insn (".long\t%3", op
);
13321 final_end_function ();
13322 assemble_end_function (thunk
, fnname
);
13325 /* Output either an indirect jump or an indirect call
13326 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13327 using a branch trampoline disabling branch target prediction. */
13330 s390_indirect_branch_via_thunk (unsigned int regno
,
13331 unsigned int return_addr_regno
,
13332 rtx comparison_operator
,
13333 enum s390_indirect_branch_type type
)
13335 enum s390_indirect_branch_option option
;
13337 if (type
== s390_indirect_branch_type_return
)
13339 if (s390_return_addr_from_memory ())
13340 option
= s390_opt_function_return_mem
;
13342 option
= s390_opt_function_return_reg
;
13344 else if (type
== s390_indirect_branch_type_jump
)
13345 option
= s390_opt_indirect_branch_jump
;
13346 else if (type
== s390_indirect_branch_type_call
)
13347 option
= s390_opt_indirect_branch_call
;
13349 gcc_unreachable ();
13351 if (TARGET_INDIRECT_BRANCH_TABLE
)
13355 ASM_GENERATE_INTERNAL_LABEL (label
,
13356 indirect_branch_table_label
[option
],
13357 indirect_branch_table_label_no
[option
]++);
13358 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13361 if (return_addr_regno
!= INVALID_REGNUM
)
13363 gcc_assert (comparison_operator
== NULL_RTX
);
13364 fprintf (asm_out_file
, " \tbrasl\t%%r%d,", return_addr_regno
);
13368 fputs (" \tjg", asm_out_file
);
13369 if (comparison_operator
!= NULL_RTX
)
13370 print_operand (asm_out_file
, comparison_operator
, 'C');
13372 fputs ("\t", asm_out_file
);
13375 if (TARGET_CPU_Z10
)
13376 fprintf (asm_out_file
,
13377 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
"\n",
13380 fprintf (asm_out_file
,
13381 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
"\n",
13382 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
13384 if ((option
== s390_opt_indirect_branch_jump
13385 && cfun
->machine
->indirect_branch_jump
== indirect_branch_thunk
)
13386 || (option
== s390_opt_indirect_branch_call
13387 && cfun
->machine
->indirect_branch_call
== indirect_branch_thunk
)
13388 || (option
== s390_opt_function_return_reg
13389 && cfun
->machine
->function_return_reg
== indirect_branch_thunk
)
13390 || (option
== s390_opt_function_return_mem
13391 && cfun
->machine
->function_return_mem
== indirect_branch_thunk
))
13393 if (TARGET_CPU_Z10
)
13394 indirect_branch_z10thunk_mask
|= (1 << regno
);
13396 indirect_branch_prez10thunk_mask
|= (1 << regno
);
13400 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13401 either be an address register or a label pointing to the location
13402 of the jump instruction. */
13405 s390_indirect_branch_via_inline_thunk (rtx execute_target
)
13407 if (TARGET_INDIRECT_BRANCH_TABLE
)
13411 ASM_GENERATE_INTERNAL_LABEL (label
,
13412 indirect_branch_table_label
[s390_opt_indirect_branch_jump
],
13413 indirect_branch_table_label_no
[s390_opt_indirect_branch_jump
]++);
13414 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13418 fputs ("\t.machinemode zarch\n", asm_out_file
);
13420 if (REG_P (execute_target
))
13421 fprintf (asm_out_file
, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target
));
13423 output_asm_insn ("\texrl\t%%r0,%0", &execute_target
);
13426 fputs ("\t.machinemode esa\n", asm_out_file
);
13428 fputs ("0:\tj\t0b\n", asm_out_file
);
13432 s390_valid_pointer_mode (scalar_int_mode mode
)
13434 return (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
));
13437 /* Checks whether the given CALL_EXPR would use a caller
13438 saved register. This is used to decide whether sibling call
13439 optimization could be performed on the respective function
13443 s390_call_saved_register_used (tree call_expr
)
13445 CUMULATIVE_ARGS cum_v
;
13446 cumulative_args_t cum
;
13451 INIT_CUMULATIVE_ARGS (cum_v
, NULL
, NULL
, 0, 0);
13452 cum
= pack_cumulative_args (&cum_v
);
13454 for (i
= 0; i
< call_expr_nargs (call_expr
); i
++)
13456 parameter
= CALL_EXPR_ARG (call_expr
, i
);
13457 gcc_assert (parameter
);
13459 /* For an undeclared variable passed as parameter we will get
13460 an ERROR_MARK node here. */
13461 if (TREE_CODE (parameter
) == ERROR_MARK
)
13464 /* We assume that in the target function all parameters are
13465 named. This only has an impact on vector argument register
13466 usage none of which is call-saved. */
13467 function_arg_info
arg (TREE_TYPE (parameter
), /*named=*/true);
13468 apply_pass_by_reference_rules (&cum_v
, arg
);
13470 parm_rtx
= s390_function_arg (cum
, arg
);
13472 s390_function_arg_advance (cum
, arg
);
13477 if (REG_P (parm_rtx
))
13479 for (reg
= 0; reg
< REG_NREGS (parm_rtx
); reg
++)
13480 if (!call_used_or_fixed_reg_p (reg
+ REGNO (parm_rtx
)))
13484 if (GET_CODE (parm_rtx
) == PARALLEL
)
13488 for (i
= 0; i
< XVECLEN (parm_rtx
, 0); i
++)
13490 rtx r
= XEXP (XVECEXP (parm_rtx
, 0, i
), 0);
13492 gcc_assert (REG_P (r
));
13494 for (reg
= 0; reg
< REG_NREGS (r
); reg
++)
13495 if (!call_used_or_fixed_reg_p (reg
+ REGNO (r
)))
13504 /* Return true if the given call expression can be
13505 turned into a sibling call.
13506 DECL holds the declaration of the function to be called whereas
13507 EXP is the call expression itself. */
13510 s390_function_ok_for_sibcall (tree decl
, tree exp
)
13512 /* The TPF epilogue uses register 1. */
13513 if (TARGET_TPF_PROFILING
)
13516 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13517 which would have to be restored before the sibcall. */
13518 if (!TARGET_64BIT
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
13521 /* The thunks for indirect branches require r1 if no exrl is
13522 available. r1 might not be available when doing a sibling
13524 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13529 /* Register 6 on s390 is available as an argument register but unfortunately
13530 "caller saved". This makes functions needing this register for arguments
13531 not suitable for sibcalls. */
13532 return !s390_call_saved_register_used (exp
);
13535 /* Return the fixed registers used for condition codes. */
13538 s390_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
13541 *p2
= INVALID_REGNUM
;
13546 /* This function is used by the call expanders of the machine description.
13547 It emits the call insn itself together with the necessary operations
13548 to adjust the target address and returns the emitted insn.
13549 ADDR_LOCATION is the target address rtx
13550 TLS_CALL the location of the thread-local symbol
13551 RESULT_REG the register where the result of the call should be stored
13552 RETADDR_REG the register where the return address should be stored
13553 If this parameter is NULL_RTX the call is considered
13554 to be a sibling call. */
13557 s390_emit_call (rtx addr_location
, rtx tls_call
, rtx result_reg
,
13560 bool plt_call
= false;
13562 rtx vec
[4] = { NULL_RTX
};
13564 rtx
*call
= &vec
[0];
13565 rtx
*clobber_ret_reg
= &vec
[1];
13566 rtx
*use
= &vec
[2];
13567 rtx
*clobber_thunk_reg
= &vec
[3];
13570 /* Direct function calls need special treatment. */
13571 if (GET_CODE (addr_location
) == SYMBOL_REF
)
13573 /* When calling a global routine in PIC mode, we must
13574 replace the symbol itself with the PLT stub. */
13575 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (addr_location
))
13577 if (TARGET_64BIT
|| retaddr_reg
!= NULL_RTX
)
13579 addr_location
= gen_rtx_UNSPEC (Pmode
,
13580 gen_rtvec (1, addr_location
),
13582 addr_location
= gen_rtx_CONST (Pmode
, addr_location
);
13586 /* For -fpic code the PLT entries might use r12 which is
13587 call-saved. Therefore we cannot do a sibcall when
13588 calling directly using a symbol ref. When reaching
13589 this point we decided (in s390_function_ok_for_sibcall)
13590 to do a sibcall for a function pointer but one of the
13591 optimizers was able to get rid of the function pointer
13592 by propagating the symbol ref into the call. This
13593 optimization is illegal for S/390 so we turn the direct
13594 call into a indirect call again. */
13595 addr_location
= force_reg (Pmode
, addr_location
);
13599 /* If it is already an indirect call or the code above moved the
13600 SYMBOL_REF to somewhere else make sure the address can be found in
13602 if (retaddr_reg
== NULL_RTX
13603 && GET_CODE (addr_location
) != SYMBOL_REF
13606 emit_move_insn (gen_rtx_REG (Pmode
, SIBCALL_REGNUM
), addr_location
);
13607 addr_location
= gen_rtx_REG (Pmode
, SIBCALL_REGNUM
);
13610 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13611 && GET_CODE (addr_location
) != SYMBOL_REF
13614 /* Indirect branch thunks require the target to be a single GPR. */
13615 addr_location
= force_reg (Pmode
, addr_location
);
13617 /* Without exrl the indirect branch thunks need an additional
13618 register for larl;ex */
13619 if (!TARGET_CPU_Z10
)
13621 *clobber_thunk_reg
= gen_rtx_REG (Pmode
, INDIRECT_BRANCH_THUNK_REGNUM
);
13622 *clobber_thunk_reg
= gen_rtx_CLOBBER (VOIDmode
, *clobber_thunk_reg
);
13626 addr_location
= gen_rtx_MEM (QImode
, addr_location
);
13627 *call
= gen_rtx_CALL (VOIDmode
, addr_location
, const0_rtx
);
13629 if (result_reg
!= NULL_RTX
)
13630 *call
= gen_rtx_SET (result_reg
, *call
);
13632 if (retaddr_reg
!= NULL_RTX
)
13634 *clobber_ret_reg
= gen_rtx_CLOBBER (VOIDmode
, retaddr_reg
);
13636 if (tls_call
!= NULL_RTX
)
13637 *use
= gen_rtx_USE (VOIDmode
, tls_call
);
13641 for (i
= 0; i
< 4; i
++)
13642 if (vec
[i
] != NULL_RTX
)
13650 v
= rtvec_alloc (elts
);
13651 for (i
= 0; i
< 4; i
++)
13652 if (vec
[i
] != NULL_RTX
)
13654 RTVEC_ELT (v
, e
) = vec
[i
];
13658 *call
= gen_rtx_PARALLEL (VOIDmode
, v
);
13661 insn
= emit_call_insn (*call
);
13663 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13664 if ((!TARGET_64BIT
&& plt_call
) || tls_call
!= NULL_RTX
)
13666 /* s390_function_ok_for_sibcall should
13667 have denied sibcalls in this case. */
13668 gcc_assert (retaddr_reg
!= NULL_RTX
);
13669 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, 12));
13674 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13677 s390_conditional_register_usage (void)
13682 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
13683 fixed_regs
[BASE_REGNUM
] = 0;
13684 fixed_regs
[RETURN_REGNUM
] = 0;
13687 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13688 call_used_regs
[i
] = 0;
13692 call_used_regs
[FPR4_REGNUM
] = 0;
13693 call_used_regs
[FPR6_REGNUM
] = 0;
13696 if (TARGET_SOFT_FLOAT
)
13698 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13702 /* Disable v16 - v31 for non-vector target. */
13705 for (i
= VR16_REGNUM
; i
<= VR31_REGNUM
; i
++)
13706 fixed_regs
[i
] = call_used_regs
[i
] = 1;
13710 /* Corresponding function to eh_return expander. */
13712 static GTY(()) rtx s390_tpf_eh_return_symbol
;
13714 s390_emit_tpf_eh_return (rtx target
)
13719 if (!s390_tpf_eh_return_symbol
)
13720 s390_tpf_eh_return_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tpf_eh_return");
13722 reg
= gen_rtx_REG (Pmode
, 2);
13723 orig_ra
= gen_rtx_REG (Pmode
, 3);
13725 emit_move_insn (reg
, target
);
13726 emit_move_insn (orig_ra
, get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
));
13727 insn
= s390_emit_call (s390_tpf_eh_return_symbol
, NULL_RTX
, reg
,
13728 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
13729 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), reg
);
13730 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), orig_ra
);
13732 emit_move_insn (EH_RETURN_HANDLER_RTX
, reg
);
13735 /* Rework the prologue/epilogue to avoid saving/restoring
13736 registers unnecessarily. */
13739 s390_optimize_prologue (void)
13741 rtx_insn
*insn
, *new_insn
, *next_insn
;
13743 /* Do a final recompute of the frame-related data. */
13744 s390_optimize_register_info ();
13746 /* If all special registers are in fact used, there's nothing we
13747 can do, so no point in walking the insn list. */
13749 if (cfun_frame_layout
.first_save_gpr
<= BASE_REGNUM
13750 && cfun_frame_layout
.last_save_gpr
>= BASE_REGNUM
)
13753 /* Search for prologue/epilogue insns and replace them. */
13754 for (insn
= get_insns (); insn
; insn
= next_insn
)
13756 int first
, last
, off
;
13757 rtx set
, base
, offset
;
13760 next_insn
= NEXT_INSN (insn
);
13762 if (! NONJUMP_INSN_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
13765 pat
= PATTERN (insn
);
13767 /* Remove ldgr/lgdr instructions used for saving and restore
13768 GPRs if possible. */
13773 if (INSN_CODE (insn
) == CODE_FOR_stack_restore_from_fpr
)
13774 tmp_pat
= XVECEXP (pat
, 0, 0);
13776 if (GET_CODE (tmp_pat
) == SET
13777 && GET_MODE (SET_SRC (tmp_pat
)) == DImode
13778 && REG_P (SET_SRC (tmp_pat
))
13779 && REG_P (SET_DEST (tmp_pat
)))
13781 int src_regno
= REGNO (SET_SRC (tmp_pat
));
13782 int dest_regno
= REGNO (SET_DEST (tmp_pat
));
13786 if (!((GENERAL_REGNO_P (src_regno
)
13787 && FP_REGNO_P (dest_regno
))
13788 || (FP_REGNO_P (src_regno
)
13789 && GENERAL_REGNO_P (dest_regno
))))
13792 gpr_regno
= GENERAL_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
13793 fpr_regno
= FP_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
13795 /* GPR must be call-saved, FPR must be call-clobbered. */
13796 if (!call_used_regs
[fpr_regno
]
13797 || call_used_regs
[gpr_regno
])
13800 /* It must not happen that what we once saved in an FPR now
13801 needs a stack slot. */
13802 gcc_assert (cfun_gpr_save_slot (gpr_regno
) != SAVE_SLOT_STACK
);
13804 if (cfun_gpr_save_slot (gpr_regno
) == SAVE_SLOT_NONE
)
13806 remove_insn (insn
);
13812 if (GET_CODE (pat
) == PARALLEL
13813 && store_multiple_operation (pat
, VOIDmode
))
13815 set
= XVECEXP (pat
, 0, 0);
13816 first
= REGNO (SET_SRC (set
));
13817 last
= first
+ XVECLEN (pat
, 0) - 1;
13818 offset
= const0_rtx
;
13819 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
13820 off
= INTVAL (offset
);
13822 if (GET_CODE (base
) != REG
|| off
< 0)
13824 if (cfun_frame_layout
.first_save_gpr
!= -1
13825 && (cfun_frame_layout
.first_save_gpr
< first
13826 || cfun_frame_layout
.last_save_gpr
> last
))
13828 if (REGNO (base
) != STACK_POINTER_REGNUM
13829 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13831 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
13834 if (cfun_frame_layout
.first_save_gpr
!= -1)
13836 rtx s_pat
= save_gprs (base
,
13837 off
+ (cfun_frame_layout
.first_save_gpr
13838 - first
) * UNITS_PER_LONG
,
13839 cfun_frame_layout
.first_save_gpr
,
13840 cfun_frame_layout
.last_save_gpr
);
13841 new_insn
= emit_insn_before (s_pat
, insn
);
13842 INSN_ADDRESSES_NEW (new_insn
, -1);
13845 remove_insn (insn
);
13849 if (cfun_frame_layout
.first_save_gpr
== -1
13850 && GET_CODE (pat
) == SET
13851 && GENERAL_REG_P (SET_SRC (pat
))
13852 && GET_CODE (SET_DEST (pat
)) == MEM
)
13855 first
= REGNO (SET_SRC (set
));
13856 offset
= const0_rtx
;
13857 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
13858 off
= INTVAL (offset
);
13860 if (GET_CODE (base
) != REG
|| off
< 0)
13862 if (REGNO (base
) != STACK_POINTER_REGNUM
13863 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13866 remove_insn (insn
);
13870 if (GET_CODE (pat
) == PARALLEL
13871 && load_multiple_operation (pat
, VOIDmode
))
13873 set
= XVECEXP (pat
, 0, 0);
13874 first
= REGNO (SET_DEST (set
));
13875 last
= first
+ XVECLEN (pat
, 0) - 1;
13876 offset
= const0_rtx
;
13877 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
13878 off
= INTVAL (offset
);
13880 if (GET_CODE (base
) != REG
|| off
< 0)
13883 if (cfun_frame_layout
.first_restore_gpr
!= -1
13884 && (cfun_frame_layout
.first_restore_gpr
< first
13885 || cfun_frame_layout
.last_restore_gpr
> last
))
13887 if (REGNO (base
) != STACK_POINTER_REGNUM
13888 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13890 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
13893 if (cfun_frame_layout
.first_restore_gpr
!= -1)
13895 rtx rpat
= restore_gprs (base
,
13896 off
+ (cfun_frame_layout
.first_restore_gpr
13897 - first
) * UNITS_PER_LONG
,
13898 cfun_frame_layout
.first_restore_gpr
,
13899 cfun_frame_layout
.last_restore_gpr
);
13901 /* Remove REG_CFA_RESTOREs for registers that we no
13902 longer need to save. */
13903 REG_NOTES (rpat
) = REG_NOTES (insn
);
13904 for (rtx
*ptr
= ®_NOTES (rpat
); *ptr
; )
13905 if (REG_NOTE_KIND (*ptr
) == REG_CFA_RESTORE
13906 && ((int) REGNO (XEXP (*ptr
, 0))
13907 < cfun_frame_layout
.first_restore_gpr
))
13908 *ptr
= XEXP (*ptr
, 1);
13910 ptr
= &XEXP (*ptr
, 1);
13911 new_insn
= emit_insn_before (rpat
, insn
);
13912 RTX_FRAME_RELATED_P (new_insn
) = 1;
13913 INSN_ADDRESSES_NEW (new_insn
, -1);
13916 remove_insn (insn
);
13920 if (cfun_frame_layout
.first_restore_gpr
== -1
13921 && GET_CODE (pat
) == SET
13922 && GENERAL_REG_P (SET_DEST (pat
))
13923 && GET_CODE (SET_SRC (pat
)) == MEM
)
13926 first
= REGNO (SET_DEST (set
));
13927 offset
= const0_rtx
;
13928 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
13929 off
= INTVAL (offset
);
13931 if (GET_CODE (base
) != REG
|| off
< 0)
13934 if (REGNO (base
) != STACK_POINTER_REGNUM
13935 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13938 remove_insn (insn
);
13944 /* On z10 and later the dynamic branch prediction must see the
13945 backward jump within a certain windows. If not it falls back to
13946 the static prediction. This function rearranges the loop backward
13947 branch in a way which makes the static prediction always correct.
13948 The function returns true if it added an instruction. */
13950 s390_fix_long_loop_prediction (rtx_insn
*insn
)
13952 rtx set
= single_set (insn
);
13953 rtx code_label
, label_ref
;
13954 rtx_insn
*uncond_jump
;
13955 rtx_insn
*cur_insn
;
13959 /* This will exclude branch on count and branch on index patterns
13960 since these are correctly statically predicted.
13962 The additional check for a PARALLEL is required here since
13963 single_set might be != NULL for PARALLELs where the set of the
13964 iteration variable is dead. */
13965 if (GET_CODE (PATTERN (insn
)) == PARALLEL
13967 || SET_DEST (set
) != pc_rtx
13968 || GET_CODE (SET_SRC(set
)) != IF_THEN_ELSE
)
13971 /* Skip conditional returns. */
13972 if (ANY_RETURN_P (XEXP (SET_SRC (set
), 1))
13973 && XEXP (SET_SRC (set
), 2) == pc_rtx
)
13976 label_ref
= (GET_CODE (XEXP (SET_SRC (set
), 1)) == LABEL_REF
?
13977 XEXP (SET_SRC (set
), 1) : XEXP (SET_SRC (set
), 2));
13979 gcc_assert (GET_CODE (label_ref
) == LABEL_REF
);
13981 code_label
= XEXP (label_ref
, 0);
13983 if (INSN_ADDRESSES (INSN_UID (code_label
)) == -1
13984 || INSN_ADDRESSES (INSN_UID (insn
)) == -1
13985 || (INSN_ADDRESSES (INSN_UID (insn
))
13986 - INSN_ADDRESSES (INSN_UID (code_label
)) < PREDICT_DISTANCE
))
13989 for (distance
= 0, cur_insn
= PREV_INSN (insn
);
13990 distance
< PREDICT_DISTANCE
- 6;
13991 distance
+= get_attr_length (cur_insn
), cur_insn
= PREV_INSN (cur_insn
))
13992 if (!cur_insn
|| JUMP_P (cur_insn
) || LABEL_P (cur_insn
))
13995 rtx_code_label
*new_label
= gen_label_rtx ();
13996 uncond_jump
= emit_jump_insn_after (
13997 gen_rtx_SET (pc_rtx
,
13998 gen_rtx_LABEL_REF (VOIDmode
, code_label
)),
14000 emit_label_after (new_label
, uncond_jump
);
14002 tmp
= XEXP (SET_SRC (set
), 1);
14003 XEXP (SET_SRC (set
), 1) = XEXP (SET_SRC (set
), 2);
14004 XEXP (SET_SRC (set
), 2) = tmp
;
14005 INSN_CODE (insn
) = -1;
14007 XEXP (label_ref
, 0) = new_label
;
14008 JUMP_LABEL (insn
) = new_label
;
14009 JUMP_LABEL (uncond_jump
) = code_label
;
14014 /* Returns 1 if INSN reads the value of REG for purposes not related
14015 to addressing of memory, and 0 otherwise. */
14017 s390_non_addr_reg_read_p (rtx reg
, rtx_insn
*insn
)
14019 return reg_referenced_p (reg
, PATTERN (insn
))
14020 && !reg_used_in_mem_p (REGNO (reg
), PATTERN (insn
));
14023 /* Starting from INSN find_cond_jump looks downwards in the insn
14024 stream for a single jump insn which is the last user of the
14025 condition code set in INSN. */
14027 find_cond_jump (rtx_insn
*insn
)
14029 for (; insn
; insn
= NEXT_INSN (insn
))
14033 if (LABEL_P (insn
))
14036 if (!JUMP_P (insn
))
14038 if (reg_mentioned_p (gen_rtx_REG (CCmode
, CC_REGNUM
), insn
))
14043 /* This will be triggered by a return. */
14044 if (GET_CODE (PATTERN (insn
)) != SET
)
14047 gcc_assert (SET_DEST (PATTERN (insn
)) == pc_rtx
);
14048 ite
= SET_SRC (PATTERN (insn
));
14050 if (GET_CODE (ite
) != IF_THEN_ELSE
)
14053 cc
= XEXP (XEXP (ite
, 0), 0);
14054 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
)))
14057 if (find_reg_note (insn
, REG_DEAD
, cc
))
14065 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14066 the semantics does not change. If NULL_RTX is passed as COND the
14067 function tries to find the conditional jump starting with INSN. */
14069 s390_swap_cmp (rtx cond
, rtx
*op0
, rtx
*op1
, rtx_insn
*insn
)
14073 if (cond
== NULL_RTX
)
14075 rtx_insn
*jump
= find_cond_jump (NEXT_INSN (insn
));
14076 rtx set
= jump
? single_set (jump
) : NULL_RTX
;
14078 if (set
== NULL_RTX
)
14081 cond
= XEXP (SET_SRC (set
), 0);
14086 PUT_CODE (cond
, swap_condition (GET_CODE (cond
)));
14089 /* On z10, instructions of the compare-and-branch family have the
14090 property to access the register occurring as second operand with
14091 its bits complemented. If such a compare is grouped with a second
14092 instruction that accesses the same register non-complemented, and
14093 if that register's value is delivered via a bypass, then the
14094 pipeline recycles, thereby causing significant performance decline.
14095 This function locates such situations and exchanges the two
14096 operands of the compare. The function return true whenever it
14099 s390_z10_optimize_cmp (rtx_insn
*insn
)
14101 rtx_insn
*prev_insn
, *next_insn
;
14102 bool insn_added_p
= false;
14103 rtx cond
, *op0
, *op1
;
14105 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
14107 /* Handle compare and branch and branch on count
14109 rtx pattern
= single_set (insn
);
14112 || SET_DEST (pattern
) != pc_rtx
14113 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
)
14116 cond
= XEXP (SET_SRC (pattern
), 0);
14117 op0
= &XEXP (cond
, 0);
14118 op1
= &XEXP (cond
, 1);
14120 else if (GET_CODE (PATTERN (insn
)) == SET
)
14124 /* Handle normal compare instructions. */
14125 src
= SET_SRC (PATTERN (insn
));
14126 dest
= SET_DEST (PATTERN (insn
));
14129 || !CC_REGNO_P (REGNO (dest
))
14130 || GET_CODE (src
) != COMPARE
)
14133 /* s390_swap_cmp will try to find the conditional
14134 jump when passing NULL_RTX as condition. */
14136 op0
= &XEXP (src
, 0);
14137 op1
= &XEXP (src
, 1);
14142 if (!REG_P (*op0
) || !REG_P (*op1
))
14145 if (GET_MODE_CLASS (GET_MODE (*op0
)) != MODE_INT
)
14148 /* Swap the COMPARE arguments and its mask if there is a
14149 conflicting access in the previous insn. */
14150 prev_insn
= prev_active_insn (insn
);
14151 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14152 && reg_referenced_p (*op1
, PATTERN (prev_insn
)))
14153 s390_swap_cmp (cond
, op0
, op1
, insn
);
14155 /* Check if there is a conflict with the next insn. If there
14156 was no conflict with the previous insn, then swap the
14157 COMPARE arguments and its mask. If we already swapped
14158 the operands, or if swapping them would cause a conflict
14159 with the previous insn, issue a NOP after the COMPARE in
14160 order to separate the two instuctions. */
14161 next_insn
= next_active_insn (insn
);
14162 if (next_insn
!= NULL_RTX
&& INSN_P (next_insn
)
14163 && s390_non_addr_reg_read_p (*op1
, next_insn
))
14165 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14166 && s390_non_addr_reg_read_p (*op0
, prev_insn
))
14168 if (REGNO (*op1
) == 0)
14169 emit_insn_after (gen_nop_lr1 (), insn
);
14171 emit_insn_after (gen_nop_lr0 (), insn
);
14172 insn_added_p
= true;
14175 s390_swap_cmp (cond
, op0
, op1
, insn
);
14177 return insn_added_p
;
14180 /* Number of INSNs to be scanned backward in the last BB of the loop
14181 and forward in the first BB of the loop. This usually should be a
14182 bit more than the number of INSNs which could go into one
14184 #define S390_OSC_SCAN_INSN_NUM 5
14186 /* Scan LOOP for static OSC collisions and return true if a osc_break
14187 should be issued for this loop. */
14189 s390_adjust_loop_scan_osc (struct loop
* loop
)
14192 HARD_REG_SET modregs
, newregs
;
14193 rtx_insn
*insn
, *store_insn
= NULL
;
14195 struct s390_address addr_store
, addr_load
;
14196 subrtx_iterator::array_type array
;
14199 CLEAR_HARD_REG_SET (modregs
);
14202 FOR_BB_INSNS_REVERSE (loop
->latch
, insn
)
14204 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14208 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14211 find_all_hard_reg_sets (insn
, &newregs
, true);
14212 modregs
|= newregs
;
14214 set
= single_set (insn
);
14218 if (MEM_P (SET_DEST (set
))
14219 && s390_decompose_address (XEXP (SET_DEST (set
), 0), &addr_store
))
14226 if (store_insn
== NULL_RTX
)
14230 FOR_BB_INSNS (loop
->header
, insn
)
14232 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14235 if (insn
== store_insn
)
14239 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14242 find_all_hard_reg_sets (insn
, &newregs
, true);
14243 modregs
|= newregs
;
14245 set
= single_set (insn
);
14249 /* An intermediate store disrupts static OSC checking
14251 if (MEM_P (SET_DEST (set
))
14252 && s390_decompose_address (XEXP (SET_DEST (set
), 0), NULL
))
14255 FOR_EACH_SUBRTX (iter
, array
, SET_SRC (set
), NONCONST
)
14257 && s390_decompose_address (XEXP (*iter
, 0), &addr_load
)
14258 && rtx_equal_p (addr_load
.base
, addr_store
.base
)
14259 && rtx_equal_p (addr_load
.indx
, addr_store
.indx
)
14260 && rtx_equal_p (addr_load
.disp
, addr_store
.disp
))
14262 if ((addr_load
.base
!= NULL_RTX
14263 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.base
)))
14264 || (addr_load
.indx
!= NULL_RTX
14265 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.indx
))))
14272 /* Look for adjustments which can be done on simple innermost
14275 s390_adjust_loops ()
14277 struct loop
*loop
= NULL
;
14280 compute_bb_for_insn ();
14282 /* Find the loops. */
14283 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
14285 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
14289 flow_loop_dump (loop
, dump_file
, NULL
, 0);
14290 fprintf (dump_file
, ";; OSC loop scan Loop: ");
14292 if (loop
->latch
== NULL
14293 || pc_set (BB_END (loop
->latch
)) == NULL_RTX
14294 || !s390_adjust_loop_scan_osc (loop
))
14298 if (loop
->latch
== NULL
)
14299 fprintf (dump_file
, " muliple backward jumps\n");
14302 fprintf (dump_file
, " header insn: %d latch insn: %d ",
14303 INSN_UID (BB_HEAD (loop
->header
)),
14304 INSN_UID (BB_END (loop
->latch
)));
14305 if (pc_set (BB_END (loop
->latch
)) == NULL_RTX
)
14306 fprintf (dump_file
, " loop does not end with jump\n");
14308 fprintf (dump_file
, " not instrumented\n");
14314 rtx_insn
*new_insn
;
14317 fprintf (dump_file
, " adding OSC break insn: ");
14318 new_insn
= emit_insn_before (gen_osc_break (),
14319 BB_END (loop
->latch
));
14320 INSN_ADDRESSES_NEW (new_insn
, -1);
14324 loop_optimizer_finalize ();
14326 df_finish_pass (false);
14329 /* Perform machine-dependent processing. */
14334 struct constant_pool
*pool
;
14336 int hw_before
, hw_after
;
14338 if (s390_tune
== PROCESSOR_2964_Z13
)
14339 s390_adjust_loops ();
14341 /* Make sure all splits have been performed; splits after
14342 machine_dependent_reorg might confuse insn length counts. */
14343 split_all_insns_noflow ();
14345 /* Install the main literal pool and the associated base
14346 register load insns. The literal pool might be > 4096 bytes in
14347 size, so that some of its elements cannot be directly accessed.
14349 To fix this, we split the single literal pool into multiple
14350 pool chunks, reloading the pool base register at various
14351 points throughout the function to ensure it always points to
14352 the pool chunk the following code expects. */
14354 /* Collect the literal pool. */
14355 pool
= s390_mainpool_start ();
14358 /* Finish up literal pool related changes. */
14359 s390_mainpool_finish (pool
);
14363 /* If literal pool overflowed, chunkify it. */
14364 pool
= s390_chunkify_start ();
14365 s390_chunkify_finish (pool
);
14368 /* Generate out-of-pool execute target insns. */
14369 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14374 label
= s390_execute_label (insn
);
14378 gcc_assert (label
!= const0_rtx
);
14380 target
= emit_label (XEXP (label
, 0));
14381 INSN_ADDRESSES_NEW (target
, -1);
14385 target
= emit_jump_insn (s390_execute_target (insn
));
14386 /* This is important in order to keep a table jump
14387 pointing at the jump table label. Only this makes it
14388 being recognized as table jump. */
14389 JUMP_LABEL (target
) = JUMP_LABEL (insn
);
14392 target
= emit_insn (s390_execute_target (insn
));
14393 INSN_ADDRESSES_NEW (target
, -1);
14396 /* Try to optimize prologue and epilogue further. */
14397 s390_optimize_prologue ();
14399 /* Walk over the insns and do some >=z10 specific changes. */
14400 if (s390_tune
>= PROCESSOR_2097_Z10
)
14403 bool insn_added_p
= false;
14405 /* The insn lengths and addresses have to be up to date for the
14406 following manipulations. */
14407 shorten_branches (get_insns ());
14409 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14411 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14415 insn_added_p
|= s390_fix_long_loop_prediction (insn
);
14417 if ((GET_CODE (PATTERN (insn
)) == PARALLEL
14418 || GET_CODE (PATTERN (insn
)) == SET
)
14419 && s390_tune
== PROCESSOR_2097_Z10
)
14420 insn_added_p
|= s390_z10_optimize_cmp (insn
);
14423 /* Adjust branches if we added new instructions. */
14425 shorten_branches (get_insns ());
14428 s390_function_num_hotpatch_hw (current_function_decl
, &hw_before
, &hw_after
);
14433 /* Insert NOPs for hotpatching. */
14434 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14436 1. inside the area covered by debug information to allow setting
14437 breakpoints at the NOPs,
14438 2. before any insn which results in an asm instruction,
14439 3. before in-function labels to avoid jumping to the NOPs, for
14440 example as part of a loop,
14441 4. before any barrier in case the function is completely empty
14442 (__builtin_unreachable ()) and has neither internal labels nor
14445 if (active_insn_p (insn
) || BARRIER_P (insn
) || LABEL_P (insn
))
14447 /* Output a series of NOPs before the first active insn. */
14448 while (insn
&& hw_after
> 0)
14452 emit_insn_before (gen_nop_6_byte (), insn
);
14455 else if (hw_after
>= 2)
14457 emit_insn_before (gen_nop_4_byte (), insn
);
14462 emit_insn_before (gen_nop_2_byte (), insn
);
14469 /* Return true if INSN is a fp load insn writing register REGNO. */
14471 s390_fpload_toreg (rtx_insn
*insn
, unsigned int regno
)
14474 enum attr_type flag
= s390_safe_attr_type (insn
);
14476 if (flag
!= TYPE_FLOADSF
&& flag
!= TYPE_FLOADDF
)
14479 set
= single_set (insn
);
14481 if (set
== NULL_RTX
)
14484 if (!REG_P (SET_DEST (set
)) || !MEM_P (SET_SRC (set
)))
14487 if (REGNO (SET_DEST (set
)) != regno
)
14493 /* This value describes the distance to be avoided between an
14494 arithmetic fp instruction and an fp load writing the same register.
14495 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14496 fine but the exact value has to be avoided. Otherwise the FP
14497 pipeline will throw an exception causing a major penalty. */
14498 #define Z10_EARLYLOAD_DISTANCE 7
14500 /* Rearrange the ready list in order to avoid the situation described
14501 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14502 moved to the very end of the ready list. */
14504 s390_z10_prevent_earlyload_conflicts (rtx_insn
**ready
, int *nready_p
)
14506 unsigned int regno
;
14507 int nready
= *nready_p
;
14512 enum attr_type flag
;
14515 /* Skip DISTANCE - 1 active insns. */
14516 for (insn
= last_scheduled_insn
, distance
= Z10_EARLYLOAD_DISTANCE
- 1;
14517 distance
> 0 && insn
!= NULL_RTX
;
14518 distance
--, insn
= prev_active_insn (insn
))
14519 if (CALL_P (insn
) || JUMP_P (insn
))
14522 if (insn
== NULL_RTX
)
14525 set
= single_set (insn
);
14527 if (set
== NULL_RTX
|| !REG_P (SET_DEST (set
))
14528 || GET_MODE_CLASS (GET_MODE (SET_DEST (set
))) != MODE_FLOAT
)
14531 flag
= s390_safe_attr_type (insn
);
14533 if (flag
== TYPE_FLOADSF
|| flag
== TYPE_FLOADDF
)
14536 regno
= REGNO (SET_DEST (set
));
14539 while (!s390_fpload_toreg (ready
[i
], regno
) && i
> 0)
14546 memmove (&ready
[1], &ready
[0], sizeof (rtx_insn
*) * i
);
14550 /* Returns TRUE if BB is entered via a fallthru edge and all other
14551 incoming edges are less than likely. */
14553 s390_bb_fallthru_entry_likely (basic_block bb
)
14555 edge e
, fallthru_edge
;
14561 fallthru_edge
= find_fallthru_edge (bb
->preds
);
14562 if (!fallthru_edge
)
14565 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14566 if (e
!= fallthru_edge
14567 && e
->probability
>= profile_probability::likely ())
14573 struct s390_sched_state
14575 /* Number of insns in the group. */
14577 /* Execution side of the group. */
14579 /* Group can only hold two insns. */
14581 } s390_sched_state
;
14583 static struct s390_sched_state sched_state
= {0, 1, false};
14585 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14586 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14587 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14588 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14589 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14591 static unsigned int
14592 s390_get_sched_attrmask (rtx_insn
*insn
)
14594 unsigned int mask
= 0;
14598 case PROCESSOR_2827_ZEC12
:
14599 if (get_attr_zEC12_cracked (insn
))
14600 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14601 if (get_attr_zEC12_expanded (insn
))
14602 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14603 if (get_attr_zEC12_endgroup (insn
))
14604 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14605 if (get_attr_zEC12_groupalone (insn
))
14606 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14608 case PROCESSOR_2964_Z13
:
14609 if (get_attr_z13_cracked (insn
))
14610 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14611 if (get_attr_z13_expanded (insn
))
14612 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14613 if (get_attr_z13_endgroup (insn
))
14614 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14615 if (get_attr_z13_groupalone (insn
))
14616 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14617 if (get_attr_z13_groupoftwo (insn
))
14618 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14620 case PROCESSOR_3906_Z14
:
14621 if (get_attr_z14_cracked (insn
))
14622 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14623 if (get_attr_z14_expanded (insn
))
14624 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14625 if (get_attr_z14_endgroup (insn
))
14626 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14627 if (get_attr_z14_groupalone (insn
))
14628 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14629 if (get_attr_z14_groupoftwo (insn
))
14630 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14632 case PROCESSOR_8561_Z15
:
14633 if (get_attr_z15_cracked (insn
))
14634 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14635 if (get_attr_z15_expanded (insn
))
14636 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14637 if (get_attr_z15_endgroup (insn
))
14638 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14639 if (get_attr_z15_groupalone (insn
))
14640 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14641 if (get_attr_z15_groupoftwo (insn
))
14642 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14645 gcc_unreachable ();
14650 static unsigned int
14651 s390_get_unit_mask (rtx_insn
*insn
, int *units
)
14653 unsigned int mask
= 0;
14657 case PROCESSOR_2964_Z13
:
14659 if (get_attr_z13_unit_lsu (insn
))
14661 if (get_attr_z13_unit_fxa (insn
))
14663 if (get_attr_z13_unit_fxb (insn
))
14665 if (get_attr_z13_unit_vfu (insn
))
14668 case PROCESSOR_3906_Z14
:
14670 if (get_attr_z14_unit_lsu (insn
))
14672 if (get_attr_z14_unit_fxa (insn
))
14674 if (get_attr_z14_unit_fxb (insn
))
14676 if (get_attr_z14_unit_vfu (insn
))
14679 case PROCESSOR_8561_Z15
:
14681 if (get_attr_z15_unit_lsu (insn
))
14683 if (get_attr_z15_unit_fxa (insn
))
14685 if (get_attr_z15_unit_fxb (insn
))
14687 if (get_attr_z15_unit_vfu (insn
))
14691 gcc_unreachable ();
14697 s390_is_fpd (rtx_insn
*insn
)
14699 if (insn
== NULL_RTX
)
14702 return get_attr_z13_unit_fpd (insn
) || get_attr_z14_unit_fpd (insn
)
14703 || get_attr_z15_unit_fpd (insn
);
14707 s390_is_fxd (rtx_insn
*insn
)
14709 if (insn
== NULL_RTX
)
14712 return get_attr_z13_unit_fxd (insn
) || get_attr_z14_unit_fxd (insn
)
14713 || get_attr_z15_unit_fxd (insn
);
14716 /* Returns TRUE if INSN is a long-running instruction. */
14718 s390_is_longrunning (rtx_insn
*insn
)
14720 if (insn
== NULL_RTX
)
14723 return s390_is_fxd (insn
) || s390_is_fpd (insn
);
14727 /* Return the scheduling score for INSN. The higher the score the
14728 better. The score is calculated from the OOO scheduling attributes
14729 of INSN and the scheduling state sched_state. */
14731 s390_sched_score (rtx_insn
*insn
)
14733 unsigned int mask
= s390_get_sched_attrmask (insn
);
14736 switch (sched_state
.group_state
)
14739 /* Try to put insns into the first slot which would otherwise
14741 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14742 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
14744 if ((mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
14748 /* Prefer not cracked insns while trying to put together a
14750 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14751 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14752 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14754 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) == 0)
14756 /* If we are in a group of two already, try to schedule another
14757 group-of-two insn to avoid shortening another group. */
14758 if (sched_state
.group_of_two
14759 && (mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
14763 /* Prefer not cracked insns while trying to put together a
14765 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14766 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14767 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14769 /* Prefer endgroup insns in the last slot. */
14770 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
14772 /* Try to avoid group-of-two insns in the last slot as they will
14773 shorten this group as well as the next one. */
14774 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
14775 score
= MAX (0, score
- 15);
14779 if (s390_tune
>= PROCESSOR_2964_Z13
)
14782 unsigned unit_mask
, m
= 1;
14784 unit_mask
= s390_get_unit_mask (insn
, &units
);
14785 gcc_assert (units
<= MAX_SCHED_UNITS
);
14787 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14788 ago the last insn of this unit type got scheduled. This is
14789 supposed to help providing a proper instruction mix to the
14791 for (i
= 0; i
< units
; i
++, m
<<= 1)
14793 score
+= (last_scheduled_unit_distance
[i
][sched_state
.side
]
14794 * MAX_SCHED_MIX_SCORE
/ MAX_SCHED_MIX_DISTANCE
);
14796 int other_side
= 1 - sched_state
.side
;
14798 /* Try to delay long-running insns when side is busy. */
14799 if (s390_is_longrunning (insn
))
14801 if (s390_is_fxd (insn
))
14803 if (fxd_longrunning
[sched_state
.side
]
14804 && fxd_longrunning
[other_side
]
14805 <= fxd_longrunning
[sched_state
.side
])
14806 score
= MAX (0, score
- 10);
14808 else if (fxd_longrunning
[other_side
]
14809 >= fxd_longrunning
[sched_state
.side
])
14813 if (s390_is_fpd (insn
))
14815 if (fpd_longrunning
[sched_state
.side
]
14816 && fpd_longrunning
[other_side
]
14817 <= fpd_longrunning
[sched_state
.side
])
14818 score
= MAX (0, score
- 10);
14820 else if (fpd_longrunning
[other_side
]
14821 >= fpd_longrunning
[sched_state
.side
])
14830 /* This function is called via hook TARGET_SCHED_REORDER before
14831 issuing one insn from list READY which contains *NREADYP entries.
14832 For target z10 it reorders load instructions to avoid early load
14833 conflicts in the floating point pipeline */
14835 s390_sched_reorder (FILE *file
, int verbose
,
14836 rtx_insn
**ready
, int *nreadyp
, int clock ATTRIBUTE_UNUSED
)
14838 if (s390_tune
== PROCESSOR_2097_Z10
14839 && reload_completed
14841 s390_z10_prevent_earlyload_conflicts (ready
, nreadyp
);
14843 if (s390_tune
>= PROCESSOR_2827_ZEC12
14844 && reload_completed
14848 int last_index
= *nreadyp
- 1;
14849 int max_index
= -1;
14850 int max_score
= -1;
14853 /* Just move the insn with the highest score to the top (the
14854 end) of the list. A full sort is not needed since a conflict
14855 in the hazard recognition cannot happen. So the top insn in
14856 the ready list will always be taken. */
14857 for (i
= last_index
; i
>= 0; i
--)
14861 if (recog_memoized (ready
[i
]) < 0)
14864 score
= s390_sched_score (ready
[i
]);
14865 if (score
> max_score
)
14872 if (max_index
!= -1)
14874 if (max_index
!= last_index
)
14876 tmp
= ready
[max_index
];
14877 ready
[max_index
] = ready
[last_index
];
14878 ready
[last_index
] = tmp
;
14882 ";;\t\tBACKEND: move insn %d to the top of list\n",
14883 INSN_UID (ready
[last_index
]));
14885 else if (verbose
> 5)
14887 ";;\t\tBACKEND: best insn %d already on top\n",
14888 INSN_UID (ready
[last_index
]));
14893 fprintf (file
, "ready list ooo attributes - sched state: %d\n",
14894 sched_state
.group_state
);
14896 for (i
= last_index
; i
>= 0; i
--)
14898 unsigned int sched_mask
;
14899 rtx_insn
*insn
= ready
[i
];
14901 if (recog_memoized (insn
) < 0)
14904 sched_mask
= s390_get_sched_attrmask (insn
);
14905 fprintf (file
, ";;\t\tBACKEND: insn %d score: %d: ",
14907 s390_sched_score (insn
));
14908 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14909 ((M) & sched_mask) ? #ATTR : "");
14910 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
14911 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
14912 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
14913 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
14914 #undef PRINT_SCHED_ATTR
14915 if (s390_tune
>= PROCESSOR_2964_Z13
)
14917 unsigned int unit_mask
, m
= 1;
14920 unit_mask
= s390_get_unit_mask (insn
, &units
);
14921 fprintf (file
, "(units:");
14922 for (j
= 0; j
< units
; j
++, m
<<= 1)
14924 fprintf (file
, " u%d", j
);
14925 fprintf (file
, ")");
14927 fprintf (file
, "\n");
14932 return s390_issue_rate ();
14936 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14937 the scheduler has issued INSN. It stores the last issued insn into
14938 last_scheduled_insn in order to make it available for
14939 s390_sched_reorder. */
14941 s390_sched_variable_issue (FILE *file
, int verbose
, rtx_insn
*insn
, int more
)
14943 last_scheduled_insn
= insn
;
14945 bool ends_group
= false;
14947 if (s390_tune
>= PROCESSOR_2827_ZEC12
14948 && reload_completed
14949 && recog_memoized (insn
) >= 0)
14951 unsigned int mask
= s390_get_sched_attrmask (insn
);
14953 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
14954 sched_state
.group_of_two
= true;
14956 /* If this is a group-of-two insn, we actually ended the last group
14957 and this insn is the first one of the new group. */
14958 if (sched_state
.group_state
== 2 && sched_state
.group_of_two
)
14960 sched_state
.side
= sched_state
.side
? 0 : 1;
14961 sched_state
.group_state
= 0;
14964 /* Longrunning and side bookkeeping. */
14965 for (int i
= 0; i
< 2; i
++)
14967 fxd_longrunning
[i
] = MAX (0, fxd_longrunning
[i
] - 1);
14968 fpd_longrunning
[i
] = MAX (0, fpd_longrunning
[i
] - 1);
14971 unsigned latency
= insn_default_latency (insn
);
14972 if (s390_is_longrunning (insn
))
14974 if (s390_is_fxd (insn
))
14975 fxd_longrunning
[sched_state
.side
] = latency
;
14977 fpd_longrunning
[sched_state
.side
] = latency
;
14980 if (s390_tune
>= PROCESSOR_2964_Z13
)
14983 unsigned unit_mask
, m
= 1;
14985 unit_mask
= s390_get_unit_mask (insn
, &units
);
14986 gcc_assert (units
<= MAX_SCHED_UNITS
);
14988 for (i
= 0; i
< units
; i
++, m
<<= 1)
14990 last_scheduled_unit_distance
[i
][sched_state
.side
] = 0;
14991 else if (last_scheduled_unit_distance
[i
][sched_state
.side
]
14992 < MAX_SCHED_MIX_DISTANCE
)
14993 last_scheduled_unit_distance
[i
][sched_state
.side
]++;
14996 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14997 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0
14998 || (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0
14999 || (mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
15001 sched_state
.group_state
= 0;
15006 switch (sched_state
.group_state
)
15009 sched_state
.group_state
++;
15012 sched_state
.group_state
++;
15013 if (sched_state
.group_of_two
)
15015 sched_state
.group_state
= 0;
15020 sched_state
.group_state
++;
15028 unsigned int sched_mask
;
15030 sched_mask
= s390_get_sched_attrmask (insn
);
15032 fprintf (file
, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn
));
15033 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15034 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15035 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15036 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15037 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15038 #undef PRINT_SCHED_ATTR
15040 if (s390_tune
>= PROCESSOR_2964_Z13
)
15042 unsigned int unit_mask
, m
= 1;
15045 unit_mask
= s390_get_unit_mask (insn
, &units
);
15046 fprintf (file
, "(units:");
15047 for (j
= 0; j
< units
; j
++, m
<<= 1)
15049 fprintf (file
, " %d", j
);
15050 fprintf (file
, ")");
15052 fprintf (file
, " sched state: %d\n", sched_state
.group_state
);
15054 if (s390_tune
>= PROCESSOR_2964_Z13
)
15058 s390_get_unit_mask (insn
, &units
);
15060 fprintf (file
, ";;\t\tBACKEND: units on this side unused for: ");
15061 for (j
= 0; j
< units
; j
++)
15062 fprintf (file
, "%d:%d ", j
,
15063 last_scheduled_unit_distance
[j
][sched_state
.side
]);
15064 fprintf (file
, "\n");
15068 /* If this insn ended a group, the next will be on the other side. */
15071 sched_state
.group_state
= 0;
15072 sched_state
.side
= sched_state
.side
? 0 : 1;
15073 sched_state
.group_of_two
= false;
15077 if (GET_CODE (PATTERN (insn
)) != USE
15078 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
15085 s390_sched_init (FILE *file ATTRIBUTE_UNUSED
,
15086 int verbose ATTRIBUTE_UNUSED
,
15087 int max_ready ATTRIBUTE_UNUSED
)
15089 /* If the next basic block is most likely entered via a fallthru edge
15090 we keep the last sched state. Otherwise we start a new group.
15091 The scheduler traverses basic blocks in "instruction stream" ordering
15092 so if we see a fallthru edge here, sched_state will be of its
15095 current_sched_info->prev_head is the insn before the first insn of the
15096 block of insns to be scheduled.
15098 rtx_insn
*insn
= current_sched_info
->prev_head
15099 ? NEXT_INSN (current_sched_info
->prev_head
) : NULL
;
15100 basic_block bb
= insn
? BLOCK_FOR_INSN (insn
) : NULL
;
15101 if (s390_tune
< PROCESSOR_2964_Z13
|| !s390_bb_fallthru_entry_likely (bb
))
15103 last_scheduled_insn
= NULL
;
15104 memset (last_scheduled_unit_distance
, 0,
15105 MAX_SCHED_UNITS
* NUM_SIDES
* sizeof (int));
15106 sched_state
.group_state
= 0;
15107 sched_state
.group_of_two
= false;
15111 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15112 a new number struct loop *loop should be unrolled if tuned for cpus with
15113 a built-in stride prefetcher.
15114 The loop is analyzed for memory accesses by calling check_dpu for
15115 each rtx of the loop. Depending on the loop_depth and the amount of
15116 memory accesses a new number <=nunroll is returned to improve the
15117 behavior of the hardware prefetch unit. */
15119 s390_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
15124 unsigned mem_count
= 0;
15126 if (s390_tune
< PROCESSOR_2097_Z10
)
15129 /* Count the number of memory references within the loop body. */
15130 bbs
= get_loop_body (loop
);
15131 subrtx_iterator::array_type array
;
15132 for (i
= 0; i
< loop
->num_nodes
; i
++)
15133 FOR_BB_INSNS (bbs
[i
], insn
)
15134 if (INSN_P (insn
) && INSN_CODE (insn
) != -1)
15138 /* The runtime of small loops with memory block operations
15139 will be determined by the memory operation. Doing
15140 unrolling doesn't help here. Measurements to confirm
15141 this where only done on recent CPU levels. So better do
15142 not change anything for older CPUs. */
15143 if (s390_tune
>= PROCESSOR_2964_Z13
15144 && loop
->ninsns
<= BLOCK_MEM_OPS_LOOP_INSNS
15145 && ((set
= single_set (insn
)) != NULL_RTX
)
15146 && ((GET_MODE (SET_DEST (set
)) == BLKmode
15147 && (GET_MODE (SET_SRC (set
)) == BLKmode
15148 || SET_SRC (set
) == const0_rtx
))
15149 || (GET_CODE (SET_SRC (set
)) == COMPARE
15150 && GET_MODE (XEXP (SET_SRC (set
), 0)) == BLKmode
15151 && GET_MODE (XEXP (SET_SRC (set
), 1)) == BLKmode
)))
15154 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
15160 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15161 if (mem_count
== 0)
15164 switch (loop_depth(loop
))
15167 return MIN (nunroll
, 28 / mem_count
);
15169 return MIN (nunroll
, 22 / mem_count
);
15171 return MIN (nunroll
, 16 / mem_count
);
15175 /* Restore the current options. This is a hook function and also called
15179 s390_function_specific_restore (struct gcc_options
*opts
,
15180 struct cl_target_option
*ptr ATTRIBUTE_UNUSED
)
15182 opts
->x_s390_cost_pointer
= (long)processor_table
[opts
->x_s390_tune
].cost
;
15186 s390_default_align (struct gcc_options
*opts
)
15188 /* Set the default function alignment to 16 in order to get rid of
15189 some unwanted performance effects. */
15190 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
15191 && opts
->x_s390_tune
>= PROCESSOR_2964_Z13
)
15192 opts
->x_str_align_functions
= "16";
15196 s390_override_options_after_change (void)
15198 s390_default_align (&global_options
);
15202 s390_option_override_internal (struct gcc_options
*opts
,
15203 const struct gcc_options
*opts_set
)
15205 /* Architecture mode defaults according to ABI. */
15206 if (!(opts_set
->x_target_flags
& MASK_ZARCH
))
15209 opts
->x_target_flags
|= MASK_ZARCH
;
15211 opts
->x_target_flags
&= ~MASK_ZARCH
;
15214 /* Set the march default in case it hasn't been specified on cmdline. */
15215 if (!opts_set
->x_s390_arch
)
15216 opts
->x_s390_arch
= PROCESSOR_2064_Z900
;
15218 opts
->x_s390_arch_flags
= processor_flags_table
[(int) opts
->x_s390_arch
];
15220 /* Determine processor to tune for. */
15221 if (!opts_set
->x_s390_tune
)
15222 opts
->x_s390_tune
= opts
->x_s390_arch
;
15224 opts
->x_s390_tune_flags
= processor_flags_table
[opts
->x_s390_tune
];
15226 /* Sanity checks. */
15227 if (opts
->x_s390_arch
== PROCESSOR_NATIVE
15228 || opts
->x_s390_tune
== PROCESSOR_NATIVE
)
15229 gcc_unreachable ();
15230 if (TARGET_64BIT
&& !TARGET_ZARCH_P (opts
->x_target_flags
))
15231 error ("64-bit ABI not supported in ESA/390 mode");
15233 if (opts
->x_s390_indirect_branch
== indirect_branch_thunk_inline
15234 || opts
->x_s390_indirect_branch_call
== indirect_branch_thunk_inline
15235 || opts
->x_s390_function_return
== indirect_branch_thunk_inline
15236 || opts
->x_s390_function_return_reg
== indirect_branch_thunk_inline
15237 || opts
->x_s390_function_return_mem
== indirect_branch_thunk_inline
)
15238 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15240 if (opts
->x_s390_indirect_branch
!= indirect_branch_keep
)
15242 if (!opts_set
->x_s390_indirect_branch_call
)
15243 opts
->x_s390_indirect_branch_call
= opts
->x_s390_indirect_branch
;
15245 if (!opts_set
->x_s390_indirect_branch_jump
)
15246 opts
->x_s390_indirect_branch_jump
= opts
->x_s390_indirect_branch
;
15249 if (opts
->x_s390_function_return
!= indirect_branch_keep
)
15251 if (!opts_set
->x_s390_function_return_reg
)
15252 opts
->x_s390_function_return_reg
= opts
->x_s390_function_return
;
15254 if (!opts_set
->x_s390_function_return_mem
)
15255 opts
->x_s390_function_return_mem
= opts
->x_s390_function_return
;
15258 /* Enable hardware transactions if available and not explicitly
15259 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15260 if (!TARGET_OPT_HTM_P (opts_set
->x_target_flags
))
15262 if (TARGET_CPU_HTM_P (opts
) && TARGET_ZARCH_P (opts
->x_target_flags
))
15263 opts
->x_target_flags
|= MASK_OPT_HTM
;
15265 opts
->x_target_flags
&= ~MASK_OPT_HTM
;
15268 if (TARGET_OPT_VX_P (opts_set
->x_target_flags
))
15270 if (TARGET_OPT_VX_P (opts
->x_target_flags
))
15272 if (!TARGET_CPU_VX_P (opts
))
15273 error ("hardware vector support not available on %s",
15274 processor_table
[(int)opts
->x_s390_arch
].name
);
15275 if (TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15276 error ("hardware vector support not available with "
15277 "%<-msoft-float%>");
15282 if (TARGET_CPU_VX_P (opts
))
15283 /* Enable vector support if available and not explicitly disabled
15284 by user. E.g. with -m31 -march=z13 -mzarch */
15285 opts
->x_target_flags
|= MASK_OPT_VX
;
15287 opts
->x_target_flags
&= ~MASK_OPT_VX
;
15290 /* Use hardware DFP if available and not explicitly disabled by
15291 user. E.g. with -m31 -march=z10 -mzarch */
15292 if (!TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15294 if (TARGET_DFP_P (opts
))
15295 opts
->x_target_flags
|= MASK_HARD_DFP
;
15297 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15300 if (TARGET_HARD_DFP_P (opts
->x_target_flags
) && !TARGET_DFP_P (opts
))
15302 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15304 if (!TARGET_CPU_DFP_P (opts
))
15305 error ("hardware decimal floating point instructions"
15306 " not available on %s",
15307 processor_table
[(int)opts
->x_s390_arch
].name
);
15308 if (!TARGET_ZARCH_P (opts
->x_target_flags
))
15309 error ("hardware decimal floating point instructions"
15310 " not available in ESA/390 mode");
15313 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15316 if (TARGET_SOFT_FLOAT_P (opts_set
->x_target_flags
)
15317 && TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15319 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
)
15320 && TARGET_HARD_DFP_P (opts
->x_target_flags
))
15321 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15322 "%<-msoft-float%>");
15324 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15327 if (TARGET_BACKCHAIN_P (opts
->x_target_flags
)
15328 && TARGET_PACKED_STACK_P (opts
->x_target_flags
)
15329 && TARGET_HARD_FLOAT_P (opts
->x_target_flags
))
15330 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15331 "supported in combination");
15333 if (opts
->x_s390_stack_size
)
15335 if (opts
->x_s390_stack_guard
>= opts
->x_s390_stack_size
)
15336 error ("stack size must be greater than the stack guard value");
15337 else if (opts
->x_s390_stack_size
> 1 << 16)
15338 error ("stack size must not be greater than 64k");
15340 else if (opts
->x_s390_stack_guard
)
15341 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15343 /* Our implementation of the stack probe requires the probe interval
15344 to be used as displacement in an address operand. The maximum
15345 probe interval currently is 64k. This would exceed short
15346 displacements. Trim that value down to 4k if that happens. This
15347 might result in too many probes being generated only on the
15348 oldest supported machine level z900. */
15349 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval
)))
15350 param_stack_clash_protection_probe_interval
= 12;
15352 #if TARGET_TPF != 0
15353 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_check
))
15354 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15356 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_target
))
15357 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15359 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_check
))
15360 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15362 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_target
))
15363 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15365 if (s390_tpf_trace_skip
)
15367 opts
->x_s390_tpf_trace_hook_prologue_target
= TPF_TRACE_PROLOGUE_SKIP_TARGET
;
15368 opts
->x_s390_tpf_trace_hook_epilogue_target
= TPF_TRACE_EPILOGUE_SKIP_TARGET
;
15372 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15373 if (!TARGET_LONG_DOUBLE_128_P (opts_set
->x_target_flags
))
15374 opts
->x_target_flags
|= MASK_LONG_DOUBLE_128
;
15377 if (opts
->x_s390_tune
>= PROCESSOR_2097_Z10
)
15379 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unrolled_insns
,
15381 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unroll_times
, 32);
15382 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peeled_insns
,
15384 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peel_times
,
15388 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_pending_list_length
,
15390 /* values for loop prefetching */
15391 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_line_size
, 256);
15392 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_size
, 128);
15393 /* s390 has more than 2 levels and the size is much larger. Since
15394 we are always running virtualized assume that we only get a small
15395 part of the caches above l1. */
15396 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l2_cache_size
, 1500);
15397 SET_OPTION_IF_UNSET (opts
, opts_set
,
15398 param_prefetch_min_insn_to_mem_ratio
, 2);
15399 SET_OPTION_IF_UNSET (opts
, opts_set
, param_simultaneous_prefetches
, 6);
15401 /* Use the alternative scheduling-pressure algorithm by default. */
15402 SET_OPTION_IF_UNSET (opts
, opts_set
, param_sched_pressure_algorithm
, 2);
15403 SET_OPTION_IF_UNSET (opts
, opts_set
, param_min_vect_loop_bound
, 2);
15405 /* Use aggressive inlining parameters. */
15406 if (opts
->x_s390_tune
>= PROCESSOR_2964_Z13
)
15408 SET_OPTION_IF_UNSET (opts
, opts_set
, param_inline_min_speedup
, 2);
15409 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_inline_insns_auto
, 80);
15412 /* Set the default alignment. */
15413 s390_default_align (opts
);
15415 /* Call target specific restore function to do post-init work. At the moment,
15416 this just sets opts->x_s390_cost_pointer. */
15417 s390_function_specific_restore (opts
, NULL
);
15419 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15420 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15421 not the case when the code runs before the prolog. */
15422 if (opts
->x_flag_fentry
&& !TARGET_64BIT
)
15423 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15427 s390_option_override (void)
15430 cl_deferred_option
*opt
;
15431 vec
<cl_deferred_option
> *v
=
15432 (vec
<cl_deferred_option
> *) s390_deferred_options
;
15435 FOR_EACH_VEC_ELT (*v
, i
, opt
)
15437 switch (opt
->opt_index
)
15439 case OPT_mhotpatch_
:
15443 char *s
= strtok (ASTRDUP (opt
->arg
), ",");
15444 char *t
= strtok (NULL
, "\0");
15448 val1
= integral_argument (s
);
15449 val2
= integral_argument (t
);
15456 if (val1
== -1 || val2
== -1)
15458 /* argument is not a plain number */
15459 error ("arguments to %qs should be non-negative integers",
15463 else if (val1
> s390_hotpatch_hw_max
15464 || val2
> s390_hotpatch_hw_max
)
15466 error ("argument to %qs is too large (max. %d)",
15467 "-mhotpatch=n,m", s390_hotpatch_hw_max
);
15470 s390_hotpatch_hw_before_label
= val1
;
15471 s390_hotpatch_hw_after_label
= val2
;
15475 gcc_unreachable ();
15479 /* Set up function hooks. */
15480 init_machine_status
= s390_init_machine_status
;
15482 s390_option_override_internal (&global_options
, &global_options_set
);
15484 /* Save the initial options in case the user does function specific
15486 target_option_default_node
= build_target_option_node (&global_options
);
15487 target_option_current_node
= target_option_default_node
;
15489 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15490 requires the arch flags to be evaluated already. Since prefetching
15491 is beneficial on s390, we enable it if available. */
15492 if (flag_prefetch_loop_arrays
< 0 && HAVE_prefetch
&& optimize
>= 3)
15493 flag_prefetch_loop_arrays
= 1;
15495 if (!s390_pic_data_is_text_relative
&& !flag_pic
)
15496 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15497 "%<-fpic%>/%<-fPIC%>");
15501 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15502 debuggers do not yet support DWARF 3/4. */
15503 if (!global_options_set
.x_dwarf_strict
)
15505 if (!global_options_set
.x_dwarf_version
)
15510 #if S390_USE_TARGET_ATTRIBUTE
15511 /* Inner function to process the attribute((target(...))), take an argument and
15512 set the current options from the argument. If we have a list, recursively go
15516 s390_valid_target_attribute_inner_p (tree args
,
15517 struct gcc_options
*opts
,
15518 struct gcc_options
*new_opts_set
,
15524 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15525 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15526 static const struct
15528 const char *string
;
15532 int only_as_pragma
;
15535 S390_ATTRIB ("arch=", OPT_march_
, 1),
15536 S390_ATTRIB ("tune=", OPT_mtune_
, 1),
15537 /* uinteger options */
15538 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_
, 1),
15539 S390_ATTRIB ("stack-size=", OPT_mstack_size_
, 1),
15540 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_
, 1),
15541 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_
, 1),
15543 S390_ATTRIB ("backchain", OPT_mbackchain
, 0),
15544 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp
, 0),
15545 S390_ATTRIB ("hard-float", OPT_mhard_float
, 0),
15546 S390_ATTRIB ("htm", OPT_mhtm
, 0),
15547 S390_ATTRIB ("vx", OPT_mvx
, 0),
15548 S390_ATTRIB ("packed-stack", OPT_mpacked_stack
, 0),
15549 S390_ATTRIB ("small-exec", OPT_msmall_exec
, 0),
15550 S390_ATTRIB ("soft-float", OPT_msoft_float
, 0),
15551 S390_ATTRIB ("mvcle", OPT_mmvcle
, 0),
15552 S390_PRAGMA ("zvector", OPT_mzvector
, 0),
15553 /* boolean options */
15554 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack
, 0),
15559 /* If this is a list, recurse to get the options. */
15560 if (TREE_CODE (args
) == TREE_LIST
)
15563 int num_pragma_values
;
15566 /* Note: attribs.c:decl_attributes prepends the values from
15567 current_target_pragma to the list of target attributes. To determine
15568 whether we're looking at a value of the attribute or the pragma we
15569 assume that the first [list_length (current_target_pragma)] values in
15570 the list are the values from the pragma. */
15571 num_pragma_values
= (!force_pragma
&& current_target_pragma
!= NULL
)
15572 ? list_length (current_target_pragma
) : 0;
15573 for (i
= 0; args
; args
= TREE_CHAIN (args
), i
++)
15577 is_pragma
= (force_pragma
|| i
< num_pragma_values
);
15578 if (TREE_VALUE (args
)
15579 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args
),
15580 opts
, new_opts_set
,
15589 else if (TREE_CODE (args
) != STRING_CST
)
15591 error ("attribute %<target%> argument not a string");
15595 /* Handle multiple arguments separated by commas. */
15596 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
15598 while (next_optstr
&& *next_optstr
!= '\0')
15600 char *p
= next_optstr
;
15602 char *comma
= strchr (next_optstr
, ',');
15603 size_t len
, opt_len
;
15609 enum cl_var_type var_type
;
15615 len
= comma
- next_optstr
;
15616 next_optstr
= comma
+ 1;
15621 next_optstr
= NULL
;
15624 /* Recognize no-xxx. */
15625 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
15634 /* Find the option. */
15637 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
15639 opt_len
= attrs
[i
].len
;
15640 if (ch
== attrs
[i
].string
[0]
15641 && ((attrs
[i
].has_arg
) ? len
> opt_len
: len
== opt_len
)
15642 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
15644 opt
= attrs
[i
].opt
;
15645 if (!opt_set_p
&& cl_options
[opt
].cl_reject_negative
)
15647 mask
= cl_options
[opt
].var_value
;
15648 var_type
= cl_options
[opt
].var_type
;
15654 /* Process the option. */
15657 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15660 else if (attrs
[i
].only_as_pragma
&& !force_pragma
)
15662 /* Value is not allowed for the target attribute. */
15663 error ("value %qs is not supported by attribute %<target%>",
15668 else if (var_type
== CLVC_BIT_SET
|| var_type
== CLVC_BIT_CLEAR
)
15670 if (var_type
== CLVC_BIT_CLEAR
)
15671 opt_set_p
= !opt_set_p
;
15674 opts
->x_target_flags
|= mask
;
15676 opts
->x_target_flags
&= ~mask
;
15677 new_opts_set
->x_target_flags
|= mask
;
15680 else if (cl_options
[opt
].var_type
== CLVC_BOOLEAN
)
15684 if (cl_options
[opt
].cl_uinteger
)
15686 /* Unsigned integer argument. Code based on the function
15687 decode_cmdline_option () in opts-common.c. */
15688 value
= integral_argument (p
+ opt_len
);
15691 value
= (opt_set_p
) ? 1 : 0;
15695 struct cl_decoded_option decoded
;
15697 /* Value range check; only implemented for numeric and boolean
15698 options at the moment. */
15699 generate_option (opt
, NULL
, value
, CL_TARGET
, &decoded
);
15700 s390_handle_option (opts
, new_opts_set
, &decoded
, input_location
);
15701 set_option (opts
, new_opts_set
, opt
, value
,
15702 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15707 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15712 else if (cl_options
[opt
].var_type
== CLVC_ENUM
)
15717 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
15719 set_option (opts
, new_opts_set
, opt
, value
,
15720 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15724 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15730 gcc_unreachable ();
15735 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15738 s390_valid_target_attribute_tree (tree args
,
15739 struct gcc_options
*opts
,
15740 const struct gcc_options
*opts_set
,
15743 tree t
= NULL_TREE
;
15744 struct gcc_options new_opts_set
;
15746 memset (&new_opts_set
, 0, sizeof (new_opts_set
));
15748 /* Process each of the options on the chain. */
15749 if (! s390_valid_target_attribute_inner_p (args
, opts
, &new_opts_set
,
15751 return error_mark_node
;
15753 /* If some option was set (even if it has not changed), rerun
15754 s390_option_override_internal, and then save the options away. */
15755 if (new_opts_set
.x_target_flags
15756 || new_opts_set
.x_s390_arch
15757 || new_opts_set
.x_s390_tune
15758 || new_opts_set
.x_s390_stack_guard
15759 || new_opts_set
.x_s390_stack_size
15760 || new_opts_set
.x_s390_branch_cost
15761 || new_opts_set
.x_s390_warn_framesize
15762 || new_opts_set
.x_s390_warn_dynamicstack_p
)
15764 const unsigned char *src
= (const unsigned char *)opts_set
;
15765 unsigned char *dest
= (unsigned char *)&new_opts_set
;
15768 /* Merge the original option flags into the new ones. */
15769 for (i
= 0; i
< sizeof(*opts_set
); i
++)
15772 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15773 s390_option_override_internal (opts
, &new_opts_set
);
15774 /* Save the current options unless we are validating options for
15776 t
= build_target_option_node (opts
);
15781 /* Hook to validate attribute((target("string"))). */
15784 s390_valid_target_attribute_p (tree fndecl
,
15785 tree
ARG_UNUSED (name
),
15787 int ARG_UNUSED (flags
))
15789 struct gcc_options func_options
;
15790 tree new_target
, new_optimize
;
15793 /* attribute((target("default"))) does nothing, beyond
15794 affecting multi-versioning. */
15795 if (TREE_VALUE (args
)
15796 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
15797 && TREE_CHAIN (args
) == NULL_TREE
15798 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
15801 tree old_optimize
= build_optimization_node (&global_options
);
15803 /* Get the optimization options of the current function. */
15804 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
15806 if (!func_optimize
)
15807 func_optimize
= old_optimize
;
15809 /* Init func_options. */
15810 memset (&func_options
, 0, sizeof (func_options
));
15811 init_options_struct (&func_options
, NULL
);
15812 lang_hooks
.init_options_struct (&func_options
);
15814 cl_optimization_restore (&func_options
, TREE_OPTIMIZATION (func_optimize
));
15816 /* Initialize func_options to the default before its target options can
15818 cl_target_option_restore (&func_options
,
15819 TREE_TARGET_OPTION (target_option_default_node
));
15821 new_target
= s390_valid_target_attribute_tree (args
, &func_options
,
15822 &global_options_set
,
15824 current_target_pragma
));
15825 new_optimize
= build_optimization_node (&func_options
);
15826 if (new_target
== error_mark_node
)
15828 else if (fndecl
&& new_target
)
15830 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
15831 if (old_optimize
!= new_optimize
)
15832 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
15837 /* Hook to determine if one function can safely inline another. */
15840 s390_can_inline_p (tree caller
, tree callee
)
15842 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
15843 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
15846 callee_tree
= target_option_default_node
;
15848 caller_tree
= target_option_default_node
;
15849 if (callee_tree
== caller_tree
)
15852 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
15853 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
15856 if ((caller_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
))
15857 != (callee_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
)))
15860 /* Don't inline functions to be compiled for a more recent arch into a
15861 function for an older arch. */
15862 else if (caller_opts
->x_s390_arch
< callee_opts
->x_s390_arch
)
15865 /* Inlining a hard float function into a soft float function is only
15866 allowed if the hard float function doesn't actually make use of
15869 We are called from FEs for multi-versioning call optimization, so
15870 beware of ipa_fn_summaries not available. */
15871 else if (((TARGET_SOFT_FLOAT_P (caller_opts
->x_target_flags
)
15872 && !TARGET_SOFT_FLOAT_P (callee_opts
->x_target_flags
))
15873 || (!TARGET_HARD_DFP_P (caller_opts
->x_target_flags
)
15874 && TARGET_HARD_DFP_P (callee_opts
->x_target_flags
)))
15875 && (! ipa_fn_summaries
15876 || ipa_fn_summaries
->get
15877 (cgraph_node::get (callee
))->fp_expressions
))
15884 /* Set VAL to correct enum value according to the indirect-branch or
15885 function-return attribute in ATTR. */
15888 s390_indirect_branch_attrvalue (tree attr
, enum indirect_branch
*val
)
15890 const char *str
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
15891 if (strcmp (str
, "keep") == 0)
15892 *val
= indirect_branch_keep
;
15893 else if (strcmp (str
, "thunk") == 0)
15894 *val
= indirect_branch_thunk
;
15895 else if (strcmp (str
, "thunk-inline") == 0)
15896 *val
= indirect_branch_thunk_inline
;
15897 else if (strcmp (str
, "thunk-extern") == 0)
15898 *val
= indirect_branch_thunk_extern
;
15901 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15902 from either the cmdline or the function attributes in
15906 s390_indirect_branch_settings (tree fndecl
)
15913 /* Initialize with the cmdline options and let the attributes
15915 cfun
->machine
->indirect_branch_jump
= s390_indirect_branch_jump
;
15916 cfun
->machine
->indirect_branch_call
= s390_indirect_branch_call
;
15918 cfun
->machine
->function_return_reg
= s390_function_return_reg
;
15919 cfun
->machine
->function_return_mem
= s390_function_return_mem
;
15921 if ((attr
= lookup_attribute ("indirect_branch",
15922 DECL_ATTRIBUTES (fndecl
))))
15924 s390_indirect_branch_attrvalue (attr
,
15925 &cfun
->machine
->indirect_branch_jump
);
15926 s390_indirect_branch_attrvalue (attr
,
15927 &cfun
->machine
->indirect_branch_call
);
15930 if ((attr
= lookup_attribute ("indirect_branch_jump",
15931 DECL_ATTRIBUTES (fndecl
))))
15932 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_jump
);
15934 if ((attr
= lookup_attribute ("indirect_branch_call",
15935 DECL_ATTRIBUTES (fndecl
))))
15936 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_call
);
15938 if ((attr
= lookup_attribute ("function_return",
15939 DECL_ATTRIBUTES (fndecl
))))
15941 s390_indirect_branch_attrvalue (attr
,
15942 &cfun
->machine
->function_return_reg
);
15943 s390_indirect_branch_attrvalue (attr
,
15944 &cfun
->machine
->function_return_mem
);
15947 if ((attr
= lookup_attribute ("function_return_reg",
15948 DECL_ATTRIBUTES (fndecl
))))
15949 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_reg
);
15951 if ((attr
= lookup_attribute ("function_return_mem",
15952 DECL_ATTRIBUTES (fndecl
))))
15953 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_mem
);
15956 #if S390_USE_TARGET_ATTRIBUTE
15957 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15961 s390_activate_target_options (tree new_tree
)
15963 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
15964 if (TREE_TARGET_GLOBALS (new_tree
))
15965 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
15966 else if (new_tree
== target_option_default_node
)
15967 restore_target_globals (&default_target_globals
);
15969 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
15970 s390_previous_fndecl
= NULL_TREE
;
15974 /* Establish appropriate back-end context for processing the function
15975 FNDECL. The argument might be NULL to indicate processing at top
15976 level, outside of any function scope. */
15978 s390_set_current_function (tree fndecl
)
15980 #if S390_USE_TARGET_ATTRIBUTE
15981 /* Only change the context if the function changes. This hook is called
15982 several times in the course of compiling a function, and we don't want to
15983 slow things down too much or call target_reinit when it isn't safe. */
15984 if (fndecl
== s390_previous_fndecl
)
15986 s390_indirect_branch_settings (fndecl
);
15991 if (s390_previous_fndecl
== NULL_TREE
)
15992 old_tree
= target_option_current_node
;
15993 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
))
15994 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
);
15996 old_tree
= target_option_default_node
;
15998 if (fndecl
== NULL_TREE
)
16000 if (old_tree
!= target_option_current_node
)
16001 s390_activate_target_options (target_option_current_node
);
16005 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
16006 if (new_tree
== NULL_TREE
)
16007 new_tree
= target_option_default_node
;
16009 if (old_tree
!= new_tree
)
16010 s390_activate_target_options (new_tree
);
16011 s390_previous_fndecl
= fndecl
;
16013 s390_indirect_branch_settings (fndecl
);
16016 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16019 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
16020 unsigned int align ATTRIBUTE_UNUSED
,
16021 enum by_pieces_operation op ATTRIBUTE_UNUSED
,
16022 bool speed_p ATTRIBUTE_UNUSED
)
16024 return (size
== 1 || size
== 2
16025 || size
== 4 || (TARGET_ZARCH
&& size
== 8));
16028 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16031 s390_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
16033 tree sfpc
= s390_builtin_decls
[S390_BUILTIN_s390_sfpc
];
16034 tree efpc
= s390_builtin_decls
[S390_BUILTIN_s390_efpc
];
16035 tree call_efpc
= build_call_expr (efpc
, 0);
16036 tree fenv_var
= create_tmp_var_raw (unsigned_type_node
);
16038 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16039 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16040 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16041 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16042 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16043 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16045 /* Generates the equivalent of feholdexcept (&fenv_var)
16047 fenv_var = __builtin_s390_efpc ();
16048 __builtin_s390_sfpc (fenv_var & mask) */
16049 tree old_fpc
= build2 (MODIFY_EXPR
, unsigned_type_node
, fenv_var
, call_efpc
);
16051 build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
,
16052 build_int_cst (unsigned_type_node
,
16053 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
|
16054 FPC_EXCEPTION_MASK
)));
16055 tree set_new_fpc
= build_call_expr (sfpc
, 1, new_fpc
);
16056 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, old_fpc
, set_new_fpc
);
16058 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16060 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16061 new_fpc
= build2 (BIT_AND_EXPR
, unsigned_type_node
, call_efpc
,
16062 build_int_cst (unsigned_type_node
,
16063 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
)));
16064 *clear
= build_call_expr (sfpc
, 1, new_fpc
);
16066 /* Generates the equivalent of feupdateenv (fenv_var)
16068 old_fpc = __builtin_s390_efpc ();
16069 __builtin_s390_sfpc (fenv_var);
16070 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16072 old_fpc
= create_tmp_var_raw (unsigned_type_node
);
16073 tree store_old_fpc
= build2 (MODIFY_EXPR
, void_type_node
,
16074 old_fpc
, call_efpc
);
16076 set_new_fpc
= build_call_expr (sfpc
, 1, fenv_var
);
16078 tree raise_old_except
= build2 (BIT_AND_EXPR
, unsigned_type_node
, old_fpc
,
16079 build_int_cst (unsigned_type_node
,
16081 raise_old_except
= build2 (RSHIFT_EXPR
, unsigned_type_node
, raise_old_except
,
16082 build_int_cst (unsigned_type_node
,
16084 tree atomic_feraiseexcept
16085 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
16086 raise_old_except
= build_call_expr (atomic_feraiseexcept
,
16087 1, raise_old_except
);
16089 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
16090 build2 (COMPOUND_EXPR
, void_type_node
,
16091 store_old_fpc
, set_new_fpc
),
16094 #undef FPC_EXCEPTION_MASK
16095 #undef FPC_FLAGS_MASK
16096 #undef FPC_DXC_MASK
16097 #undef FPC_EXCEPTION_MASK_SHIFT
16098 #undef FPC_FLAGS_SHIFT
16099 #undef FPC_DXC_SHIFT
16102 /* Return the vector mode to be used for inner mode MODE when doing
16104 static machine_mode
16105 s390_preferred_simd_mode (scalar_mode mode
)
16133 /* Our hardware does not require vectors to be strictly aligned. */
16135 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED
,
16136 const_tree type ATTRIBUTE_UNUSED
,
16137 int misalignment ATTRIBUTE_UNUSED
,
16138 bool is_packed ATTRIBUTE_UNUSED
)
16143 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
16147 /* The vector ABI requires vector types to be aligned on an 8 byte
16148 boundary (our stack alignment). However, we allow this to be
16149 overriden by the user, while this definitely breaks the ABI. */
16150 static HOST_WIDE_INT
16151 s390_vector_alignment (const_tree type
)
16153 tree size
= TYPE_SIZE (type
);
16155 if (!TARGET_VX_ABI
)
16156 return default_vector_alignment (type
);
16158 if (TYPE_USER_ALIGN (type
))
16159 return TYPE_ALIGN (type
);
16161 if (tree_fits_uhwi_p (size
)
16162 && tree_to_uhwi (size
) < BIGGEST_ALIGNMENT
)
16163 return tree_to_uhwi (size
);
16165 return BIGGEST_ALIGNMENT
;
16168 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16169 LARL instruction. */
16171 static HOST_WIDE_INT
16172 s390_constant_alignment (const_tree
, HOST_WIDE_INT align
)
16174 return MAX (align
, 16);
16177 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16178 /* Implement TARGET_ASM_FILE_START. */
16180 s390_asm_file_start (void)
16182 default_file_start ();
16183 s390_asm_output_machine_for_arch (asm_out_file
);
16187 /* Implement TARGET_ASM_FILE_END. */
16189 s390_asm_file_end (void)
16191 #ifdef HAVE_AS_GNU_ATTRIBUTE
16192 varpool_node
*vnode
;
16193 cgraph_node
*cnode
;
16195 FOR_EACH_VARIABLE (vnode
)
16196 if (TREE_PUBLIC (vnode
->decl
))
16197 s390_check_type_for_vector_abi (TREE_TYPE (vnode
->decl
), false, false);
16199 FOR_EACH_FUNCTION (cnode
)
16200 if (TREE_PUBLIC (cnode
->decl
))
16201 s390_check_type_for_vector_abi (TREE_TYPE (cnode
->decl
), false, false);
16204 if (s390_vector_abi
!= 0)
16205 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
16208 file_end_indicate_exec_stack ();
16210 if (flag_split_stack
)
16211 file_end_indicate_split_stack ();
16214 /* Return true if TYPE is a vector bool type. */
16216 s390_vector_bool_type_p (const_tree type
)
16218 return TYPE_VECTOR_OPAQUE (type
);
16221 /* Return the diagnostic message string if the binary operation OP is
16222 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16224 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
16226 bool bool1_p
, bool2_p
;
16230 machine_mode mode1
, mode2
;
16232 if (!TARGET_ZVECTOR
)
16235 if (!VECTOR_TYPE_P (type1
) || !VECTOR_TYPE_P (type2
))
16238 bool1_p
= s390_vector_bool_type_p (type1
);
16239 bool2_p
= s390_vector_bool_type_p (type2
);
16241 /* Mixing signed and unsigned types is forbidden for all
16243 if (!bool1_p
&& !bool2_p
16244 && TYPE_UNSIGNED (type1
) != TYPE_UNSIGNED (type2
))
16245 return N_("types differ in signedness");
16247 plusminus_p
= (op
== PLUS_EXPR
|| op
== MINUS_EXPR
);
16248 muldiv_p
= (op
== MULT_EXPR
|| op
== RDIV_EXPR
|| op
== TRUNC_DIV_EXPR
16249 || op
== CEIL_DIV_EXPR
|| op
== FLOOR_DIV_EXPR
16250 || op
== ROUND_DIV_EXPR
);
16251 compare_p
= (op
== LT_EXPR
|| op
== LE_EXPR
|| op
== GT_EXPR
|| op
== GE_EXPR
16252 || op
== EQ_EXPR
|| op
== NE_EXPR
);
16254 if (bool1_p
&& bool2_p
&& (plusminus_p
|| muldiv_p
))
16255 return N_("binary operator does not support two vector bool operands");
16257 if (bool1_p
!= bool2_p
&& (muldiv_p
|| compare_p
))
16258 return N_("binary operator does not support vector bool operand");
16260 mode1
= TYPE_MODE (type1
);
16261 mode2
= TYPE_MODE (type2
);
16263 if (bool1_p
!= bool2_p
&& plusminus_p
16264 && (GET_MODE_CLASS (mode1
) == MODE_VECTOR_FLOAT
16265 || GET_MODE_CLASS (mode2
) == MODE_VECTOR_FLOAT
))
16266 return N_("binary operator does not support mixing vector "
16267 "bool with floating point vector operands");
16272 /* Implement TARGET_C_EXCESS_PRECISION.
16274 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16275 double on s390, causing operations on float_t to operate in a higher
16276 precision than is necessary. However, it is not the case that SFmode
16277 operations have implicit excess precision, and we generate more optimal
16278 code if we let the compiler know no implicit extra precision is added.
16280 That means when we are compiling with -fexcess-precision=fast, the value
16281 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16282 float_t (though they would be correct for -fexcess-precision=standard).
16284 A complete fix would modify glibc to remove the unnecessary typedef
16285 of float_t to double. */
16287 static enum flt_eval_method
16288 s390_excess_precision (enum excess_precision_type type
)
16292 case EXCESS_PRECISION_TYPE_IMPLICIT
:
16293 case EXCESS_PRECISION_TYPE_FAST
:
16294 /* The fastest type to promote to will always be the native type,
16295 whether that occurs with implicit excess precision or
16297 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
16298 case EXCESS_PRECISION_TYPE_STANDARD
:
16299 /* Otherwise, when we are in a standards compliant mode, to
16300 ensure consistency with the implementation in glibc, report that
16301 float is evaluated to the range and precision of double. */
16302 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE
;
16304 gcc_unreachable ();
16306 return FLT_EVAL_METHOD_UNPREDICTABLE
;
16309 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16311 static unsigned HOST_WIDE_INT
16312 s390_asan_shadow_offset (void)
16314 return TARGET_64BIT
? HOST_WIDE_INT_1U
<< 52 : HOST_WIDE_INT_UC (0x20000000);
16317 #ifdef HAVE_GAS_HIDDEN
16318 # define USE_HIDDEN_LINKONCE 1
16320 # define USE_HIDDEN_LINKONCE 0
16323 /* Output an indirect branch trampoline for target register REGNO. */
16326 s390_output_indirect_thunk_function (unsigned int regno
, bool z10_p
)
16329 char thunk_label
[32];
16333 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
, regno
);
16335 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
,
16336 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
16338 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
16339 get_identifier (thunk_label
),
16340 build_function_type_list (void_type_node
, NULL_TREE
));
16341 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
16342 NULL_TREE
, void_type_node
);
16343 TREE_PUBLIC (decl
) = 1;
16344 TREE_STATIC (decl
) = 1;
16345 DECL_IGNORED_P (decl
) = 1;
16347 if (USE_HIDDEN_LINKONCE
)
16349 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
16351 targetm
.asm_out
.unique_section (decl
, 0);
16352 switch_to_section (get_named_section (decl
, NULL
, 0));
16354 targetm
.asm_out
.globalize_label (asm_out_file
, thunk_label
);
16355 fputs ("\t.hidden\t", asm_out_file
);
16356 assemble_name (asm_out_file
, thunk_label
);
16357 putc ('\n', asm_out_file
);
16358 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, thunk_label
, decl
);
16362 switch_to_section (text_section
);
16363 ASM_OUTPUT_LABEL (asm_out_file
, thunk_label
);
16366 DECL_INITIAL (decl
) = make_node (BLOCK
);
16367 current_function_decl
= decl
;
16368 allocate_struct_function (decl
, false);
16369 init_function_start (decl
);
16370 cfun
->is_thunk
= true;
16371 first_function_block_is_cold
= false;
16372 final_start_function (emit_barrier (), asm_out_file
, 1);
16374 /* This makes CFI at least usable for indirect jumps.
16376 Stopping in the thunk: backtrace will point to the thunk target
16377 is if it was interrupted by a signal. For a call this means that
16378 the call chain will be: caller->callee->thunk */
16379 if (flag_asynchronous_unwind_tables
&& flag_dwarf2_cfi_asm
)
16381 fputs ("\t.cfi_signal_frame\n", asm_out_file
);
16382 fprintf (asm_out_file
, "\t.cfi_return_column %d\n", regno
);
16383 for (i
= 0; i
< FPR15_REGNUM
; i
++)
16384 fprintf (asm_out_file
, "\t.cfi_same_value %s\n", reg_names
[i
]);
16391 /* We generate a thunk for z10 compiled code although z10 is
16392 currently not enabled. Tell the assembler to accept the
16394 if (!TARGET_CPU_Z10
)
16396 fputs ("\t.machine push\n", asm_out_file
);
16397 fputs ("\t.machine z10\n", asm_out_file
);
16399 /* We use exrl even if -mzarch hasn't been specified on the
16400 command line so we have to tell the assembler to accept
16403 fputs ("\t.machinemode zarch\n", asm_out_file
);
16405 fputs ("\texrl\t0,1f\n", asm_out_file
);
16408 fputs ("\t.machinemode esa\n", asm_out_file
);
16410 if (!TARGET_CPU_Z10
)
16411 fputs ("\t.machine pop\n", asm_out_file
);
16416 fprintf (asm_out_file
, "\tlarl\t%%r%d,1f\n",
16417 INDIRECT_BRANCH_THUNK_REGNUM
);
16420 fprintf (asm_out_file
, "\tex\t0,0(%%r%d)\n",
16421 INDIRECT_BRANCH_THUNK_REGNUM
);
16425 fputs ("0:\tj\t0b\n", asm_out_file
);
16427 /* 1: br <regno> */
16428 fprintf (asm_out_file
, "1:\tbr\t%%r%d\n", regno
);
16430 final_end_function ();
16431 init_insn_lengths ();
16432 free_after_compilation (cfun
);
16434 current_function_decl
= NULL
;
16437 /* Implement the asm.code_end target hook. */
16440 s390_code_end (void)
16444 for (i
= 1; i
< 16; i
++)
16446 if (indirect_branch_z10thunk_mask
& (1 << i
))
16447 s390_output_indirect_thunk_function (i
, true);
16449 if (indirect_branch_prez10thunk_mask
& (1 << i
))
16450 s390_output_indirect_thunk_function (i
, false);
16453 if (TARGET_INDIRECT_BRANCH_TABLE
)
16458 for (o
= 0; o
< INDIRECT_BRANCH_NUM_OPTIONS
; o
++)
16460 if (indirect_branch_table_label_no
[o
] == 0)
16463 switch_to_section (get_section (indirect_branch_table_name
[o
],
16466 for (i
= 0; i
< indirect_branch_table_label_no
[o
]; i
++)
16468 char label_start
[32];
16470 ASM_GENERATE_INTERNAL_LABEL (label_start
,
16471 indirect_branch_table_label
[o
], i
);
16473 fputs ("\t.long\t", asm_out_file
);
16474 assemble_name_raw (asm_out_file
, label_start
);
16475 fputs ("-.\n", asm_out_file
);
16477 switch_to_section (current_function_section ());
16482 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16485 s390_case_values_threshold (void)
16487 /* Disabling branch prediction for indirect jumps makes jump tables
16488 much more expensive. */
16489 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP
)
16492 return default_case_values_threshold ();
16495 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16496 back-end specific dependencies.
16498 Establish an ANTI dependency between r11 and r15 restores from FPRs
16499 to prevent the instructions scheduler from reordering them since
16500 this would break CFI. No further handling in the sched_reorder
16501 hook is required since the r11 and r15 restore will never appear in
16502 the same ready list with that change. */
16504 s390_sched_dependencies_evaluation (rtx_insn
*head
, rtx_insn
*tail
)
16506 if (!frame_pointer_needed
|| !epilogue_completed
)
16509 while (head
!= tail
&& DEBUG_INSN_P (head
))
16510 head
= NEXT_INSN (head
);
16512 rtx_insn
*r15_restore
= NULL
, *r11_restore
= NULL
;
16514 for (rtx_insn
*insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16516 rtx set
= single_set (insn
);
16518 || !RTX_FRAME_RELATED_P (insn
)
16520 || !REG_P (SET_DEST (set
))
16521 || !FP_REG_P (SET_SRC (set
)))
16524 if (REGNO (SET_DEST (set
)) == HARD_FRAME_POINTER_REGNUM
)
16525 r11_restore
= insn
;
16527 if (REGNO (SET_DEST (set
)) == STACK_POINTER_REGNUM
)
16528 r15_restore
= insn
;
16531 if (r11_restore
== NULL
|| r15_restore
== NULL
)
16533 add_dependence (r11_restore
, r15_restore
, REG_DEP_ANTI
);
16536 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16538 static unsigned HOST_WIDE_INT
16539 s390_shift_truncation_mask (machine_mode mode
)
16541 return mode
== DImode
|| mode
== SImode
? 63 : 0;
16544 /* Initialize GCC target structure. */
16546 #undef TARGET_ASM_ALIGNED_HI_OP
16547 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16548 #undef TARGET_ASM_ALIGNED_DI_OP
16549 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16550 #undef TARGET_ASM_INTEGER
16551 #define TARGET_ASM_INTEGER s390_assemble_integer
16553 #undef TARGET_ASM_OPEN_PAREN
16554 #define TARGET_ASM_OPEN_PAREN ""
16556 #undef TARGET_ASM_CLOSE_PAREN
16557 #define TARGET_ASM_CLOSE_PAREN ""
16559 #undef TARGET_OPTION_OVERRIDE
16560 #define TARGET_OPTION_OVERRIDE s390_option_override
16562 #ifdef TARGET_THREAD_SSP_OFFSET
16563 #undef TARGET_STACK_PROTECT_GUARD
16564 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16567 #undef TARGET_ENCODE_SECTION_INFO
16568 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16570 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16571 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16574 #undef TARGET_HAVE_TLS
16575 #define TARGET_HAVE_TLS true
16577 #undef TARGET_CANNOT_FORCE_CONST_MEM
16578 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16580 #undef TARGET_DELEGITIMIZE_ADDRESS
16581 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16583 #undef TARGET_LEGITIMIZE_ADDRESS
16584 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16586 #undef TARGET_RETURN_IN_MEMORY
16587 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16589 #undef TARGET_INIT_BUILTINS
16590 #define TARGET_INIT_BUILTINS s390_init_builtins
16591 #undef TARGET_EXPAND_BUILTIN
16592 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16593 #undef TARGET_BUILTIN_DECL
16594 #define TARGET_BUILTIN_DECL s390_builtin_decl
16596 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16597 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16599 #undef TARGET_ASM_OUTPUT_MI_THUNK
16600 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16601 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16602 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16604 #undef TARGET_C_EXCESS_PRECISION
16605 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16607 #undef TARGET_SCHED_ADJUST_PRIORITY
16608 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16609 #undef TARGET_SCHED_ISSUE_RATE
16610 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16611 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16612 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16614 #undef TARGET_SCHED_VARIABLE_ISSUE
16615 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16616 #undef TARGET_SCHED_REORDER
16617 #define TARGET_SCHED_REORDER s390_sched_reorder
16618 #undef TARGET_SCHED_INIT
16619 #define TARGET_SCHED_INIT s390_sched_init
16621 #undef TARGET_CANNOT_COPY_INSN_P
16622 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16623 #undef TARGET_RTX_COSTS
16624 #define TARGET_RTX_COSTS s390_rtx_costs
16625 #undef TARGET_ADDRESS_COST
16626 #define TARGET_ADDRESS_COST s390_address_cost
16627 #undef TARGET_REGISTER_MOVE_COST
16628 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16629 #undef TARGET_MEMORY_MOVE_COST
16630 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16631 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16632 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16633 s390_builtin_vectorization_cost
16635 #undef TARGET_MACHINE_DEPENDENT_REORG
16636 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16638 #undef TARGET_VALID_POINTER_MODE
16639 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16641 #undef TARGET_BUILD_BUILTIN_VA_LIST
16642 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16643 #undef TARGET_EXPAND_BUILTIN_VA_START
16644 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16645 #undef TARGET_ASAN_SHADOW_OFFSET
16646 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16648 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16650 #undef TARGET_PROMOTE_FUNCTION_MODE
16651 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16652 #undef TARGET_PASS_BY_REFERENCE
16653 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16655 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16656 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16658 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16659 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16660 #undef TARGET_FUNCTION_ARG
16661 #define TARGET_FUNCTION_ARG s390_function_arg
16662 #undef TARGET_FUNCTION_ARG_ADVANCE
16663 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16664 #undef TARGET_FUNCTION_ARG_PADDING
16665 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16666 #undef TARGET_FUNCTION_VALUE
16667 #define TARGET_FUNCTION_VALUE s390_function_value
16668 #undef TARGET_LIBCALL_VALUE
16669 #define TARGET_LIBCALL_VALUE s390_libcall_value
16670 #undef TARGET_STRICT_ARGUMENT_NAMING
16671 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16673 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16674 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16676 #undef TARGET_FIXED_CONDITION_CODE_REGS
16677 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16679 #undef TARGET_CC_MODES_COMPATIBLE
16680 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16682 #undef TARGET_INVALID_WITHIN_DOLOOP
16683 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16686 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16687 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16690 #undef TARGET_DWARF_FRAME_REG_MODE
16691 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16693 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16694 #undef TARGET_MANGLE_TYPE
16695 #define TARGET_MANGLE_TYPE s390_mangle_type
16698 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16699 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16701 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16702 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16704 #undef TARGET_PREFERRED_RELOAD_CLASS
16705 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16707 #undef TARGET_SECONDARY_RELOAD
16708 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16709 #undef TARGET_SECONDARY_MEMORY_NEEDED
16710 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16711 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16712 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16714 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16715 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16717 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16718 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16720 #undef TARGET_LEGITIMATE_ADDRESS_P
16721 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16723 #undef TARGET_LEGITIMATE_CONSTANT_P
16724 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16726 #undef TARGET_LRA_P
16727 #define TARGET_LRA_P s390_lra_p
16729 #undef TARGET_CAN_ELIMINATE
16730 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16732 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16733 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16735 #undef TARGET_LOOP_UNROLL_ADJUST
16736 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16738 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16739 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16740 #undef TARGET_TRAMPOLINE_INIT
16741 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16744 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16745 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16747 #undef TARGET_UNWIND_WORD_MODE
16748 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16750 #undef TARGET_CANONICALIZE_COMPARISON
16751 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16753 #undef TARGET_HARD_REGNO_SCRATCH_OK
16754 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16756 #undef TARGET_HARD_REGNO_NREGS
16757 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16758 #undef TARGET_HARD_REGNO_MODE_OK
16759 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16760 #undef TARGET_MODES_TIEABLE_P
16761 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16763 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16764 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16765 s390_hard_regno_call_part_clobbered
16767 #undef TARGET_ATTRIBUTE_TABLE
16768 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16770 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16771 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16773 #undef TARGET_SET_UP_BY_PROLOGUE
16774 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16776 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16777 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16779 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16780 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16781 s390_use_by_pieces_infrastructure_p
16783 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16784 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16786 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16787 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16789 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16790 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16792 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16793 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16795 #undef TARGET_VECTOR_ALIGNMENT
16796 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16798 #undef TARGET_INVALID_BINARY_OP
16799 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16801 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16802 #undef TARGET_ASM_FILE_START
16803 #define TARGET_ASM_FILE_START s390_asm_file_start
16806 #undef TARGET_ASM_FILE_END
16807 #define TARGET_ASM_FILE_END s390_asm_file_end
16809 #undef TARGET_SET_CURRENT_FUNCTION
16810 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16812 #if S390_USE_TARGET_ATTRIBUTE
16813 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16814 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16816 #undef TARGET_CAN_INLINE_P
16817 #define TARGET_CAN_INLINE_P s390_can_inline_p
16820 #undef TARGET_OPTION_RESTORE
16821 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16823 #undef TARGET_CAN_CHANGE_MODE_CLASS
16824 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16826 #undef TARGET_CONSTANT_ALIGNMENT
16827 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16829 #undef TARGET_ASM_CODE_END
16830 #define TARGET_ASM_CODE_END s390_code_end
16832 #undef TARGET_CASE_VALUES_THRESHOLD
16833 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16835 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16836 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16837 s390_sched_dependencies_evaluation
16839 #undef TARGET_SHIFT_TRUNCATION_MASK
16840 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16842 /* Use only short displacement, since long displacement is not available for
16843 the floating point instructions. */
16844 #undef TARGET_MAX_ANCHOR_OFFSET
16845 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16847 struct gcc_target targetm
= TARGET_INITIALIZER
;
16849 #include "gt-s390.h"