1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "target-globals.h"
37 #include "stringpool.h"
45 #include "diagnostic-core.h"
46 #include "diagnostic.h"
48 #include "fold-const.h"
49 #include "print-tree.h"
50 #include "stor-layout.h"
53 #include "conditions.h"
55 #include "insn-attr.h"
67 #include "cfgcleanup.h"
69 #include "langhooks.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
76 #include "tree-pass.h"
81 #include "tm-constrs.h"
83 #include "symbol-summary.h"
85 #include "ipa-fnsummary.h"
87 /* This file should be included last. */
88 #include "target-def.h"
90 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode
);
92 /* Remember the last target of s390_set_current_function. */
93 static GTY(()) tree s390_previous_fndecl
;
95 /* Define the specific costs for a given cpu. */
97 struct processor_costs
100 const int m
; /* cost of an M instruction. */
101 const int mghi
; /* cost of an MGHI instruction. */
102 const int mh
; /* cost of an MH instruction. */
103 const int mhi
; /* cost of an MHI instruction. */
104 const int ml
; /* cost of an ML instruction. */
105 const int mr
; /* cost of an MR instruction. */
106 const int ms
; /* cost of an MS instruction. */
107 const int msg
; /* cost of an MSG instruction. */
108 const int msgf
; /* cost of an MSGF instruction. */
109 const int msgfr
; /* cost of an MSGFR instruction. */
110 const int msgr
; /* cost of an MSGR instruction. */
111 const int msr
; /* cost of an MSR instruction. */
112 const int mult_df
; /* cost of multiplication in DFmode. */
115 const int sqxbr
; /* cost of square root in TFmode. */
116 const int sqdbr
; /* cost of square root in DFmode. */
117 const int sqebr
; /* cost of square root in SFmode. */
118 /* multiply and add */
119 const int madbr
; /* cost of multiply and add in DFmode. */
120 const int maebr
; /* cost of multiply and add in SFmode. */
132 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
135 struct processor_costs z900_cost
=
137 COSTS_N_INSNS (5), /* M */
138 COSTS_N_INSNS (10), /* MGHI */
139 COSTS_N_INSNS (5), /* MH */
140 COSTS_N_INSNS (4), /* MHI */
141 COSTS_N_INSNS (5), /* ML */
142 COSTS_N_INSNS (5), /* MR */
143 COSTS_N_INSNS (4), /* MS */
144 COSTS_N_INSNS (15), /* MSG */
145 COSTS_N_INSNS (7), /* MSGF */
146 COSTS_N_INSNS (7), /* MSGFR */
147 COSTS_N_INSNS (10), /* MSGR */
148 COSTS_N_INSNS (4), /* MSR */
149 COSTS_N_INSNS (7), /* multiplication in DFmode */
150 COSTS_N_INSNS (13), /* MXBR */
151 COSTS_N_INSNS (136), /* SQXBR */
152 COSTS_N_INSNS (44), /* SQDBR */
153 COSTS_N_INSNS (35), /* SQEBR */
154 COSTS_N_INSNS (18), /* MADBR */
155 COSTS_N_INSNS (13), /* MAEBR */
156 COSTS_N_INSNS (134), /* DXBR */
157 COSTS_N_INSNS (30), /* DDBR */
158 COSTS_N_INSNS (27), /* DEBR */
159 COSTS_N_INSNS (220), /* DLGR */
160 COSTS_N_INSNS (34), /* DLR */
161 COSTS_N_INSNS (34), /* DR */
162 COSTS_N_INSNS (32), /* DSGFR */
163 COSTS_N_INSNS (32), /* DSGR */
167 struct processor_costs z990_cost
=
169 COSTS_N_INSNS (4), /* M */
170 COSTS_N_INSNS (2), /* MGHI */
171 COSTS_N_INSNS (2), /* MH */
172 COSTS_N_INSNS (2), /* MHI */
173 COSTS_N_INSNS (4), /* ML */
174 COSTS_N_INSNS (4), /* MR */
175 COSTS_N_INSNS (5), /* MS */
176 COSTS_N_INSNS (6), /* MSG */
177 COSTS_N_INSNS (4), /* MSGF */
178 COSTS_N_INSNS (4), /* MSGFR */
179 COSTS_N_INSNS (4), /* MSGR */
180 COSTS_N_INSNS (4), /* MSR */
181 COSTS_N_INSNS (1), /* multiplication in DFmode */
182 COSTS_N_INSNS (28), /* MXBR */
183 COSTS_N_INSNS (130), /* SQXBR */
184 COSTS_N_INSNS (66), /* SQDBR */
185 COSTS_N_INSNS (38), /* SQEBR */
186 COSTS_N_INSNS (1), /* MADBR */
187 COSTS_N_INSNS (1), /* MAEBR */
188 COSTS_N_INSNS (60), /* DXBR */
189 COSTS_N_INSNS (40), /* DDBR */
190 COSTS_N_INSNS (26), /* DEBR */
191 COSTS_N_INSNS (176), /* DLGR */
192 COSTS_N_INSNS (31), /* DLR */
193 COSTS_N_INSNS (31), /* DR */
194 COSTS_N_INSNS (31), /* DSGFR */
195 COSTS_N_INSNS (31), /* DSGR */
199 struct processor_costs z9_109_cost
=
201 COSTS_N_INSNS (4), /* M */
202 COSTS_N_INSNS (2), /* MGHI */
203 COSTS_N_INSNS (2), /* MH */
204 COSTS_N_INSNS (2), /* MHI */
205 COSTS_N_INSNS (4), /* ML */
206 COSTS_N_INSNS (4), /* MR */
207 COSTS_N_INSNS (5), /* MS */
208 COSTS_N_INSNS (6), /* MSG */
209 COSTS_N_INSNS (4), /* MSGF */
210 COSTS_N_INSNS (4), /* MSGFR */
211 COSTS_N_INSNS (4), /* MSGR */
212 COSTS_N_INSNS (4), /* MSR */
213 COSTS_N_INSNS (1), /* multiplication in DFmode */
214 COSTS_N_INSNS (28), /* MXBR */
215 COSTS_N_INSNS (130), /* SQXBR */
216 COSTS_N_INSNS (66), /* SQDBR */
217 COSTS_N_INSNS (38), /* SQEBR */
218 COSTS_N_INSNS (1), /* MADBR */
219 COSTS_N_INSNS (1), /* MAEBR */
220 COSTS_N_INSNS (60), /* DXBR */
221 COSTS_N_INSNS (40), /* DDBR */
222 COSTS_N_INSNS (26), /* DEBR */
223 COSTS_N_INSNS (30), /* DLGR */
224 COSTS_N_INSNS (23), /* DLR */
225 COSTS_N_INSNS (23), /* DR */
226 COSTS_N_INSNS (24), /* DSGFR */
227 COSTS_N_INSNS (24), /* DSGR */
231 struct processor_costs z10_cost
=
233 COSTS_N_INSNS (10), /* M */
234 COSTS_N_INSNS (10), /* MGHI */
235 COSTS_N_INSNS (10), /* MH */
236 COSTS_N_INSNS (10), /* MHI */
237 COSTS_N_INSNS (10), /* ML */
238 COSTS_N_INSNS (10), /* MR */
239 COSTS_N_INSNS (10), /* MS */
240 COSTS_N_INSNS (10), /* MSG */
241 COSTS_N_INSNS (10), /* MSGF */
242 COSTS_N_INSNS (10), /* MSGFR */
243 COSTS_N_INSNS (10), /* MSGR */
244 COSTS_N_INSNS (10), /* MSR */
245 COSTS_N_INSNS (1) , /* multiplication in DFmode */
246 COSTS_N_INSNS (50), /* MXBR */
247 COSTS_N_INSNS (120), /* SQXBR */
248 COSTS_N_INSNS (52), /* SQDBR */
249 COSTS_N_INSNS (38), /* SQEBR */
250 COSTS_N_INSNS (1), /* MADBR */
251 COSTS_N_INSNS (1), /* MAEBR */
252 COSTS_N_INSNS (111), /* DXBR */
253 COSTS_N_INSNS (39), /* DDBR */
254 COSTS_N_INSNS (32), /* DEBR */
255 COSTS_N_INSNS (160), /* DLGR */
256 COSTS_N_INSNS (71), /* DLR */
257 COSTS_N_INSNS (71), /* DR */
258 COSTS_N_INSNS (71), /* DSGFR */
259 COSTS_N_INSNS (71), /* DSGR */
263 struct processor_costs z196_cost
=
265 COSTS_N_INSNS (7), /* M */
266 COSTS_N_INSNS (5), /* MGHI */
267 COSTS_N_INSNS (5), /* MH */
268 COSTS_N_INSNS (5), /* MHI */
269 COSTS_N_INSNS (7), /* ML */
270 COSTS_N_INSNS (7), /* MR */
271 COSTS_N_INSNS (6), /* MS */
272 COSTS_N_INSNS (8), /* MSG */
273 COSTS_N_INSNS (6), /* MSGF */
274 COSTS_N_INSNS (6), /* MSGFR */
275 COSTS_N_INSNS (8), /* MSGR */
276 COSTS_N_INSNS (6), /* MSR */
277 COSTS_N_INSNS (1) , /* multiplication in DFmode */
278 COSTS_N_INSNS (40), /* MXBR B+40 */
279 COSTS_N_INSNS (100), /* SQXBR B+100 */
280 COSTS_N_INSNS (42), /* SQDBR B+42 */
281 COSTS_N_INSNS (28), /* SQEBR B+28 */
282 COSTS_N_INSNS (1), /* MADBR B */
283 COSTS_N_INSNS (1), /* MAEBR B */
284 COSTS_N_INSNS (101), /* DXBR B+101 */
285 COSTS_N_INSNS (29), /* DDBR */
286 COSTS_N_INSNS (22), /* DEBR */
287 COSTS_N_INSNS (160), /* DLGR cracked */
288 COSTS_N_INSNS (160), /* DLR cracked */
289 COSTS_N_INSNS (160), /* DR expanded */
290 COSTS_N_INSNS (160), /* DSGFR cracked */
291 COSTS_N_INSNS (160), /* DSGR cracked */
295 struct processor_costs zEC12_cost
=
297 COSTS_N_INSNS (7), /* M */
298 COSTS_N_INSNS (5), /* MGHI */
299 COSTS_N_INSNS (5), /* MH */
300 COSTS_N_INSNS (5), /* MHI */
301 COSTS_N_INSNS (7), /* ML */
302 COSTS_N_INSNS (7), /* MR */
303 COSTS_N_INSNS (6), /* MS */
304 COSTS_N_INSNS (8), /* MSG */
305 COSTS_N_INSNS (6), /* MSGF */
306 COSTS_N_INSNS (6), /* MSGFR */
307 COSTS_N_INSNS (8), /* MSGR */
308 COSTS_N_INSNS (6), /* MSR */
309 COSTS_N_INSNS (1) , /* multiplication in DFmode */
310 COSTS_N_INSNS (40), /* MXBR B+40 */
311 COSTS_N_INSNS (100), /* SQXBR B+100 */
312 COSTS_N_INSNS (42), /* SQDBR B+42 */
313 COSTS_N_INSNS (28), /* SQEBR B+28 */
314 COSTS_N_INSNS (1), /* MADBR B */
315 COSTS_N_INSNS (1), /* MAEBR B */
316 COSTS_N_INSNS (131), /* DXBR B+131 */
317 COSTS_N_INSNS (29), /* DDBR */
318 COSTS_N_INSNS (22), /* DEBR */
319 COSTS_N_INSNS (160), /* DLGR cracked */
320 COSTS_N_INSNS (160), /* DLR cracked */
321 COSTS_N_INSNS (160), /* DR expanded */
322 COSTS_N_INSNS (160), /* DSGFR cracked */
323 COSTS_N_INSNS (160), /* DSGR cracked */
328 /* The preferred name to be used in user visible output. */
329 const char *const name
;
330 /* CPU name as it should be passed to Binutils via .machine */
331 const char *const binutils_name
;
332 const enum processor_type processor
;
333 const struct processor_costs
*cost
;
335 const processor_table
[] =
337 { "g5", "g5", PROCESSOR_9672_G5
, &z900_cost
},
338 { "g6", "g6", PROCESSOR_9672_G6
, &z900_cost
},
339 { "z900", "z900", PROCESSOR_2064_Z900
, &z900_cost
},
340 { "z990", "z990", PROCESSOR_2084_Z990
, &z990_cost
},
341 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109
, &z9_109_cost
},
342 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC
, &z9_109_cost
},
343 { "z10", "z10", PROCESSOR_2097_Z10
, &z10_cost
},
344 { "z196", "z196", PROCESSOR_2817_Z196
, &z196_cost
},
345 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12
, &zEC12_cost
},
346 { "z13", "z13", PROCESSOR_2964_Z13
, &zEC12_cost
},
347 { "z14", "arch12", PROCESSOR_3906_Z14
, &zEC12_cost
},
348 { "native", "", PROCESSOR_NATIVE
, NULL
}
351 extern int reload_completed
;
353 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
354 static rtx_insn
*last_scheduled_insn
;
355 #define MAX_SCHED_UNITS 3
356 static int last_scheduled_unit_distance
[MAX_SCHED_UNITS
];
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
362 #define MAX_SCHED_MIX_SCORE 8
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 100
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
372 base + index + displacement
373 where any of the components is optional.
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
387 /* The following structure is embedded in the machine
388 specific part of struct function. */
390 struct GTY (()) s390_frame_layout
392 /* Offset within stack frame. */
393 HOST_WIDE_INT gprs_offset
;
394 HOST_WIDE_INT f0_offset
;
395 HOST_WIDE_INT f4_offset
;
396 HOST_WIDE_INT f8_offset
;
397 HOST_WIDE_INT backchain_offset
;
399 /* Number of first and last gpr where slots in the register
400 save area are reserved for. */
401 int first_save_gpr_slot
;
402 int last_save_gpr_slot
;
404 /* Location (FP register number) where GPRs (r0-r15) should
406 0 - does not need to be saved at all
408 #define SAVE_SLOT_NONE 0
409 #define SAVE_SLOT_STACK -1
410 signed char gpr_save_slots
[16];
412 /* Number of first and last gpr to be saved, restored. */
414 int first_restore_gpr
;
416 int last_restore_gpr
;
418 /* Bits standing for floating point registers. Set, if the
419 respective register has to be saved. Starting with reg 16 (f0)
420 at the rightmost bit.
421 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
422 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
423 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
424 unsigned int fpr_bitmap
;
426 /* Number of floating point registers f8-f15 which must be saved. */
429 /* Set if return address needs to be saved.
430 This flag is set by s390_return_addr_rtx if it could not use
431 the initial value of r14 and therefore depends on r14 saved
433 bool save_return_addr_p
;
435 /* Size of stack frame. */
436 HOST_WIDE_INT frame_size
;
439 /* Define the structure for the machine field in struct function. */
441 struct GTY(()) machine_function
443 struct s390_frame_layout frame_layout
;
445 /* Literal pool base register. */
448 /* True if we may need to perform branch splitting. */
449 bool split_branches_pending_p
;
451 bool has_landing_pad_p
;
453 /* True if the current function may contain a tbegin clobbering
457 /* For -fsplit-stack support: A stack local which holds a pointer to
458 the stack arguments for a function with a variable number of
459 arguments. This is set at the start of the function and is used
460 to initialize the overflow_arg_area field of the va_list
462 rtx split_stack_varargs_pointer
;
465 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
467 #define cfun_frame_layout (cfun->machine->frame_layout)
468 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
469 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
470 ? cfun_frame_layout.fpr_bitmap & 0x0f \
471 : cfun_frame_layout.fpr_bitmap & 0x03))
472 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
473 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
474 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
475 (1 << (REGNO - FPR0_REGNUM)))
476 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
477 (1 << (REGNO - FPR0_REGNUM))))
478 #define cfun_gpr_save_slot(REGNO) \
479 cfun->machine->frame_layout.gpr_save_slots[REGNO]
481 /* Number of GPRs and FPRs used for argument passing. */
482 #define GP_ARG_NUM_REG 5
483 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
484 #define VEC_ARG_NUM_REG 8
486 /* A couple of shortcuts. */
487 #define CONST_OK_FOR_J(x) \
488 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
489 #define CONST_OK_FOR_K(x) \
490 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
491 #define CONST_OK_FOR_Os(x) \
492 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
493 #define CONST_OK_FOR_Op(x) \
494 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
495 #define CONST_OK_FOR_On(x) \
496 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
498 #define REGNO_PAIR_OK(REGNO, MODE) \
499 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
501 /* That's the read ahead of the dynamic branch prediction unit in
502 bytes on a z10 (or higher) CPU. */
503 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
506 /* Indicate which ABI has been used for passing vector args.
507 0 - no vector type arguments have been passed where the ABI is relevant
508 1 - the old ABI has been used
509 2 - a vector type argument has been passed either in a vector register
510 or on the stack by value */
511 static int s390_vector_abi
= 0;
513 /* Set the vector ABI marker if TYPE is subject to the vector ABI
514 switch. The vector ABI affects only vector data types. There are
515 two aspects of the vector ABI relevant here:
517 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
518 ABI and natural alignment with the old.
520 2. vector <= 16 bytes are passed in VRs or by value on the stack
521 with the new ABI but by reference on the stack with the old.
523 If ARG_P is true TYPE is used for a function argument or return
524 value. The ABI marker then is set for all vector data types. If
525 ARG_P is false only type 1 vectors are being checked. */
528 s390_check_type_for_vector_abi (const_tree type
, bool arg_p
, bool in_struct_p
)
530 static hash_set
<const_tree
> visited_types_hash
;
535 if (type
== NULL_TREE
|| TREE_CODE (type
) == ERROR_MARK
)
538 if (visited_types_hash
.contains (type
))
541 visited_types_hash
.add (type
);
543 if (VECTOR_TYPE_P (type
))
545 int type_size
= int_size_in_bytes (type
);
547 /* Outside arguments only the alignment is changing and this
548 only happens for vector types >= 16 bytes. */
549 if (!arg_p
&& type_size
< 16)
552 /* In arguments vector types > 16 are passed as before (GCC
553 never enforced the bigger alignment for arguments which was
554 required by the old vector ABI). However, it might still be
555 ABI relevant due to the changed alignment if it is a struct
557 if (arg_p
&& type_size
> 16 && !in_struct_p
)
560 s390_vector_abi
= TARGET_VX_ABI
? 2 : 1;
562 else if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == ARRAY_TYPE
)
564 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
565 natural alignment there will never be ABI dependent padding
566 in an array type. That's why we do not set in_struct_p to
568 s390_check_type_for_vector_abi (TREE_TYPE (type
), arg_p
, in_struct_p
);
570 else if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
574 /* Check the return type. */
575 s390_check_type_for_vector_abi (TREE_TYPE (type
), true, false);
577 for (arg_chain
= TYPE_ARG_TYPES (type
);
579 arg_chain
= TREE_CHAIN (arg_chain
))
580 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain
), true, false);
582 else if (RECORD_OR_UNION_TYPE_P (type
))
586 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
588 if (TREE_CODE (field
) != FIELD_DECL
)
591 s390_check_type_for_vector_abi (TREE_TYPE (field
), arg_p
, true);
597 /* System z builtins. */
599 #include "s390-builtins.h"
601 const unsigned int bflags_builtin
[S390_BUILTIN_MAX
+ 1] =
606 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
608 #define OB_DEF_VAR(...)
609 #include "s390-builtins.def"
613 const unsigned int opflags_builtin
[S390_BUILTIN_MAX
+ 1] =
618 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
620 #define OB_DEF_VAR(...)
621 #include "s390-builtins.def"
625 const unsigned int bflags_overloaded_builtin
[S390_OVERLOADED_BUILTIN_MAX
+ 1] =
631 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
632 #define OB_DEF_VAR(...)
633 #include "s390-builtins.def"
638 bflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
645 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
646 #include "s390-builtins.def"
651 opflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
658 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
659 #include "s390-builtins.def"
663 tree s390_builtin_types
[BT_MAX
];
664 tree s390_builtin_fn_types
[BT_FN_MAX
];
665 tree s390_builtin_decls
[S390_BUILTIN_MAX
+
666 S390_OVERLOADED_BUILTIN_MAX
+
667 S390_OVERLOADED_BUILTIN_VAR_MAX
];
669 static enum insn_code
const code_for_builtin
[S390_BUILTIN_MAX
+ 1] = {
673 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
675 #define OB_DEF_VAR(...)
677 #include "s390-builtins.def"
682 s390_init_builtins (void)
684 /* These definitions are being used in s390-builtins.def. */
685 tree returns_twice_attr
= tree_cons (get_identifier ("returns_twice"),
687 tree noreturn_attr
= tree_cons (get_identifier ("noreturn"), NULL
, NULL
);
688 tree c_uint64_type_node
;
690 /* The uint64_type_node from tree.c is not compatible to the C99
691 uint64_t data type. What we want is c_uint64_type_node from
692 c-common.c. But since backend code is not supposed to interface
693 with the frontend we recreate it here. */
695 c_uint64_type_node
= long_unsigned_type_node
;
697 c_uint64_type_node
= long_long_unsigned_type_node
;
700 #define DEF_TYPE(INDEX, NODE, CONST_P) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = (!CONST_P) ? \
703 (NODE) : build_type_variant ((NODE), 1, 0);
705 #undef DEF_POINTER_TYPE
706 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
707 if (s390_builtin_types[INDEX] == NULL) \
708 s390_builtin_types[INDEX] = \
709 build_pointer_type (s390_builtin_types[INDEX_BASE]);
711 #undef DEF_DISTINCT_TYPE
712 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
713 if (s390_builtin_types[INDEX] == NULL) \
714 s390_builtin_types[INDEX] = \
715 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
717 #undef DEF_VECTOR_TYPE
718 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
719 if (s390_builtin_types[INDEX] == NULL) \
720 s390_builtin_types[INDEX] = \
721 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
723 #undef DEF_OPAQUE_VECTOR_TYPE
724 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
725 if (s390_builtin_types[INDEX] == NULL) \
726 s390_builtin_types[INDEX] = \
727 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
730 #define DEF_FN_TYPE(INDEX, args...) \
731 if (s390_builtin_fn_types[INDEX] == NULL) \
732 s390_builtin_fn_types[INDEX] = \
733 build_function_type_list (args, NULL_TREE);
735 #define DEF_OV_TYPE(...)
736 #include "s390-builtin-types.def"
739 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
740 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
741 s390_builtin_decls[S390_BUILTIN_##NAME] = \
742 add_builtin_function ("__builtin_" #NAME, \
743 s390_builtin_fn_types[FNTYPE], \
744 S390_BUILTIN_##NAME, \
749 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
750 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
752 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
753 add_builtin_function ("__builtin_" #NAME, \
754 s390_builtin_fn_types[FNTYPE], \
755 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
760 #define OB_DEF_VAR(...)
761 #include "s390-builtins.def"
765 /* Return true if ARG is appropriate as argument number ARGNUM of
766 builtin DECL. The operand flags from s390-builtins.def have to
767 passed as OP_FLAGS. */
769 s390_const_operand_ok (tree arg
, int argnum
, int op_flags
, tree decl
)
771 if (O_UIMM_P (op_flags
))
773 int bitwidths
[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
774 int bitwidth
= bitwidths
[op_flags
- O_U1
];
776 if (!tree_fits_uhwi_p (arg
)
777 || tree_to_uhwi (arg
) > (HOST_WIDE_INT_1U
<< bitwidth
) - 1)
779 error("constant argument %d for builtin %qF is out of range (0.."
780 HOST_WIDE_INT_PRINT_UNSIGNED
")",
782 (HOST_WIDE_INT_1U
<< bitwidth
) - 1);
787 if (O_SIMM_P (op_flags
))
789 int bitwidths
[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
790 int bitwidth
= bitwidths
[op_flags
- O_S2
];
792 if (!tree_fits_shwi_p (arg
)
793 || tree_to_shwi (arg
) < -(HOST_WIDE_INT_1
<< (bitwidth
- 1))
794 || tree_to_shwi (arg
) > ((HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1))
796 error("constant argument %d for builtin %qF is out of range ("
797 HOST_WIDE_INT_PRINT_DEC
".."
798 HOST_WIDE_INT_PRINT_DEC
")",
800 -(HOST_WIDE_INT_1
<< (bitwidth
- 1)),
801 (HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1);
808 /* Expand an expression EXP that calls a built-in function,
809 with result going to TARGET if that's convenient
810 (and in mode MODE if that's convenient).
811 SUBTARGET may be used as the target for computing one of EXP's operands.
812 IGNORE is nonzero if the value is to be ignored. */
815 s390_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
816 machine_mode mode ATTRIBUTE_UNUSED
,
817 int ignore ATTRIBUTE_UNUSED
)
821 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
822 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
823 enum insn_code icode
;
824 rtx op
[MAX_ARGS
], pat
;
828 call_expr_arg_iterator iter
;
829 unsigned int all_op_flags
= opflags_for_builtin (fcode
);
830 machine_mode last_vec_mode
= VOIDmode
;
832 if (TARGET_DEBUG_ARG
)
835 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
836 (int)fcode
, IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
837 bflags_for_builtin (fcode
));
840 if (S390_USE_TARGET_ATTRIBUTE
)
844 bflags
= bflags_for_builtin (fcode
);
845 if ((bflags
& B_HTM
) && !TARGET_HTM
)
847 error ("builtin %qF is not supported without -mhtm "
848 "(default with -march=zEC12 and higher).", fndecl
);
851 if (((bflags
& B_VX
) || (bflags
& B_VXE
)) && !TARGET_VX
)
853 error ("builtin %qF requires -mvx "
854 "(default with -march=z13 and higher).", fndecl
);
858 if ((bflags
& B_VXE
) && !TARGET_VXE
)
860 error ("Builtin %qF requires z14 or higher.", fndecl
);
864 if (fcode
>= S390_OVERLOADED_BUILTIN_VAR_OFFSET
865 && fcode
< S390_ALL_BUILTIN_MAX
)
869 else if (fcode
< S390_OVERLOADED_BUILTIN_OFFSET
)
871 icode
= code_for_builtin
[fcode
];
872 /* Set a flag in the machine specific cfun part in order to support
873 saving/restoring of FPRs. */
874 if (fcode
== S390_BUILTIN_tbegin
|| fcode
== S390_BUILTIN_tbegin_retry
)
875 cfun
->machine
->tbegin_p
= true;
877 else if (fcode
< S390_OVERLOADED_BUILTIN_VAR_OFFSET
)
879 error ("unresolved overloaded builtin");
883 internal_error ("bad builtin fcode");
886 internal_error ("bad builtin icode");
888 nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
892 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
894 || GET_MODE (target
) != tmode
895 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
896 target
= gen_reg_rtx (tmode
);
898 /* There are builtins (e.g. vec_promote) with no vector
899 arguments but an element selector. So we have to also look
900 at the vector return type when emitting the modulo
902 if (VECTOR_MODE_P (insn_data
[icode
].operand
[0].mode
))
903 last_vec_mode
= insn_data
[icode
].operand
[0].mode
;
907 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
910 const struct insn_operand_data
*insn_op
;
911 unsigned int op_flags
= all_op_flags
& ((1 << O_SHIFT
) - 1);
913 all_op_flags
= all_op_flags
>> O_SHIFT
;
915 if (arg
== error_mark_node
)
917 if (arity
>= MAX_ARGS
)
920 if (O_IMM_P (op_flags
)
921 && TREE_CODE (arg
) != INTEGER_CST
)
923 error ("constant value required for builtin %qF argument %d",
928 if (!s390_const_operand_ok (arg
, arity
+ 1, op_flags
, fndecl
))
931 insn_op
= &insn_data
[icode
].operand
[arity
+ nonvoid
];
932 op
[arity
] = expand_expr (arg
, NULL_RTX
, insn_op
->mode
, EXPAND_NORMAL
);
934 /* expand_expr truncates constants to the target mode only if it
935 is "convenient". However, our checks below rely on this
937 if (CONST_INT_P (op
[arity
])
938 && SCALAR_INT_MODE_P (insn_op
->mode
)
939 && GET_MODE (op
[arity
]) != insn_op
->mode
)
940 op
[arity
] = GEN_INT (trunc_int_for_mode (INTVAL (op
[arity
]),
943 /* Wrap the expanded RTX for pointer types into a MEM expr with
944 the proper mode. This allows us to use e.g. (match_operand
945 "memory_operand"..) in the insn patterns instead of (mem
946 (match_operand "address_operand)). This is helpful for
947 patterns not just accepting MEMs. */
948 if (POINTER_TYPE_P (TREE_TYPE (arg
))
949 && insn_op
->predicate
!= address_operand
)
950 op
[arity
] = gen_rtx_MEM (insn_op
->mode
, op
[arity
]);
952 /* Expand the module operation required on element selectors. */
953 if (op_flags
== O_ELEM
)
955 gcc_assert (last_vec_mode
!= VOIDmode
);
956 op
[arity
] = simplify_expand_binop (SImode
, code_to_optab (AND
),
958 GEN_INT (GET_MODE_NUNITS (last_vec_mode
) - 1),
959 NULL_RTX
, 1, OPTAB_DIRECT
);
962 /* Record the vector mode used for an element selector. This assumes:
963 1. There is no builtin with two different vector modes and an element selector
964 2. The element selector comes after the vector type it is referring to.
965 This currently the true for all the builtins but FIXME we
966 should better check for that. */
967 if (VECTOR_MODE_P (insn_op
->mode
))
968 last_vec_mode
= insn_op
->mode
;
970 if (insn_op
->predicate (op
[arity
], insn_op
->mode
))
976 if (MEM_P (op
[arity
])
977 && insn_op
->predicate
== memory_operand
978 && (GET_MODE (XEXP (op
[arity
], 0)) == Pmode
979 || GET_MODE (XEXP (op
[arity
], 0)) == VOIDmode
))
981 op
[arity
] = replace_equiv_address (op
[arity
],
982 copy_to_mode_reg (Pmode
,
983 XEXP (op
[arity
], 0)));
985 /* Some of the builtins require different modes/types than the
986 pattern in order to implement a specific API. Instead of
987 adding many expanders which do the mode change we do it here.
988 E.g. s390_vec_add_u128 required to have vector unsigned char
989 arguments is mapped to addti3. */
990 else if (insn_op
->mode
!= VOIDmode
991 && GET_MODE (op
[arity
]) != VOIDmode
992 && GET_MODE (op
[arity
]) != insn_op
->mode
993 && ((tmp_rtx
= simplify_gen_subreg (insn_op
->mode
, op
[arity
],
994 GET_MODE (op
[arity
]), 0))
999 else if (GET_MODE (op
[arity
]) == insn_op
->mode
1000 || GET_MODE (op
[arity
]) == VOIDmode
1001 || (insn_op
->predicate
== address_operand
1002 && GET_MODE (op
[arity
]) == Pmode
))
1004 /* An address_operand usually has VOIDmode in the expander
1005 so we cannot use this. */
1006 machine_mode target_mode
=
1007 (insn_op
->predicate
== address_operand
1008 ? (machine_mode
) Pmode
: insn_op
->mode
);
1009 op
[arity
] = copy_to_mode_reg (target_mode
, op
[arity
]);
1012 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
))
1014 error ("invalid argument %d for builtin %qF", arity
+ 1, fndecl
);
1023 pat
= GEN_FCN (icode
) (target
);
1027 pat
= GEN_FCN (icode
) (target
, op
[0]);
1029 pat
= GEN_FCN (icode
) (op
[0]);
1033 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
1035 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
1039 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
1041 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
1045 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
1047 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
1051 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
1053 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
1057 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1059 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1075 static const int s390_hotpatch_hw_max
= 1000000;
1076 static int s390_hotpatch_hw_before_label
= 0;
1077 static int s390_hotpatch_hw_after_label
= 0;
1079 /* Check whether the hotpatch attribute is applied to a function and, if it has
1080 an argument, the argument is valid. */
1083 s390_handle_hotpatch_attribute (tree
*node
, tree name
, tree args
,
1084 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1090 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1092 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1094 *no_add_attrs
= true;
1096 if (args
!= NULL
&& TREE_CHAIN (args
) != NULL
)
1098 expr
= TREE_VALUE (args
);
1099 expr2
= TREE_VALUE (TREE_CHAIN (args
));
1101 if (args
== NULL
|| TREE_CHAIN (args
) == NULL
)
1103 else if (TREE_CODE (expr
) != INTEGER_CST
1104 || !INTEGRAL_TYPE_P (TREE_TYPE (expr
))
1105 || wi::gtu_p (expr
, s390_hotpatch_hw_max
))
1107 else if (TREE_CODE (expr2
) != INTEGER_CST
1108 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2
))
1109 || wi::gtu_p (expr2
, s390_hotpatch_hw_max
))
1115 error ("requested %qE attribute is not a comma separated pair of"
1116 " non-negative integer constants or too large (max. %d)", name
,
1117 s390_hotpatch_hw_max
);
1118 *no_add_attrs
= true;
1124 /* Expand the s390_vector_bool type attribute. */
1127 s390_handle_vectorbool_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1128 tree args ATTRIBUTE_UNUSED
,
1129 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1131 tree type
= *node
, result
= NULL_TREE
;
1134 while (POINTER_TYPE_P (type
)
1135 || TREE_CODE (type
) == FUNCTION_TYPE
1136 || TREE_CODE (type
) == METHOD_TYPE
1137 || TREE_CODE (type
) == ARRAY_TYPE
)
1138 type
= TREE_TYPE (type
);
1140 mode
= TYPE_MODE (type
);
1143 case E_DImode
: case E_V2DImode
:
1144 result
= s390_builtin_types
[BT_BV2DI
];
1146 case E_SImode
: case E_V4SImode
:
1147 result
= s390_builtin_types
[BT_BV4SI
];
1149 case E_HImode
: case E_V8HImode
:
1150 result
= s390_builtin_types
[BT_BV8HI
];
1152 case E_QImode
: case E_V16QImode
:
1153 result
= s390_builtin_types
[BT_BV16QI
];
1159 *no_add_attrs
= true; /* No need to hang on to the attribute. */
1162 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
1167 static const struct attribute_spec s390_attribute_table
[] = {
1168 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute
, false },
1169 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute
, true },
1171 { NULL
, 0, 0, false, false, false, NULL
, false }
1174 /* Return the alignment for LABEL. We default to the -falign-labels
1175 value except for the literal pool base label. */
1177 s390_label_align (rtx_insn
*label
)
1179 rtx_insn
*prev_insn
= prev_active_insn (label
);
1182 if (prev_insn
== NULL_RTX
)
1185 set
= single_set (prev_insn
);
1187 if (set
== NULL_RTX
)
1190 src
= SET_SRC (set
);
1192 /* Don't align literal pool base labels. */
1193 if (GET_CODE (src
) == UNSPEC
1194 && XINT (src
, 1) == UNSPEC_MAIN_BASE
)
1198 return align_labels_log
;
1201 static GTY(()) rtx got_symbol
;
1203 /* Return the GOT table symbol. The symbol will be created when the
1204 function is invoked for the first time. */
1207 s390_got_symbol (void)
1211 got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1212 SYMBOL_REF_FLAGS (got_symbol
) = SYMBOL_FLAG_LOCAL
;
1218 static scalar_int_mode
1219 s390_libgcc_cmp_return_mode (void)
1221 return TARGET_64BIT
? DImode
: SImode
;
1224 static scalar_int_mode
1225 s390_libgcc_shift_count_mode (void)
1227 return TARGET_64BIT
? DImode
: SImode
;
1230 static scalar_int_mode
1231 s390_unwind_word_mode (void)
1233 return TARGET_64BIT
? DImode
: SImode
;
1236 /* Return true if the back end supports mode MODE. */
1238 s390_scalar_mode_supported_p (scalar_mode mode
)
1240 /* In contrast to the default implementation reject TImode constants on 31bit
1241 TARGET_ZARCH for ABI compliance. */
1242 if (!TARGET_64BIT
&& TARGET_ZARCH
&& mode
== TImode
)
1245 if (DECIMAL_FLOAT_MODE_P (mode
))
1246 return default_decimal_float_supported_p ();
1248 return default_scalar_mode_supported_p (mode
);
1251 /* Return true if the back end supports vector mode MODE. */
1253 s390_vector_mode_supported_p (machine_mode mode
)
1257 if (!VECTOR_MODE_P (mode
)
1259 || GET_MODE_SIZE (mode
) > 16)
1262 inner
= GET_MODE_INNER (mode
);
1280 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1283 s390_set_has_landing_pad_p (bool value
)
1285 cfun
->machine
->has_landing_pad_p
= value
;
1288 /* If two condition code modes are compatible, return a condition code
1289 mode which is compatible with both. Otherwise, return
1293 s390_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
1301 if (m2
== CCUmode
|| m2
== CCTmode
|| m2
== CCZ1mode
1302 || m2
== CCSmode
|| m2
== CCSRmode
|| m2
== CCURmode
)
1323 /* Return true if SET either doesn't set the CC register, or else
1324 the source and destination have matching CC modes and that
1325 CC mode is at least as constrained as REQ_MODE. */
1328 s390_match_ccmode_set (rtx set
, machine_mode req_mode
)
1330 machine_mode set_mode
;
1332 gcc_assert (GET_CODE (set
) == SET
);
1334 /* These modes are supposed to be used only in CC consumer
1336 gcc_assert (req_mode
!= CCVIALLmode
&& req_mode
!= CCVIANYmode
1337 && req_mode
!= CCVFALLmode
&& req_mode
!= CCVFANYmode
);
1339 if (GET_CODE (SET_DEST (set
)) != REG
|| !CC_REGNO_P (REGNO (SET_DEST (set
))))
1342 set_mode
= GET_MODE (SET_DEST (set
));
1362 if (req_mode
!= set_mode
)
1367 if (req_mode
!= CCSmode
&& req_mode
!= CCUmode
&& req_mode
!= CCTmode
1368 && req_mode
!= CCSRmode
&& req_mode
!= CCURmode
1369 && req_mode
!= CCZ1mode
)
1375 if (req_mode
!= CCAmode
)
1383 return (GET_MODE (SET_SRC (set
)) == set_mode
);
1386 /* Return true if every SET in INSN that sets the CC register
1387 has source and destination with matching CC modes and that
1388 CC mode is at least as constrained as REQ_MODE.
1389 If REQ_MODE is VOIDmode, always return false. */
1392 s390_match_ccmode (rtx_insn
*insn
, machine_mode req_mode
)
1396 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1397 if (req_mode
== VOIDmode
)
1400 if (GET_CODE (PATTERN (insn
)) == SET
)
1401 return s390_match_ccmode_set (PATTERN (insn
), req_mode
);
1403 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1404 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
1406 rtx set
= XVECEXP (PATTERN (insn
), 0, i
);
1407 if (GET_CODE (set
) == SET
)
1408 if (!s390_match_ccmode_set (set
, req_mode
))
1415 /* If a test-under-mask instruction can be used to implement
1416 (compare (and ... OP1) OP2), return the CC mode required
1417 to do that. Otherwise, return VOIDmode.
1418 MIXED is true if the instruction can distinguish between
1419 CC1 and CC2 for mixed selected bits (TMxx), it is false
1420 if the instruction cannot (TM). */
1423 s390_tm_ccmode (rtx op1
, rtx op2
, bool mixed
)
1427 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1428 if (GET_CODE (op1
) != CONST_INT
|| GET_CODE (op2
) != CONST_INT
)
1431 /* Selected bits all zero: CC0.
1432 e.g.: int a; if ((a & (16 + 128)) == 0) */
1433 if (INTVAL (op2
) == 0)
1436 /* Selected bits all one: CC3.
1437 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1438 if (INTVAL (op2
) == INTVAL (op1
))
1441 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1443 if ((a & (16 + 128)) == 16) -> CCT1
1444 if ((a & (16 + 128)) == 128) -> CCT2 */
1447 bit1
= exact_log2 (INTVAL (op2
));
1448 bit0
= exact_log2 (INTVAL (op1
) ^ INTVAL (op2
));
1449 if (bit0
!= -1 && bit1
!= -1)
1450 return bit0
> bit1
? CCT1mode
: CCT2mode
;
1456 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1457 OP0 and OP1 of a COMPARE, return the mode to be used for the
1461 s390_select_ccmode (enum rtx_code code
, rtx op0
, rtx op1
)
1467 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1468 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1470 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1471 && CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1))))
1473 if ((GET_CODE (op0
) == PLUS
|| GET_CODE (op0
) == MINUS
1474 || GET_CODE (op1
) == NEG
)
1475 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1478 if (GET_CODE (op0
) == AND
)
1480 /* Check whether we can potentially do it via TM. */
1481 machine_mode ccmode
;
1482 ccmode
= s390_tm_ccmode (XEXP (op0
, 1), op1
, 1);
1483 if (ccmode
!= VOIDmode
)
1485 /* Relax CCTmode to CCZmode to allow fall-back to AND
1486 if that turns out to be beneficial. */
1487 return ccmode
== CCTmode
? CCZmode
: ccmode
;
1491 if (register_operand (op0
, HImode
)
1492 && GET_CODE (op1
) == CONST_INT
1493 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 65535))
1495 if (register_operand (op0
, QImode
)
1496 && GET_CODE (op1
) == CONST_INT
1497 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 255))
1506 /* The only overflow condition of NEG and ABS happens when
1507 -INT_MAX is used as parameter, which stays negative. So
1508 we have an overflow from a positive value to a negative.
1509 Using CCAP mode the resulting cc can be used for comparisons. */
1510 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1511 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1514 /* If constants are involved in an add instruction it is possible to use
1515 the resulting cc for comparisons with zero. Knowing the sign of the
1516 constant the overflow behavior gets predictable. e.g.:
1517 int a, b; if ((b = a + c) > 0)
1518 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1519 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1520 && (CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1)))
1521 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0
, 1)), 'O', "Os")
1522 /* Avoid INT32_MIN on 32 bit. */
1523 && (!TARGET_ZARCH
|| INTVAL (XEXP (op0
, 1)) != -0x7fffffff - 1))))
1525 if (INTVAL (XEXP((op0
), 1)) < 0)
1539 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1540 && GET_CODE (op1
) != CONST_INT
)
1546 if (GET_CODE (op0
) == PLUS
1547 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1550 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1551 && GET_CODE (op1
) != CONST_INT
)
1557 if (GET_CODE (op0
) == MINUS
1558 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1561 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1562 && GET_CODE (op1
) != CONST_INT
)
1571 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1572 that we can implement more efficiently. */
1575 s390_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
1576 bool op0_preserve_value
)
1578 if (op0_preserve_value
)
1581 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1582 if ((*code
== EQ
|| *code
== NE
)
1583 && *op1
== const0_rtx
1584 && GET_CODE (*op0
) == ZERO_EXTRACT
1585 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1586 && GET_CODE (XEXP (*op0
, 2)) == CONST_INT
1587 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1589 rtx inner
= XEXP (*op0
, 0);
1590 HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (GET_MODE (inner
));
1591 HOST_WIDE_INT len
= INTVAL (XEXP (*op0
, 1));
1592 HOST_WIDE_INT pos
= INTVAL (XEXP (*op0
, 2));
1594 if (len
> 0 && len
< modesize
1595 && pos
>= 0 && pos
+ len
<= modesize
1596 && modesize
<= HOST_BITS_PER_WIDE_INT
)
1598 unsigned HOST_WIDE_INT block
;
1599 block
= (HOST_WIDE_INT_1U
<< len
) - 1;
1600 block
<<= modesize
- pos
- len
;
1602 *op0
= gen_rtx_AND (GET_MODE (inner
), inner
,
1603 gen_int_mode (block
, GET_MODE (inner
)));
1607 /* Narrow AND of memory against immediate to enable TM. */
1608 if ((*code
== EQ
|| *code
== NE
)
1609 && *op1
== const0_rtx
1610 && GET_CODE (*op0
) == AND
1611 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1612 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1614 rtx inner
= XEXP (*op0
, 0);
1615 rtx mask
= XEXP (*op0
, 1);
1617 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1618 if (GET_CODE (inner
) == SUBREG
1619 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner
)))
1620 && (GET_MODE_SIZE (GET_MODE (inner
))
1621 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
1623 & GET_MODE_MASK (GET_MODE (inner
))
1624 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner
))))
1626 inner
= SUBREG_REG (inner
);
1628 /* Do not change volatile MEMs. */
1629 if (MEM_P (inner
) && !MEM_VOLATILE_P (inner
))
1631 int part
= s390_single_part (XEXP (*op0
, 1),
1632 GET_MODE (inner
), QImode
, 0);
1635 mask
= gen_int_mode (s390_extract_part (mask
, QImode
, 0), QImode
);
1636 inner
= adjust_address_nv (inner
, QImode
, part
);
1637 *op0
= gen_rtx_AND (QImode
, inner
, mask
);
1642 /* Narrow comparisons against 0xffff to HImode if possible. */
1643 if ((*code
== EQ
|| *code
== NE
)
1644 && GET_CODE (*op1
) == CONST_INT
1645 && INTVAL (*op1
) == 0xffff
1646 && SCALAR_INT_MODE_P (GET_MODE (*op0
))
1647 && (nonzero_bits (*op0
, GET_MODE (*op0
))
1648 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1650 *op0
= gen_lowpart (HImode
, *op0
);
1654 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1655 if (GET_CODE (*op0
) == UNSPEC
1656 && XINT (*op0
, 1) == UNSPEC_STRCMPCC_TO_INT
1657 && XVECLEN (*op0
, 0) == 1
1658 && GET_MODE (XVECEXP (*op0
, 0, 0)) == CCUmode
1659 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1660 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1661 && *op1
== const0_rtx
)
1663 enum rtx_code new_code
= UNKNOWN
;
1666 case EQ
: new_code
= EQ
; break;
1667 case NE
: new_code
= NE
; break;
1668 case LT
: new_code
= GTU
; break;
1669 case GT
: new_code
= LTU
; break;
1670 case LE
: new_code
= GEU
; break;
1671 case GE
: new_code
= LEU
; break;
1675 if (new_code
!= UNKNOWN
)
1677 *op0
= XVECEXP (*op0
, 0, 0);
1682 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1683 if (GET_CODE (*op0
) == UNSPEC
1684 && XINT (*op0
, 1) == UNSPEC_CC_TO_INT
1685 && XVECLEN (*op0
, 0) == 1
1686 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1687 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1688 && CONST_INT_P (*op1
))
1690 enum rtx_code new_code
= UNKNOWN
;
1691 switch (GET_MODE (XVECEXP (*op0
, 0, 0)))
1697 case EQ
: new_code
= EQ
; break;
1698 case NE
: new_code
= NE
; break;
1705 if (new_code
!= UNKNOWN
)
1707 /* For CCRAWmode put the required cc mask into the second
1709 if (GET_MODE (XVECEXP (*op0
, 0, 0)) == CCRAWmode
1710 && INTVAL (*op1
) >= 0 && INTVAL (*op1
) <= 3)
1711 *op1
= gen_rtx_CONST_INT (VOIDmode
, 1 << (3 - INTVAL (*op1
)));
1712 *op0
= XVECEXP (*op0
, 0, 0);
1717 /* Simplify cascaded EQ, NE with const0_rtx. */
1718 if ((*code
== NE
|| *code
== EQ
)
1719 && (GET_CODE (*op0
) == EQ
|| GET_CODE (*op0
) == NE
)
1720 && GET_MODE (*op0
) == SImode
1721 && GET_MODE (XEXP (*op0
, 0)) == CCZ1mode
1722 && REG_P (XEXP (*op0
, 0))
1723 && XEXP (*op0
, 1) == const0_rtx
1724 && *op1
== const0_rtx
)
1726 if ((*code
== EQ
&& GET_CODE (*op0
) == NE
)
1727 || (*code
== NE
&& GET_CODE (*op0
) == EQ
))
1731 *op0
= XEXP (*op0
, 0);
1734 /* Prefer register over memory as first operand. */
1735 if (MEM_P (*op0
) && REG_P (*op1
))
1737 rtx tem
= *op0
; *op0
= *op1
; *op1
= tem
;
1738 *code
= (int)swap_condition ((enum rtx_code
)*code
);
1741 /* A comparison result is compared against zero. Replace it with
1742 the (perhaps inverted) original comparison.
1743 This probably should be done by simplify_relational_operation. */
1744 if ((*code
== EQ
|| *code
== NE
)
1745 && *op1
== const0_rtx
1746 && COMPARISON_P (*op0
)
1747 && CC_REG_P (XEXP (*op0
, 0)))
1749 enum rtx_code new_code
;
1752 new_code
= reversed_comparison_code_parts (GET_CODE (*op0
),
1754 XEXP (*op1
, 0), NULL
);
1756 new_code
= GET_CODE (*op0
);
1758 if (new_code
!= UNKNOWN
)
1761 *op1
= XEXP (*op0
, 1);
1762 *op0
= XEXP (*op0
, 0);
1768 /* Emit a compare instruction suitable to implement the comparison
1769 OP0 CODE OP1. Return the correct condition RTL to be placed in
1770 the IF_THEN_ELSE of the conditional branch testing the result. */
1773 s390_emit_compare (enum rtx_code code
, rtx op0
, rtx op1
)
1775 machine_mode mode
= s390_select_ccmode (code
, op0
, op1
);
1778 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
1780 /* Do not output a redundant compare instruction if a
1781 compare_and_swap pattern already computed the result and the
1782 machine modes are compatible. */
1783 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0
), mode
)
1789 cc
= gen_rtx_REG (mode
, CC_REGNUM
);
1790 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (mode
, op0
, op1
)));
1793 return gen_rtx_fmt_ee (code
, VOIDmode
, cc
, const0_rtx
);
1796 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1798 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1799 conditional branch testing the result. */
1802 s390_emit_compare_and_swap (enum rtx_code code
, rtx old
, rtx mem
,
1803 rtx cmp
, rtx new_rtx
, machine_mode ccmode
)
1807 cc
= gen_rtx_REG (ccmode
, CC_REGNUM
);
1808 switch (GET_MODE (mem
))
1811 emit_insn (gen_atomic_compare_and_swapsi_internal (old
, mem
, cmp
,
1815 emit_insn (gen_atomic_compare_and_swapdi_internal (old
, mem
, cmp
,
1819 emit_insn (gen_atomic_compare_and_swapti_internal (old
, mem
, cmp
,
1827 return s390_emit_compare (code
, cc
, const0_rtx
);
1830 /* Emit a jump instruction to TARGET and return it. If COND is
1831 NULL_RTX, emit an unconditional jump, else a conditional jump under
1835 s390_emit_jump (rtx target
, rtx cond
)
1839 target
= gen_rtx_LABEL_REF (VOIDmode
, target
);
1841 target
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, target
, pc_rtx
);
1843 insn
= gen_rtx_SET (pc_rtx
, target
);
1844 return emit_jump_insn (insn
);
1847 /* Return branch condition mask to implement a branch
1848 specified by CODE. Return -1 for invalid comparisons. */
1851 s390_branch_condition_mask (rtx code
)
1853 const int CC0
= 1 << 3;
1854 const int CC1
= 1 << 2;
1855 const int CC2
= 1 << 1;
1856 const int CC3
= 1 << 0;
1858 gcc_assert (GET_CODE (XEXP (code
, 0)) == REG
);
1859 gcc_assert (REGNO (XEXP (code
, 0)) == CC_REGNUM
);
1860 gcc_assert (XEXP (code
, 1) == const0_rtx
1861 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
1862 && CONST_INT_P (XEXP (code
, 1))));
1865 switch (GET_MODE (XEXP (code
, 0)))
1869 switch (GET_CODE (code
))
1871 case EQ
: return CC0
;
1872 case NE
: return CC1
| CC2
| CC3
;
1878 switch (GET_CODE (code
))
1880 case EQ
: return CC1
;
1881 case NE
: return CC0
| CC2
| CC3
;
1887 switch (GET_CODE (code
))
1889 case EQ
: return CC2
;
1890 case NE
: return CC0
| CC1
| CC3
;
1896 switch (GET_CODE (code
))
1898 case EQ
: return CC3
;
1899 case NE
: return CC0
| CC1
| CC2
;
1905 switch (GET_CODE (code
))
1907 case EQ
: return CC0
| CC2
;
1908 case NE
: return CC1
| CC3
;
1914 switch (GET_CODE (code
))
1916 case LTU
: return CC2
| CC3
; /* carry */
1917 case GEU
: return CC0
| CC1
; /* no carry */
1923 switch (GET_CODE (code
))
1925 case GTU
: return CC0
| CC1
; /* borrow */
1926 case LEU
: return CC2
| CC3
; /* no borrow */
1932 switch (GET_CODE (code
))
1934 case EQ
: return CC0
| CC2
;
1935 case NE
: return CC1
| CC3
;
1936 case LTU
: return CC1
;
1937 case GTU
: return CC3
;
1938 case LEU
: return CC1
| CC2
;
1939 case GEU
: return CC2
| CC3
;
1944 switch (GET_CODE (code
))
1946 case EQ
: return CC0
;
1947 case NE
: return CC1
| CC2
| CC3
;
1948 case LTU
: return CC1
;
1949 case GTU
: return CC2
;
1950 case LEU
: return CC0
| CC1
;
1951 case GEU
: return CC0
| CC2
;
1957 switch (GET_CODE (code
))
1959 case EQ
: return CC0
;
1960 case NE
: return CC2
| CC1
| CC3
;
1961 case LTU
: return CC2
;
1962 case GTU
: return CC1
;
1963 case LEU
: return CC0
| CC2
;
1964 case GEU
: return CC0
| CC1
;
1970 switch (GET_CODE (code
))
1972 case EQ
: return CC0
;
1973 case NE
: return CC1
| CC2
| CC3
;
1974 case LT
: return CC1
| CC3
;
1975 case GT
: return CC2
;
1976 case LE
: return CC0
| CC1
| CC3
;
1977 case GE
: return CC0
| CC2
;
1983 switch (GET_CODE (code
))
1985 case EQ
: return CC0
;
1986 case NE
: return CC1
| CC2
| CC3
;
1987 case LT
: return CC1
;
1988 case GT
: return CC2
| CC3
;
1989 case LE
: return CC0
| CC1
;
1990 case GE
: return CC0
| CC2
| CC3
;
1996 switch (GET_CODE (code
))
1998 case EQ
: return CC0
;
1999 case NE
: return CC1
| CC2
| CC3
;
2000 case LT
: return CC1
;
2001 case GT
: return CC2
;
2002 case LE
: return CC0
| CC1
;
2003 case GE
: return CC0
| CC2
;
2004 case UNORDERED
: return CC3
;
2005 case ORDERED
: return CC0
| CC1
| CC2
;
2006 case UNEQ
: return CC0
| CC3
;
2007 case UNLT
: return CC1
| CC3
;
2008 case UNGT
: return CC2
| CC3
;
2009 case UNLE
: return CC0
| CC1
| CC3
;
2010 case UNGE
: return CC0
| CC2
| CC3
;
2011 case LTGT
: return CC1
| CC2
;
2017 switch (GET_CODE (code
))
2019 case EQ
: return CC0
;
2020 case NE
: return CC2
| CC1
| CC3
;
2021 case LT
: return CC2
;
2022 case GT
: return CC1
;
2023 case LE
: return CC0
| CC2
;
2024 case GE
: return CC0
| CC1
;
2025 case UNORDERED
: return CC3
;
2026 case ORDERED
: return CC0
| CC2
| CC1
;
2027 case UNEQ
: return CC0
| CC3
;
2028 case UNLT
: return CC2
| CC3
;
2029 case UNGT
: return CC1
| CC3
;
2030 case UNLE
: return CC0
| CC2
| CC3
;
2031 case UNGE
: return CC0
| CC1
| CC3
;
2032 case LTGT
: return CC2
| CC1
;
2037 /* Vector comparison modes. */
2038 /* CC2 will never be set. It however is part of the negated
2041 switch (GET_CODE (code
))
2046 case GE
: return CC0
;
2047 /* The inverted modes are in fact *any* modes. */
2051 case LT
: return CC3
| CC1
| CC2
;
2056 switch (GET_CODE (code
))
2061 case GE
: return CC0
| CC1
;
2062 /* The inverted modes are in fact *all* modes. */
2066 case LT
: return CC3
| CC2
;
2070 switch (GET_CODE (code
))
2074 case GE
: return CC0
;
2075 /* The inverted modes are in fact *any* modes. */
2078 case UNLT
: return CC3
| CC1
| CC2
;
2083 switch (GET_CODE (code
))
2087 case GE
: return CC0
| CC1
;
2088 /* The inverted modes are in fact *all* modes. */
2091 case UNLT
: return CC3
| CC2
;
2096 switch (GET_CODE (code
))
2099 return INTVAL (XEXP (code
, 1));
2101 return (INTVAL (XEXP (code
, 1))) ^ 0xf;
2112 /* Return branch condition mask to implement a compare and branch
2113 specified by CODE. Return -1 for invalid comparisons. */
2116 s390_compare_and_branch_condition_mask (rtx code
)
2118 const int CC0
= 1 << 3;
2119 const int CC1
= 1 << 2;
2120 const int CC2
= 1 << 1;
2122 switch (GET_CODE (code
))
2146 /* If INV is false, return assembler mnemonic string to implement
2147 a branch specified by CODE. If INV is true, return mnemonic
2148 for the corresponding inverted branch. */
2151 s390_branch_condition_mnemonic (rtx code
, int inv
)
2155 static const char *const mnemonic
[16] =
2157 NULL
, "o", "h", "nle",
2158 "l", "nhe", "lh", "ne",
2159 "e", "nlh", "he", "nl",
2160 "le", "nh", "no", NULL
2163 if (GET_CODE (XEXP (code
, 0)) == REG
2164 && REGNO (XEXP (code
, 0)) == CC_REGNUM
2165 && (XEXP (code
, 1) == const0_rtx
2166 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2167 && CONST_INT_P (XEXP (code
, 1)))))
2168 mask
= s390_branch_condition_mask (code
);
2170 mask
= s390_compare_and_branch_condition_mask (code
);
2172 gcc_assert (mask
>= 0);
2177 gcc_assert (mask
>= 1 && mask
<= 14);
2179 return mnemonic
[mask
];
2182 /* Return the part of op which has a value different from def.
2183 The size of the part is determined by mode.
2184 Use this function only if you already know that op really
2185 contains such a part. */
2187 unsigned HOST_WIDE_INT
2188 s390_extract_part (rtx op
, machine_mode mode
, int def
)
2190 unsigned HOST_WIDE_INT value
= 0;
2191 int max_parts
= HOST_BITS_PER_WIDE_INT
/ GET_MODE_BITSIZE (mode
);
2192 int part_bits
= GET_MODE_BITSIZE (mode
);
2193 unsigned HOST_WIDE_INT part_mask
= (HOST_WIDE_INT_1U
<< part_bits
) - 1;
2196 for (i
= 0; i
< max_parts
; i
++)
2199 value
= UINTVAL (op
);
2201 value
>>= part_bits
;
2203 if ((value
& part_mask
) != (def
& part_mask
))
2204 return value
& part_mask
;
2210 /* If OP is an integer constant of mode MODE with exactly one
2211 part of mode PART_MODE unequal to DEF, return the number of that
2212 part. Otherwise, return -1. */
2215 s390_single_part (rtx op
,
2217 machine_mode part_mode
,
2220 unsigned HOST_WIDE_INT value
= 0;
2221 int n_parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (part_mode
);
2222 unsigned HOST_WIDE_INT part_mask
2223 = (HOST_WIDE_INT_1U
<< GET_MODE_BITSIZE (part_mode
)) - 1;
2226 if (GET_CODE (op
) != CONST_INT
)
2229 for (i
= 0; i
< n_parts
; i
++)
2232 value
= UINTVAL (op
);
2234 value
>>= GET_MODE_BITSIZE (part_mode
);
2236 if ((value
& part_mask
) != (def
& part_mask
))
2244 return part
== -1 ? -1 : n_parts
- 1 - part
;
2247 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2248 bits and no other bits are set in (the lower SIZE bits of) IN.
2250 PSTART and PEND can be used to obtain the start and end
2251 position (inclusive) of the bitfield relative to 64
2252 bits. *PSTART / *PEND gives the position of the first/last bit
2253 of the bitfield counting from the highest order bit starting
2257 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in
, int size
,
2258 int *pstart
, int *pend
)
2262 int lowbit
= HOST_BITS_PER_WIDE_INT
- 1;
2263 int highbit
= HOST_BITS_PER_WIDE_INT
- size
;
2264 unsigned HOST_WIDE_INT bitmask
= HOST_WIDE_INT_1U
;
2266 gcc_assert (!!pstart
== !!pend
);
2267 for (start
= lowbit
; start
>= highbit
; bitmask
<<= 1, start
--)
2270 /* Look for the rightmost bit of a contiguous range of ones. */
2277 /* Look for the firt zero bit after the range of ones. */
2278 if (! (bitmask
& in
))
2282 /* We're one past the last one-bit. */
2286 /* No one bits found. */
2289 if (start
> highbit
)
2291 unsigned HOST_WIDE_INT mask
;
2293 /* Calculate a mask for all bits beyond the contiguous bits. */
2294 mask
= ((~HOST_WIDE_INT_0U
>> highbit
)
2295 & (~HOST_WIDE_INT_0U
<< (lowbit
- start
+ 1)));
2297 /* There are more bits set beyond the first range of one bits. */
2310 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2311 if ~IN contains a contiguous bitfield. In that case, *END is <
2314 If WRAP_P is true, a bitmask that wraps around is also tested.
2315 When a wraparoud occurs *START is greater than *END (in
2316 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2317 part of the range. If WRAP_P is false, no wraparound is
2321 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in
, bool wrap_p
,
2322 int size
, int *start
, int *end
)
2324 int bs
= HOST_BITS_PER_WIDE_INT
;
2327 gcc_assert (!!start
== !!end
);
2328 if ((in
& ((~HOST_WIDE_INT_0U
) >> (bs
- size
))) == 0)
2329 /* This cannot be expressed as a contiguous bitmask. Exit early because
2330 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2333 b
= s390_contiguous_bitmask_nowrap_p (in
, size
, start
, end
);
2338 b
= s390_contiguous_bitmask_nowrap_p (~in
, size
, start
, end
);
2344 gcc_assert (s
>= 1);
2345 *start
= ((e
+ 1) & (bs
- 1));
2346 *end
= ((s
- 1 + bs
) & (bs
- 1));
2352 /* Return true if OP contains the same contiguous bitfield in *all*
2353 its elements. START and END can be used to obtain the start and
2354 end position of the bitfield.
2356 START/STOP give the position of the first/last bit of the bitfield
2357 counting from the lowest order bit starting with zero. In order to
2358 use these values for S/390 instructions this has to be converted to
2359 "bits big endian" style. */
2362 s390_contiguous_bitmask_vector_p (rtx op
, int *start
, int *end
)
2364 unsigned HOST_WIDE_INT mask
;
2369 gcc_assert (!!start
== !!end
);
2370 if (!const_vec_duplicate_p (op
, &elt
)
2371 || !CONST_INT_P (elt
))
2374 size
= GET_MODE_UNIT_BITSIZE (GET_MODE (op
));
2376 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2380 mask
= UINTVAL (elt
);
2382 b
= s390_contiguous_bitmask_p (mask
, true, size
, start
, end
);
2387 *start
-= (HOST_BITS_PER_WIDE_INT
- size
);
2388 *end
-= (HOST_BITS_PER_WIDE_INT
- size
);
2396 /* Return true if C consists only of byte chunks being either 0 or
2397 0xff. If MASK is !=NULL a byte mask is generated which is
2398 appropriate for the vector generate byte mask instruction. */
2401 s390_bytemask_vector_p (rtx op
, unsigned *mask
)
2404 unsigned tmp_mask
= 0;
2405 int nunit
, unit_size
;
2407 if (!VECTOR_MODE_P (GET_MODE (op
))
2408 || GET_CODE (op
) != CONST_VECTOR
2409 || !CONST_INT_P (XVECEXP (op
, 0, 0)))
2412 nunit
= GET_MODE_NUNITS (GET_MODE (op
));
2413 unit_size
= GET_MODE_UNIT_SIZE (GET_MODE (op
));
2415 for (i
= 0; i
< nunit
; i
++)
2417 unsigned HOST_WIDE_INT c
;
2420 if (!CONST_INT_P (XVECEXP (op
, 0, i
)))
2423 c
= UINTVAL (XVECEXP (op
, 0, i
));
2424 for (j
= 0; j
< unit_size
; j
++)
2426 if ((c
& 0xff) != 0 && (c
& 0xff) != 0xff)
2428 tmp_mask
|= (c
& 1) << ((nunit
- 1 - i
) * unit_size
+ j
);
2429 c
= c
>> BITS_PER_UNIT
;
2439 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2440 equivalent to a shift followed by the AND. In particular, CONTIG
2441 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2442 for ROTL indicate a rotate to the right. */
2445 s390_extzv_shift_ok (int bitsize
, int rotl
, unsigned HOST_WIDE_INT contig
)
2450 ok
= s390_contiguous_bitmask_nowrap_p (contig
, bitsize
, &start
, &end
);
2454 return (64 - end
>= rotl
);
2457 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2459 rotl
= -rotl
+ (64 - bitsize
);
2460 return (start
>= rotl
);
2464 /* Check whether we can (and want to) split a double-word
2465 move in mode MODE from SRC to DST into two single-word
2466 moves, moving the subword FIRST_SUBWORD first. */
2469 s390_split_ok_p (rtx dst
, rtx src
, machine_mode mode
, int first_subword
)
2471 /* Floating point and vector registers cannot be split. */
2472 if (FP_REG_P (src
) || FP_REG_P (dst
) || VECTOR_REG_P (src
) || VECTOR_REG_P (dst
))
2475 /* Non-offsettable memory references cannot be split. */
2476 if ((GET_CODE (src
) == MEM
&& !offsettable_memref_p (src
))
2477 || (GET_CODE (dst
) == MEM
&& !offsettable_memref_p (dst
)))
2480 /* Moving the first subword must not clobber a register
2481 needed to move the second subword. */
2482 if (register_operand (dst
, mode
))
2484 rtx subreg
= operand_subword (dst
, first_subword
, 0, mode
);
2485 if (reg_overlap_mentioned_p (subreg
, src
))
2492 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2493 and [MEM2, MEM2 + SIZE] do overlap and false
2497 s390_overlap_p (rtx mem1
, rtx mem2
, HOST_WIDE_INT size
)
2499 rtx addr1
, addr2
, addr_delta
;
2500 HOST_WIDE_INT delta
;
2502 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2508 addr1
= XEXP (mem1
, 0);
2509 addr2
= XEXP (mem2
, 0);
2511 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2513 /* This overlapping check is used by peepholes merging memory block operations.
2514 Overlapping operations would otherwise be recognized by the S/390 hardware
2515 and would fall back to a slower implementation. Allowing overlapping
2516 operations would lead to slow code but not to wrong code. Therefore we are
2517 somewhat optimistic if we cannot prove that the memory blocks are
2519 That's why we return false here although this may accept operations on
2520 overlapping memory areas. */
2521 if (!addr_delta
|| GET_CODE (addr_delta
) != CONST_INT
)
2524 delta
= INTVAL (addr_delta
);
2527 || (delta
> 0 && delta
< size
)
2528 || (delta
< 0 && -delta
< size
))
2534 /* Check whether the address of memory reference MEM2 equals exactly
2535 the address of memory reference MEM1 plus DELTA. Return true if
2536 we can prove this to be the case, false otherwise. */
2539 s390_offset_p (rtx mem1
, rtx mem2
, rtx delta
)
2541 rtx addr1
, addr2
, addr_delta
;
2543 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2546 addr1
= XEXP (mem1
, 0);
2547 addr2
= XEXP (mem2
, 0);
2549 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2550 if (!addr_delta
|| !rtx_equal_p (addr_delta
, delta
))
2556 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2559 s390_expand_logical_operator (enum rtx_code code
, machine_mode mode
,
2562 machine_mode wmode
= mode
;
2563 rtx dst
= operands
[0];
2564 rtx src1
= operands
[1];
2565 rtx src2
= operands
[2];
2568 /* If we cannot handle the operation directly, use a temp register. */
2569 if (!s390_logical_operator_ok_p (operands
))
2570 dst
= gen_reg_rtx (mode
);
2572 /* QImode and HImode patterns make sense only if we have a destination
2573 in memory. Otherwise perform the operation in SImode. */
2574 if ((mode
== QImode
|| mode
== HImode
) && GET_CODE (dst
) != MEM
)
2577 /* Widen operands if required. */
2580 if (GET_CODE (dst
) == SUBREG
2581 && (tem
= simplify_subreg (wmode
, dst
, mode
, 0)) != 0)
2583 else if (REG_P (dst
))
2584 dst
= gen_rtx_SUBREG (wmode
, dst
, 0);
2586 dst
= gen_reg_rtx (wmode
);
2588 if (GET_CODE (src1
) == SUBREG
2589 && (tem
= simplify_subreg (wmode
, src1
, mode
, 0)) != 0)
2591 else if (GET_MODE (src1
) != VOIDmode
)
2592 src1
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src1
), 0);
2594 if (GET_CODE (src2
) == SUBREG
2595 && (tem
= simplify_subreg (wmode
, src2
, mode
, 0)) != 0)
2597 else if (GET_MODE (src2
) != VOIDmode
)
2598 src2
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src2
), 0);
2601 /* Emit the instruction. */
2602 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, wmode
, src1
, src2
));
2603 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
2604 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
2606 /* Fix up the destination if needed. */
2607 if (dst
!= operands
[0])
2608 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
2611 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2614 s390_logical_operator_ok_p (rtx
*operands
)
2616 /* If the destination operand is in memory, it needs to coincide
2617 with one of the source operands. After reload, it has to be
2618 the first source operand. */
2619 if (GET_CODE (operands
[0]) == MEM
)
2620 return rtx_equal_p (operands
[0], operands
[1])
2621 || (!reload_completed
&& rtx_equal_p (operands
[0], operands
[2]));
2626 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2627 operand IMMOP to switch from SS to SI type instructions. */
2630 s390_narrow_logical_operator (enum rtx_code code
, rtx
*memop
, rtx
*immop
)
2632 int def
= code
== AND
? -1 : 0;
2636 gcc_assert (GET_CODE (*memop
) == MEM
);
2637 gcc_assert (!MEM_VOLATILE_P (*memop
));
2639 mask
= s390_extract_part (*immop
, QImode
, def
);
2640 part
= s390_single_part (*immop
, GET_MODE (*memop
), QImode
, def
);
2641 gcc_assert (part
>= 0);
2643 *memop
= adjust_address (*memop
, QImode
, part
);
2644 *immop
= gen_int_mode (mask
, QImode
);
2648 /* How to allocate a 'struct machine_function'. */
2650 static struct machine_function
*
2651 s390_init_machine_status (void)
2653 return ggc_cleared_alloc
<machine_function
> ();
2656 /* Map for smallest class containing reg regno. */
2658 const enum reg_class regclass_map
[FIRST_PSEUDO_REGISTER
] =
2659 { GENERAL_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 0 */
2660 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 4 */
2661 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 8 */
2662 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 12 */
2663 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 16 */
2664 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 20 */
2665 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 24 */
2666 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 28 */
2667 ADDR_REGS
, CC_REGS
, ADDR_REGS
, ADDR_REGS
, /* 32 */
2668 ACCESS_REGS
, ACCESS_REGS
, VEC_REGS
, VEC_REGS
, /* 36 */
2669 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 40 */
2670 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 44 */
2671 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 48 */
2672 VEC_REGS
, VEC_REGS
/* 52 */
2675 /* Return attribute type of insn. */
2677 static enum attr_type
2678 s390_safe_attr_type (rtx_insn
*insn
)
2680 if (recog_memoized (insn
) >= 0)
2681 return get_attr_type (insn
);
2686 /* Return true if DISP is a valid short displacement. */
2689 s390_short_displacement (rtx disp
)
2691 /* No displacement is OK. */
2695 /* Without the long displacement facility we don't need to
2696 distingiush between long and short displacement. */
2697 if (!TARGET_LONG_DISPLACEMENT
)
2700 /* Integer displacement in range. */
2701 if (GET_CODE (disp
) == CONST_INT
)
2702 return INTVAL (disp
) >= 0 && INTVAL (disp
) < 4096;
2704 /* GOT offset is not OK, the GOT can be large. */
2705 if (GET_CODE (disp
) == CONST
2706 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
2707 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
2708 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTNTPOFF
))
2711 /* All other symbolic constants are literal pool references,
2712 which are OK as the literal pool must be small. */
2713 if (GET_CODE (disp
) == CONST
)
2719 /* Decompose a RTL expression ADDR for a memory address into
2720 its components, returned in OUT.
2722 Returns false if ADDR is not a valid memory address, true
2723 otherwise. If OUT is NULL, don't return the components,
2724 but check for validity only.
2726 Note: Only addresses in canonical form are recognized.
2727 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2728 canonical form so that they will be recognized. */
2731 s390_decompose_address (rtx addr
, struct s390_address
*out
)
2733 HOST_WIDE_INT offset
= 0;
2734 rtx base
= NULL_RTX
;
2735 rtx indx
= NULL_RTX
;
2736 rtx disp
= NULL_RTX
;
2738 bool pointer
= false;
2739 bool base_ptr
= false;
2740 bool indx_ptr
= false;
2741 bool literal_pool
= false;
2743 /* We may need to substitute the literal pool base register into the address
2744 below. However, at this point we do not know which register is going to
2745 be used as base, so we substitute the arg pointer register. This is going
2746 to be treated as holding a pointer below -- it shouldn't be used for any
2748 rtx fake_pool_base
= gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
);
2750 /* Decompose address into base + index + displacement. */
2752 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == UNSPEC
)
2755 else if (GET_CODE (addr
) == PLUS
)
2757 rtx op0
= XEXP (addr
, 0);
2758 rtx op1
= XEXP (addr
, 1);
2759 enum rtx_code code0
= GET_CODE (op0
);
2760 enum rtx_code code1
= GET_CODE (op1
);
2762 if (code0
== REG
|| code0
== UNSPEC
)
2764 if (code1
== REG
|| code1
== UNSPEC
)
2766 indx
= op0
; /* index + base */
2772 base
= op0
; /* base + displacement */
2777 else if (code0
== PLUS
)
2779 indx
= XEXP (op0
, 0); /* index + base + disp */
2780 base
= XEXP (op0
, 1);
2791 disp
= addr
; /* displacement */
2793 /* Extract integer part of displacement. */
2797 if (GET_CODE (disp
) == CONST_INT
)
2799 offset
= INTVAL (disp
);
2802 else if (GET_CODE (disp
) == CONST
2803 && GET_CODE (XEXP (disp
, 0)) == PLUS
2804 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
2806 offset
= INTVAL (XEXP (XEXP (disp
, 0), 1));
2807 disp
= XEXP (XEXP (disp
, 0), 0);
2811 /* Strip off CONST here to avoid special case tests later. */
2812 if (disp
&& GET_CODE (disp
) == CONST
)
2813 disp
= XEXP (disp
, 0);
2815 /* We can convert literal pool addresses to
2816 displacements by basing them off the base register. */
2817 if (disp
&& GET_CODE (disp
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (disp
))
2822 base
= fake_pool_base
, literal_pool
= true;
2824 /* Mark up the displacement. */
2825 disp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, disp
),
2826 UNSPEC_LTREL_OFFSET
);
2829 /* Validate base register. */
2832 if (GET_CODE (base
) == UNSPEC
)
2833 switch (XINT (base
, 1))
2837 disp
= gen_rtx_UNSPEC (Pmode
,
2838 gen_rtvec (1, XVECEXP (base
, 0, 0)),
2839 UNSPEC_LTREL_OFFSET
);
2843 base
= XVECEXP (base
, 0, 1);
2846 case UNSPEC_LTREL_BASE
:
2847 if (XVECLEN (base
, 0) == 1)
2848 base
= fake_pool_base
, literal_pool
= true;
2850 base
= XVECEXP (base
, 0, 1);
2857 if (!REG_P (base
) || GET_MODE (base
) != Pmode
)
2860 if (REGNO (base
) == STACK_POINTER_REGNUM
2861 || REGNO (base
) == FRAME_POINTER_REGNUM
2862 || ((reload_completed
|| reload_in_progress
)
2863 && frame_pointer_needed
2864 && REGNO (base
) == HARD_FRAME_POINTER_REGNUM
)
2865 || REGNO (base
) == ARG_POINTER_REGNUM
2867 && REGNO (base
) == PIC_OFFSET_TABLE_REGNUM
))
2868 pointer
= base_ptr
= true;
2870 if ((reload_completed
|| reload_in_progress
)
2871 && base
== cfun
->machine
->base_reg
)
2872 pointer
= base_ptr
= literal_pool
= true;
2875 /* Validate index register. */
2878 if (GET_CODE (indx
) == UNSPEC
)
2879 switch (XINT (indx
, 1))
2883 disp
= gen_rtx_UNSPEC (Pmode
,
2884 gen_rtvec (1, XVECEXP (indx
, 0, 0)),
2885 UNSPEC_LTREL_OFFSET
);
2889 indx
= XVECEXP (indx
, 0, 1);
2892 case UNSPEC_LTREL_BASE
:
2893 if (XVECLEN (indx
, 0) == 1)
2894 indx
= fake_pool_base
, literal_pool
= true;
2896 indx
= XVECEXP (indx
, 0, 1);
2903 if (!REG_P (indx
) || GET_MODE (indx
) != Pmode
)
2906 if (REGNO (indx
) == STACK_POINTER_REGNUM
2907 || REGNO (indx
) == FRAME_POINTER_REGNUM
2908 || ((reload_completed
|| reload_in_progress
)
2909 && frame_pointer_needed
2910 && REGNO (indx
) == HARD_FRAME_POINTER_REGNUM
)
2911 || REGNO (indx
) == ARG_POINTER_REGNUM
2913 && REGNO (indx
) == PIC_OFFSET_TABLE_REGNUM
))
2914 pointer
= indx_ptr
= true;
2916 if ((reload_completed
|| reload_in_progress
)
2917 && indx
== cfun
->machine
->base_reg
)
2918 pointer
= indx_ptr
= literal_pool
= true;
2921 /* Prefer to use pointer as base, not index. */
2922 if (base
&& indx
&& !base_ptr
2923 && (indx_ptr
|| (!REG_POINTER (base
) && REG_POINTER (indx
))))
2930 /* Validate displacement. */
2933 /* If virtual registers are involved, the displacement will change later
2934 anyway as the virtual registers get eliminated. This could make a
2935 valid displacement invalid, but it is more likely to make an invalid
2936 displacement valid, because we sometimes access the register save area
2937 via negative offsets to one of those registers.
2938 Thus we don't check the displacement for validity here. If after
2939 elimination the displacement turns out to be invalid after all,
2940 this is fixed up by reload in any case. */
2941 /* LRA maintains always displacements up to date and we need to
2942 know the displacement is right during all LRA not only at the
2943 final elimination. */
2945 || (base
!= arg_pointer_rtx
2946 && indx
!= arg_pointer_rtx
2947 && base
!= return_address_pointer_rtx
2948 && indx
!= return_address_pointer_rtx
2949 && base
!= frame_pointer_rtx
2950 && indx
!= frame_pointer_rtx
2951 && base
!= virtual_stack_vars_rtx
2952 && indx
!= virtual_stack_vars_rtx
))
2953 if (!DISP_IN_RANGE (offset
))
2958 /* All the special cases are pointers. */
2961 /* In the small-PIC case, the linker converts @GOT
2962 and @GOTNTPOFF offsets to possible displacements. */
2963 if (GET_CODE (disp
) == UNSPEC
2964 && (XINT (disp
, 1) == UNSPEC_GOT
2965 || XINT (disp
, 1) == UNSPEC_GOTNTPOFF
)
2971 /* Accept pool label offsets. */
2972 else if (GET_CODE (disp
) == UNSPEC
2973 && XINT (disp
, 1) == UNSPEC_POOL_OFFSET
)
2976 /* Accept literal pool references. */
2977 else if (GET_CODE (disp
) == UNSPEC
2978 && XINT (disp
, 1) == UNSPEC_LTREL_OFFSET
)
2980 /* In case CSE pulled a non literal pool reference out of
2981 the pool we have to reject the address. This is
2982 especially important when loading the GOT pointer on non
2983 zarch CPUs. In this case the literal pool contains an lt
2984 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2985 will most likely exceed the displacement. */
2986 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
2987 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp
, 0, 0)))
2990 orig_disp
= gen_rtx_CONST (Pmode
, disp
);
2993 /* If we have an offset, make sure it does not
2994 exceed the size of the constant pool entry. */
2995 rtx sym
= XVECEXP (disp
, 0, 0);
2996 if (offset
>= GET_MODE_SIZE (get_pool_mode (sym
)))
2999 orig_disp
= plus_constant (Pmode
, orig_disp
, offset
);
3014 out
->disp
= orig_disp
;
3015 out
->pointer
= pointer
;
3016 out
->literal_pool
= literal_pool
;
3022 /* Decompose a RTL expression OP for an address style operand into its
3023 components, and return the base register in BASE and the offset in
3024 OFFSET. While OP looks like an address it is never supposed to be
3027 Return true if OP is a valid address operand, false if not. */
3030 s390_decompose_addrstyle_without_index (rtx op
, rtx
*base
,
3031 HOST_WIDE_INT
*offset
)
3035 /* We can have an integer constant, an address register,
3036 or a sum of the two. */
3037 if (CONST_SCALAR_INT_P (op
))
3042 if (op
&& GET_CODE (op
) == PLUS
&& CONST_SCALAR_INT_P (XEXP (op
, 1)))
3047 while (op
&& GET_CODE (op
) == SUBREG
)
3048 op
= SUBREG_REG (op
);
3050 if (op
&& GET_CODE (op
) != REG
)
3055 if (off
== NULL_RTX
)
3057 else if (CONST_INT_P (off
))
3058 *offset
= INTVAL (off
);
3059 else if (CONST_WIDE_INT_P (off
))
3060 /* The offset will anyway be cut down to 12 bits so take just
3061 the lowest order chunk of the wide int. */
3062 *offset
= CONST_WIDE_INT_ELT (off
, 0);
3073 /* Return true if CODE is a valid address without index. */
3076 s390_legitimate_address_without_index_p (rtx op
)
3078 struct s390_address addr
;
3080 if (!s390_decompose_address (XEXP (op
, 0), &addr
))
3089 /* Return TRUE if ADDR is an operand valid for a load/store relative
3090 instruction. Be aware that the alignment of the operand needs to
3091 be checked separately.
3092 Valid addresses are single references or a sum of a reference and a
3093 constant integer. Return these parts in SYMREF and ADDEND. You can
3094 pass NULL in REF and/or ADDEND if you are not interested in these
3095 values. Literal pool references are *not* considered symbol
3099 s390_loadrelative_operand_p (rtx addr
, rtx
*symref
, HOST_WIDE_INT
*addend
)
3101 HOST_WIDE_INT tmpaddend
= 0;
3103 if (GET_CODE (addr
) == CONST
)
3104 addr
= XEXP (addr
, 0);
3106 if (GET_CODE (addr
) == PLUS
)
3108 if (!CONST_INT_P (XEXP (addr
, 1)))
3111 tmpaddend
= INTVAL (XEXP (addr
, 1));
3112 addr
= XEXP (addr
, 0);
3115 if ((GET_CODE (addr
) == SYMBOL_REF
&& !CONSTANT_POOL_ADDRESS_P (addr
))
3116 || (GET_CODE (addr
) == UNSPEC
3117 && (XINT (addr
, 1) == UNSPEC_GOTENT
3118 || (TARGET_CPU_ZARCH
&& XINT (addr
, 1) == UNSPEC_PLT
))))
3123 *addend
= tmpaddend
;
3130 /* Return true if the address in OP is valid for constraint letter C
3131 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3132 pool MEMs should be accepted. Only the Q, R, S, T constraint
3133 letters are allowed for C. */
3136 s390_check_qrst_address (char c
, rtx op
, bool lit_pool_ok
)
3138 struct s390_address addr
;
3139 bool decomposed
= false;
3141 if (!address_operand (op
, GET_MODE (op
)))
3144 /* This check makes sure that no symbolic address (except literal
3145 pool references) are accepted by the R or T constraints. */
3146 if (s390_loadrelative_operand_p (op
, NULL
, NULL
))
3149 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3152 if (!s390_decompose_address (op
, &addr
))
3154 if (addr
.literal_pool
)
3159 /* With reload, we sometimes get intermediate address forms that are
3160 actually invalid as-is, but we need to accept them in the most
3161 generic cases below ('R' or 'T'), since reload will in fact fix
3162 them up. LRA behaves differently here; we never see such forms,
3163 but on the other hand, we need to strictly reject every invalid
3164 address form. Perform this check right up front. */
3165 if (lra_in_progress
)
3167 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3174 case 'Q': /* no index short displacement */
3175 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3179 if (!s390_short_displacement (addr
.disp
))
3183 case 'R': /* with index short displacement */
3184 if (TARGET_LONG_DISPLACEMENT
)
3186 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3188 if (!s390_short_displacement (addr
.disp
))
3191 /* Any invalid address here will be fixed up by reload,
3192 so accept it for the most generic constraint. */
3195 case 'S': /* no index long displacement */
3196 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3202 case 'T': /* with index long displacement */
3203 /* Any invalid address here will be fixed up by reload,
3204 so accept it for the most generic constraint. */
3214 /* Evaluates constraint strings described by the regular expression
3215 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3216 the constraint given in STR, or 0 else. */
3219 s390_mem_constraint (const char *str
, rtx op
)
3226 /* Check for offsettable variants of memory constraints. */
3227 if (!MEM_P (op
) || MEM_VOLATILE_P (op
))
3229 if ((reload_completed
|| reload_in_progress
)
3230 ? !offsettable_memref_p (op
) : !offsettable_nonstrict_memref_p (op
))
3232 return s390_check_qrst_address (str
[1], XEXP (op
, 0), true);
3234 /* Check for non-literal-pool variants of memory constraints. */
3237 return s390_check_qrst_address (str
[1], XEXP (op
, 0), false);
3242 if (GET_CODE (op
) != MEM
)
3244 return s390_check_qrst_address (c
, XEXP (op
, 0), true);
3246 /* Simply check for the basic form of a shift count. Reload will
3247 take care of making sure we have a proper base register. */
3248 if (!s390_decompose_addrstyle_without_index (op
, NULL
, NULL
))
3252 return s390_check_qrst_address (str
[1], op
, true);
3260 /* Evaluates constraint strings starting with letter O. Input
3261 parameter C is the second letter following the "O" in the constraint
3262 string. Returns 1 if VALUE meets the respective constraint and 0
3266 s390_O_constraint_str (const char c
, HOST_WIDE_INT value
)
3274 return trunc_int_for_mode (value
, SImode
) == value
;
3278 || s390_single_part (GEN_INT (value
), DImode
, SImode
, 0) == 1;
3281 return s390_single_part (GEN_INT (value
- 1), DImode
, SImode
, -1) == 1;
3289 /* Evaluates constraint strings starting with letter N. Parameter STR
3290 contains the letters following letter "N" in the constraint string.
3291 Returns true if VALUE matches the constraint. */
3294 s390_N_constraint_str (const char *str
, HOST_WIDE_INT value
)
3296 machine_mode mode
, part_mode
;
3298 int part
, part_goal
;
3304 part_goal
= str
[0] - '0';
3348 if (GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (part_mode
))
3351 part
= s390_single_part (GEN_INT (value
), mode
, part_mode
, def
);
3354 if (part_goal
!= -1 && part_goal
!= part
)
3361 /* Returns true if the input parameter VALUE is a float zero. */
3364 s390_float_const_zero_p (rtx value
)
3366 return (GET_MODE_CLASS (GET_MODE (value
)) == MODE_FLOAT
3367 && value
== CONST0_RTX (GET_MODE (value
)));
3370 /* Implement TARGET_REGISTER_MOVE_COST. */
3373 s390_register_move_cost (machine_mode mode
,
3374 reg_class_t from
, reg_class_t to
)
3376 /* On s390, copy between fprs and gprs is expensive. */
3378 /* It becomes somewhat faster having ldgr/lgdr. */
3379 if (TARGET_Z10
&& GET_MODE_SIZE (mode
) == 8)
3381 /* ldgr is single cycle. */
3382 if (reg_classes_intersect_p (from
, GENERAL_REGS
)
3383 && reg_classes_intersect_p (to
, FP_REGS
))
3385 /* lgdr needs 3 cycles. */
3386 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
3387 && reg_classes_intersect_p (from
, FP_REGS
))
3391 /* Otherwise copying is done via memory. */
3392 if ((reg_classes_intersect_p (from
, GENERAL_REGS
)
3393 && reg_classes_intersect_p (to
, FP_REGS
))
3394 || (reg_classes_intersect_p (from
, FP_REGS
)
3395 && reg_classes_intersect_p (to
, GENERAL_REGS
)))
3401 /* Implement TARGET_MEMORY_MOVE_COST. */
3404 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
3405 reg_class_t rclass ATTRIBUTE_UNUSED
,
3406 bool in ATTRIBUTE_UNUSED
)
3411 /* Compute a (partial) cost for rtx X. Return true if the complete
3412 cost has been computed, and false if subexpressions should be
3413 scanned. In either case, *TOTAL contains the cost result. The
3414 initial value of *TOTAL is the default value computed by
3415 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3416 code of the superexpression of x. */
3419 s390_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
3420 int opno ATTRIBUTE_UNUSED
,
3421 int *total
, bool speed ATTRIBUTE_UNUSED
)
3423 int code
= GET_CODE (x
);
3431 case CONST_WIDE_INT
:
3438 /* Without this a conditional move instruction would be
3439 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3440 comparison operator). That's a bit pessimistic. */
3442 if (!TARGET_Z196
|| GET_CODE (SET_SRC (x
)) != IF_THEN_ELSE
)
3445 rtx cond
= XEXP (SET_SRC (x
), 0);
3447 if (!CC_REG_P (XEXP (cond
, 0)) || !CONST_INT_P (XEXP (cond
, 1)))
3450 /* It is going to be a load/store on condition. Make it
3451 slightly more expensive than a normal load. */
3452 *total
= COSTS_N_INSNS (1) + 1;
3454 rtx dst
= SET_DEST (x
);
3455 rtx then
= XEXP (SET_SRC (x
), 1);
3456 rtx els
= XEXP (SET_SRC (x
), 2);
3458 /* It is a real IF-THEN-ELSE. An additional move will be
3459 needed to implement that. */
3460 if (reload_completed
3461 && !rtx_equal_p (dst
, then
)
3462 && !rtx_equal_p (dst
, els
))
3463 *total
+= COSTS_N_INSNS (1) / 2;
3465 /* A minor penalty for constants we cannot directly handle. */
3466 if ((CONST_INT_P (then
) || CONST_INT_P (els
))
3467 && (!TARGET_Z13
|| MEM_P (dst
)
3468 || (CONST_INT_P (then
) && !satisfies_constraint_K (then
))
3469 || (CONST_INT_P (els
) && !satisfies_constraint_K (els
))))
3470 *total
+= COSTS_N_INSNS (1) / 2;
3472 /* A store on condition can only handle register src operands. */
3473 if (MEM_P (dst
) && (!REG_P (then
) || !REG_P (els
)))
3474 *total
+= COSTS_N_INSNS (1) / 2;
3480 if (GET_CODE (XEXP (x
, 0)) == AND
3481 && GET_CODE (XEXP (x
, 1)) == ASHIFT
3482 && REG_P (XEXP (XEXP (x
, 0), 0))
3483 && REG_P (XEXP (XEXP (x
, 1), 0))
3484 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3485 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3486 && (UINTVAL (XEXP (XEXP (x
, 0), 1)) ==
3487 (HOST_WIDE_INT_1U
<< UINTVAL (XEXP (XEXP (x
, 1), 1))) - 1))
3489 *total
= COSTS_N_INSNS (2);
3493 /* ~AND on a 128 bit mode. This can be done using a vector
3496 && GET_CODE (XEXP (x
, 0)) == NOT
3497 && GET_CODE (XEXP (x
, 1)) == NOT
3498 && REG_P (XEXP (XEXP (x
, 0), 0))
3499 && REG_P (XEXP (XEXP (x
, 1), 0))
3500 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x
, 0), 0))) == 16
3501 && s390_hard_regno_mode_ok (VR0_REGNUM
,
3502 GET_MODE (XEXP (XEXP (x
, 0), 0))))
3504 *total
= COSTS_N_INSNS (1);
3517 *total
= COSTS_N_INSNS (1);
3522 *total
= COSTS_N_INSNS (1);
3530 rtx left
= XEXP (x
, 0);
3531 rtx right
= XEXP (x
, 1);
3532 if (GET_CODE (right
) == CONST_INT
3533 && CONST_OK_FOR_K (INTVAL (right
)))
3534 *total
= s390_cost
->mhi
;
3535 else if (GET_CODE (left
) == SIGN_EXTEND
)
3536 *total
= s390_cost
->mh
;
3538 *total
= s390_cost
->ms
; /* msr, ms, msy */
3543 rtx left
= XEXP (x
, 0);
3544 rtx right
= XEXP (x
, 1);
3547 if (GET_CODE (right
) == CONST_INT
3548 && CONST_OK_FOR_K (INTVAL (right
)))
3549 *total
= s390_cost
->mghi
;
3550 else if (GET_CODE (left
) == SIGN_EXTEND
)
3551 *total
= s390_cost
->msgf
;
3553 *total
= s390_cost
->msg
; /* msgr, msg */
3555 else /* TARGET_31BIT */
3557 if (GET_CODE (left
) == SIGN_EXTEND
3558 && GET_CODE (right
) == SIGN_EXTEND
)
3559 /* mulsidi case: mr, m */
3560 *total
= s390_cost
->m
;
3561 else if (GET_CODE (left
) == ZERO_EXTEND
3562 && GET_CODE (right
) == ZERO_EXTEND
3563 && TARGET_CPU_ZARCH
)
3564 /* umulsidi case: ml, mlr */
3565 *total
= s390_cost
->ml
;
3567 /* Complex calculation is required. */
3568 *total
= COSTS_N_INSNS (40);
3574 *total
= s390_cost
->mult_df
;
3577 *total
= s390_cost
->mxbr
;
3588 *total
= s390_cost
->madbr
;
3591 *total
= s390_cost
->maebr
;
3596 /* Negate in the third argument is free: FMSUB. */
3597 if (GET_CODE (XEXP (x
, 2)) == NEG
)
3599 *total
+= (rtx_cost (XEXP (x
, 0), mode
, FMA
, 0, speed
)
3600 + rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
)
3601 + rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, FMA
, 2, speed
));
3608 if (mode
== TImode
) /* 128 bit division */
3609 *total
= s390_cost
->dlgr
;
3610 else if (mode
== DImode
)
3612 rtx right
= XEXP (x
, 1);
3613 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3614 *total
= s390_cost
->dlr
;
3615 else /* 64 by 64 bit division */
3616 *total
= s390_cost
->dlgr
;
3618 else if (mode
== SImode
) /* 32 bit division */
3619 *total
= s390_cost
->dlr
;
3626 rtx right
= XEXP (x
, 1);
3627 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3629 *total
= s390_cost
->dsgfr
;
3631 *total
= s390_cost
->dr
;
3632 else /* 64 by 64 bit division */
3633 *total
= s390_cost
->dsgr
;
3635 else if (mode
== SImode
) /* 32 bit division */
3636 *total
= s390_cost
->dlr
;
3637 else if (mode
== SFmode
)
3639 *total
= s390_cost
->debr
;
3641 else if (mode
== DFmode
)
3643 *total
= s390_cost
->ddbr
;
3645 else if (mode
== TFmode
)
3647 *total
= s390_cost
->dxbr
;
3653 *total
= s390_cost
->sqebr
;
3654 else if (mode
== DFmode
)
3655 *total
= s390_cost
->sqdbr
;
3657 *total
= s390_cost
->sqxbr
;
3662 if (outer_code
== MULT
|| outer_code
== DIV
|| outer_code
== MOD
3663 || outer_code
== PLUS
|| outer_code
== MINUS
3664 || outer_code
== COMPARE
)
3669 *total
= COSTS_N_INSNS (1);
3670 if (GET_CODE (XEXP (x
, 0)) == AND
3671 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3672 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
3674 rtx op0
= XEXP (XEXP (x
, 0), 0);
3675 rtx op1
= XEXP (XEXP (x
, 0), 1);
3676 rtx op2
= XEXP (x
, 1);
3678 if (memory_operand (op0
, GET_MODE (op0
))
3679 && s390_tm_ccmode (op1
, op2
, 0) != VOIDmode
)
3681 if (register_operand (op0
, GET_MODE (op0
))
3682 && s390_tm_ccmode (op1
, op2
, 1) != VOIDmode
)
3692 /* Return the cost of an address rtx ADDR. */
3695 s390_address_cost (rtx addr
, machine_mode mode ATTRIBUTE_UNUSED
,
3696 addr_space_t as ATTRIBUTE_UNUSED
,
3697 bool speed ATTRIBUTE_UNUSED
)
3699 struct s390_address ad
;
3700 if (!s390_decompose_address (addr
, &ad
))
3703 return ad
.indx
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3706 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3708 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
3710 int misalign ATTRIBUTE_UNUSED
)
3712 switch (type_of_cost
)
3722 case cond_branch_not_taken
:
3724 case vec_promote_demote
:
3725 case unaligned_load
:
3726 case unaligned_store
:
3729 case cond_branch_taken
:
3733 return TYPE_VECTOR_SUBPARTS (vectype
) - 1;
3740 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3741 otherwise return 0. */
3744 tls_symbolic_operand (rtx op
)
3746 if (GET_CODE (op
) != SYMBOL_REF
)
3748 return SYMBOL_REF_TLS_MODEL (op
);
3751 /* Split DImode access register reference REG (on 64-bit) into its constituent
3752 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3753 gen_highpart cannot be used as they assume all registers are word-sized,
3754 while our access registers have only half that size. */
3757 s390_split_access_reg (rtx reg
, rtx
*lo
, rtx
*hi
)
3759 gcc_assert (TARGET_64BIT
);
3760 gcc_assert (ACCESS_REG_P (reg
));
3761 gcc_assert (GET_MODE (reg
) == DImode
);
3762 gcc_assert (!(REGNO (reg
) & 1));
3764 *lo
= gen_rtx_REG (SImode
, REGNO (reg
) + 1);
3765 *hi
= gen_rtx_REG (SImode
, REGNO (reg
));
3768 /* Return true if OP contains a symbol reference */
3771 symbolic_reference_mentioned_p (rtx op
)
3776 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3779 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3780 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3786 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3787 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3791 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3798 /* Return true if OP contains a reference to a thread-local symbol. */
3801 tls_symbolic_reference_mentioned_p (rtx op
)
3806 if (GET_CODE (op
) == SYMBOL_REF
)
3807 return tls_symbolic_operand (op
);
3809 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3810 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3816 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3817 if (tls_symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3821 else if (fmt
[i
] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op
, i
)))
3829 /* Return true if OP is a legitimate general operand when
3830 generating PIC code. It is given that flag_pic is on
3831 and that OP satisfies CONSTANT_P. */
3834 legitimate_pic_operand_p (rtx op
)
3836 /* Accept all non-symbolic constants. */
3837 if (!SYMBOLIC_CONST (op
))
3840 /* Reject everything else; must be handled
3841 via emit_symbolic_move. */
3845 /* Returns true if the constant value OP is a legitimate general operand.
3846 It is given that OP satisfies CONSTANT_P. */
3849 s390_legitimate_constant_p (machine_mode mode
, rtx op
)
3851 if (TARGET_VX
&& VECTOR_MODE_P (mode
) && GET_CODE (op
) == CONST_VECTOR
)
3853 if (GET_MODE_SIZE (mode
) != 16)
3856 if (!satisfies_constraint_j00 (op
)
3857 && !satisfies_constraint_jm1 (op
)
3858 && !satisfies_constraint_jKK (op
)
3859 && !satisfies_constraint_jxx (op
)
3860 && !satisfies_constraint_jyy (op
))
3864 /* Accept all non-symbolic constants. */
3865 if (!SYMBOLIC_CONST (op
))
3868 /* Accept immediate LARL operands. */
3869 if (TARGET_CPU_ZARCH
&& larl_operand (op
, mode
))
3872 /* Thread-local symbols are never legal constants. This is
3873 so that emit_call knows that computing such addresses
3874 might require a function call. */
3875 if (TLS_SYMBOLIC_CONST (op
))
3878 /* In the PIC case, symbolic constants must *not* be
3879 forced into the literal pool. We accept them here,
3880 so that they will be handled by emit_symbolic_move. */
3884 /* All remaining non-PIC symbolic constants are
3885 forced into the literal pool. */
3889 /* Determine if it's legal to put X into the constant pool. This
3890 is not possible if X contains the address of a symbol that is
3891 not constant (TLS) or not known at final link time (PIC). */
3894 s390_cannot_force_const_mem (machine_mode mode
, rtx x
)
3896 switch (GET_CODE (x
))
3900 case CONST_WIDE_INT
:
3902 /* Accept all non-symbolic constants. */
3906 /* Labels are OK iff we are non-PIC. */
3907 return flag_pic
!= 0;
3910 /* 'Naked' TLS symbol references are never OK,
3911 non-TLS symbols are OK iff we are non-PIC. */
3912 if (tls_symbolic_operand (x
))
3915 return flag_pic
!= 0;
3918 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0));
3921 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0))
3922 || s390_cannot_force_const_mem (mode
, XEXP (x
, 1));
3925 switch (XINT (x
, 1))
3927 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3928 case UNSPEC_LTREL_OFFSET
:
3936 case UNSPEC_GOTNTPOFF
:
3937 case UNSPEC_INDNTPOFF
:
3940 /* If the literal pool shares the code section, be put
3941 execute template placeholders into the pool as well. */
3943 return TARGET_CPU_ZARCH
;
3955 /* Returns true if the constant value OP is a legitimate general
3956 operand during and after reload. The difference to
3957 legitimate_constant_p is that this function will not accept
3958 a constant that would need to be forced to the literal pool
3959 before it can be used as operand.
3960 This function accepts all constants which can be loaded directly
3964 legitimate_reload_constant_p (rtx op
)
3966 /* Accept la(y) operands. */
3967 if (GET_CODE (op
) == CONST_INT
3968 && DISP_IN_RANGE (INTVAL (op
)))
3971 /* Accept l(g)hi/l(g)fi operands. */
3972 if (GET_CODE (op
) == CONST_INT
3973 && (CONST_OK_FOR_K (INTVAL (op
)) || CONST_OK_FOR_Os (INTVAL (op
))))
3976 /* Accept lliXX operands. */
3978 && GET_CODE (op
) == CONST_INT
3979 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
3980 && s390_single_part (op
, word_mode
, HImode
, 0) >= 0)
3984 && GET_CODE (op
) == CONST_INT
3985 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
3986 && s390_single_part (op
, word_mode
, SImode
, 0) >= 0)
3989 /* Accept larl operands. */
3990 if (TARGET_CPU_ZARCH
3991 && larl_operand (op
, VOIDmode
))
3994 /* Accept floating-point zero operands that fit into a single GPR. */
3995 if (GET_CODE (op
) == CONST_DOUBLE
3996 && s390_float_const_zero_p (op
)
3997 && GET_MODE_SIZE (GET_MODE (op
)) <= UNITS_PER_WORD
)
4000 /* Accept double-word operands that can be split. */
4001 if (GET_CODE (op
) == CONST_WIDE_INT
4002 || (GET_CODE (op
) == CONST_INT
4003 && trunc_int_for_mode (INTVAL (op
), word_mode
) != INTVAL (op
)))
4005 machine_mode dword_mode
= word_mode
== SImode
? DImode
: TImode
;
4006 rtx hi
= operand_subword (op
, 0, 0, dword_mode
);
4007 rtx lo
= operand_subword (op
, 1, 0, dword_mode
);
4008 return legitimate_reload_constant_p (hi
)
4009 && legitimate_reload_constant_p (lo
);
4012 /* Everything else cannot be handled without reload. */
4016 /* Returns true if the constant value OP is a legitimate fp operand
4017 during and after reload.
4018 This function accepts all constants which can be loaded directly
4022 legitimate_reload_fp_constant_p (rtx op
)
4024 /* Accept floating-point zero operands if the load zero instruction
4025 can be used. Prior to z196 the load fp zero instruction caused a
4026 performance penalty if the result is used as BFP number. */
4028 && GET_CODE (op
) == CONST_DOUBLE
4029 && s390_float_const_zero_p (op
))
4035 /* Returns true if the constant value OP is a legitimate vector operand
4036 during and after reload.
4037 This function accepts all constants which can be loaded directly
4041 legitimate_reload_vector_constant_p (rtx op
)
4043 if (TARGET_VX
&& GET_MODE_SIZE (GET_MODE (op
)) == 16
4044 && (satisfies_constraint_j00 (op
)
4045 || satisfies_constraint_jm1 (op
)
4046 || satisfies_constraint_jKK (op
)
4047 || satisfies_constraint_jxx (op
)
4048 || satisfies_constraint_jyy (op
)))
4054 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4055 return the class of reg to actually use. */
4058 s390_preferred_reload_class (rtx op
, reg_class_t rclass
)
4060 switch (GET_CODE (op
))
4062 /* Constants we cannot reload into general registers
4063 must be forced into the literal pool. */
4067 case CONST_WIDE_INT
:
4068 if (reg_class_subset_p (GENERAL_REGS
, rclass
)
4069 && legitimate_reload_constant_p (op
))
4070 return GENERAL_REGS
;
4071 else if (reg_class_subset_p (ADDR_REGS
, rclass
)
4072 && legitimate_reload_constant_p (op
))
4074 else if (reg_class_subset_p (FP_REGS
, rclass
)
4075 && legitimate_reload_fp_constant_p (op
))
4077 else if (reg_class_subset_p (VEC_REGS
, rclass
)
4078 && legitimate_reload_vector_constant_p (op
))
4083 /* If a symbolic constant or a PLUS is reloaded,
4084 it is most likely being used as an address, so
4085 prefer ADDR_REGS. If 'class' is not a superset
4086 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4088 /* Symrefs cannot be pushed into the literal pool with -fPIC
4089 so we *MUST NOT* return NO_REGS for these cases
4090 (s390_cannot_force_const_mem will return true).
4092 On the other hand we MUST return NO_REGS for symrefs with
4093 invalid addend which might have been pushed to the literal
4094 pool (no -fPIC). Usually we would expect them to be
4095 handled via secondary reload but this does not happen if
4096 they are used as literal pool slot replacement in reload
4097 inheritance (see emit_input_reload_insns). */
4098 if (TARGET_CPU_ZARCH
4099 && GET_CODE (XEXP (op
, 0)) == PLUS
4100 && GET_CODE (XEXP (XEXP(op
, 0), 0)) == SYMBOL_REF
4101 && GET_CODE (XEXP (XEXP(op
, 0), 1)) == CONST_INT
)
4103 if (flag_pic
&& reg_class_subset_p (ADDR_REGS
, rclass
))
4111 if (!legitimate_reload_constant_p (op
))
4115 /* load address will be used. */
4116 if (reg_class_subset_p (ADDR_REGS
, rclass
))
4128 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4129 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4133 s390_check_symref_alignment (rtx addr
, HOST_WIDE_INT alignment
)
4135 HOST_WIDE_INT addend
;
4138 /* The "required alignment" might be 0 (e.g. for certain structs
4139 accessed via BLKmode). Early abort in this case, as well as when
4140 an alignment > 8 is required. */
4141 if (alignment
< 2 || alignment
> 8)
4144 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4147 if (addend
& (alignment
- 1))
4150 if (GET_CODE (symref
) == SYMBOL_REF
)
4152 /* We have load-relative instructions for 2-byte, 4-byte, and
4153 8-byte alignment so allow only these. */
4156 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref
);
4157 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref
);
4158 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref
);
4159 default: return false;
4163 if (GET_CODE (symref
) == UNSPEC
4164 && alignment
<= UNITS_PER_LONG
)
4170 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4171 operand SCRATCH is used to reload the even part of the address and
4175 s390_reload_larl_operand (rtx reg
, rtx addr
, rtx scratch
)
4177 HOST_WIDE_INT addend
;
4180 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4184 /* Easy case. The addend is even so larl will do fine. */
4185 emit_move_insn (reg
, addr
);
4188 /* We can leave the scratch register untouched if the target
4189 register is a valid base register. */
4190 if (REGNO (reg
) < FIRST_PSEUDO_REGISTER
4191 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
)
4194 gcc_assert (REGNO (scratch
) < FIRST_PSEUDO_REGISTER
);
4195 gcc_assert (REGNO_REG_CLASS (REGNO (scratch
)) == ADDR_REGS
);
4198 emit_move_insn (scratch
,
4199 gen_rtx_CONST (Pmode
,
4200 gen_rtx_PLUS (Pmode
, symref
,
4201 GEN_INT (addend
- 1))));
4203 emit_move_insn (scratch
, symref
);
4205 /* Increment the address using la in order to avoid clobbering cc. */
4206 s390_load_address (reg
, gen_rtx_PLUS (Pmode
, scratch
, const1_rtx
));
4210 /* Generate what is necessary to move between REG and MEM using
4211 SCRATCH. The direction is given by TOMEM. */
4214 s390_reload_symref_address (rtx reg
, rtx mem
, rtx scratch
, bool tomem
)
4216 /* Reload might have pulled a constant out of the literal pool.
4217 Force it back in. */
4218 if (CONST_INT_P (mem
) || GET_CODE (mem
) == CONST_DOUBLE
4219 || GET_CODE (mem
) == CONST_WIDE_INT
4220 || GET_CODE (mem
) == CONST_VECTOR
4221 || GET_CODE (mem
) == CONST
)
4222 mem
= force_const_mem (GET_MODE (reg
), mem
);
4224 gcc_assert (MEM_P (mem
));
4226 /* For a load from memory we can leave the scratch register
4227 untouched if the target register is a valid base register. */
4229 && REGNO (reg
) < FIRST_PSEUDO_REGISTER
4230 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
4231 && GET_MODE (reg
) == GET_MODE (scratch
))
4234 /* Load address into scratch register. Since we can't have a
4235 secondary reload for a secondary reload we have to cover the case
4236 where larl would need a secondary reload here as well. */
4237 s390_reload_larl_operand (scratch
, XEXP (mem
, 0), scratch
);
4239 /* Now we can use a standard load/store to do the move. */
4241 emit_move_insn (replace_equiv_address (mem
, scratch
), reg
);
4243 emit_move_insn (reg
, replace_equiv_address (mem
, scratch
));
4246 /* Inform reload about cases where moving X with a mode MODE to a register in
4247 RCLASS requires an extra scratch or immediate register. Return the class
4248 needed for the immediate register. */
4251 s390_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
4252 machine_mode mode
, secondary_reload_info
*sri
)
4254 enum reg_class rclass
= (enum reg_class
) rclass_i
;
4256 /* Intermediate register needed. */
4257 if (reg_classes_intersect_p (CC_REGS
, rclass
))
4258 return GENERAL_REGS
;
4262 /* The vst/vl vector move instructions allow only for short
4265 && GET_CODE (XEXP (x
, 0)) == PLUS
4266 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4267 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x
, 0), 1)))
4268 && reg_class_subset_p (rclass
, VEC_REGS
)
4269 && (!reg_class_subset_p (rclass
, FP_REGS
)
4270 || (GET_MODE_SIZE (mode
) > 8
4271 && s390_class_max_nregs (FP_REGS
, mode
) == 1)))
4274 sri
->icode
= (TARGET_64BIT
?
4275 CODE_FOR_reloaddi_la_in
:
4276 CODE_FOR_reloadsi_la_in
);
4278 sri
->icode
= (TARGET_64BIT
?
4279 CODE_FOR_reloaddi_la_out
:
4280 CODE_FOR_reloadsi_la_out
);
4286 HOST_WIDE_INT offset
;
4289 /* On z10 several optimizer steps may generate larl operands with
4292 && s390_loadrelative_operand_p (x
, &symref
, &offset
)
4294 && !SYMBOL_FLAG_NOTALIGN2_P (symref
)
4295 && (offset
& 1) == 1)
4296 sri
->icode
= ((mode
== DImode
) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4297 : CODE_FOR_reloadsi_larl_odd_addend_z10
);
4299 /* Handle all the (mem (symref)) accesses we cannot use the z10
4300 instructions for. */
4302 && s390_loadrelative_operand_p (XEXP (x
, 0), NULL
, NULL
)
4304 || !reg_class_subset_p (rclass
, GENERAL_REGS
)
4305 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
4306 || !s390_check_symref_alignment (XEXP (x
, 0),
4307 GET_MODE_SIZE (mode
))))
4309 #define __SECONDARY_RELOAD_CASE(M,m) \
4312 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4313 CODE_FOR_reload##m##di_tomem_z10; \
4315 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4316 CODE_FOR_reload##m##si_tomem_z10; \
4319 switch (GET_MODE (x
))
4321 __SECONDARY_RELOAD_CASE (QI
, qi
);
4322 __SECONDARY_RELOAD_CASE (HI
, hi
);
4323 __SECONDARY_RELOAD_CASE (SI
, si
);
4324 __SECONDARY_RELOAD_CASE (DI
, di
);
4325 __SECONDARY_RELOAD_CASE (TI
, ti
);
4326 __SECONDARY_RELOAD_CASE (SF
, sf
);
4327 __SECONDARY_RELOAD_CASE (DF
, df
);
4328 __SECONDARY_RELOAD_CASE (TF
, tf
);
4329 __SECONDARY_RELOAD_CASE (SD
, sd
);
4330 __SECONDARY_RELOAD_CASE (DD
, dd
);
4331 __SECONDARY_RELOAD_CASE (TD
, td
);
4332 __SECONDARY_RELOAD_CASE (V1QI
, v1qi
);
4333 __SECONDARY_RELOAD_CASE (V2QI
, v2qi
);
4334 __SECONDARY_RELOAD_CASE (V4QI
, v4qi
);
4335 __SECONDARY_RELOAD_CASE (V8QI
, v8qi
);
4336 __SECONDARY_RELOAD_CASE (V16QI
, v16qi
);
4337 __SECONDARY_RELOAD_CASE (V1HI
, v1hi
);
4338 __SECONDARY_RELOAD_CASE (V2HI
, v2hi
);
4339 __SECONDARY_RELOAD_CASE (V4HI
, v4hi
);
4340 __SECONDARY_RELOAD_CASE (V8HI
, v8hi
);
4341 __SECONDARY_RELOAD_CASE (V1SI
, v1si
);
4342 __SECONDARY_RELOAD_CASE (V2SI
, v2si
);
4343 __SECONDARY_RELOAD_CASE (V4SI
, v4si
);
4344 __SECONDARY_RELOAD_CASE (V1DI
, v1di
);
4345 __SECONDARY_RELOAD_CASE (V2DI
, v2di
);
4346 __SECONDARY_RELOAD_CASE (V1TI
, v1ti
);
4347 __SECONDARY_RELOAD_CASE (V1SF
, v1sf
);
4348 __SECONDARY_RELOAD_CASE (V2SF
, v2sf
);
4349 __SECONDARY_RELOAD_CASE (V4SF
, v4sf
);
4350 __SECONDARY_RELOAD_CASE (V1DF
, v1df
);
4351 __SECONDARY_RELOAD_CASE (V2DF
, v2df
);
4352 __SECONDARY_RELOAD_CASE (V1TF
, v1tf
);
4356 #undef __SECONDARY_RELOAD_CASE
4360 /* We need a scratch register when loading a PLUS expression which
4361 is not a legitimate operand of the LOAD ADDRESS instruction. */
4362 /* LRA can deal with transformation of plus op very well -- so we
4363 don't need to prompt LRA in this case. */
4364 if (! lra_in_progress
&& in_p
&& s390_plus_operand (x
, mode
))
4365 sri
->icode
= (TARGET_64BIT
?
4366 CODE_FOR_reloaddi_plus
: CODE_FOR_reloadsi_plus
);
4368 /* Performing a multiword move from or to memory we have to make sure the
4369 second chunk in memory is addressable without causing a displacement
4370 overflow. If that would be the case we calculate the address in
4371 a scratch register. */
4373 && GET_CODE (XEXP (x
, 0)) == PLUS
4374 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4375 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x
, 0), 1))
4376 + GET_MODE_SIZE (mode
) - 1))
4378 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4379 in a s_operand address since we may fallback to lm/stm. So we only
4380 have to care about overflows in the b+i+d case. */
4381 if ((reg_classes_intersect_p (GENERAL_REGS
, rclass
)
4382 && s390_class_max_nregs (GENERAL_REGS
, mode
) > 1
4383 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == PLUS
)
4384 /* For FP_REGS no lm/stm is available so this check is triggered
4385 for displacement overflows in b+i+d and b+d like addresses. */
4386 || (reg_classes_intersect_p (FP_REGS
, rclass
)
4387 && s390_class_max_nregs (FP_REGS
, mode
) > 1))
4390 sri
->icode
= (TARGET_64BIT
?
4391 CODE_FOR_reloaddi_la_in
:
4392 CODE_FOR_reloadsi_la_in
);
4394 sri
->icode
= (TARGET_64BIT
?
4395 CODE_FOR_reloaddi_la_out
:
4396 CODE_FOR_reloadsi_la_out
);
4400 /* A scratch address register is needed when a symbolic constant is
4401 copied to r0 compiling with -fPIC. In other cases the target
4402 register might be used as temporary (see legitimize_pic_address). */
4403 if (in_p
&& SYMBOLIC_CONST (x
) && flag_pic
== 2 && rclass
!= ADDR_REGS
)
4404 sri
->icode
= (TARGET_64BIT
?
4405 CODE_FOR_reloaddi_PIC_addr
:
4406 CODE_FOR_reloadsi_PIC_addr
);
4408 /* Either scratch or no register needed. */
4412 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4414 We need secondary memory to move data between GPRs and FPRs.
4416 - With DFP the ldgr lgdr instructions are available. Due to the
4417 different alignment we cannot use them for SFmode. For 31 bit a
4418 64 bit value in GPR would be a register pair so here we still
4419 need to go via memory.
4421 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4422 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4423 in full VRs so as before also on z13 we do these moves via
4426 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4429 s390_secondary_memory_needed (machine_mode mode
,
4430 reg_class_t class1
, reg_class_t class2
)
4432 return (((reg_classes_intersect_p (class1
, VEC_REGS
)
4433 && reg_classes_intersect_p (class2
, GENERAL_REGS
))
4434 || (reg_classes_intersect_p (class1
, GENERAL_REGS
)
4435 && reg_classes_intersect_p (class2
, VEC_REGS
)))
4436 && (!TARGET_DFP
|| !TARGET_64BIT
|| GET_MODE_SIZE (mode
) != 8)
4437 && (!TARGET_VX
|| (SCALAR_FLOAT_MODE_P (mode
)
4438 && GET_MODE_SIZE (mode
) > 8)));
4441 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4443 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4444 because the movsi and movsf patterns don't handle r/f moves. */
4447 s390_secondary_memory_needed_mode (machine_mode mode
)
4449 if (GET_MODE_BITSIZE (mode
) < 32)
4450 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
4454 /* Generate code to load SRC, which is PLUS that is not a
4455 legitimate operand for the LA instruction, into TARGET.
4456 SCRATCH may be used as scratch register. */
4459 s390_expand_plus_operand (rtx target
, rtx src
,
4463 struct s390_address ad
;
4465 /* src must be a PLUS; get its two operands. */
4466 gcc_assert (GET_CODE (src
) == PLUS
);
4467 gcc_assert (GET_MODE (src
) == Pmode
);
4469 /* Check if any of the two operands is already scheduled
4470 for replacement by reload. This can happen e.g. when
4471 float registers occur in an address. */
4472 sum1
= find_replacement (&XEXP (src
, 0));
4473 sum2
= find_replacement (&XEXP (src
, 1));
4474 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4476 /* If the address is already strictly valid, there's nothing to do. */
4477 if (!s390_decompose_address (src
, &ad
)
4478 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4479 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
4481 /* Otherwise, one of the operands cannot be an address register;
4482 we reload its value into the scratch register. */
4483 if (true_regnum (sum1
) < 1 || true_regnum (sum1
) > 15)
4485 emit_move_insn (scratch
, sum1
);
4488 if (true_regnum (sum2
) < 1 || true_regnum (sum2
) > 15)
4490 emit_move_insn (scratch
, sum2
);
4494 /* According to the way these invalid addresses are generated
4495 in reload.c, it should never happen (at least on s390) that
4496 *neither* of the PLUS components, after find_replacements
4497 was applied, is an address register. */
4498 if (sum1
== scratch
&& sum2
== scratch
)
4504 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4507 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4508 is only ever performed on addresses, so we can mark the
4509 sum as legitimate for LA in any case. */
4510 s390_load_address (target
, src
);
4514 /* Return true if ADDR is a valid memory address.
4515 STRICT specifies whether strict register checking applies. */
4518 s390_legitimate_address_p (machine_mode mode
, rtx addr
, bool strict
)
4520 struct s390_address ad
;
4523 && larl_operand (addr
, VOIDmode
)
4524 && (mode
== VOIDmode
4525 || s390_check_symref_alignment (addr
, GET_MODE_SIZE (mode
))))
4528 if (!s390_decompose_address (addr
, &ad
))
4533 if (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4536 if (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
)))
4542 && !(REGNO (ad
.base
) >= FIRST_PSEUDO_REGISTER
4543 || REGNO_REG_CLASS (REGNO (ad
.base
)) == ADDR_REGS
))
4547 && !(REGNO (ad
.indx
) >= FIRST_PSEUDO_REGISTER
4548 || REGNO_REG_CLASS (REGNO (ad
.indx
)) == ADDR_REGS
))
4554 /* Return true if OP is a valid operand for the LA instruction.
4555 In 31-bit, we need to prove that the result is used as an
4556 address, as LA performs only a 31-bit addition. */
4559 legitimate_la_operand_p (rtx op
)
4561 struct s390_address addr
;
4562 if (!s390_decompose_address (op
, &addr
))
4565 return (TARGET_64BIT
|| addr
.pointer
);
4568 /* Return true if it is valid *and* preferable to use LA to
4569 compute the sum of OP1 and OP2. */
4572 preferred_la_operand_p (rtx op1
, rtx op2
)
4574 struct s390_address addr
;
4576 if (op2
!= const0_rtx
)
4577 op1
= gen_rtx_PLUS (Pmode
, op1
, op2
);
4579 if (!s390_decompose_address (op1
, &addr
))
4581 if (addr
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (addr
.base
)))
4583 if (addr
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (addr
.indx
)))
4586 /* Avoid LA instructions with index register on z196; it is
4587 preferable to use regular add instructions when possible.
4588 Starting with zEC12 the la with index register is "uncracked"
4590 if (addr
.indx
&& s390_tune
== PROCESSOR_2817_Z196
)
4593 if (!TARGET_64BIT
&& !addr
.pointer
)
4599 if ((addr
.base
&& REG_P (addr
.base
) && REG_POINTER (addr
.base
))
4600 || (addr
.indx
&& REG_P (addr
.indx
) && REG_POINTER (addr
.indx
)))
4606 /* Emit a forced load-address operation to load SRC into DST.
4607 This will use the LOAD ADDRESS instruction even in situations
4608 where legitimate_la_operand_p (SRC) returns false. */
4611 s390_load_address (rtx dst
, rtx src
)
4614 emit_move_insn (dst
, src
);
4616 emit_insn (gen_force_la_31 (dst
, src
));
4619 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4622 s390_rel_address_ok_p (rtx symbol_ref
)
4626 if (symbol_ref
== s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref
))
4629 decl
= SYMBOL_REF_DECL (symbol_ref
);
4631 if (!flag_pic
|| SYMBOL_REF_LOCAL_P (symbol_ref
))
4632 return (s390_pic_data_is_text_relative
4634 && TREE_CODE (decl
) == FUNCTION_DECL
));
4639 /* Return a legitimate reference for ORIG (an address) using the
4640 register REG. If REG is 0, a new pseudo is generated.
4642 There are two types of references that must be handled:
4644 1. Global data references must load the address from the GOT, via
4645 the PIC reg. An insn is emitted to do this load, and the reg is
4648 2. Static data references, constant pool addresses, and code labels
4649 compute the address as an offset from the GOT, whose base is in
4650 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4651 differentiate them from global data objects. The returned
4652 address is the PIC reg + an unspec constant.
4654 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4655 reg also appears in the address. */
4658 legitimize_pic_address (rtx orig
, rtx reg
)
4661 rtx addend
= const0_rtx
;
4664 gcc_assert (!TLS_SYMBOLIC_CONST (addr
));
4666 if (GET_CODE (addr
) == CONST
)
4667 addr
= XEXP (addr
, 0);
4669 if (GET_CODE (addr
) == PLUS
)
4671 addend
= XEXP (addr
, 1);
4672 addr
= XEXP (addr
, 0);
4675 if ((GET_CODE (addr
) == LABEL_REF
4676 || (SYMBOL_REF_P (addr
) && s390_rel_address_ok_p (addr
))
4677 || (GET_CODE (addr
) == UNSPEC
&&
4678 (XINT (addr
, 1) == UNSPEC_GOTENT
4679 || (TARGET_CPU_ZARCH
&& XINT (addr
, 1) == UNSPEC_PLT
))))
4680 && GET_CODE (addend
) == CONST_INT
)
4682 /* This can be locally addressed. */
4684 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4685 rtx const_addr
= (GET_CODE (addr
) == UNSPEC
?
4686 gen_rtx_CONST (Pmode
, addr
) : addr
);
4688 if (TARGET_CPU_ZARCH
4689 && larl_operand (const_addr
, VOIDmode
)
4690 && INTVAL (addend
) < HOST_WIDE_INT_1
<< 31
4691 && INTVAL (addend
) >= -(HOST_WIDE_INT_1
<< 31))
4693 if (INTVAL (addend
) & 1)
4695 /* LARL can't handle odd offsets, so emit a pair of LARL
4697 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4699 if (!DISP_IN_RANGE (INTVAL (addend
)))
4701 HOST_WIDE_INT even
= INTVAL (addend
) - 1;
4702 addr
= gen_rtx_PLUS (Pmode
, addr
, GEN_INT (even
));
4703 addr
= gen_rtx_CONST (Pmode
, addr
);
4704 addend
= const1_rtx
;
4707 emit_move_insn (temp
, addr
);
4708 new_rtx
= gen_rtx_PLUS (Pmode
, temp
, addend
);
4712 s390_load_address (reg
, new_rtx
);
4718 /* If the offset is even, we can just use LARL. This
4719 will happen automatically. */
4724 /* No larl - Access local symbols relative to the GOT. */
4726 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4728 if (reload_in_progress
|| reload_completed
)
4729 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
4731 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
4732 if (addend
!= const0_rtx
)
4733 addr
= gen_rtx_PLUS (Pmode
, addr
, addend
);
4734 addr
= gen_rtx_CONST (Pmode
, addr
);
4735 addr
= force_const_mem (Pmode
, addr
);
4736 emit_move_insn (temp
, addr
);
4738 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
4741 s390_load_address (reg
, new_rtx
);
4746 else if (GET_CODE (addr
) == SYMBOL_REF
&& addend
== const0_rtx
)
4748 /* A non-local symbol reference without addend.
4750 The symbol ref is wrapped into an UNSPEC to make sure the
4751 proper operand modifier (@GOT or @GOTENT) will be emitted.
4752 This will tell the linker to put the symbol into the GOT.
4754 Additionally the code dereferencing the GOT slot is emitted here.
4756 An addend to the symref needs to be added afterwards.
4757 legitimize_pic_address calls itself recursively to handle
4758 that case. So no need to do it here. */
4761 reg
= gen_reg_rtx (Pmode
);
4765 /* Use load relative if possible.
4766 lgrl <target>, sym@GOTENT */
4767 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
4768 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
4769 new_rtx
= gen_const_mem (GET_MODE (reg
), new_rtx
);
4771 emit_move_insn (reg
, new_rtx
);
4774 else if (flag_pic
== 1)
4776 /* Assume GOT offset is a valid displacement operand (< 4k
4777 or < 512k with z990). This is handled the same way in
4778 both 31- and 64-bit code (@GOT).
4779 lg <target>, sym@GOT(r12) */
4781 if (reload_in_progress
|| reload_completed
)
4782 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
4784 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
4785 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
4786 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
4787 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
4788 emit_move_insn (reg
, new_rtx
);
4791 else if (TARGET_CPU_ZARCH
)
4793 /* If the GOT offset might be >= 4k, we determine the position
4794 of the GOT entry via a PC-relative LARL (@GOTENT).
4795 larl temp, sym@GOTENT
4796 lg <target>, 0(temp) */
4798 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4800 gcc_assert (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
4801 || REGNO_REG_CLASS (REGNO (temp
)) == ADDR_REGS
);
4803 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
4804 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
4805 emit_move_insn (temp
, new_rtx
);
4807 new_rtx
= gen_const_mem (Pmode
, temp
);
4808 emit_move_insn (reg
, new_rtx
);
4814 /* If the GOT offset might be >= 4k, we have to load it
4815 from the literal pool (@GOT).
4817 lg temp, lit-litbase(r13)
4818 lg <target>, 0(temp)
4819 lit: .long sym@GOT */
4821 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4823 gcc_assert (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
4824 || REGNO_REG_CLASS (REGNO (temp
)) == ADDR_REGS
);
4826 if (reload_in_progress
|| reload_completed
)
4827 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
4829 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
4830 addr
= gen_rtx_CONST (Pmode
, addr
);
4831 addr
= force_const_mem (Pmode
, addr
);
4832 emit_move_insn (temp
, addr
);
4834 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
4835 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
4836 emit_move_insn (reg
, new_rtx
);
4840 else if (GET_CODE (addr
) == UNSPEC
&& GET_CODE (addend
) == CONST_INT
)
4842 gcc_assert (XVECLEN (addr
, 0) == 1);
4843 switch (XINT (addr
, 1))
4845 /* These address symbols (or PLT slots) relative to the GOT
4846 (not GOT slots!). In general this will exceed the
4847 displacement range so these value belong into the literal
4851 new_rtx
= force_const_mem (Pmode
, orig
);
4854 /* For -fPIC the GOT size might exceed the displacement
4855 range so make sure the value is in the literal pool. */
4858 new_rtx
= force_const_mem (Pmode
, orig
);
4861 /* For @GOTENT larl is used. This is handled like local
4867 /* @PLT is OK as is on 64-bit, must be converted to
4868 GOT-relative @PLTOFF on 31-bit. */
4870 if (!TARGET_CPU_ZARCH
)
4872 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4874 if (reload_in_progress
|| reload_completed
)
4875 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
4877 addr
= XVECEXP (addr
, 0, 0);
4878 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
4880 if (addend
!= const0_rtx
)
4881 addr
= gen_rtx_PLUS (Pmode
, addr
, addend
);
4882 addr
= gen_rtx_CONST (Pmode
, addr
);
4883 addr
= force_const_mem (Pmode
, addr
);
4884 emit_move_insn (temp
, addr
);
4886 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
4889 s390_load_address (reg
, new_rtx
);
4894 /* On 64 bit larl can be used. This case is handled like
4895 local symbol refs. */
4899 /* Everything else cannot happen. */
4904 else if (addend
!= const0_rtx
)
4906 /* Otherwise, compute the sum. */
4908 rtx base
= legitimize_pic_address (addr
, reg
);
4909 new_rtx
= legitimize_pic_address (addend
,
4910 base
== reg
? NULL_RTX
: reg
);
4911 if (GET_CODE (new_rtx
) == CONST_INT
)
4912 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
4915 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
4917 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
4918 new_rtx
= XEXP (new_rtx
, 1);
4920 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
4923 if (GET_CODE (new_rtx
) == CONST
)
4924 new_rtx
= XEXP (new_rtx
, 0);
4925 new_rtx
= force_operand (new_rtx
, 0);
4931 /* Load the thread pointer into a register. */
4934 s390_get_thread_pointer (void)
4936 rtx tp
= gen_reg_rtx (Pmode
);
4938 emit_move_insn (tp
, gen_rtx_REG (Pmode
, TP_REGNUM
));
4939 mark_reg_pointer (tp
, BITS_PER_WORD
);
4944 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4945 in s390_tls_symbol which always refers to __tls_get_offset.
4946 The returned offset is written to RESULT_REG and an USE rtx is
4947 generated for TLS_CALL. */
4949 static GTY(()) rtx s390_tls_symbol
;
4952 s390_emit_tls_call_insn (rtx result_reg
, rtx tls_call
)
4957 emit_insn (s390_load_got ());
4959 if (!s390_tls_symbol
)
4960 s390_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tls_get_offset");
4962 insn
= s390_emit_call (s390_tls_symbol
, tls_call
, result_reg
,
4963 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
4965 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), result_reg
);
4966 RTL_CONST_CALL_P (insn
) = 1;
4969 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4970 this (thread-local) address. REG may be used as temporary. */
4973 legitimize_tls_address (rtx addr
, rtx reg
)
4975 rtx new_rtx
, tls_call
, temp
, base
, r2
;
4978 if (GET_CODE (addr
) == SYMBOL_REF
)
4979 switch (tls_symbolic_operand (addr
))
4981 case TLS_MODEL_GLOBAL_DYNAMIC
:
4983 r2
= gen_rtx_REG (Pmode
, 2);
4984 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_TLSGD
);
4985 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
4986 new_rtx
= force_const_mem (Pmode
, new_rtx
);
4987 emit_move_insn (r2
, new_rtx
);
4988 s390_emit_tls_call_insn (r2
, tls_call
);
4989 insn
= get_insns ();
4992 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
4993 temp
= gen_reg_rtx (Pmode
);
4994 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
4996 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
4999 s390_load_address (reg
, new_rtx
);
5004 case TLS_MODEL_LOCAL_DYNAMIC
:
5006 r2
= gen_rtx_REG (Pmode
, 2);
5007 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM
);
5008 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5009 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5010 emit_move_insn (r2
, new_rtx
);
5011 s390_emit_tls_call_insn (r2
, tls_call
);
5012 insn
= get_insns ();
5015 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM_NTPOFF
);
5016 temp
= gen_reg_rtx (Pmode
);
5017 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5019 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5020 base
= gen_reg_rtx (Pmode
);
5021 s390_load_address (base
, new_rtx
);
5023 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_DTPOFF
);
5024 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5025 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5026 temp
= gen_reg_rtx (Pmode
);
5027 emit_move_insn (temp
, new_rtx
);
5029 new_rtx
= gen_rtx_PLUS (Pmode
, base
, temp
);
5032 s390_load_address (reg
, new_rtx
);
5037 case TLS_MODEL_INITIAL_EXEC
:
5040 /* Assume GOT offset < 4k. This is handled the same way
5041 in both 31- and 64-bit code. */
5043 if (reload_in_progress
|| reload_completed
)
5044 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5046 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTNTPOFF
);
5047 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5048 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5049 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5050 temp
= gen_reg_rtx (Pmode
);
5051 emit_move_insn (temp
, new_rtx
);
5053 else if (TARGET_CPU_ZARCH
)
5055 /* If the GOT offset might be >= 4k, we determine the position
5056 of the GOT entry via a PC-relative LARL. */
5058 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_INDNTPOFF
);
5059 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5060 temp
= gen_reg_rtx (Pmode
);
5061 emit_move_insn (temp
, new_rtx
);
5063 new_rtx
= gen_const_mem (Pmode
, temp
);
5064 temp
= gen_reg_rtx (Pmode
);
5065 emit_move_insn (temp
, new_rtx
);
5069 /* If the GOT offset might be >= 4k, we have to load it
5070 from the literal pool. */
5072 if (reload_in_progress
|| reload_completed
)
5073 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5075 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTNTPOFF
);
5076 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5077 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5078 temp
= gen_reg_rtx (Pmode
);
5079 emit_move_insn (temp
, new_rtx
);
5081 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
5082 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5084 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, new_rtx
, addr
), UNSPEC_TLS_LOAD
);
5085 temp
= gen_reg_rtx (Pmode
);
5086 emit_insn (gen_rtx_SET (temp
, new_rtx
));
5090 /* In position-dependent code, load the absolute address of
5091 the GOT entry from the literal pool. */
5093 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_INDNTPOFF
);
5094 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5095 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5096 temp
= gen_reg_rtx (Pmode
);
5097 emit_move_insn (temp
, new_rtx
);
5100 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5101 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, new_rtx
, addr
), UNSPEC_TLS_LOAD
);
5102 temp
= gen_reg_rtx (Pmode
);
5103 emit_insn (gen_rtx_SET (temp
, new_rtx
));
5106 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5109 s390_load_address (reg
, new_rtx
);
5114 case TLS_MODEL_LOCAL_EXEC
:
5115 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5116 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5117 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5118 temp
= gen_reg_rtx (Pmode
);
5119 emit_move_insn (temp
, new_rtx
);
5121 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5124 s390_load_address (reg
, new_rtx
);
5133 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == UNSPEC
)
5135 switch (XINT (XEXP (addr
, 0), 1))
5137 case UNSPEC_INDNTPOFF
:
5138 gcc_assert (TARGET_CPU_ZARCH
);
5147 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
5148 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
5150 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5151 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5152 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5154 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5155 new_rtx
= plus_constant (Pmode
, new_rtx
,
5156 INTVAL (XEXP (XEXP (addr
, 0), 1)));
5157 new_rtx
= force_operand (new_rtx
, 0);
5161 gcc_unreachable (); /* for now ... */
5166 /* Emit insns making the address in operands[1] valid for a standard
5167 move to operands[0]. operands[1] is replaced by an address which
5168 should be used instead of the former RTX to emit the move
5172 emit_symbolic_move (rtx
*operands
)
5174 rtx temp
= !can_create_pseudo_p () ? operands
[0] : gen_reg_rtx (Pmode
);
5176 if (GET_CODE (operands
[0]) == MEM
)
5177 operands
[1] = force_reg (Pmode
, operands
[1]);
5178 else if (TLS_SYMBOLIC_CONST (operands
[1]))
5179 operands
[1] = legitimize_tls_address (operands
[1], temp
);
5181 operands
[1] = legitimize_pic_address (operands
[1], temp
);
5184 /* Try machine-dependent ways of modifying an illegitimate address X
5185 to be legitimate. If we find one, return the new, valid address.
5187 OLDX is the address as it was before break_out_memory_refs was called.
5188 In some cases it is useful to look at this to decide what needs to be done.
5190 MODE is the mode of the operand pointed to by X.
5192 When -fpic is used, special handling is needed for symbolic references.
5193 See comments by legitimize_pic_address for details. */
5196 s390_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
5197 machine_mode mode ATTRIBUTE_UNUSED
)
5199 rtx constant_term
= const0_rtx
;
5201 if (TLS_SYMBOLIC_CONST (x
))
5203 x
= legitimize_tls_address (x
, 0);
5205 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5208 else if (GET_CODE (x
) == PLUS
5209 && (TLS_SYMBOLIC_CONST (XEXP (x
, 0))
5210 || TLS_SYMBOLIC_CONST (XEXP (x
, 1))))
5216 if (SYMBOLIC_CONST (x
)
5217 || (GET_CODE (x
) == PLUS
5218 && (SYMBOLIC_CONST (XEXP (x
, 0))
5219 || SYMBOLIC_CONST (XEXP (x
, 1)))))
5220 x
= legitimize_pic_address (x
, 0);
5222 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5226 x
= eliminate_constant_term (x
, &constant_term
);
5228 /* Optimize loading of large displacements by splitting them
5229 into the multiple of 4K and the rest; this allows the
5230 former to be CSE'd if possible.
5232 Don't do this if the displacement is added to a register
5233 pointing into the stack frame, as the offsets will
5234 change later anyway. */
5236 if (GET_CODE (constant_term
) == CONST_INT
5237 && !TARGET_LONG_DISPLACEMENT
5238 && !DISP_IN_RANGE (INTVAL (constant_term
))
5239 && !(REG_P (x
) && REGNO_PTR_FRAME_P (REGNO (x
))))
5241 HOST_WIDE_INT lower
= INTVAL (constant_term
) & 0xfff;
5242 HOST_WIDE_INT upper
= INTVAL (constant_term
) ^ lower
;
5244 rtx temp
= gen_reg_rtx (Pmode
);
5245 rtx val
= force_operand (GEN_INT (upper
), temp
);
5247 emit_move_insn (temp
, val
);
5249 x
= gen_rtx_PLUS (Pmode
, x
, temp
);
5250 constant_term
= GEN_INT (lower
);
5253 if (GET_CODE (x
) == PLUS
)
5255 if (GET_CODE (XEXP (x
, 0)) == REG
)
5257 rtx temp
= gen_reg_rtx (Pmode
);
5258 rtx val
= force_operand (XEXP (x
, 1), temp
);
5260 emit_move_insn (temp
, val
);
5262 x
= gen_rtx_PLUS (Pmode
, XEXP (x
, 0), temp
);
5265 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5267 rtx temp
= gen_reg_rtx (Pmode
);
5268 rtx val
= force_operand (XEXP (x
, 0), temp
);
5270 emit_move_insn (temp
, val
);
5272 x
= gen_rtx_PLUS (Pmode
, temp
, XEXP (x
, 1));
5276 if (constant_term
!= const0_rtx
)
5277 x
= gen_rtx_PLUS (Pmode
, x
, constant_term
);
5282 /* Try a machine-dependent way of reloading an illegitimate address AD
5283 operand. If we find one, push the reload and return the new address.
5285 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5286 and TYPE is the reload type of the current reload. */
5289 legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
5290 int opnum
, int type
)
5292 if (!optimize
|| TARGET_LONG_DISPLACEMENT
)
5295 if (GET_CODE (ad
) == PLUS
)
5297 rtx tem
= simplify_binary_operation (PLUS
, Pmode
,
5298 XEXP (ad
, 0), XEXP (ad
, 1));
5303 if (GET_CODE (ad
) == PLUS
5304 && GET_CODE (XEXP (ad
, 0)) == REG
5305 && GET_CODE (XEXP (ad
, 1)) == CONST_INT
5306 && !DISP_IN_RANGE (INTVAL (XEXP (ad
, 1))))
5308 HOST_WIDE_INT lower
= INTVAL (XEXP (ad
, 1)) & 0xfff;
5309 HOST_WIDE_INT upper
= INTVAL (XEXP (ad
, 1)) ^ lower
;
5310 rtx cst
, tem
, new_rtx
;
5312 cst
= GEN_INT (upper
);
5313 if (!legitimate_reload_constant_p (cst
))
5314 cst
= force_const_mem (Pmode
, cst
);
5316 tem
= gen_rtx_PLUS (Pmode
, XEXP (ad
, 0), cst
);
5317 new_rtx
= gen_rtx_PLUS (Pmode
, tem
, GEN_INT (lower
));
5319 push_reload (XEXP (tem
, 1), 0, &XEXP (tem
, 1), 0,
5320 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
5321 opnum
, (enum reload_type
) type
);
5328 /* Emit code to move LEN bytes from DST to SRC. */
5331 s390_expand_movmem (rtx dst
, rtx src
, rtx len
)
5333 /* When tuning for z10 or higher we rely on the Glibc functions to
5334 do the right thing. Only for constant lengths below 64k we will
5335 generate inline code. */
5336 if (s390_tune
>= PROCESSOR_2097_Z10
5337 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5340 /* Expand memcpy for constant length operands without a loop if it
5341 is shorter that way.
5343 With a constant length argument a
5344 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5345 if (GET_CODE (len
) == CONST_INT
5346 && INTVAL (len
) >= 0
5347 && INTVAL (len
) <= 256 * 6
5348 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5352 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5354 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5355 rtx newsrc
= adjust_address (src
, BLKmode
, o
);
5356 emit_insn (gen_movmem_short (newdst
, newsrc
,
5357 GEN_INT (l
> 256 ? 255 : l
- 1)));
5361 else if (TARGET_MVCLE
)
5363 emit_insn (gen_movmem_long (dst
, src
, convert_to_mode (Pmode
, len
, 1)));
5368 rtx dst_addr
, src_addr
, count
, blocks
, temp
;
5369 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5370 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5371 rtx_code_label
*end_label
= gen_label_rtx ();
5374 mode
= GET_MODE (len
);
5375 if (mode
== VOIDmode
)
5378 dst_addr
= gen_reg_rtx (Pmode
);
5379 src_addr
= gen_reg_rtx (Pmode
);
5380 count
= gen_reg_rtx (mode
);
5381 blocks
= gen_reg_rtx (mode
);
5383 convert_move (count
, len
, 1);
5384 emit_cmp_and_jump_insns (count
, const0_rtx
,
5385 EQ
, NULL_RTX
, mode
, 1, end_label
);
5387 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5388 emit_move_insn (src_addr
, force_operand (XEXP (src
, 0), NULL_RTX
));
5389 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5390 src
= change_address (src
, VOIDmode
, src_addr
);
5392 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5395 emit_move_insn (count
, temp
);
5397 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5400 emit_move_insn (blocks
, temp
);
5402 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5403 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5405 emit_label (loop_start_label
);
5408 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 768))
5412 /* Issue a read prefetch for the +3 cache line. */
5413 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (768)),
5414 const0_rtx
, const0_rtx
);
5415 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5416 emit_insn (prefetch
);
5418 /* Issue a write prefetch for the +3 cache line. */
5419 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (768)),
5420 const1_rtx
, const0_rtx
);
5421 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5422 emit_insn (prefetch
);
5425 emit_insn (gen_movmem_short (dst
, src
, GEN_INT (255)));
5426 s390_load_address (dst_addr
,
5427 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5428 s390_load_address (src_addr
,
5429 gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (256)));
5431 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5434 emit_move_insn (blocks
, temp
);
5436 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5437 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5439 emit_jump (loop_start_label
);
5440 emit_label (loop_end_label
);
5442 emit_insn (gen_movmem_short (dst
, src
,
5443 convert_to_mode (Pmode
, count
, 1)));
5444 emit_label (end_label
);
5449 /* Emit code to set LEN bytes at DST to VAL.
5450 Make use of clrmem if VAL is zero. */
5453 s390_expand_setmem (rtx dst
, rtx len
, rtx val
)
5455 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) <= 0)
5458 gcc_assert (GET_CODE (val
) == CONST_INT
|| GET_MODE (val
) == QImode
);
5460 /* Expand setmem/clrmem for a constant length operand without a
5461 loop if it will be shorter that way.
5462 With a constant length and without pfd argument a
5463 clrmem loop is 32 bytes -> 5.3 * xc
5464 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5465 if (GET_CODE (len
) == CONST_INT
5466 && ((INTVAL (len
) <= 256 * 5 && val
== const0_rtx
)
5467 || INTVAL (len
) <= 257 * 3)
5468 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5472 if (val
== const0_rtx
)
5473 /* clrmem: emit 256 byte blockwise XCs. */
5474 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5476 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5477 emit_insn (gen_clrmem_short (newdst
,
5478 GEN_INT (l
> 256 ? 255 : l
- 1)));
5481 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5482 setting first byte to val and using a 256 byte mvc with one
5483 byte overlap to propagate the byte. */
5484 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 257, o
+= 257)
5486 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5487 emit_move_insn (adjust_address (dst
, QImode
, o
), val
);
5490 rtx newdstp1
= adjust_address (dst
, BLKmode
, o
+ 1);
5491 emit_insn (gen_movmem_short (newdstp1
, newdst
,
5492 GEN_INT (l
> 257 ? 255 : l
- 2)));
5497 else if (TARGET_MVCLE
)
5499 val
= force_not_mem (convert_modes (Pmode
, QImode
, val
, 1));
5501 emit_insn (gen_setmem_long_di (dst
, convert_to_mode (Pmode
, len
, 1),
5504 emit_insn (gen_setmem_long_si (dst
, convert_to_mode (Pmode
, len
, 1),
5510 rtx dst_addr
, count
, blocks
, temp
, dstp1
= NULL_RTX
;
5511 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5512 rtx_code_label
*onebyte_end_label
= gen_label_rtx ();
5513 rtx_code_label
*zerobyte_end_label
= gen_label_rtx ();
5514 rtx_code_label
*restbyte_end_label
= gen_label_rtx ();
5517 mode
= GET_MODE (len
);
5518 if (mode
== VOIDmode
)
5521 dst_addr
= gen_reg_rtx (Pmode
);
5522 count
= gen_reg_rtx (mode
);
5523 blocks
= gen_reg_rtx (mode
);
5525 convert_move (count
, len
, 1);
5526 emit_cmp_and_jump_insns (count
, const0_rtx
,
5527 EQ
, NULL_RTX
, mode
, 1, zerobyte_end_label
,
5528 profile_probability::very_unlikely ());
5530 /* We need to make a copy of the target address since memset is
5531 supposed to return it unmodified. We have to make it here
5532 already since the new reg is used at onebyte_end_label. */
5533 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5534 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5536 if (val
!= const0_rtx
)
5538 /* When using the overlapping mvc the original target
5539 address is only accessed as single byte entity (even by
5540 the mvc reading this value). */
5541 set_mem_size (dst
, 1);
5542 dstp1
= adjust_address (dst
, VOIDmode
, 1);
5543 emit_cmp_and_jump_insns (count
,
5544 const1_rtx
, EQ
, NULL_RTX
, mode
, 1,
5546 profile_probability::very_unlikely ());
5549 /* There is one unconditional (mvi+mvc)/xc after the loop
5550 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5551 or one (xc) here leaves this number of bytes to be handled by
5553 temp
= expand_binop (mode
, add_optab
, count
,
5554 val
== const0_rtx
? constm1_rtx
: GEN_INT (-2),
5555 count
, 1, OPTAB_DIRECT
);
5557 emit_move_insn (count
, temp
);
5559 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5562 emit_move_insn (blocks
, temp
);
5564 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5565 EQ
, NULL_RTX
, mode
, 1, restbyte_end_label
);
5567 emit_jump (loop_start_label
);
5569 if (val
!= const0_rtx
)
5571 /* The 1 byte != 0 special case. Not handled efficiently
5572 since we require two jumps for that. However, this
5573 should be very rare. */
5574 emit_label (onebyte_end_label
);
5575 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5576 emit_jump (zerobyte_end_label
);
5579 emit_label (loop_start_label
);
5582 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 1024))
5584 /* Issue a write prefetch for the +4 cache line. */
5585 rtx prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
,
5587 const1_rtx
, const0_rtx
);
5588 emit_insn (prefetch
);
5589 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5592 if (val
== const0_rtx
)
5593 emit_insn (gen_clrmem_short (dst
, GEN_INT (255)));
5596 /* Set the first byte in the block to the value and use an
5597 overlapping mvc for the block. */
5598 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5599 emit_insn (gen_movmem_short (dstp1
, dst
, GEN_INT (254)));
5601 s390_load_address (dst_addr
,
5602 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5604 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5607 emit_move_insn (blocks
, temp
);
5609 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5610 NE
, NULL_RTX
, mode
, 1, loop_start_label
);
5612 emit_label (restbyte_end_label
);
5614 if (val
== const0_rtx
)
5615 emit_insn (gen_clrmem_short (dst
, convert_to_mode (Pmode
, count
, 1)));
5618 /* Set the first byte in the block to the value and use an
5619 overlapping mvc for the block. */
5620 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5621 /* execute only uses the lowest 8 bits of count that's
5622 exactly what we need here. */
5623 emit_insn (gen_movmem_short (dstp1
, dst
,
5624 convert_to_mode (Pmode
, count
, 1)));
5627 emit_label (zerobyte_end_label
);
5631 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5632 and return the result in TARGET. */
5635 s390_expand_cmpmem (rtx target
, rtx op0
, rtx op1
, rtx len
)
5637 rtx ccreg
= gen_rtx_REG (CCUmode
, CC_REGNUM
);
5640 /* When tuning for z10 or higher we rely on the Glibc functions to
5641 do the right thing. Only for constant lengths below 64k we will
5642 generate inline code. */
5643 if (s390_tune
>= PROCESSOR_2097_Z10
5644 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5647 /* As the result of CMPINT is inverted compared to what we need,
5648 we have to swap the operands. */
5649 tmp
= op0
; op0
= op1
; op1
= tmp
;
5651 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) >= 0 && INTVAL (len
) <= 256)
5653 if (INTVAL (len
) > 0)
5655 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (INTVAL (len
) - 1)));
5656 emit_insn (gen_cmpint (target
, ccreg
));
5659 emit_move_insn (target
, const0_rtx
);
5661 else if (TARGET_MVCLE
)
5663 emit_insn (gen_cmpmem_long (op0
, op1
, convert_to_mode (Pmode
, len
, 1)));
5664 emit_insn (gen_cmpint (target
, ccreg
));
5668 rtx addr0
, addr1
, count
, blocks
, temp
;
5669 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5670 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5671 rtx_code_label
*end_label
= gen_label_rtx ();
5674 mode
= GET_MODE (len
);
5675 if (mode
== VOIDmode
)
5678 addr0
= gen_reg_rtx (Pmode
);
5679 addr1
= gen_reg_rtx (Pmode
);
5680 count
= gen_reg_rtx (mode
);
5681 blocks
= gen_reg_rtx (mode
);
5683 convert_move (count
, len
, 1);
5684 emit_cmp_and_jump_insns (count
, const0_rtx
,
5685 EQ
, NULL_RTX
, mode
, 1, end_label
);
5687 emit_move_insn (addr0
, force_operand (XEXP (op0
, 0), NULL_RTX
));
5688 emit_move_insn (addr1
, force_operand (XEXP (op1
, 0), NULL_RTX
));
5689 op0
= change_address (op0
, VOIDmode
, addr0
);
5690 op1
= change_address (op1
, VOIDmode
, addr1
);
5692 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5695 emit_move_insn (count
, temp
);
5697 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5700 emit_move_insn (blocks
, temp
);
5702 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5703 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5705 emit_label (loop_start_label
);
5708 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 512))
5712 /* Issue a read prefetch for the +2 cache line of operand 1. */
5713 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (512)),
5714 const0_rtx
, const0_rtx
);
5715 emit_insn (prefetch
);
5716 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5718 /* Issue a read prefetch for the +2 cache line of operand 2. */
5719 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (512)),
5720 const0_rtx
, const0_rtx
);
5721 emit_insn (prefetch
);
5722 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5725 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (255)));
5726 temp
= gen_rtx_NE (VOIDmode
, ccreg
, const0_rtx
);
5727 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5728 gen_rtx_LABEL_REF (VOIDmode
, end_label
), pc_rtx
);
5729 temp
= gen_rtx_SET (pc_rtx
, temp
);
5730 emit_jump_insn (temp
);
5732 s390_load_address (addr0
,
5733 gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (256)));
5734 s390_load_address (addr1
,
5735 gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (256)));
5737 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5740 emit_move_insn (blocks
, temp
);
5742 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5743 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5745 emit_jump (loop_start_label
);
5746 emit_label (loop_end_label
);
5748 emit_insn (gen_cmpmem_short (op0
, op1
,
5749 convert_to_mode (Pmode
, count
, 1)));
5750 emit_label (end_label
);
5752 emit_insn (gen_cmpint (target
, ccreg
));
5757 /* Emit a conditional jump to LABEL for condition code mask MASK using
5758 comparsion operator COMPARISON. Return the emitted jump insn. */
5761 s390_emit_ccraw_jump (HOST_WIDE_INT mask
, enum rtx_code comparison
, rtx label
)
5765 gcc_assert (comparison
== EQ
|| comparison
== NE
);
5766 gcc_assert (mask
> 0 && mask
< 15);
5768 temp
= gen_rtx_fmt_ee (comparison
, VOIDmode
,
5769 gen_rtx_REG (CCRAWmode
, CC_REGNUM
), GEN_INT (mask
));
5770 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5771 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
5772 temp
= gen_rtx_SET (pc_rtx
, temp
);
5773 return emit_jump_insn (temp
);
5776 /* Emit the instructions to implement strlen of STRING and store the
5777 result in TARGET. The string has the known ALIGNMENT. This
5778 version uses vector instructions and is therefore not appropriate
5779 for targets prior to z13. */
5782 s390_expand_vec_strlen (rtx target
, rtx string
, rtx alignment
)
5784 rtx highest_index_to_load_reg
= gen_reg_rtx (Pmode
);
5785 rtx str_reg
= gen_reg_rtx (V16QImode
);
5786 rtx str_addr_base_reg
= gen_reg_rtx (Pmode
);
5787 rtx str_idx_reg
= gen_reg_rtx (Pmode
);
5788 rtx result_reg
= gen_reg_rtx (V16QImode
);
5789 rtx is_aligned_label
= gen_label_rtx ();
5790 rtx into_loop_label
= NULL_RTX
;
5791 rtx loop_start_label
= gen_label_rtx ();
5793 rtx len
= gen_reg_rtx (QImode
);
5796 s390_load_address (str_addr_base_reg
, XEXP (string
, 0));
5797 emit_move_insn (str_idx_reg
, const0_rtx
);
5799 if (INTVAL (alignment
) < 16)
5801 /* Check whether the address happens to be aligned properly so
5802 jump directly to the aligned loop. */
5803 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode
,
5804 str_addr_base_reg
, GEN_INT (15)),
5805 const0_rtx
, EQ
, NULL_RTX
,
5806 Pmode
, 1, is_aligned_label
);
5808 temp
= gen_reg_rtx (Pmode
);
5809 temp
= expand_binop (Pmode
, and_optab
, str_addr_base_reg
,
5810 GEN_INT (15), temp
, 1, OPTAB_DIRECT
);
5811 gcc_assert (REG_P (temp
));
5812 highest_index_to_load_reg
=
5813 expand_binop (Pmode
, sub_optab
, GEN_INT (15), temp
,
5814 highest_index_to_load_reg
, 1, OPTAB_DIRECT
);
5815 gcc_assert (REG_P (highest_index_to_load_reg
));
5816 emit_insn (gen_vllv16qi (str_reg
,
5817 convert_to_mode (SImode
, highest_index_to_load_reg
, 1),
5818 gen_rtx_MEM (BLKmode
, str_addr_base_reg
)));
5820 into_loop_label
= gen_label_rtx ();
5821 s390_emit_jump (into_loop_label
, NULL_RTX
);
5825 emit_label (is_aligned_label
);
5826 LABEL_NUSES (is_aligned_label
) = INTVAL (alignment
) < 16 ? 2 : 1;
5828 /* Reaching this point we are only performing 16 bytes aligned
5830 emit_move_insn (highest_index_to_load_reg
, GEN_INT (15));
5832 emit_label (loop_start_label
);
5833 LABEL_NUSES (loop_start_label
) = 1;
5835 /* Load 16 bytes of the string into VR. */
5836 emit_move_insn (str_reg
,
5837 gen_rtx_MEM (V16QImode
,
5838 gen_rtx_PLUS (Pmode
, str_idx_reg
,
5839 str_addr_base_reg
)));
5840 if (into_loop_label
!= NULL_RTX
)
5842 emit_label (into_loop_label
);
5843 LABEL_NUSES (into_loop_label
) = 1;
5846 /* Increment string index by 16 bytes. */
5847 expand_binop (Pmode
, add_optab
, str_idx_reg
, GEN_INT (16),
5848 str_idx_reg
, 1, OPTAB_DIRECT
);
5850 emit_insn (gen_vec_vfenesv16qi (result_reg
, str_reg
, str_reg
,
5851 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
5853 add_int_reg_note (s390_emit_ccraw_jump (8, NE
, loop_start_label
),
5855 profile_probability::very_likely ().to_reg_br_prob_note ());
5856 emit_insn (gen_vec_extractv16qiqi (len
, result_reg
, GEN_INT (7)));
5858 /* If the string pointer wasn't aligned we have loaded less then 16
5859 bytes and the remaining bytes got filled with zeros (by vll).
5860 Now we have to check whether the resulting index lies within the
5861 bytes actually part of the string. */
5863 cond
= s390_emit_compare (GT
, convert_to_mode (Pmode
, len
, 1),
5864 highest_index_to_load_reg
);
5865 s390_load_address (highest_index_to_load_reg
,
5866 gen_rtx_PLUS (Pmode
, highest_index_to_load_reg
,
5869 emit_insn (gen_movdicc (str_idx_reg
, cond
,
5870 highest_index_to_load_reg
, str_idx_reg
));
5872 emit_insn (gen_movsicc (str_idx_reg
, cond
,
5873 highest_index_to_load_reg
, str_idx_reg
));
5875 add_reg_br_prob_note (s390_emit_jump (is_aligned_label
, cond
),
5876 profile_probability::very_unlikely ());
5878 expand_binop (Pmode
, add_optab
, str_idx_reg
,
5879 GEN_INT (-16), str_idx_reg
, 1, OPTAB_DIRECT
);
5880 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5882 temp
= expand_binop (Pmode
, add_optab
, str_idx_reg
,
5883 convert_to_mode (Pmode
, len
, 1),
5884 target
, 1, OPTAB_DIRECT
);
5886 emit_move_insn (target
, temp
);
5890 s390_expand_vec_movstr (rtx result
, rtx dst
, rtx src
)
5892 rtx temp
= gen_reg_rtx (Pmode
);
5893 rtx src_addr
= XEXP (src
, 0);
5894 rtx dst_addr
= XEXP (dst
, 0);
5895 rtx src_addr_reg
= gen_reg_rtx (Pmode
);
5896 rtx dst_addr_reg
= gen_reg_rtx (Pmode
);
5897 rtx offset
= gen_reg_rtx (Pmode
);
5898 rtx vsrc
= gen_reg_rtx (V16QImode
);
5899 rtx vpos
= gen_reg_rtx (V16QImode
);
5900 rtx loadlen
= gen_reg_rtx (SImode
);
5901 rtx gpos_qi
= gen_reg_rtx(QImode
);
5902 rtx gpos
= gen_reg_rtx (SImode
);
5903 rtx done_label
= gen_label_rtx ();
5904 rtx loop_label
= gen_label_rtx ();
5905 rtx exit_label
= gen_label_rtx ();
5906 rtx full_label
= gen_label_rtx ();
5908 /* Perform a quick check for string ending on the first up to 16
5909 bytes and exit early if successful. */
5911 emit_insn (gen_vlbb (vsrc
, src
, GEN_INT (6)));
5912 emit_insn (gen_lcbb (loadlen
, src_addr
, GEN_INT (6)));
5913 emit_insn (gen_vfenezv16qi (vpos
, vsrc
, vsrc
));
5914 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
5915 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
5916 /* gpos is the byte index if a zero was found and 16 otherwise.
5917 So if it is lower than the loaded bytes we have a hit. */
5918 emit_cmp_and_jump_insns (gpos
, loadlen
, GE
, NULL_RTX
, SImode
, 1,
5920 emit_insn (gen_vstlv16qi (vsrc
, gpos
, dst
));
5922 force_expand_binop (Pmode
, add_optab
, dst_addr
, gpos
, result
,
5924 emit_jump (exit_label
);
5927 emit_label (full_label
);
5928 LABEL_NUSES (full_label
) = 1;
5930 /* Calculate `offset' so that src + offset points to the last byte
5931 before 16 byte alignment. */
5933 /* temp = src_addr & 0xf */
5934 force_expand_binop (Pmode
, and_optab
, src_addr
, GEN_INT (15), temp
,
5937 /* offset = 0xf - temp */
5938 emit_move_insn (offset
, GEN_INT (15));
5939 force_expand_binop (Pmode
, sub_optab
, offset
, temp
, offset
,
5942 /* Store `offset' bytes in the dstination string. The quick check
5943 has loaded at least `offset' bytes into vsrc. */
5945 emit_insn (gen_vstlv16qi (vsrc
, gen_lowpart (SImode
, offset
), dst
));
5947 /* Advance to the next byte to be loaded. */
5948 force_expand_binop (Pmode
, add_optab
, offset
, const1_rtx
, offset
,
5951 /* Make sure the addresses are single regs which can be used as a
5953 emit_move_insn (src_addr_reg
, src_addr
);
5954 emit_move_insn (dst_addr_reg
, dst_addr
);
5958 emit_label (loop_label
);
5959 LABEL_NUSES (loop_label
) = 1;
5961 emit_move_insn (vsrc
,
5962 gen_rtx_MEM (V16QImode
,
5963 gen_rtx_PLUS (Pmode
, src_addr_reg
, offset
)));
5965 emit_insn (gen_vec_vfenesv16qi (vpos
, vsrc
, vsrc
,
5966 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
5967 add_int_reg_note (s390_emit_ccraw_jump (8, EQ
, done_label
),
5968 REG_BR_PROB
, profile_probability::very_unlikely ()
5969 .to_reg_br_prob_note ());
5971 emit_move_insn (gen_rtx_MEM (V16QImode
,
5972 gen_rtx_PLUS (Pmode
, dst_addr_reg
, offset
)),
5975 force_expand_binop (Pmode
, add_optab
, offset
, GEN_INT (16),
5976 offset
, 1, OPTAB_DIRECT
);
5978 emit_jump (loop_label
);
5983 /* We are done. Add the offset of the zero character to the dst_addr
5984 pointer to get the result. */
5986 emit_label (done_label
);
5987 LABEL_NUSES (done_label
) = 1;
5989 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, offset
, dst_addr_reg
,
5992 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
5993 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
5995 emit_insn (gen_vstlv16qi (vsrc
, gpos
, gen_rtx_MEM (BLKmode
, dst_addr_reg
)));
5997 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, gpos
, result
,
6002 emit_label (exit_label
);
6003 LABEL_NUSES (exit_label
) = 1;
6007 /* Expand conditional increment or decrement using alc/slb instructions.
6008 Should generate code setting DST to either SRC or SRC + INCREMENT,
6009 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6010 Returns true if successful, false otherwise.
6012 That makes it possible to implement some if-constructs without jumps e.g.:
6013 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6014 unsigned int a, b, c;
6015 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6016 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6017 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6018 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6020 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6021 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6022 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6023 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6024 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6027 s390_expand_addcc (enum rtx_code cmp_code
, rtx cmp_op0
, rtx cmp_op1
,
6028 rtx dst
, rtx src
, rtx increment
)
6030 machine_mode cmp_mode
;
6031 machine_mode cc_mode
;
6037 if ((GET_MODE (cmp_op0
) == SImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6038 && (GET_MODE (cmp_op1
) == SImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6040 else if ((GET_MODE (cmp_op0
) == DImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6041 && (GET_MODE (cmp_op1
) == DImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6046 /* Try ADD LOGICAL WITH CARRY. */
6047 if (increment
== const1_rtx
)
6049 /* Determine CC mode to use. */
6050 if (cmp_code
== EQ
|| cmp_code
== NE
)
6052 if (cmp_op1
!= const0_rtx
)
6054 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6055 NULL_RTX
, 0, OPTAB_WIDEN
);
6056 cmp_op1
= const0_rtx
;
6059 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6062 if (cmp_code
== LTU
|| cmp_code
== LEU
)
6067 cmp_code
= swap_condition (cmp_code
);
6084 /* Emit comparison instruction pattern. */
6085 if (!register_operand (cmp_op0
, cmp_mode
))
6086 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6088 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6089 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6090 /* We use insn_invalid_p here to add clobbers if required. */
6091 ret
= insn_invalid_p (emit_insn (insn
), false);
6094 /* Emit ALC instruction pattern. */
6095 op_res
= gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6096 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6099 if (src
!= const0_rtx
)
6101 if (!register_operand (src
, GET_MODE (dst
)))
6102 src
= force_reg (GET_MODE (dst
), src
);
6104 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, src
);
6105 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, const0_rtx
);
6108 p
= rtvec_alloc (2);
6110 gen_rtx_SET (dst
, op_res
);
6112 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6113 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6118 /* Try SUBTRACT LOGICAL WITH BORROW. */
6119 if (increment
== constm1_rtx
)
6121 /* Determine CC mode to use. */
6122 if (cmp_code
== EQ
|| cmp_code
== NE
)
6124 if (cmp_op1
!= const0_rtx
)
6126 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6127 NULL_RTX
, 0, OPTAB_WIDEN
);
6128 cmp_op1
= const0_rtx
;
6131 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6134 if (cmp_code
== GTU
|| cmp_code
== GEU
)
6139 cmp_code
= swap_condition (cmp_code
);
6156 /* Emit comparison instruction pattern. */
6157 if (!register_operand (cmp_op0
, cmp_mode
))
6158 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6160 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6161 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6162 /* We use insn_invalid_p here to add clobbers if required. */
6163 ret
= insn_invalid_p (emit_insn (insn
), false);
6166 /* Emit SLB instruction pattern. */
6167 if (!register_operand (src
, GET_MODE (dst
)))
6168 src
= force_reg (GET_MODE (dst
), src
);
6170 op_res
= gen_rtx_MINUS (GET_MODE (dst
),
6171 gen_rtx_MINUS (GET_MODE (dst
), src
, const0_rtx
),
6172 gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6173 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6175 p
= rtvec_alloc (2);
6177 gen_rtx_SET (dst
, op_res
);
6179 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6180 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6188 /* Expand code for the insv template. Return true if successful. */
6191 s390_expand_insv (rtx dest
, rtx op1
, rtx op2
, rtx src
)
6193 int bitsize
= INTVAL (op1
);
6194 int bitpos
= INTVAL (op2
);
6195 machine_mode mode
= GET_MODE (dest
);
6197 int smode_bsize
, mode_bsize
;
6200 if (bitsize
+ bitpos
> GET_MODE_BITSIZE (mode
))
6203 /* Generate INSERT IMMEDIATE (IILL et al). */
6204 /* (set (ze (reg)) (const_int)). */
6206 && register_operand (dest
, word_mode
)
6207 && (bitpos
% 16) == 0
6208 && (bitsize
% 16) == 0
6209 && const_int_operand (src
, VOIDmode
))
6211 HOST_WIDE_INT val
= INTVAL (src
);
6212 int regpos
= bitpos
+ bitsize
;
6214 while (regpos
> bitpos
)
6216 machine_mode putmode
;
6219 if (TARGET_EXTIMM
&& (regpos
% 32 == 0) && (regpos
>= bitpos
+ 32))
6224 putsize
= GET_MODE_BITSIZE (putmode
);
6226 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6229 gen_int_mode (val
, putmode
));
6232 gcc_assert (regpos
== bitpos
);
6236 smode
= smallest_int_mode_for_size (bitsize
);
6237 smode_bsize
= GET_MODE_BITSIZE (smode
);
6238 mode_bsize
= GET_MODE_BITSIZE (mode
);
6240 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6242 && (bitsize
% BITS_PER_UNIT
) == 0
6244 && (register_operand (src
, word_mode
)
6245 || const_int_operand (src
, VOIDmode
)))
6247 /* Emit standard pattern if possible. */
6248 if (smode_bsize
== bitsize
)
6250 emit_move_insn (adjust_address (dest
, smode
, 0),
6251 gen_lowpart (smode
, src
));
6255 /* (set (ze (mem)) (const_int)). */
6256 else if (const_int_operand (src
, VOIDmode
))
6258 int size
= bitsize
/ BITS_PER_UNIT
;
6259 rtx src_mem
= adjust_address (force_const_mem (word_mode
, src
),
6261 UNITS_PER_WORD
- size
);
6263 dest
= adjust_address (dest
, BLKmode
, 0);
6264 set_mem_size (dest
, size
);
6265 s390_expand_movmem (dest
, src_mem
, GEN_INT (size
));
6269 /* (set (ze (mem)) (reg)). */
6270 else if (register_operand (src
, word_mode
))
6273 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
, op1
,
6277 /* Emit st,stcmh sequence. */
6278 int stcmh_width
= bitsize
- 32;
6279 int size
= stcmh_width
/ BITS_PER_UNIT
;
6281 emit_move_insn (adjust_address (dest
, SImode
, size
),
6282 gen_lowpart (SImode
, src
));
6283 set_mem_size (dest
, size
);
6284 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6285 GEN_INT (stcmh_width
),
6287 gen_rtx_LSHIFTRT (word_mode
, src
, GEN_INT (32)));
6293 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6294 if ((bitpos
% BITS_PER_UNIT
) == 0
6295 && (bitsize
% BITS_PER_UNIT
) == 0
6296 && (bitpos
& 32) == ((bitpos
+ bitsize
- 1) & 32)
6298 && (mode
== DImode
|| mode
== SImode
)
6299 && register_operand (dest
, mode
))
6301 /* Emit a strict_low_part pattern if possible. */
6302 if (smode_bsize
== bitsize
&& bitpos
== mode_bsize
- smode_bsize
)
6304 op
= gen_rtx_STRICT_LOW_PART (VOIDmode
, gen_lowpart (smode
, dest
));
6305 op
= gen_rtx_SET (op
, gen_lowpart (smode
, src
));
6306 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6307 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
)));
6311 /* ??? There are more powerful versions of ICM that are not
6312 completely represented in the md file. */
6315 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6316 if (TARGET_Z10
&& (mode
== DImode
|| mode
== SImode
))
6318 machine_mode mode_s
= GET_MODE (src
);
6320 if (CONSTANT_P (src
))
6322 /* For constant zero values the representation with AND
6323 appears to be folded in more situations than the (set
6324 (zero_extract) ...).
6325 We only do this when the start and end of the bitfield
6326 remain in the same SImode chunk. That way nihf or nilf
6328 The AND patterns might still generate a risbg for this. */
6329 if (src
== const0_rtx
&& bitpos
/ 32 == (bitpos
+ bitsize
- 1) / 32)
6332 src
= force_reg (mode
, src
);
6334 else if (mode_s
!= mode
)
6336 gcc_assert (GET_MODE_BITSIZE (mode_s
) >= bitsize
);
6337 src
= force_reg (mode_s
, src
);
6338 src
= gen_lowpart (mode
, src
);
6341 op
= gen_rtx_ZERO_EXTRACT (mode
, dest
, op1
, op2
),
6342 op
= gen_rtx_SET (op
, src
);
6346 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6347 op
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
));
6357 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6358 register that holds VAL of mode MODE shifted by COUNT bits. */
6361 s390_expand_mask_and_shift (rtx val
, machine_mode mode
, rtx count
)
6363 val
= expand_simple_binop (SImode
, AND
, val
, GEN_INT (GET_MODE_MASK (mode
)),
6364 NULL_RTX
, 1, OPTAB_DIRECT
);
6365 return expand_simple_binop (SImode
, ASHIFT
, val
, count
,
6366 NULL_RTX
, 1, OPTAB_DIRECT
);
6369 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6370 the result in TARGET. */
6373 s390_expand_vec_compare (rtx target
, enum rtx_code cond
,
6374 rtx cmp_op1
, rtx cmp_op2
)
6376 machine_mode mode
= GET_MODE (target
);
6377 bool neg_p
= false, swap_p
= false;
6380 if (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_VECTOR_FLOAT
)
6384 /* NE a != b -> !(a == b) */
6385 case NE
: cond
= EQ
; neg_p
= true; break;
6386 /* UNGT a u> b -> !(b >= a) */
6387 case UNGT
: cond
= GE
; neg_p
= true; swap_p
= true; break;
6388 /* UNGE a u>= b -> !(b > a) */
6389 case UNGE
: cond
= GT
; neg_p
= true; swap_p
= true; break;
6390 /* LE: a <= b -> b >= a */
6391 case LE
: cond
= GE
; swap_p
= true; break;
6392 /* UNLE: a u<= b -> !(a > b) */
6393 case UNLE
: cond
= GT
; neg_p
= true; break;
6394 /* LT: a < b -> b > a */
6395 case LT
: cond
= GT
; swap_p
= true; break;
6396 /* UNLT: a u< b -> !(a >= b) */
6397 case UNLT
: cond
= GE
; neg_p
= true; break;
6399 emit_insn (gen_vec_cmpuneqv2df (target
, cmp_op1
, cmp_op2
));
6402 emit_insn (gen_vec_cmpltgtv2df (target
, cmp_op1
, cmp_op2
));
6405 emit_insn (gen_vec_orderedv2df (target
, cmp_op1
, cmp_op2
));
6408 emit_insn (gen_vec_unorderedv2df (target
, cmp_op1
, cmp_op2
));
6417 /* NE: a != b -> !(a == b) */
6418 case NE
: cond
= EQ
; neg_p
= true; break;
6419 /* GE: a >= b -> !(b > a) */
6420 case GE
: cond
= GT
; neg_p
= true; swap_p
= true; break;
6421 /* GEU: a >= b -> !(b > a) */
6422 case GEU
: cond
= GTU
; neg_p
= true; swap_p
= true; break;
6423 /* LE: a <= b -> !(a > b) */
6424 case LE
: cond
= GT
; neg_p
= true; break;
6425 /* LEU: a <= b -> !(a > b) */
6426 case LEU
: cond
= GTU
; neg_p
= true; break;
6427 /* LT: a < b -> b > a */
6428 case LT
: cond
= GT
; swap_p
= true; break;
6429 /* LTU: a < b -> b > a */
6430 case LTU
: cond
= GTU
; swap_p
= true; break;
6437 tmp
= cmp_op1
; cmp_op1
= cmp_op2
; cmp_op2
= tmp
;
6440 emit_insn (gen_rtx_SET (target
, gen_rtx_fmt_ee (cond
,
6442 cmp_op1
, cmp_op2
)));
6444 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (mode
, target
)));
6447 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6448 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6449 elements in CMP1 and CMP2 fulfill the comparison.
6450 This function is only used to emit patterns for the vx builtins and
6451 therefore only handles comparison codes required by the
6454 s390_expand_vec_compare_cc (rtx target
, enum rtx_code code
,
6455 rtx cmp1
, rtx cmp2
, bool all_p
)
6457 machine_mode cc_producer_mode
, cc_consumer_mode
, scratch_mode
;
6458 rtx tmp_reg
= gen_reg_rtx (SImode
);
6459 bool swap_p
= false;
6461 if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_INT
)
6467 cc_producer_mode
= CCVEQmode
;
6471 code
= swap_condition (code
);
6476 cc_producer_mode
= CCVIHmode
;
6480 code
= swap_condition (code
);
6485 cc_producer_mode
= CCVIHUmode
;
6491 scratch_mode
= GET_MODE (cmp1
);
6492 /* These codes represent inverted CC interpretations. Inverting
6493 an ALL CC mode results in an ANY CC mode and the other way
6494 around. Invert the all_p flag here to compensate for
6496 if (code
== NE
|| code
== LE
|| code
== LEU
)
6499 cc_consumer_mode
= all_p
? CCVIALLmode
: CCVIANYmode
;
6501 else if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_FLOAT
)
6507 case EQ
: cc_producer_mode
= CCVEQmode
; break;
6508 case NE
: cc_producer_mode
= CCVEQmode
; inv_p
= true; break;
6509 case GT
: cc_producer_mode
= CCVFHmode
; break;
6510 case GE
: cc_producer_mode
= CCVFHEmode
; break;
6511 case UNLE
: cc_producer_mode
= CCVFHmode
; inv_p
= true; break;
6512 case UNLT
: cc_producer_mode
= CCVFHEmode
; inv_p
= true; break;
6513 case LT
: cc_producer_mode
= CCVFHmode
; code
= GT
; swap_p
= true; break;
6514 case LE
: cc_producer_mode
= CCVFHEmode
; code
= GE
; swap_p
= true; break;
6515 default: gcc_unreachable ();
6517 scratch_mode
= mode_for_int_vector (GET_MODE (cmp1
)).require ();
6522 cc_consumer_mode
= all_p
? CCVFALLmode
: CCVFANYmode
;
6534 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6535 gen_rtvec (2, gen_rtx_SET (
6536 gen_rtx_REG (cc_producer_mode
, CC_REGNUM
),
6537 gen_rtx_COMPARE (cc_producer_mode
, cmp1
, cmp2
)),
6538 gen_rtx_CLOBBER (VOIDmode
,
6539 gen_rtx_SCRATCH (scratch_mode
)))));
6540 emit_move_insn (target
, const0_rtx
);
6541 emit_move_insn (tmp_reg
, const1_rtx
);
6543 emit_move_insn (target
,
6544 gen_rtx_IF_THEN_ELSE (SImode
,
6545 gen_rtx_fmt_ee (code
, VOIDmode
,
6546 gen_rtx_REG (cc_consumer_mode
, CC_REGNUM
),
6551 /* Invert the comparison CODE applied to a CC mode. This is only safe
6552 if we know whether there result was created by a floating point
6553 compare or not. For the CCV modes this is encoded as part of the
6556 s390_reverse_condition (machine_mode mode
, enum rtx_code code
)
6558 /* Reversal of FP compares takes care -- an ordered compare
6559 becomes an unordered compare and vice versa. */
6560 if (mode
== CCVFALLmode
|| mode
== CCVFANYmode
)
6561 return reverse_condition_maybe_unordered (code
);
6562 else if (mode
== CCVIALLmode
|| mode
== CCVIANYmode
)
6563 return reverse_condition (code
);
6568 /* Generate a vector comparison expression loading either elements of
6569 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6573 s390_expand_vcond (rtx target
, rtx then
, rtx els
,
6574 enum rtx_code cond
, rtx cmp_op1
, rtx cmp_op2
)
6577 machine_mode result_mode
;
6580 machine_mode target_mode
= GET_MODE (target
);
6581 machine_mode cmp_mode
= GET_MODE (cmp_op1
);
6582 rtx op
= (cond
== LT
) ? els
: then
;
6584 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6585 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6586 for short and byte (x >> 15 and x >> 7 respectively). */
6587 if ((cond
== LT
|| cond
== GE
)
6588 && target_mode
== cmp_mode
6589 && cmp_op2
== CONST0_RTX (cmp_mode
)
6590 && op
== CONST0_RTX (target_mode
)
6591 && s390_vector_mode_supported_p (target_mode
)
6592 && GET_MODE_CLASS (target_mode
) == MODE_VECTOR_INT
)
6594 rtx negop
= (cond
== LT
) ? then
: els
;
6596 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (target_mode
)) - 1;
6598 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6599 if (negop
== CONST1_RTX (target_mode
))
6601 rtx res
= expand_simple_binop (cmp_mode
, LSHIFTRT
, cmp_op1
,
6602 GEN_INT (shift
), target
,
6605 emit_move_insn (target
, res
);
6609 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6610 else if (all_ones_operand (negop
, target_mode
))
6612 rtx res
= expand_simple_binop (cmp_mode
, ASHIFTRT
, cmp_op1
,
6613 GEN_INT (shift
), target
,
6616 emit_move_insn (target
, res
);
6621 /* We always use an integral type vector to hold the comparison
6623 result_mode
= mode_for_int_vector (cmp_mode
).require ();
6624 result_target
= gen_reg_rtx (result_mode
);
6626 /* We allow vector immediates as comparison operands that
6627 can be handled by the optimization above but not by the
6628 following code. Hence, force them into registers here. */
6629 if (!REG_P (cmp_op1
))
6630 cmp_op1
= force_reg (GET_MODE (cmp_op1
), cmp_op1
);
6632 if (!REG_P (cmp_op2
))
6633 cmp_op2
= force_reg (GET_MODE (cmp_op2
), cmp_op2
);
6635 s390_expand_vec_compare (result_target
, cond
,
6638 /* If the results are supposed to be either -1 or 0 we are done
6639 since this is what our compare instructions generate anyway. */
6640 if (all_ones_operand (then
, GET_MODE (then
))
6641 && const0_operand (els
, GET_MODE (els
)))
6643 emit_move_insn (target
, gen_rtx_SUBREG (target_mode
,
6648 /* Otherwise we will do a vsel afterwards. */
6649 /* This gets triggered e.g.
6650 with gcc.c-torture/compile/pr53410-1.c */
6652 then
= force_reg (target_mode
, then
);
6655 els
= force_reg (target_mode
, els
);
6657 tmp
= gen_rtx_fmt_ee (EQ
, VOIDmode
,
6659 CONST0_RTX (result_mode
));
6661 /* We compared the result against zero above so we have to swap then
6663 tmp
= gen_rtx_IF_THEN_ELSE (target_mode
, tmp
, els
, then
);
6665 gcc_assert (target_mode
== GET_MODE (then
));
6666 emit_insn (gen_rtx_SET (target
, tmp
));
6669 /* Emit the RTX necessary to initialize the vector TARGET with values
6672 s390_expand_vec_init (rtx target
, rtx vals
)
6674 machine_mode mode
= GET_MODE (target
);
6675 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6676 int n_elts
= GET_MODE_NUNITS (mode
);
6677 bool all_same
= true, all_regs
= true, all_const_int
= true;
6681 for (i
= 0; i
< n_elts
; ++i
)
6683 x
= XVECEXP (vals
, 0, i
);
6685 if (!CONST_INT_P (x
))
6686 all_const_int
= false;
6688 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6695 /* Use vector gen mask or vector gen byte mask if possible. */
6696 if (all_same
&& all_const_int
6697 && (XVECEXP (vals
, 0, 0) == const0_rtx
6698 || s390_contiguous_bitmask_vector_p (XVECEXP (vals
, 0, 0),
6700 || s390_bytemask_vector_p (XVECEXP (vals
, 0, 0), NULL
)))
6702 emit_insn (gen_rtx_SET (target
,
6703 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0))));
6709 emit_insn (gen_rtx_SET (target
,
6710 gen_rtx_VEC_DUPLICATE (mode
,
6711 XVECEXP (vals
, 0, 0))));
6718 && GET_MODE_SIZE (inner_mode
) == 8)
6720 /* Use vector load pair. */
6721 emit_insn (gen_rtx_SET (target
,
6722 gen_rtx_VEC_CONCAT (mode
,
6723 XVECEXP (vals
, 0, 0),
6724 XVECEXP (vals
, 0, 1))));
6728 /* Use vector load logical element and zero. */
6729 if (TARGET_VXE
&& (mode
== V4SImode
|| mode
== V4SFmode
))
6733 x
= XVECEXP (vals
, 0, 0);
6734 if (memory_operand (x
, inner_mode
))
6736 for (i
= 1; i
< n_elts
; ++i
)
6737 found
= found
&& XVECEXP (vals
, 0, i
) == const0_rtx
;
6741 machine_mode half_mode
= (inner_mode
== SFmode
6742 ? V2SFmode
: V2SImode
);
6743 emit_insn (gen_rtx_SET (target
,
6744 gen_rtx_VEC_CONCAT (mode
,
6745 gen_rtx_VEC_CONCAT (half_mode
,
6748 gen_rtx_VEC_CONCAT (half_mode
,
6756 /* We are about to set the vector elements one by one. Zero out the
6757 full register first in order to help the data flow framework to
6758 detect it as full VR set. */
6759 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
6761 /* Unfortunately the vec_init expander is not allowed to fail. So
6762 we have to implement the fallback ourselves. */
6763 for (i
= 0; i
< n_elts
; i
++)
6765 rtx elem
= XVECEXP (vals
, 0, i
);
6766 if (!general_operand (elem
, GET_MODE (elem
)))
6767 elem
= force_reg (inner_mode
, elem
);
6769 emit_insn (gen_rtx_SET (target
,
6770 gen_rtx_UNSPEC (mode
,
6772 GEN_INT (i
), target
),
6777 /* Structure to hold the initial parameters for a compare_and_swap operation
6778 in HImode and QImode. */
6780 struct alignment_context
6782 rtx memsi
; /* SI aligned memory location. */
6783 rtx shift
; /* Bit offset with regard to lsb. */
6784 rtx modemask
; /* Mask of the HQImode shifted by SHIFT bits. */
6785 rtx modemaski
; /* ~modemask */
6786 bool aligned
; /* True if memory is aligned, false else. */
6789 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6790 structure AC for transparent simplifying, if the memory alignment is known
6791 to be at least 32bit. MEM is the memory location for the actual operation
6792 and MODE its mode. */
6795 init_alignment_context (struct alignment_context
*ac
, rtx mem
,
6798 ac
->shift
= GEN_INT (GET_MODE_SIZE (SImode
) - GET_MODE_SIZE (mode
));
6799 ac
->aligned
= (MEM_ALIGN (mem
) >= GET_MODE_BITSIZE (SImode
));
6802 ac
->memsi
= adjust_address (mem
, SImode
, 0); /* Memory is aligned. */
6805 /* Alignment is unknown. */
6806 rtx byteoffset
, addr
, align
;
6808 /* Force the address into a register. */
6809 addr
= force_reg (Pmode
, XEXP (mem
, 0));
6811 /* Align it to SImode. */
6812 align
= expand_simple_binop (Pmode
, AND
, addr
,
6813 GEN_INT (-GET_MODE_SIZE (SImode
)),
6814 NULL_RTX
, 1, OPTAB_DIRECT
);
6816 ac
->memsi
= gen_rtx_MEM (SImode
, align
);
6817 MEM_VOLATILE_P (ac
->memsi
) = MEM_VOLATILE_P (mem
);
6818 set_mem_alias_set (ac
->memsi
, ALIAS_SET_MEMORY_BARRIER
);
6819 set_mem_align (ac
->memsi
, GET_MODE_BITSIZE (SImode
));
6821 /* Calculate shiftcount. */
6822 byteoffset
= expand_simple_binop (Pmode
, AND
, addr
,
6823 GEN_INT (GET_MODE_SIZE (SImode
) - 1),
6824 NULL_RTX
, 1, OPTAB_DIRECT
);
6825 /* As we already have some offset, evaluate the remaining distance. */
6826 ac
->shift
= expand_simple_binop (SImode
, MINUS
, ac
->shift
, byteoffset
,
6827 NULL_RTX
, 1, OPTAB_DIRECT
);
6830 /* Shift is the byte count, but we need the bitcount. */
6831 ac
->shift
= expand_simple_binop (SImode
, ASHIFT
, ac
->shift
, GEN_INT (3),
6832 NULL_RTX
, 1, OPTAB_DIRECT
);
6834 /* Calculate masks. */
6835 ac
->modemask
= expand_simple_binop (SImode
, ASHIFT
,
6836 GEN_INT (GET_MODE_MASK (mode
)),
6837 ac
->shift
, NULL_RTX
, 1, OPTAB_DIRECT
);
6838 ac
->modemaski
= expand_simple_unop (SImode
, NOT
, ac
->modemask
,
6842 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6843 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6844 perform the merge in SEQ2. */
6847 s390_two_part_insv (struct alignment_context
*ac
, rtx
*seq1
, rtx
*seq2
,
6848 machine_mode mode
, rtx val
, rtx ins
)
6855 tmp
= copy_to_mode_reg (SImode
, val
);
6856 if (s390_expand_insv (tmp
, GEN_INT (GET_MODE_BITSIZE (mode
)),
6860 *seq2
= get_insns ();
6867 /* Failed to use insv. Generate a two part shift and mask. */
6869 tmp
= s390_expand_mask_and_shift (ins
, mode
, ac
->shift
);
6870 *seq1
= get_insns ();
6874 tmp
= expand_simple_binop (SImode
, IOR
, tmp
, val
, NULL_RTX
, 1, OPTAB_DIRECT
);
6875 *seq2
= get_insns ();
6881 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6882 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6883 value to set if CMP == MEM. */
6886 s390_expand_cs_hqi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
6887 rtx cmp
, rtx new_rtx
, bool is_weak
)
6889 struct alignment_context ac
;
6890 rtx cmpv
, newv
, val
, cc
, seq0
, seq1
, seq2
, seq3
;
6891 rtx res
= gen_reg_rtx (SImode
);
6892 rtx_code_label
*csloop
= NULL
, *csend
= NULL
;
6894 gcc_assert (MEM_P (mem
));
6896 init_alignment_context (&ac
, mem
, mode
);
6898 /* Load full word. Subsequent loads are performed by CS. */
6899 val
= expand_simple_binop (SImode
, AND
, ac
.memsi
, ac
.modemaski
,
6900 NULL_RTX
, 1, OPTAB_DIRECT
);
6902 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6903 possible, we try to use insv to make this happen efficiently. If
6904 that fails we'll generate code both inside and outside the loop. */
6905 cmpv
= s390_two_part_insv (&ac
, &seq0
, &seq2
, mode
, val
, cmp
);
6906 newv
= s390_two_part_insv (&ac
, &seq1
, &seq3
, mode
, val
, new_rtx
);
6913 /* Start CS loop. */
6916 /* Begin assuming success. */
6917 emit_move_insn (btarget
, const1_rtx
);
6919 csloop
= gen_label_rtx ();
6920 csend
= gen_label_rtx ();
6921 emit_label (csloop
);
6924 /* val = "<mem>00..0<mem>"
6925 * cmp = "00..0<cmp>00..0"
6926 * new = "00..0<new>00..0"
6932 cc
= s390_emit_compare_and_swap (EQ
, res
, ac
.memsi
, cmpv
, newv
, CCZ1mode
);
6934 emit_insn (gen_cstorecc4 (btarget
, cc
, XEXP (cc
, 0), XEXP (cc
, 1)));
6939 /* Jump to end if we're done (likely?). */
6940 s390_emit_jump (csend
, cc
);
6942 /* Check for changes outside mode, and loop internal if so.
6943 Arrange the moves so that the compare is adjacent to the
6944 branch so that we can generate CRJ. */
6945 tmp
= copy_to_reg (val
);
6946 force_expand_binop (SImode
, and_optab
, res
, ac
.modemaski
, val
,
6948 cc
= s390_emit_compare (NE
, val
, tmp
);
6949 s390_emit_jump (csloop
, cc
);
6952 emit_move_insn (btarget
, const0_rtx
);
6956 /* Return the correct part of the bitfield. */
6957 convert_move (vtarget
, expand_simple_binop (SImode
, LSHIFTRT
, res
, ac
.shift
,
6958 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
6961 /* Variant of s390_expand_cs for SI, DI and TI modes. */
6963 s390_expand_cs_tdsi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
6964 rtx cmp
, rtx new_rtx
, bool is_weak
)
6966 rtx output
= vtarget
;
6967 rtx_code_label
*skip_cs_label
= NULL
;
6968 bool do_const_opt
= false;
6970 if (!register_operand (output
, mode
))
6971 output
= gen_reg_rtx (mode
);
6973 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
6974 with the constant first and skip the compare_and_swap because its very
6975 expensive and likely to fail anyway.
6976 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
6977 cause spurious in that case.
6978 Note 2: It may be useful to do this also for non-constant INPUT.
6979 Note 3: Currently only targets with "load on condition" are supported
6980 (z196 and newer). */
6983 && (mode
== SImode
|| mode
== DImode
))
6984 do_const_opt
= (is_weak
&& CONST_INT_P (cmp
));
6988 rtx cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
6990 skip_cs_label
= gen_label_rtx ();
6991 emit_move_insn (btarget
, const0_rtx
);
6992 if (CONST_INT_P (cmp
) && INTVAL (cmp
) == 0)
6994 rtvec lt
= rtvec_alloc (2);
6996 /* Load-and-test + conditional jump. */
6998 = gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, mem
, cmp
));
6999 RTVEC_ELT (lt
, 1) = gen_rtx_SET (output
, mem
);
7000 emit_insn (gen_rtx_PARALLEL (VOIDmode
, lt
));
7004 emit_move_insn (output
, mem
);
7005 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, output
, cmp
)));
7007 s390_emit_jump (skip_cs_label
, gen_rtx_NE (VOIDmode
, cc
, const0_rtx
));
7008 add_reg_br_prob_note (get_last_insn (),
7009 profile_probability::very_unlikely ());
7010 /* If the jump is not taken, OUTPUT is the expected value. */
7012 /* Reload newval to a register manually, *after* the compare and jump
7013 above. Otherwise Reload might place it before the jump. */
7016 cmp
= force_reg (mode
, cmp
);
7017 new_rtx
= force_reg (mode
, new_rtx
);
7018 s390_emit_compare_and_swap (EQ
, output
, mem
, cmp
, new_rtx
,
7019 (do_const_opt
) ? CCZmode
: CCZ1mode
);
7020 if (skip_cs_label
!= NULL
)
7021 emit_label (skip_cs_label
);
7023 /* We deliberately accept non-register operands in the predicate
7024 to ensure the write back to the output operand happens *before*
7025 the store-flags code below. This makes it easier for combine
7026 to merge the store-flags code with a potential test-and-branch
7027 pattern following (immediately!) afterwards. */
7028 if (output
!= vtarget
)
7029 emit_move_insn (vtarget
, output
);
7035 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7036 btarget has already been initialized with 0 above. */
7037 cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7038 cond
= gen_rtx_EQ (VOIDmode
, cc
, const0_rtx
);
7039 ite
= gen_rtx_IF_THEN_ELSE (SImode
, cond
, const1_rtx
, btarget
);
7040 emit_insn (gen_rtx_SET (btarget
, ite
));
7046 cc
= gen_rtx_REG (CCZ1mode
, CC_REGNUM
);
7047 cond
= gen_rtx_EQ (SImode
, cc
, const0_rtx
);
7048 emit_insn (gen_cstorecc4 (btarget
, cond
, cc
, const0_rtx
));
7052 /* Expand an atomic compare and swap operation. MEM is the memory location,
7053 CMP the old value to compare MEM with and NEW_RTX the value to set if
7057 s390_expand_cs (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7058 rtx cmp
, rtx new_rtx
, bool is_weak
)
7065 s390_expand_cs_tdsi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7069 s390_expand_cs_hqi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7076 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7077 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7081 s390_expand_atomic_exchange_tdsi (rtx output
, rtx mem
, rtx input
)
7083 machine_mode mode
= GET_MODE (mem
);
7084 rtx_code_label
*csloop
;
7087 && (mode
== DImode
|| mode
== SImode
)
7088 && CONST_INT_P (input
) && INTVAL (input
) == 0)
7090 emit_move_insn (output
, const0_rtx
);
7092 emit_insn (gen_atomic_fetch_anddi (output
, mem
, const0_rtx
, input
));
7094 emit_insn (gen_atomic_fetch_andsi (output
, mem
, const0_rtx
, input
));
7098 input
= force_reg (mode
, input
);
7099 emit_move_insn (output
, mem
);
7100 csloop
= gen_label_rtx ();
7101 emit_label (csloop
);
7102 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, output
, mem
, output
,
7106 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7107 and VAL the value to play with. If AFTER is true then store the value
7108 MEM holds after the operation, if AFTER is false then store the value MEM
7109 holds before the operation. If TARGET is zero then discard that value, else
7110 store it to TARGET. */
7113 s390_expand_atomic (machine_mode mode
, enum rtx_code code
,
7114 rtx target
, rtx mem
, rtx val
, bool after
)
7116 struct alignment_context ac
;
7118 rtx new_rtx
= gen_reg_rtx (SImode
);
7119 rtx orig
= gen_reg_rtx (SImode
);
7120 rtx_code_label
*csloop
= gen_label_rtx ();
7122 gcc_assert (!target
|| register_operand (target
, VOIDmode
));
7123 gcc_assert (MEM_P (mem
));
7125 init_alignment_context (&ac
, mem
, mode
);
7127 /* Shift val to the correct bit positions.
7128 Preserve "icm", but prevent "ex icm". */
7129 if (!(ac
.aligned
&& code
== SET
&& MEM_P (val
)))
7130 val
= s390_expand_mask_and_shift (val
, mode
, ac
.shift
);
7132 /* Further preparation insns. */
7133 if (code
== PLUS
|| code
== MINUS
)
7134 emit_move_insn (orig
, val
);
7135 else if (code
== MULT
|| code
== AND
) /* val = "11..1<val>11..1" */
7136 val
= expand_simple_binop (SImode
, XOR
, val
, ac
.modemaski
,
7137 NULL_RTX
, 1, OPTAB_DIRECT
);
7139 /* Load full word. Subsequent loads are performed by CS. */
7140 cmp
= force_reg (SImode
, ac
.memsi
);
7142 /* Start CS loop. */
7143 emit_label (csloop
);
7144 emit_move_insn (new_rtx
, cmp
);
7146 /* Patch new with val at correct position. */
7151 val
= expand_simple_binop (SImode
, code
, new_rtx
, orig
,
7152 NULL_RTX
, 1, OPTAB_DIRECT
);
7153 val
= expand_simple_binop (SImode
, AND
, val
, ac
.modemask
,
7154 NULL_RTX
, 1, OPTAB_DIRECT
);
7157 if (ac
.aligned
&& MEM_P (val
))
7158 store_bit_field (new_rtx
, GET_MODE_BITSIZE (mode
), 0,
7159 0, 0, SImode
, val
, false);
7162 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, ac
.modemaski
,
7163 NULL_RTX
, 1, OPTAB_DIRECT
);
7164 new_rtx
= expand_simple_binop (SImode
, IOR
, new_rtx
, val
,
7165 NULL_RTX
, 1, OPTAB_DIRECT
);
7171 new_rtx
= expand_simple_binop (SImode
, code
, new_rtx
, val
,
7172 NULL_RTX
, 1, OPTAB_DIRECT
);
7174 case MULT
: /* NAND */
7175 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, val
,
7176 NULL_RTX
, 1, OPTAB_DIRECT
);
7177 new_rtx
= expand_simple_binop (SImode
, XOR
, new_rtx
, ac
.modemask
,
7178 NULL_RTX
, 1, OPTAB_DIRECT
);
7184 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, cmp
,
7185 ac
.memsi
, cmp
, new_rtx
,
7188 /* Return the correct part of the bitfield. */
7190 convert_move (target
, expand_simple_binop (SImode
, LSHIFTRT
,
7191 after
? new_rtx
: cmp
, ac
.shift
,
7192 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7195 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7196 We need to emit DTP-relative relocations. */
7198 static void s390_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
7201 s390_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7206 fputs ("\t.long\t", file
);
7209 fputs ("\t.quad\t", file
);
7214 output_addr_const (file
, x
);
7215 fputs ("@DTPOFF", file
);
7218 /* Return the proper mode for REGNO being represented in the dwarf
7221 s390_dwarf_frame_reg_mode (int regno
)
7223 machine_mode save_mode
= default_dwarf_frame_reg_mode (regno
);
7225 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7226 if (GENERAL_REGNO_P (regno
))
7229 /* The rightmost 64 bits of vector registers are call-clobbered. */
7230 if (GET_MODE_SIZE (save_mode
) > 8)
7236 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7237 /* Implement TARGET_MANGLE_TYPE. */
7240 s390_mangle_type (const_tree type
)
7242 type
= TYPE_MAIN_VARIANT (type
);
7244 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
7245 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
7248 if (type
== s390_builtin_types
[BT_BV16QI
]) return "U6__boolc";
7249 if (type
== s390_builtin_types
[BT_BV8HI
]) return "U6__bools";
7250 if (type
== s390_builtin_types
[BT_BV4SI
]) return "U6__booli";
7251 if (type
== s390_builtin_types
[BT_BV2DI
]) return "U6__booll";
7253 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
7254 && TARGET_LONG_DOUBLE_128
)
7257 /* For all other types, use normal C++ mangling. */
7262 /* In the name of slightly smaller debug output, and to cater to
7263 general assembler lossage, recognize various UNSPEC sequences
7264 and turn them back into a direct symbol reference. */
7267 s390_delegitimize_address (rtx orig_x
)
7271 orig_x
= delegitimize_mem_from_attrs (orig_x
);
7274 /* Extract the symbol ref from:
7275 (plus:SI (reg:SI 12 %r12)
7276 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7277 UNSPEC_GOTOFF/PLTOFF)))
7279 (plus:SI (reg:SI 12 %r12)
7280 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7281 UNSPEC_GOTOFF/PLTOFF)
7282 (const_int 4 [0x4])))) */
7283 if (GET_CODE (x
) == PLUS
7284 && REG_P (XEXP (x
, 0))
7285 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
7286 && GET_CODE (XEXP (x
, 1)) == CONST
)
7288 HOST_WIDE_INT offset
= 0;
7290 /* The const operand. */
7291 y
= XEXP (XEXP (x
, 1), 0);
7293 if (GET_CODE (y
) == PLUS
7294 && GET_CODE (XEXP (y
, 1)) == CONST_INT
)
7296 offset
= INTVAL (XEXP (y
, 1));
7300 if (GET_CODE (y
) == UNSPEC
7301 && (XINT (y
, 1) == UNSPEC_GOTOFF
7302 || XINT (y
, 1) == UNSPEC_PLTOFF
))
7303 return plus_constant (Pmode
, XVECEXP (y
, 0, 0), offset
);
7306 if (GET_CODE (x
) != MEM
)
7310 if (GET_CODE (x
) == PLUS
7311 && GET_CODE (XEXP (x
, 1)) == CONST
7312 && GET_CODE (XEXP (x
, 0)) == REG
7313 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7315 y
= XEXP (XEXP (x
, 1), 0);
7316 if (GET_CODE (y
) == UNSPEC
7317 && XINT (y
, 1) == UNSPEC_GOT
)
7318 y
= XVECEXP (y
, 0, 0);
7322 else if (GET_CODE (x
) == CONST
)
7324 /* Extract the symbol ref from:
7325 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7326 UNSPEC_PLT/GOTENT))) */
7329 if (GET_CODE (y
) == UNSPEC
7330 && (XINT (y
, 1) == UNSPEC_GOTENT
7331 || XINT (y
, 1) == UNSPEC_PLT
))
7332 y
= XVECEXP (y
, 0, 0);
7339 if (GET_MODE (orig_x
) != Pmode
)
7341 if (GET_MODE (orig_x
) == BLKmode
)
7343 y
= lowpart_subreg (GET_MODE (orig_x
), y
, Pmode
);
7350 /* Output operand OP to stdio stream FILE.
7351 OP is an address (register + offset) which is not used to address data;
7352 instead the rightmost bits are interpreted as the value. */
7355 print_addrstyle_operand (FILE *file
, rtx op
)
7357 HOST_WIDE_INT offset
;
7360 /* Extract base register and offset. */
7361 if (!s390_decompose_addrstyle_without_index (op
, &base
, &offset
))
7367 gcc_assert (GET_CODE (base
) == REG
);
7368 gcc_assert (REGNO (base
) < FIRST_PSEUDO_REGISTER
);
7369 gcc_assert (REGNO_REG_CLASS (REGNO (base
)) == ADDR_REGS
);
7372 /* Offsets are constricted to twelve bits. */
7373 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
& ((1 << 12) - 1));
7375 fprintf (file
, "(%s)", reg_names
[REGNO (base
)]);
7378 /* Assigns the number of NOP halfwords to be emitted before and after the
7379 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7380 If hotpatching is disabled for the function, the values are set to zero.
7384 s390_function_num_hotpatch_hw (tree decl
,
7390 attr
= lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl
));
7392 /* Handle the arguments of the hotpatch attribute. The values
7393 specified via attribute might override the cmdline argument
7397 tree args
= TREE_VALUE (attr
);
7399 *hw_before
= TREE_INT_CST_LOW (TREE_VALUE (args
));
7400 *hw_after
= TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args
)));
7404 /* Use the values specified by the cmdline arguments. */
7405 *hw_before
= s390_hotpatch_hw_before_label
;
7406 *hw_after
= s390_hotpatch_hw_after_label
;
7410 /* Write the current .machine and .machinemode specification to the assembler
7413 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7415 s390_asm_output_machine_for_arch (FILE *asm_out_file
)
7417 fprintf (asm_out_file
, "\t.machinemode %s\n",
7418 (TARGET_ZARCH
) ? "zarch" : "esa");
7419 fprintf (asm_out_file
, "\t.machine \"%s",
7420 processor_table
[s390_arch
].binutils_name
);
7421 if (S390_USE_ARCHITECTURE_MODIFIERS
)
7425 cpu_flags
= processor_flags_table
[(int) s390_arch
];
7426 if (TARGET_HTM
&& !(cpu_flags
& PF_TX
))
7427 fprintf (asm_out_file
, "+htm");
7428 else if (!TARGET_HTM
&& (cpu_flags
& PF_TX
))
7429 fprintf (asm_out_file
, "+nohtm");
7430 if (TARGET_VX
&& !(cpu_flags
& PF_VX
))
7431 fprintf (asm_out_file
, "+vx");
7432 else if (!TARGET_VX
&& (cpu_flags
& PF_VX
))
7433 fprintf (asm_out_file
, "+novx");
7435 fprintf (asm_out_file
, "\"\n");
7438 /* Write an extra function header before the very start of the function. */
7441 s390_asm_output_function_prefix (FILE *asm_out_file
,
7442 const char *fnname ATTRIBUTE_UNUSED
)
7444 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
) == NULL
)
7446 /* Since only the function specific options are saved but not the indications
7447 which options are set, it's too much work here to figure out which options
7448 have actually changed. Thus, generate .machine and .machinemode whenever a
7449 function has the target attribute or pragma. */
7450 fprintf (asm_out_file
, "\t.machinemode push\n");
7451 fprintf (asm_out_file
, "\t.machine push\n");
7452 s390_asm_output_machine_for_arch (asm_out_file
);
7455 /* Write an extra function footer after the very end of the function. */
7458 s390_asm_declare_function_size (FILE *asm_out_file
,
7459 const char *fnname
, tree decl
)
7461 if (!flag_inhibit_size_directive
)
7462 ASM_OUTPUT_MEASURED_SIZE (asm_out_file
, fnname
);
7463 if (DECL_FUNCTION_SPECIFIC_TARGET (decl
) == NULL
)
7465 fprintf (asm_out_file
, "\t.machine pop\n");
7466 fprintf (asm_out_file
, "\t.machinemode pop\n");
7470 /* Write the extra assembler code needed to declare a function properly. */
7473 s390_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
7476 int hw_before
, hw_after
;
7478 s390_function_num_hotpatch_hw (decl
, &hw_before
, &hw_after
);
7481 unsigned int function_alignment
;
7484 /* Add a trampoline code area before the function label and initialize it
7485 with two-byte nop instructions. This area can be overwritten with code
7486 that jumps to a patched version of the function. */
7487 asm_fprintf (asm_out_file
, "\tnopr\t%%r0"
7488 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7490 for (i
= 1; i
< hw_before
; i
++)
7491 fputs ("\tnopr\t%r0\n", asm_out_file
);
7493 /* Note: The function label must be aligned so that (a) the bytes of the
7494 following nop do not cross a cacheline boundary, and (b) a jump address
7495 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7496 stored directly before the label without crossing a cacheline
7497 boundary. All this is necessary to make sure the trampoline code can
7498 be changed atomically.
7499 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7500 if there are NOPs before the function label, the alignment is placed
7501 before them. So it is necessary to duplicate the alignment after the
7503 function_alignment
= MAX (8, DECL_ALIGN (decl
) / BITS_PER_UNIT
);
7504 if (! DECL_USER_ALIGN (decl
))
7505 function_alignment
= MAX (function_alignment
,
7506 (unsigned int) align_functions
);
7507 fputs ("\t# alignment for hotpatch\n", asm_out_file
);
7508 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (function_alignment
));
7511 if (S390_USE_TARGET_ATTRIBUTE
&& TARGET_DEBUG_ARG
)
7513 asm_fprintf (asm_out_file
, "\t# fn:%s ar%d\n", fname
, s390_arch
);
7514 asm_fprintf (asm_out_file
, "\t# fn:%s tu%d\n", fname
, s390_tune
);
7515 asm_fprintf (asm_out_file
, "\t# fn:%s sg%d\n", fname
, s390_stack_guard
);
7516 asm_fprintf (asm_out_file
, "\t# fn:%s ss%d\n", fname
, s390_stack_size
);
7517 asm_fprintf (asm_out_file
, "\t# fn:%s bc%d\n", fname
, s390_branch_cost
);
7518 asm_fprintf (asm_out_file
, "\t# fn:%s wf%d\n", fname
,
7519 s390_warn_framesize
);
7520 asm_fprintf (asm_out_file
, "\t# fn:%s ba%d\n", fname
, TARGET_BACKCHAIN
);
7521 asm_fprintf (asm_out_file
, "\t# fn:%s hd%d\n", fname
, TARGET_HARD_DFP
);
7522 asm_fprintf (asm_out_file
, "\t# fn:%s hf%d\n", fname
, !TARGET_SOFT_FLOAT
);
7523 asm_fprintf (asm_out_file
, "\t# fn:%s ht%d\n", fname
, TARGET_OPT_HTM
);
7524 asm_fprintf (asm_out_file
, "\t# fn:%s vx%d\n", fname
, TARGET_OPT_VX
);
7525 asm_fprintf (asm_out_file
, "\t# fn:%s ps%d\n", fname
,
7526 TARGET_PACKED_STACK
);
7527 asm_fprintf (asm_out_file
, "\t# fn:%s se%d\n", fname
, TARGET_SMALL_EXEC
);
7528 asm_fprintf (asm_out_file
, "\t# fn:%s mv%d\n", fname
, TARGET_MVCLE
);
7529 asm_fprintf (asm_out_file
, "\t# fn:%s zv%d\n", fname
, TARGET_ZVECTOR
);
7530 asm_fprintf (asm_out_file
, "\t# fn:%s wd%d\n", fname
,
7531 s390_warn_dynamicstack_p
);
7533 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
7535 asm_fprintf (asm_out_file
,
7536 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7540 /* Output machine-dependent UNSPECs occurring in address constant X
7541 in assembler syntax to stdio stream FILE. Returns true if the
7542 constant X could be recognized, false otherwise. */
7545 s390_output_addr_const_extra (FILE *file
, rtx x
)
7547 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 1)
7548 switch (XINT (x
, 1))
7551 output_addr_const (file
, XVECEXP (x
, 0, 0));
7552 fprintf (file
, "@GOTENT");
7555 output_addr_const (file
, XVECEXP (x
, 0, 0));
7556 fprintf (file
, "@GOT");
7559 output_addr_const (file
, XVECEXP (x
, 0, 0));
7560 fprintf (file
, "@GOTOFF");
7563 output_addr_const (file
, XVECEXP (x
, 0, 0));
7564 fprintf (file
, "@PLT");
7567 output_addr_const (file
, XVECEXP (x
, 0, 0));
7568 fprintf (file
, "@PLTOFF");
7571 output_addr_const (file
, XVECEXP (x
, 0, 0));
7572 fprintf (file
, "@TLSGD");
7575 assemble_name (file
, get_some_local_dynamic_name ());
7576 fprintf (file
, "@TLSLDM");
7579 output_addr_const (file
, XVECEXP (x
, 0, 0));
7580 fprintf (file
, "@DTPOFF");
7583 output_addr_const (file
, XVECEXP (x
, 0, 0));
7584 fprintf (file
, "@NTPOFF");
7586 case UNSPEC_GOTNTPOFF
:
7587 output_addr_const (file
, XVECEXP (x
, 0, 0));
7588 fprintf (file
, "@GOTNTPOFF");
7590 case UNSPEC_INDNTPOFF
:
7591 output_addr_const (file
, XVECEXP (x
, 0, 0));
7592 fprintf (file
, "@INDNTPOFF");
7596 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 2)
7597 switch (XINT (x
, 1))
7599 case UNSPEC_POOL_OFFSET
:
7600 x
= gen_rtx_MINUS (GET_MODE (x
), XVECEXP (x
, 0, 0), XVECEXP (x
, 0, 1));
7601 output_addr_const (file
, x
);
7607 /* Output address operand ADDR in assembler syntax to
7608 stdio stream FILE. */
7611 print_operand_address (FILE *file
, rtx addr
)
7613 struct s390_address ad
;
7614 memset (&ad
, 0, sizeof (s390_address
));
7616 if (s390_loadrelative_operand_p (addr
, NULL
, NULL
))
7620 output_operand_lossage ("symbolic memory references are "
7621 "only supported on z10 or later");
7624 output_addr_const (file
, addr
);
7628 if (!s390_decompose_address (addr
, &ad
)
7629 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7630 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
7631 output_operand_lossage ("cannot decompose address");
7634 output_addr_const (file
, ad
.disp
);
7636 fprintf (file
, "0");
7638 if (ad
.base
&& ad
.indx
)
7639 fprintf (file
, "(%s,%s)", reg_names
[REGNO (ad
.indx
)],
7640 reg_names
[REGNO (ad
.base
)]);
7642 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
7645 /* Output operand X in assembler syntax to stdio stream FILE.
7646 CODE specified the format flag. The following format flags
7649 'C': print opcode suffix for branch condition.
7650 'D': print opcode suffix for inverse branch condition.
7651 'E': print opcode suffix for branch on index instruction.
7652 'G': print the size of the operand in bytes.
7653 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7654 'M': print the second word of a TImode operand.
7655 'N': print the second word of a DImode operand.
7656 'O': print only the displacement of a memory reference or address.
7657 'R': print only the base register of a memory reference or address.
7658 'S': print S-type memory reference (base+displacement).
7659 'Y': print address style operand without index (e.g. shift count or setmem
7662 'b': print integer X as if it's an unsigned byte.
7663 'c': print integer X as if it's an signed byte.
7664 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7665 'f': "end" contiguous bitmask X in SImode.
7666 'h': print integer X as if it's a signed halfword.
7667 'i': print the first nonzero HImode part of X.
7668 'j': print the first HImode part unequal to -1 of X.
7669 'k': print the first nonzero SImode part of X.
7670 'm': print the first SImode part unequal to -1 of X.
7671 'o': print integer X as if it's an unsigned 32bit word.
7672 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7673 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7674 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7675 'x': print integer X as if it's an unsigned halfword.
7676 'v': print register number as vector register (v1 instead of f1).
7680 print_operand (FILE *file
, rtx x
, int code
)
7687 fprintf (file
, s390_branch_condition_mnemonic (x
, FALSE
));
7691 fprintf (file
, s390_branch_condition_mnemonic (x
, TRUE
));
7695 if (GET_CODE (x
) == LE
)
7696 fprintf (file
, "l");
7697 else if (GET_CODE (x
) == GT
)
7698 fprintf (file
, "h");
7700 output_operand_lossage ("invalid comparison operator "
7701 "for 'E' output modifier");
7705 if (GET_CODE (x
) == SYMBOL_REF
)
7707 fprintf (file
, "%s", ":tls_load:");
7708 output_addr_const (file
, x
);
7710 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
7712 fprintf (file
, "%s", ":tls_gdcall:");
7713 output_addr_const (file
, XVECEXP (x
, 0, 0));
7715 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSLDM
)
7717 fprintf (file
, "%s", ":tls_ldcall:");
7718 const char *name
= get_some_local_dynamic_name ();
7720 assemble_name (file
, name
);
7723 output_operand_lossage ("invalid reference for 'J' output modifier");
7727 fprintf (file
, "%u", GET_MODE_SIZE (GET_MODE (x
)));
7732 struct s390_address ad
;
7735 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
7738 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7741 output_operand_lossage ("invalid address for 'O' output modifier");
7746 output_addr_const (file
, ad
.disp
);
7748 fprintf (file
, "0");
7754 struct s390_address ad
;
7757 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
7760 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7763 output_operand_lossage ("invalid address for 'R' output modifier");
7768 fprintf (file
, "%s", reg_names
[REGNO (ad
.base
)]);
7770 fprintf (file
, "0");
7776 struct s390_address ad
;
7781 output_operand_lossage ("memory reference expected for "
7782 "'S' output modifier");
7785 ret
= s390_decompose_address (XEXP (x
, 0), &ad
);
7788 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7791 output_operand_lossage ("invalid address for 'S' output modifier");
7796 output_addr_const (file
, ad
.disp
);
7798 fprintf (file
, "0");
7801 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
7806 if (GET_CODE (x
) == REG
)
7807 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
7808 else if (GET_CODE (x
) == MEM
)
7809 x
= change_address (x
, VOIDmode
,
7810 plus_constant (Pmode
, XEXP (x
, 0), 4));
7812 output_operand_lossage ("register or memory expression expected "
7813 "for 'N' output modifier");
7817 if (GET_CODE (x
) == REG
)
7818 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
7819 else if (GET_CODE (x
) == MEM
)
7820 x
= change_address (x
, VOIDmode
,
7821 plus_constant (Pmode
, XEXP (x
, 0), 8));
7823 output_operand_lossage ("register or memory expression expected "
7824 "for 'M' output modifier");
7828 print_addrstyle_operand (file
, x
);
7832 switch (GET_CODE (x
))
7835 /* Print FP regs as fx instead of vx when they are accessed
7836 through non-vector mode. */
7838 || VECTOR_NOFP_REG_P (x
)
7839 || (FP_REG_P (x
) && VECTOR_MODE_P (GET_MODE (x
)))
7840 || (VECTOR_REG_P (x
)
7841 && (GET_MODE_SIZE (GET_MODE (x
)) /
7842 s390_class_max_nregs (FP_REGS
, GET_MODE (x
))) > 8))
7843 fprintf (file
, "%%v%s", reg_names
[REGNO (x
)] + 2);
7845 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
7849 output_address (GET_MODE (x
), XEXP (x
, 0));
7856 output_addr_const (file
, x
);
7869 ival
= ((ival
& 0xff) ^ 0x80) - 0x80;
7875 ival
= ((ival
& 0xffff) ^ 0x8000) - 0x8000;
7878 ival
= s390_extract_part (x
, HImode
, 0);
7881 ival
= s390_extract_part (x
, HImode
, -1);
7884 ival
= s390_extract_part (x
, SImode
, 0);
7887 ival
= s390_extract_part (x
, SImode
, -1);
7899 len
= (code
== 's' || code
== 'e' ? 64 : 32);
7900 ok
= s390_contiguous_bitmask_p (ival
, true, len
, &start
, &end
);
7902 if (code
== 's' || code
== 't')
7909 output_operand_lossage ("invalid constant for output modifier '%c'", code
);
7911 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
7914 case CONST_WIDE_INT
:
7916 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
7917 CONST_WIDE_INT_ELT (x
, 0) & 0xff);
7918 else if (code
== 'x')
7919 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
7920 CONST_WIDE_INT_ELT (x
, 0) & 0xffff);
7921 else if (code
== 'h')
7922 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
7923 ((CONST_WIDE_INT_ELT (x
, 0) & 0xffff) ^ 0x8000) - 0x8000);
7927 output_operand_lossage ("invalid constant - try using "
7928 "an output modifier");
7930 output_operand_lossage ("invalid constant for output modifier '%c'",
7938 gcc_assert (const_vec_duplicate_p (x
));
7939 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
7940 ((INTVAL (XVECEXP (x
, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7948 ok
= s390_contiguous_bitmask_vector_p (x
, &start
, &end
);
7950 ival
= (code
== 's') ? start
: end
;
7951 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
7957 bool ok
= s390_bytemask_vector_p (x
, &mask
);
7959 fprintf (file
, "%u", mask
);
7964 output_operand_lossage ("invalid constant vector for output "
7965 "modifier '%c'", code
);
7971 output_operand_lossage ("invalid expression - try using "
7972 "an output modifier");
7974 output_operand_lossage ("invalid expression for output "
7975 "modifier '%c'", code
);
7980 /* Target hook for assembling integer objects. We need to define it
7981 here to work a round a bug in some versions of GAS, which couldn't
7982 handle values smaller than INT_MIN when printed in decimal. */
7985 s390_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
7987 if (size
== 8 && aligned_p
7988 && GET_CODE (x
) == CONST_INT
&& INTVAL (x
) < INT_MIN
)
7990 fprintf (asm_out_file
, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX
"\n",
7994 return default_assemble_integer (x
, size
, aligned_p
);
7997 /* Returns true if register REGNO is used for forming
7998 a memory address in expression X. */
8001 reg_used_in_mem_p (int regno
, rtx x
)
8003 enum rtx_code code
= GET_CODE (x
);
8009 if (refers_to_regno_p (regno
, XEXP (x
, 0)))
8012 else if (code
== SET
8013 && GET_CODE (SET_DEST (x
)) == PC
)
8015 if (refers_to_regno_p (regno
, SET_SRC (x
)))
8019 fmt
= GET_RTX_FORMAT (code
);
8020 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8023 && reg_used_in_mem_p (regno
, XEXP (x
, i
)))
8026 else if (fmt
[i
] == 'E')
8027 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8028 if (reg_used_in_mem_p (regno
, XVECEXP (x
, i
, j
)))
8034 /* Returns true if expression DEP_RTX sets an address register
8035 used by instruction INSN to address memory. */
8038 addr_generation_dependency_p (rtx dep_rtx
, rtx_insn
*insn
)
8042 if (NONJUMP_INSN_P (dep_rtx
))
8043 dep_rtx
= PATTERN (dep_rtx
);
8045 if (GET_CODE (dep_rtx
) == SET
)
8047 target
= SET_DEST (dep_rtx
);
8048 if (GET_CODE (target
) == STRICT_LOW_PART
)
8049 target
= XEXP (target
, 0);
8050 while (GET_CODE (target
) == SUBREG
)
8051 target
= SUBREG_REG (target
);
8053 if (GET_CODE (target
) == REG
)
8055 int regno
= REGNO (target
);
8057 if (s390_safe_attr_type (insn
) == TYPE_LA
)
8059 pat
= PATTERN (insn
);
8060 if (GET_CODE (pat
) == PARALLEL
)
8062 gcc_assert (XVECLEN (pat
, 0) == 2);
8063 pat
= XVECEXP (pat
, 0, 0);
8065 gcc_assert (GET_CODE (pat
) == SET
);
8066 return refers_to_regno_p (regno
, SET_SRC (pat
));
8068 else if (get_attr_atype (insn
) == ATYPE_AGEN
)
8069 return reg_used_in_mem_p (regno
, PATTERN (insn
));
8075 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8078 s390_agen_dep_p (rtx_insn
*dep_insn
, rtx_insn
*insn
)
8080 rtx dep_rtx
= PATTERN (dep_insn
);
8083 if (GET_CODE (dep_rtx
) == SET
8084 && addr_generation_dependency_p (dep_rtx
, insn
))
8086 else if (GET_CODE (dep_rtx
) == PARALLEL
)
8088 for (i
= 0; i
< XVECLEN (dep_rtx
, 0); i
++)
8090 if (addr_generation_dependency_p (XVECEXP (dep_rtx
, 0, i
), insn
))
8098 /* A C statement (sans semicolon) to update the integer scheduling priority
8099 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8100 reduce the priority to execute INSN later. Do not define this macro if
8101 you do not need to adjust the scheduling priorities of insns.
8103 A STD instruction should be scheduled earlier,
8104 in order to use the bypass. */
8106 s390_adjust_priority (rtx_insn
*insn
, int priority
)
8108 if (! INSN_P (insn
))
8111 if (s390_tune
<= PROCESSOR_2064_Z900
)
8114 switch (s390_safe_attr_type (insn
))
8118 priority
= priority
<< 3;
8122 priority
= priority
<< 1;
8131 /* The number of instructions that can be issued per cycle. */
8134 s390_issue_rate (void)
8138 case PROCESSOR_2084_Z990
:
8139 case PROCESSOR_2094_Z9_109
:
8140 case PROCESSOR_2094_Z9_EC
:
8141 case PROCESSOR_2817_Z196
:
8143 case PROCESSOR_2097_Z10
:
8145 case PROCESSOR_9672_G5
:
8146 case PROCESSOR_9672_G6
:
8147 case PROCESSOR_2064_Z900
:
8148 /* Starting with EC12 we use the sched_reorder hook to take care
8149 of instruction dispatch constraints. The algorithm only
8150 picks the best instruction and assumes only a single
8151 instruction gets issued per cycle. */
8152 case PROCESSOR_2827_ZEC12
:
8153 case PROCESSOR_2964_Z13
:
8154 case PROCESSOR_3906_Z14
:
8161 s390_first_cycle_multipass_dfa_lookahead (void)
8166 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8167 Fix up MEMs as required. */
8170 annotate_constant_pool_refs (rtx
*x
)
8175 gcc_assert (GET_CODE (*x
) != SYMBOL_REF
8176 || !CONSTANT_POOL_ADDRESS_P (*x
));
8178 /* Literal pool references can only occur inside a MEM ... */
8179 if (GET_CODE (*x
) == MEM
)
8181 rtx memref
= XEXP (*x
, 0);
8183 if (GET_CODE (memref
) == SYMBOL_REF
8184 && CONSTANT_POOL_ADDRESS_P (memref
))
8186 rtx base
= cfun
->machine
->base_reg
;
8187 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, memref
, base
),
8190 *x
= replace_equiv_address (*x
, addr
);
8194 if (GET_CODE (memref
) == CONST
8195 && GET_CODE (XEXP (memref
, 0)) == PLUS
8196 && GET_CODE (XEXP (XEXP (memref
, 0), 1)) == CONST_INT
8197 && GET_CODE (XEXP (XEXP (memref
, 0), 0)) == SYMBOL_REF
8198 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref
, 0), 0)))
8200 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (memref
, 0), 1));
8201 rtx sym
= XEXP (XEXP (memref
, 0), 0);
8202 rtx base
= cfun
->machine
->base_reg
;
8203 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8206 *x
= replace_equiv_address (*x
, plus_constant (Pmode
, addr
, off
));
8211 /* ... or a load-address type pattern. */
8212 if (GET_CODE (*x
) == SET
)
8214 rtx addrref
= SET_SRC (*x
);
8216 if (GET_CODE (addrref
) == SYMBOL_REF
8217 && CONSTANT_POOL_ADDRESS_P (addrref
))
8219 rtx base
= cfun
->machine
->base_reg
;
8220 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addrref
, base
),
8223 SET_SRC (*x
) = addr
;
8227 if (GET_CODE (addrref
) == CONST
8228 && GET_CODE (XEXP (addrref
, 0)) == PLUS
8229 && GET_CODE (XEXP (XEXP (addrref
, 0), 1)) == CONST_INT
8230 && GET_CODE (XEXP (XEXP (addrref
, 0), 0)) == SYMBOL_REF
8231 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref
, 0), 0)))
8233 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (addrref
, 0), 1));
8234 rtx sym
= XEXP (XEXP (addrref
, 0), 0);
8235 rtx base
= cfun
->machine
->base_reg
;
8236 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8239 SET_SRC (*x
) = plus_constant (Pmode
, addr
, off
);
8244 /* Annotate LTREL_BASE as well. */
8245 if (GET_CODE (*x
) == UNSPEC
8246 && XINT (*x
, 1) == UNSPEC_LTREL_BASE
)
8248 rtx base
= cfun
->machine
->base_reg
;
8249 *x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, XVECEXP (*x
, 0, 0), base
),
8254 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8255 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8259 annotate_constant_pool_refs (&XEXP (*x
, i
));
8261 else if (fmt
[i
] == 'E')
8263 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8264 annotate_constant_pool_refs (&XVECEXP (*x
, i
, j
));
8269 /* Split all branches that exceed the maximum distance.
8270 Returns true if this created a new literal pool entry. */
8273 s390_split_branches (void)
8275 rtx temp_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
8276 int new_literal
= 0, ret
;
8281 /* We need correct insn addresses. */
8283 shorten_branches (get_insns ());
8285 /* Find all branches that exceed 64KB, and split them. */
8287 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8289 if (! JUMP_P (insn
) || tablejump_p (insn
, NULL
, NULL
))
8292 pat
= PATTERN (insn
);
8293 if (GET_CODE (pat
) == PARALLEL
)
8294 pat
= XVECEXP (pat
, 0, 0);
8295 if (GET_CODE (pat
) != SET
|| SET_DEST (pat
) != pc_rtx
)
8298 if (GET_CODE (SET_SRC (pat
)) == LABEL_REF
)
8300 label
= &SET_SRC (pat
);
8302 else if (GET_CODE (SET_SRC (pat
)) == IF_THEN_ELSE
)
8304 if (GET_CODE (XEXP (SET_SRC (pat
), 1)) == LABEL_REF
)
8305 label
= &XEXP (SET_SRC (pat
), 1);
8306 else if (GET_CODE (XEXP (SET_SRC (pat
), 2)) == LABEL_REF
)
8307 label
= &XEXP (SET_SRC (pat
), 2);
8314 if (get_attr_length (insn
) <= 4)
8317 /* We are going to use the return register as scratch register,
8318 make sure it will be saved/restored by the prologue/epilogue. */
8319 cfun_frame_layout
.save_return_addr_p
= 1;
8324 rtx mem
= force_const_mem (Pmode
, *label
);
8325 rtx_insn
*set_insn
= emit_insn_before (gen_rtx_SET (temp_reg
, mem
),
8327 INSN_ADDRESSES_NEW (set_insn
, -1);
8328 annotate_constant_pool_refs (&PATTERN (set_insn
));
8335 target
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, *label
),
8336 UNSPEC_LTREL_OFFSET
);
8337 target
= gen_rtx_CONST (Pmode
, target
);
8338 target
= force_const_mem (Pmode
, target
);
8339 rtx_insn
*set_insn
= emit_insn_before (gen_rtx_SET (temp_reg
, target
),
8341 INSN_ADDRESSES_NEW (set_insn
, -1);
8342 annotate_constant_pool_refs (&PATTERN (set_insn
));
8344 target
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, XEXP (target
, 0),
8345 cfun
->machine
->base_reg
),
8347 target
= gen_rtx_PLUS (Pmode
, temp_reg
, target
);
8350 ret
= validate_change (insn
, label
, target
, 0);
8358 /* Find an annotated literal pool symbol referenced in RTX X,
8359 and store it at REF. Will abort if X contains references to
8360 more than one such pool symbol; multiple references to the same
8361 symbol are allowed, however.
8363 The rtx pointed to by REF must be initialized to NULL_RTX
8364 by the caller before calling this routine. */
8367 find_constant_pool_ref (rtx x
, rtx
*ref
)
8372 /* Ignore LTREL_BASE references. */
8373 if (GET_CODE (x
) == UNSPEC
8374 && XINT (x
, 1) == UNSPEC_LTREL_BASE
)
8376 /* Likewise POOL_ENTRY insns. */
8377 if (GET_CODE (x
) == UNSPEC_VOLATILE
8378 && XINT (x
, 1) == UNSPECV_POOL_ENTRY
)
8381 gcc_assert (GET_CODE (x
) != SYMBOL_REF
8382 || !CONSTANT_POOL_ADDRESS_P (x
));
8384 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_LTREF
)
8386 rtx sym
= XVECEXP (x
, 0, 0);
8387 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
8388 && CONSTANT_POOL_ADDRESS_P (sym
));
8390 if (*ref
== NULL_RTX
)
8393 gcc_assert (*ref
== sym
);
8398 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8399 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8403 find_constant_pool_ref (XEXP (x
, i
), ref
);
8405 else if (fmt
[i
] == 'E')
8407 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8408 find_constant_pool_ref (XVECEXP (x
, i
, j
), ref
);
8413 /* Replace every reference to the annotated literal pool
8414 symbol REF in X by its base plus OFFSET. */
8417 replace_constant_pool_ref (rtx
*x
, rtx ref
, rtx offset
)
8422 gcc_assert (*x
!= ref
);
8424 if (GET_CODE (*x
) == UNSPEC
8425 && XINT (*x
, 1) == UNSPEC_LTREF
8426 && XVECEXP (*x
, 0, 0) == ref
)
8428 *x
= gen_rtx_PLUS (Pmode
, XVECEXP (*x
, 0, 1), offset
);
8432 if (GET_CODE (*x
) == PLUS
8433 && GET_CODE (XEXP (*x
, 1)) == CONST_INT
8434 && GET_CODE (XEXP (*x
, 0)) == UNSPEC
8435 && XINT (XEXP (*x
, 0), 1) == UNSPEC_LTREF
8436 && XVECEXP (XEXP (*x
, 0), 0, 0) == ref
)
8438 rtx addr
= gen_rtx_PLUS (Pmode
, XVECEXP (XEXP (*x
, 0), 0, 1), offset
);
8439 *x
= plus_constant (Pmode
, addr
, INTVAL (XEXP (*x
, 1)));
8443 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8444 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8448 replace_constant_pool_ref (&XEXP (*x
, i
), ref
, offset
);
8450 else if (fmt
[i
] == 'E')
8452 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8453 replace_constant_pool_ref (&XVECEXP (*x
, i
, j
), ref
, offset
);
8458 /* Check whether X contains an UNSPEC_LTREL_BASE.
8459 Return its constant pool symbol if found, NULL_RTX otherwise. */
8462 find_ltrel_base (rtx x
)
8467 if (GET_CODE (x
) == UNSPEC
8468 && XINT (x
, 1) == UNSPEC_LTREL_BASE
)
8469 return XVECEXP (x
, 0, 0);
8471 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8472 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8476 rtx fnd
= find_ltrel_base (XEXP (x
, i
));
8480 else if (fmt
[i
] == 'E')
8482 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8484 rtx fnd
= find_ltrel_base (XVECEXP (x
, i
, j
));
8494 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8497 replace_ltrel_base (rtx
*x
)
8502 if (GET_CODE (*x
) == UNSPEC
8503 && XINT (*x
, 1) == UNSPEC_LTREL_BASE
)
8505 *x
= XVECEXP (*x
, 0, 1);
8509 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8510 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8514 replace_ltrel_base (&XEXP (*x
, i
));
8516 else if (fmt
[i
] == 'E')
8518 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8519 replace_ltrel_base (&XVECEXP (*x
, i
, j
));
8525 /* We keep a list of constants which we have to add to internal
8526 constant tables in the middle of large functions. */
8528 #define NR_C_MODES 32
8529 machine_mode constant_modes
[NR_C_MODES
] =
8531 TFmode
, TImode
, TDmode
,
8532 V16QImode
, V8HImode
, V4SImode
, V2DImode
, V1TImode
,
8533 V4SFmode
, V2DFmode
, V1TFmode
,
8534 DFmode
, DImode
, DDmode
,
8535 V8QImode
, V4HImode
, V2SImode
, V1DImode
, V2SFmode
, V1DFmode
,
8536 SFmode
, SImode
, SDmode
,
8537 V4QImode
, V2HImode
, V1SImode
, V1SFmode
,
8546 struct constant
*next
;
8548 rtx_code_label
*label
;
8551 struct constant_pool
8553 struct constant_pool
*next
;
8554 rtx_insn
*first_insn
;
8555 rtx_insn
*pool_insn
;
8557 rtx_insn
*emit_pool_after
;
8559 struct constant
*constants
[NR_C_MODES
];
8560 struct constant
*execute
;
8561 rtx_code_label
*label
;
8565 /* Allocate new constant_pool structure. */
8567 static struct constant_pool
*
8568 s390_alloc_pool (void)
8570 struct constant_pool
*pool
;
8573 pool
= (struct constant_pool
*) xmalloc (sizeof *pool
);
8575 for (i
= 0; i
< NR_C_MODES
; i
++)
8576 pool
->constants
[i
] = NULL
;
8578 pool
->execute
= NULL
;
8579 pool
->label
= gen_label_rtx ();
8580 pool
->first_insn
= NULL
;
8581 pool
->pool_insn
= NULL
;
8582 pool
->insns
= BITMAP_ALLOC (NULL
);
8584 pool
->emit_pool_after
= NULL
;
8589 /* Create new constant pool covering instructions starting at INSN
8590 and chain it to the end of POOL_LIST. */
8592 static struct constant_pool
*
8593 s390_start_pool (struct constant_pool
**pool_list
, rtx_insn
*insn
)
8595 struct constant_pool
*pool
, **prev
;
8597 pool
= s390_alloc_pool ();
8598 pool
->first_insn
= insn
;
8600 for (prev
= pool_list
; *prev
; prev
= &(*prev
)->next
)
8607 /* End range of instructions covered by POOL at INSN and emit
8608 placeholder insn representing the pool. */
8611 s390_end_pool (struct constant_pool
*pool
, rtx_insn
*insn
)
8613 rtx pool_size
= GEN_INT (pool
->size
+ 8 /* alignment slop */);
8616 insn
= get_last_insn ();
8618 pool
->pool_insn
= emit_insn_after (gen_pool (pool_size
), insn
);
8619 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
8622 /* Add INSN to the list of insns covered by POOL. */
8625 s390_add_pool_insn (struct constant_pool
*pool
, rtx insn
)
8627 bitmap_set_bit (pool
->insns
, INSN_UID (insn
));
8630 /* Return pool out of POOL_LIST that covers INSN. */
8632 static struct constant_pool
*
8633 s390_find_pool (struct constant_pool
*pool_list
, rtx insn
)
8635 struct constant_pool
*pool
;
8637 for (pool
= pool_list
; pool
; pool
= pool
->next
)
8638 if (bitmap_bit_p (pool
->insns
, INSN_UID (insn
)))
8644 /* Add constant VAL of mode MODE to the constant pool POOL. */
8647 s390_add_constant (struct constant_pool
*pool
, rtx val
, machine_mode mode
)
8652 for (i
= 0; i
< NR_C_MODES
; i
++)
8653 if (constant_modes
[i
] == mode
)
8655 gcc_assert (i
!= NR_C_MODES
);
8657 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8658 if (rtx_equal_p (val
, c
->value
))
8663 c
= (struct constant
*) xmalloc (sizeof *c
);
8665 c
->label
= gen_label_rtx ();
8666 c
->next
= pool
->constants
[i
];
8667 pool
->constants
[i
] = c
;
8668 pool
->size
+= GET_MODE_SIZE (mode
);
8672 /* Return an rtx that represents the offset of X from the start of
8676 s390_pool_offset (struct constant_pool
*pool
, rtx x
)
8680 label
= gen_rtx_LABEL_REF (GET_MODE (x
), pool
->label
);
8681 x
= gen_rtx_UNSPEC (GET_MODE (x
), gen_rtvec (2, x
, label
),
8682 UNSPEC_POOL_OFFSET
);
8683 return gen_rtx_CONST (GET_MODE (x
), x
);
8686 /* Find constant VAL of mode MODE in the constant pool POOL.
8687 Return an RTX describing the distance from the start of
8688 the pool to the location of the new constant. */
8691 s390_find_constant (struct constant_pool
*pool
, rtx val
,
8697 for (i
= 0; i
< NR_C_MODES
; i
++)
8698 if (constant_modes
[i
] == mode
)
8700 gcc_assert (i
!= NR_C_MODES
);
8702 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8703 if (rtx_equal_p (val
, c
->value
))
8708 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8711 /* Check whether INSN is an execute. Return the label_ref to its
8712 execute target template if so, NULL_RTX otherwise. */
8715 s390_execute_label (rtx insn
)
8717 if (NONJUMP_INSN_P (insn
)
8718 && GET_CODE (PATTERN (insn
)) == PARALLEL
8719 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
8720 && XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
)
8721 return XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 2);
8726 /* Add execute target for INSN to the constant pool POOL. */
8729 s390_add_execute (struct constant_pool
*pool
, rtx insn
)
8733 for (c
= pool
->execute
; c
!= NULL
; c
= c
->next
)
8734 if (INSN_UID (insn
) == INSN_UID (c
->value
))
8739 c
= (struct constant
*) xmalloc (sizeof *c
);
8741 c
->label
= gen_label_rtx ();
8742 c
->next
= pool
->execute
;
8748 /* Find execute target for INSN in the constant pool POOL.
8749 Return an RTX describing the distance from the start of
8750 the pool to the location of the execute target. */
8753 s390_find_execute (struct constant_pool
*pool
, rtx insn
)
8757 for (c
= pool
->execute
; c
!= NULL
; c
= c
->next
)
8758 if (INSN_UID (insn
) == INSN_UID (c
->value
))
8763 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8766 /* For an execute INSN, extract the execute target template. */
8769 s390_execute_target (rtx insn
)
8771 rtx pattern
= PATTERN (insn
);
8772 gcc_assert (s390_execute_label (insn
));
8774 if (XVECLEN (pattern
, 0) == 2)
8776 pattern
= copy_rtx (XVECEXP (pattern
, 0, 1));
8780 rtvec vec
= rtvec_alloc (XVECLEN (pattern
, 0) - 1);
8783 for (i
= 0; i
< XVECLEN (pattern
, 0) - 1; i
++)
8784 RTVEC_ELT (vec
, i
) = copy_rtx (XVECEXP (pattern
, 0, i
+ 1));
8786 pattern
= gen_rtx_PARALLEL (VOIDmode
, vec
);
8792 /* Indicate that INSN cannot be duplicated. This is the case for
8793 execute insns that carry a unique label. */
8796 s390_cannot_copy_insn_p (rtx_insn
*insn
)
8798 rtx label
= s390_execute_label (insn
);
8799 return label
&& label
!= const0_rtx
;
8802 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8803 do not emit the pool base label. */
8806 s390_dump_pool (struct constant_pool
*pool
, bool remote_label
)
8809 rtx_insn
*insn
= pool
->pool_insn
;
8812 /* Switch to rodata section. */
8813 if (TARGET_CPU_ZARCH
)
8815 insn
= emit_insn_after (gen_pool_section_start (), insn
);
8816 INSN_ADDRESSES_NEW (insn
, -1);
8819 /* Ensure minimum pool alignment. */
8820 if (TARGET_CPU_ZARCH
)
8821 insn
= emit_insn_after (gen_pool_align (GEN_INT (8)), insn
);
8823 insn
= emit_insn_after (gen_pool_align (GEN_INT (4)), insn
);
8824 INSN_ADDRESSES_NEW (insn
, -1);
8826 /* Emit pool base label. */
8829 insn
= emit_label_after (pool
->label
, insn
);
8830 INSN_ADDRESSES_NEW (insn
, -1);
8833 /* Dump constants in descending alignment requirement order,
8834 ensuring proper alignment for every constant. */
8835 for (i
= 0; i
< NR_C_MODES
; i
++)
8836 for (c
= pool
->constants
[i
]; c
; c
= c
->next
)
8838 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8839 rtx value
= copy_rtx (c
->value
);
8840 if (GET_CODE (value
) == CONST
8841 && GET_CODE (XEXP (value
, 0)) == UNSPEC
8842 && XINT (XEXP (value
, 0), 1) == UNSPEC_LTREL_OFFSET
8843 && XVECLEN (XEXP (value
, 0), 0) == 1)
8844 value
= s390_pool_offset (pool
, XVECEXP (XEXP (value
, 0), 0, 0));
8846 insn
= emit_label_after (c
->label
, insn
);
8847 INSN_ADDRESSES_NEW (insn
, -1);
8849 value
= gen_rtx_UNSPEC_VOLATILE (constant_modes
[i
],
8850 gen_rtvec (1, value
),
8851 UNSPECV_POOL_ENTRY
);
8852 insn
= emit_insn_after (value
, insn
);
8853 INSN_ADDRESSES_NEW (insn
, -1);
8856 /* Ensure minimum alignment for instructions. */
8857 insn
= emit_insn_after (gen_pool_align (GEN_INT (2)), insn
);
8858 INSN_ADDRESSES_NEW (insn
, -1);
8860 /* Output in-pool execute template insns. */
8861 for (c
= pool
->execute
; c
; c
= c
->next
)
8863 insn
= emit_label_after (c
->label
, insn
);
8864 INSN_ADDRESSES_NEW (insn
, -1);
8866 insn
= emit_insn_after (s390_execute_target (c
->value
), insn
);
8867 INSN_ADDRESSES_NEW (insn
, -1);
8870 /* Switch back to previous section. */
8871 if (TARGET_CPU_ZARCH
)
8873 insn
= emit_insn_after (gen_pool_section_end (), insn
);
8874 INSN_ADDRESSES_NEW (insn
, -1);
8877 insn
= emit_barrier_after (insn
);
8878 INSN_ADDRESSES_NEW (insn
, -1);
8880 /* Remove placeholder insn. */
8881 remove_insn (pool
->pool_insn
);
8884 /* Free all memory used by POOL. */
8887 s390_free_pool (struct constant_pool
*pool
)
8889 struct constant
*c
, *next
;
8892 for (i
= 0; i
< NR_C_MODES
; i
++)
8893 for (c
= pool
->constants
[i
]; c
; c
= next
)
8899 for (c
= pool
->execute
; c
; c
= next
)
8905 BITMAP_FREE (pool
->insns
);
8910 /* Collect main literal pool. Return NULL on overflow. */
8912 static struct constant_pool
*
8913 s390_mainpool_start (void)
8915 struct constant_pool
*pool
;
8918 pool
= s390_alloc_pool ();
8920 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8922 if (NONJUMP_INSN_P (insn
)
8923 && GET_CODE (PATTERN (insn
)) == SET
8924 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC_VOLATILE
8925 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPECV_MAIN_POOL
)
8927 /* There might be two main_pool instructions if base_reg
8928 is call-clobbered; one for shrink-wrapped code and one
8929 for the rest. We want to keep the first. */
8930 if (pool
->pool_insn
)
8932 insn
= PREV_INSN (insn
);
8933 delete_insn (NEXT_INSN (insn
));
8936 pool
->pool_insn
= insn
;
8939 if (!TARGET_CPU_ZARCH
&& s390_execute_label (insn
))
8941 s390_add_execute (pool
, insn
);
8943 else if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
8945 rtx pool_ref
= NULL_RTX
;
8946 find_constant_pool_ref (PATTERN (insn
), &pool_ref
);
8949 rtx constant
= get_pool_constant (pool_ref
);
8950 machine_mode mode
= get_pool_mode (pool_ref
);
8951 s390_add_constant (pool
, constant
, mode
);
8955 /* If hot/cold partitioning is enabled we have to make sure that
8956 the literal pool is emitted in the same section where the
8957 initialization of the literal pool base pointer takes place.
8958 emit_pool_after is only used in the non-overflow case on non
8959 Z cpus where we can emit the literal pool at the end of the
8960 function body within the text section. */
8962 && NOTE_KIND (insn
) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8963 && !pool
->emit_pool_after
)
8964 pool
->emit_pool_after
= PREV_INSN (insn
);
8967 gcc_assert (pool
->pool_insn
|| pool
->size
== 0);
8969 if (pool
->size
>= 4096)
8971 /* We're going to chunkify the pool, so remove the main
8972 pool placeholder insn. */
8973 remove_insn (pool
->pool_insn
);
8975 s390_free_pool (pool
);
8979 /* If the functions ends with the section where the literal pool
8980 should be emitted set the marker to its end. */
8981 if (pool
&& !pool
->emit_pool_after
)
8982 pool
->emit_pool_after
= get_last_insn ();
8987 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8988 Modify the current function to output the pool constants as well as
8989 the pool register setup instruction. */
8992 s390_mainpool_finish (struct constant_pool
*pool
)
8994 rtx base_reg
= cfun
->machine
->base_reg
;
8996 /* If the pool is empty, we're done. */
8997 if (pool
->size
== 0)
8999 /* We don't actually need a base register after all. */
9000 cfun
->machine
->base_reg
= NULL_RTX
;
9002 if (pool
->pool_insn
)
9003 remove_insn (pool
->pool_insn
);
9004 s390_free_pool (pool
);
9008 /* We need correct insn addresses. */
9009 shorten_branches (get_insns ());
9011 /* On zSeries, we use a LARL to load the pool register. The pool is
9012 located in the .rodata section, so we emit it after the function. */
9013 if (TARGET_CPU_ZARCH
)
9015 rtx set
= gen_main_base_64 (base_reg
, pool
->label
);
9016 rtx_insn
*insn
= emit_insn_after (set
, pool
->pool_insn
);
9017 INSN_ADDRESSES_NEW (insn
, -1);
9018 remove_insn (pool
->pool_insn
);
9020 insn
= get_last_insn ();
9021 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9022 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9024 s390_dump_pool (pool
, 0);
9027 /* On S/390, if the total size of the function's code plus literal pool
9028 does not exceed 4096 bytes, we use BASR to set up a function base
9029 pointer, and emit the literal pool at the end of the function. */
9030 else if (INSN_ADDRESSES (INSN_UID (pool
->emit_pool_after
))
9031 + pool
->size
+ 8 /* alignment slop */ < 4096)
9033 rtx set
= gen_main_base_31_small (base_reg
, pool
->label
);
9034 rtx_insn
*insn
= emit_insn_after (set
, pool
->pool_insn
);
9035 INSN_ADDRESSES_NEW (insn
, -1);
9036 remove_insn (pool
->pool_insn
);
9038 insn
= emit_label_after (pool
->label
, insn
);
9039 INSN_ADDRESSES_NEW (insn
, -1);
9041 /* emit_pool_after will be set by s390_mainpool_start to the
9042 last insn of the section where the literal pool should be
9044 insn
= pool
->emit_pool_after
;
9046 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9047 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9049 s390_dump_pool (pool
, 1);
9052 /* Otherwise, we emit an inline literal pool and use BASR to branch
9053 over it, setting up the pool register at the same time. */
9056 rtx_code_label
*pool_end
= gen_label_rtx ();
9058 rtx pat
= gen_main_base_31_large (base_reg
, pool
->label
, pool_end
);
9059 rtx_insn
*insn
= emit_jump_insn_after (pat
, pool
->pool_insn
);
9060 JUMP_LABEL (insn
) = pool_end
;
9061 INSN_ADDRESSES_NEW (insn
, -1);
9062 remove_insn (pool
->pool_insn
);
9064 insn
= emit_label_after (pool
->label
, insn
);
9065 INSN_ADDRESSES_NEW (insn
, -1);
9067 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9068 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9070 insn
= emit_label_after (pool_end
, pool
->pool_insn
);
9071 INSN_ADDRESSES_NEW (insn
, -1);
9073 s390_dump_pool (pool
, 1);
9077 /* Replace all literal pool references. */
9079 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9082 replace_ltrel_base (&PATTERN (insn
));
9084 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9086 rtx addr
, pool_ref
= NULL_RTX
;
9087 find_constant_pool_ref (PATTERN (insn
), &pool_ref
);
9090 if (s390_execute_label (insn
))
9091 addr
= s390_find_execute (pool
, insn
);
9093 addr
= s390_find_constant (pool
, get_pool_constant (pool_ref
),
9094 get_pool_mode (pool_ref
));
9096 replace_constant_pool_ref (&PATTERN (insn
), pool_ref
, addr
);
9097 INSN_CODE (insn
) = -1;
9103 /* Free the pool. */
9104 s390_free_pool (pool
);
9107 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9108 We have decided we cannot use this pool, so revert all changes
9109 to the current function that were done by s390_mainpool_start. */
9111 s390_mainpool_cancel (struct constant_pool
*pool
)
9113 /* We didn't actually change the instruction stream, so simply
9114 free the pool memory. */
9115 s390_free_pool (pool
);
9119 /* Chunkify the literal pool. */
9121 #define S390_POOL_CHUNK_MIN 0xc00
9122 #define S390_POOL_CHUNK_MAX 0xe00
9124 static struct constant_pool
*
9125 s390_chunkify_start (void)
9127 struct constant_pool
*curr_pool
= NULL
, *pool_list
= NULL
;
9130 rtx pending_ltrel
= NULL_RTX
;
9133 rtx (*gen_reload_base
) (rtx
, rtx
) =
9134 TARGET_CPU_ZARCH
? gen_reload_base_64
: gen_reload_base_31
;
9137 /* We need correct insn addresses. */
9139 shorten_branches (get_insns ());
9141 /* Scan all insns and move literals to pool chunks. */
9143 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9145 bool section_switch_p
= false;
9147 /* Check for pending LTREL_BASE. */
9150 rtx ltrel_base
= find_ltrel_base (PATTERN (insn
));
9153 gcc_assert (ltrel_base
== pending_ltrel
);
9154 pending_ltrel
= NULL_RTX
;
9158 if (!TARGET_CPU_ZARCH
&& s390_execute_label (insn
))
9161 curr_pool
= s390_start_pool (&pool_list
, insn
);
9163 s390_add_execute (curr_pool
, insn
);
9164 s390_add_pool_insn (curr_pool
, insn
);
9166 else if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9168 rtx pool_ref
= NULL_RTX
;
9169 find_constant_pool_ref (PATTERN (insn
), &pool_ref
);
9172 rtx constant
= get_pool_constant (pool_ref
);
9173 machine_mode mode
= get_pool_mode (pool_ref
);
9176 curr_pool
= s390_start_pool (&pool_list
, insn
);
9178 s390_add_constant (curr_pool
, constant
, mode
);
9179 s390_add_pool_insn (curr_pool
, insn
);
9181 /* Don't split the pool chunk between a LTREL_OFFSET load
9182 and the corresponding LTREL_BASE. */
9183 if (GET_CODE (constant
) == CONST
9184 && GET_CODE (XEXP (constant
, 0)) == UNSPEC
9185 && XINT (XEXP (constant
, 0), 1) == UNSPEC_LTREL_OFFSET
)
9187 gcc_assert (!pending_ltrel
);
9188 pending_ltrel
= pool_ref
;
9193 if (JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
) || LABEL_P (insn
))
9196 s390_add_pool_insn (curr_pool
, insn
);
9197 /* An LTREL_BASE must follow within the same basic block. */
9198 gcc_assert (!pending_ltrel
);
9202 switch (NOTE_KIND (insn
))
9204 case NOTE_INSN_SWITCH_TEXT_SECTIONS
:
9205 section_switch_p
= true;
9207 case NOTE_INSN_VAR_LOCATION
:
9208 case NOTE_INSN_CALL_ARG_LOCATION
:
9215 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn
)
9216 || INSN_ADDRESSES (INSN_UID (insn
)) == -1)
9219 if (TARGET_CPU_ZARCH
)
9221 if (curr_pool
->size
< S390_POOL_CHUNK_MAX
)
9224 s390_end_pool (curr_pool
, NULL
);
9229 int chunk_size
= INSN_ADDRESSES (INSN_UID (insn
))
9230 - INSN_ADDRESSES (INSN_UID (curr_pool
->first_insn
))
9233 /* We will later have to insert base register reload insns.
9234 Those will have an effect on code size, which we need to
9235 consider here. This calculation makes rather pessimistic
9236 worst-case assumptions. */
9240 if (chunk_size
< S390_POOL_CHUNK_MIN
9241 && curr_pool
->size
< S390_POOL_CHUNK_MIN
9242 && !section_switch_p
)
9245 /* Pool chunks can only be inserted after BARRIERs ... */
9246 if (BARRIER_P (insn
))
9248 s390_end_pool (curr_pool
, insn
);
9253 /* ... so if we don't find one in time, create one. */
9254 else if (chunk_size
> S390_POOL_CHUNK_MAX
9255 || curr_pool
->size
> S390_POOL_CHUNK_MAX
9256 || section_switch_p
)
9258 rtx_insn
*label
, *jump
, *barrier
, *next
, *prev
;
9260 if (!section_switch_p
)
9262 /* We can insert the barrier only after a 'real' insn. */
9263 if (! NONJUMP_INSN_P (insn
) && ! CALL_P (insn
))
9265 if (get_attr_length (insn
) == 0)
9267 /* Don't separate LTREL_BASE from the corresponding
9268 LTREL_OFFSET load. */
9275 next
= NEXT_INSN (insn
);
9279 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
9280 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
));
9284 gcc_assert (!pending_ltrel
);
9286 /* The old pool has to end before the section switch
9287 note in order to make it part of the current
9289 insn
= PREV_INSN (insn
);
9292 label
= gen_label_rtx ();
9294 if (prev
&& NOTE_P (prev
))
9295 prev
= prev_nonnote_insn (prev
);
9297 jump
= emit_jump_insn_after_setloc (gen_jump (label
), insn
,
9298 INSN_LOCATION (prev
));
9300 jump
= emit_jump_insn_after_noloc (gen_jump (label
), insn
);
9301 barrier
= emit_barrier_after (jump
);
9302 insn
= emit_label_after (label
, barrier
);
9303 JUMP_LABEL (jump
) = label
;
9304 LABEL_NUSES (label
) = 1;
9306 INSN_ADDRESSES_NEW (jump
, -1);
9307 INSN_ADDRESSES_NEW (barrier
, -1);
9308 INSN_ADDRESSES_NEW (insn
, -1);
9310 s390_end_pool (curr_pool
, barrier
);
9318 s390_end_pool (curr_pool
, NULL
);
9319 gcc_assert (!pending_ltrel
);
9321 /* Find all labels that are branched into
9322 from an insn belonging to a different chunk. */
9324 far_labels
= BITMAP_ALLOC (NULL
);
9326 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9328 rtx_jump_table_data
*table
;
9330 /* Labels marked with LABEL_PRESERVE_P can be target
9331 of non-local jumps, so we have to mark them.
9332 The same holds for named labels.
9334 Don't do that, however, if it is the label before
9338 && (LABEL_PRESERVE_P (insn
) || LABEL_NAME (insn
)))
9340 rtx_insn
*vec_insn
= NEXT_INSN (insn
);
9341 if (! vec_insn
|| ! JUMP_TABLE_DATA_P (vec_insn
))
9342 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (insn
));
9344 /* Check potential targets in a table jump (casesi_jump). */
9345 else if (tablejump_p (insn
, NULL
, &table
))
9347 rtx vec_pat
= PATTERN (table
);
9348 int i
, diff_p
= GET_CODE (vec_pat
) == ADDR_DIFF_VEC
;
9350 for (i
= 0; i
< XVECLEN (vec_pat
, diff_p
); i
++)
9352 rtx label
= XEXP (XVECEXP (vec_pat
, diff_p
, i
), 0);
9354 if (s390_find_pool (pool_list
, label
)
9355 != s390_find_pool (pool_list
, insn
))
9356 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9359 /* If we have a direct jump (conditional or unconditional),
9360 check all potential targets. */
9361 else if (JUMP_P (insn
))
9363 rtx pat
= PATTERN (insn
);
9365 if (GET_CODE (pat
) == PARALLEL
)
9366 pat
= XVECEXP (pat
, 0, 0);
9368 if (GET_CODE (pat
) == SET
)
9370 rtx label
= JUMP_LABEL (insn
);
9371 if (label
&& !ANY_RETURN_P (label
))
9373 if (s390_find_pool (pool_list
, label
)
9374 != s390_find_pool (pool_list
, insn
))
9375 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9381 /* Insert base register reload insns before every pool. */
9383 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9385 rtx new_insn
= gen_reload_base (cfun
->machine
->base_reg
,
9387 rtx_insn
*insn
= curr_pool
->first_insn
;
9388 INSN_ADDRESSES_NEW (emit_insn_before (new_insn
, insn
), -1);
9391 /* Insert base register reload insns at every far label. */
9393 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9395 && bitmap_bit_p (far_labels
, CODE_LABEL_NUMBER (insn
)))
9397 struct constant_pool
*pool
= s390_find_pool (pool_list
, insn
);
9400 rtx new_insn
= gen_reload_base (cfun
->machine
->base_reg
,
9402 INSN_ADDRESSES_NEW (emit_insn_after (new_insn
, insn
), -1);
9407 BITMAP_FREE (far_labels
);
9410 /* Recompute insn addresses. */
9412 init_insn_lengths ();
9413 shorten_branches (get_insns ());
9418 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9419 After we have decided to use this list, finish implementing
9420 all changes to the current function as required. */
9423 s390_chunkify_finish (struct constant_pool
*pool_list
)
9425 struct constant_pool
*curr_pool
= NULL
;
9429 /* Replace all literal pool references. */
9431 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9434 replace_ltrel_base (&PATTERN (insn
));
9436 curr_pool
= s390_find_pool (pool_list
, insn
);
9440 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9442 rtx addr
, pool_ref
= NULL_RTX
;
9443 find_constant_pool_ref (PATTERN (insn
), &pool_ref
);
9446 if (s390_execute_label (insn
))
9447 addr
= s390_find_execute (curr_pool
, insn
);
9449 addr
= s390_find_constant (curr_pool
,
9450 get_pool_constant (pool_ref
),
9451 get_pool_mode (pool_ref
));
9453 replace_constant_pool_ref (&PATTERN (insn
), pool_ref
, addr
);
9454 INSN_CODE (insn
) = -1;
9459 /* Dump out all literal pools. */
9461 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9462 s390_dump_pool (curr_pool
, 0);
9464 /* Free pool list. */
9468 struct constant_pool
*next
= pool_list
->next
;
9469 s390_free_pool (pool_list
);
9474 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9475 We have decided we cannot use this list, so revert all changes
9476 to the current function that were done by s390_chunkify_start. */
9479 s390_chunkify_cancel (struct constant_pool
*pool_list
)
9481 struct constant_pool
*curr_pool
= NULL
;
9484 /* Remove all pool placeholder insns. */
9486 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9488 /* Did we insert an extra barrier? Remove it. */
9489 rtx_insn
*barrier
= PREV_INSN (curr_pool
->pool_insn
);
9490 rtx_insn
*jump
= barrier
? PREV_INSN (barrier
) : NULL
;
9491 rtx_insn
*label
= NEXT_INSN (curr_pool
->pool_insn
);
9493 if (jump
&& JUMP_P (jump
)
9494 && barrier
&& BARRIER_P (barrier
)
9495 && label
&& LABEL_P (label
)
9496 && GET_CODE (PATTERN (jump
)) == SET
9497 && SET_DEST (PATTERN (jump
)) == pc_rtx
9498 && GET_CODE (SET_SRC (PATTERN (jump
))) == LABEL_REF
9499 && XEXP (SET_SRC (PATTERN (jump
)), 0) == label
)
9502 remove_insn (barrier
);
9503 remove_insn (label
);
9506 remove_insn (curr_pool
->pool_insn
);
9509 /* Remove all base register reload insns. */
9511 for (insn
= get_insns (); insn
; )
9513 rtx_insn
*next_insn
= NEXT_INSN (insn
);
9515 if (NONJUMP_INSN_P (insn
)
9516 && GET_CODE (PATTERN (insn
)) == SET
9517 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC
9518 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPEC_RELOAD_BASE
)
9524 /* Free pool list. */
9528 struct constant_pool
*next
= pool_list
->next
;
9529 s390_free_pool (pool_list
);
9534 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9537 s390_output_pool_entry (rtx exp
, machine_mode mode
, unsigned int align
)
9539 switch (GET_MODE_CLASS (mode
))
9542 case MODE_DECIMAL_FLOAT
:
9543 gcc_assert (GET_CODE (exp
) == CONST_DOUBLE
);
9545 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp
),
9546 as_a
<scalar_float_mode
> (mode
), align
);
9550 assemble_integer (exp
, GET_MODE_SIZE (mode
), align
, 1);
9551 mark_symbol_refs_as_used (exp
);
9554 case MODE_VECTOR_INT
:
9555 case MODE_VECTOR_FLOAT
:
9558 machine_mode inner_mode
;
9559 gcc_assert (GET_CODE (exp
) == CONST_VECTOR
);
9561 inner_mode
= GET_MODE_INNER (GET_MODE (exp
));
9562 for (i
= 0; i
< XVECLEN (exp
, 0); i
++)
9563 s390_output_pool_entry (XVECEXP (exp
, 0, i
),
9567 : GET_MODE_BITSIZE (inner_mode
));
9577 /* Return an RTL expression representing the value of the return address
9578 for the frame COUNT steps up from the current frame. FRAME is the
9579 frame pointer of that frame. */
9582 s390_return_addr_rtx (int count
, rtx frame ATTRIBUTE_UNUSED
)
9587 /* Without backchain, we fail for all but the current frame. */
9589 if (!TARGET_BACKCHAIN
&& count
> 0)
9592 /* For the current frame, we need to make sure the initial
9593 value of RETURN_REGNUM is actually saved. */
9597 /* On non-z architectures branch splitting could overwrite r14. */
9598 if (TARGET_CPU_ZARCH
)
9599 return get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
);
9602 cfun_frame_layout
.save_return_addr_p
= true;
9603 return gen_rtx_MEM (Pmode
, return_address_pointer_rtx
);
9607 if (TARGET_PACKED_STACK
)
9608 offset
= -2 * UNITS_PER_LONG
;
9610 offset
= RETURN_REGNUM
* UNITS_PER_LONG
;
9612 addr
= plus_constant (Pmode
, frame
, offset
);
9613 addr
= memory_address (Pmode
, addr
);
9614 return gen_rtx_MEM (Pmode
, addr
);
9617 /* Return an RTL expression representing the back chain stored in
9618 the current stack frame. */
9621 s390_back_chain_rtx (void)
9625 gcc_assert (TARGET_BACKCHAIN
);
9627 if (TARGET_PACKED_STACK
)
9628 chain
= plus_constant (Pmode
, stack_pointer_rtx
,
9629 STACK_POINTER_OFFSET
- UNITS_PER_LONG
);
9631 chain
= stack_pointer_rtx
;
9633 chain
= gen_rtx_MEM (Pmode
, chain
);
9637 /* Find first call clobbered register unused in a function.
9638 This could be used as base register in a leaf function
9639 or for holding the return address before epilogue. */
9642 find_unused_clobbered_reg (void)
9645 for (i
= 0; i
< 6; i
++)
9646 if (!df_regs_ever_live_p (i
))
9652 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9653 clobbered hard regs in SETREG. */
9656 s390_reg_clobbered_rtx (rtx setreg
, const_rtx set_insn ATTRIBUTE_UNUSED
, void *data
)
9658 char *regs_ever_clobbered
= (char *)data
;
9659 unsigned int i
, regno
;
9660 machine_mode mode
= GET_MODE (setreg
);
9662 if (GET_CODE (setreg
) == SUBREG
)
9664 rtx inner
= SUBREG_REG (setreg
);
9665 if (!GENERAL_REG_P (inner
) && !FP_REG_P (inner
))
9667 regno
= subreg_regno (setreg
);
9669 else if (GENERAL_REG_P (setreg
) || FP_REG_P (setreg
))
9670 regno
= REGNO (setreg
);
9675 i
< end_hard_regno (mode
, regno
);
9677 regs_ever_clobbered
[i
] = 1;
9680 /* Walks through all basic blocks of the current function looking
9681 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9682 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9683 each of those regs. */
9686 s390_regs_ever_clobbered (char regs_ever_clobbered
[])
9692 memset (regs_ever_clobbered
, 0, 32);
9694 /* For non-leaf functions we have to consider all call clobbered regs to be
9698 for (i
= 0; i
< 32; i
++)
9699 regs_ever_clobbered
[i
] = call_really_used_regs
[i
];
9702 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9703 this work is done by liveness analysis (mark_regs_live_at_end).
9704 Special care is needed for functions containing landing pads. Landing pads
9705 may use the eh registers, but the code which sets these registers is not
9706 contained in that function. Hence s390_regs_ever_clobbered is not able to
9707 deal with this automatically. */
9708 if (crtl
->calls_eh_return
|| cfun
->machine
->has_landing_pad_p
)
9709 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; i
++)
9710 if (crtl
->calls_eh_return
9711 || (cfun
->machine
->has_landing_pad_p
9712 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i
))))
9713 regs_ever_clobbered
[EH_RETURN_DATA_REGNO (i
)] = 1;
9715 /* For nonlocal gotos all call-saved registers have to be saved.
9716 This flag is also set for the unwinding code in libgcc.
9717 See expand_builtin_unwind_init. For regs_ever_live this is done by
9719 if (crtl
->saves_all_registers
)
9720 for (i
= 0; i
< 32; i
++)
9721 if (!call_really_used_regs
[i
])
9722 regs_ever_clobbered
[i
] = 1;
9724 FOR_EACH_BB_FN (cur_bb
, cfun
)
9726 FOR_BB_INSNS (cur_bb
, cur_insn
)
9730 if (!INSN_P (cur_insn
))
9733 pat
= PATTERN (cur_insn
);
9735 /* Ignore GPR restore insns. */
9736 if (epilogue_completed
&& RTX_FRAME_RELATED_P (cur_insn
))
9738 if (GET_CODE (pat
) == SET
9739 && GENERAL_REG_P (SET_DEST (pat
)))
9742 if (GET_MODE (SET_SRC (pat
)) == DImode
9743 && FP_REG_P (SET_SRC (pat
)))
9747 if (GET_CODE (SET_SRC (pat
)) == MEM
)
9752 if (GET_CODE (pat
) == PARALLEL
9753 && load_multiple_operation (pat
, VOIDmode
))
9758 s390_reg_clobbered_rtx
,
9759 regs_ever_clobbered
);
9764 /* Determine the frame area which actually has to be accessed
9765 in the function epilogue. The values are stored at the
9766 given pointers AREA_BOTTOM (address of the lowest used stack
9767 address) and AREA_TOP (address of the first item which does
9768 not belong to the stack frame). */
9771 s390_frame_area (int *area_bottom
, int *area_top
)
9778 if (cfun_frame_layout
.first_restore_gpr
!= -1)
9780 b
= (cfun_frame_layout
.gprs_offset
9781 + cfun_frame_layout
.first_restore_gpr
* UNITS_PER_LONG
);
9782 t
= b
+ (cfun_frame_layout
.last_restore_gpr
9783 - cfun_frame_layout
.first_restore_gpr
+ 1) * UNITS_PER_LONG
;
9786 if (TARGET_64BIT
&& cfun_save_high_fprs_p
)
9788 b
= MIN (b
, cfun_frame_layout
.f8_offset
);
9789 t
= MAX (t
, (cfun_frame_layout
.f8_offset
9790 + cfun_frame_layout
.high_fprs
* 8));
9795 if (cfun_fpr_save_p (FPR4_REGNUM
))
9797 b
= MIN (b
, cfun_frame_layout
.f4_offset
);
9798 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 8);
9800 if (cfun_fpr_save_p (FPR6_REGNUM
))
9802 b
= MIN (b
, cfun_frame_layout
.f4_offset
+ 8);
9803 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 16);
9809 /* Update gpr_save_slots in the frame layout trying to make use of
9810 FPRs as GPR save slots.
9811 This is a helper routine of s390_register_info. */
9814 s390_register_info_gprtofpr ()
9816 int save_reg_slot
= FPR0_REGNUM
;
9819 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
9822 /* builtin_eh_return needs to be able to modify the return address
9823 on the stack. It could also adjust the FPR save slot instead but
9824 is it worth the trouble?! */
9825 if (crtl
->calls_eh_return
)
9828 for (i
= 15; i
>= 6; i
--)
9830 if (cfun_gpr_save_slot (i
) == SAVE_SLOT_NONE
)
9833 /* Advance to the next FP register which can be used as a
9835 while ((!call_really_used_regs
[save_reg_slot
]
9836 || df_regs_ever_live_p (save_reg_slot
)
9837 || cfun_fpr_save_p (save_reg_slot
))
9838 && FP_REGNO_P (save_reg_slot
))
9840 if (!FP_REGNO_P (save_reg_slot
))
9842 /* We only want to use ldgr/lgdr if we can get rid of
9843 stm/lm entirely. So undo the gpr slot allocation in
9844 case we ran out of FPR save slots. */
9845 for (j
= 6; j
<= 15; j
++)
9846 if (FP_REGNO_P (cfun_gpr_save_slot (j
)))
9847 cfun_gpr_save_slot (j
) = SAVE_SLOT_STACK
;
9850 cfun_gpr_save_slot (i
) = save_reg_slot
++;
9854 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9856 This is a helper routine for s390_register_info. */
9859 s390_register_info_stdarg_fpr ()
9865 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9866 f0-f4 for 64 bit. */
9868 || !TARGET_HARD_FLOAT
9869 || !cfun
->va_list_fpr_size
9870 || crtl
->args
.info
.fprs
>= FP_ARG_NUM_REG
)
9873 min_fpr
= crtl
->args
.info
.fprs
;
9874 max_fpr
= min_fpr
+ cfun
->va_list_fpr_size
- 1;
9875 if (max_fpr
>= FP_ARG_NUM_REG
)
9876 max_fpr
= FP_ARG_NUM_REG
- 1;
9878 /* FPR argument regs start at f0. */
9879 min_fpr
+= FPR0_REGNUM
;
9880 max_fpr
+= FPR0_REGNUM
;
9882 for (i
= min_fpr
; i
<= max_fpr
; i
++)
9883 cfun_set_fpr_save (i
);
9886 /* Reserve the GPR save slots for GPRs which need to be saved due to
9888 This is a helper routine for s390_register_info. */
9891 s390_register_info_stdarg_gpr ()
9898 || !cfun
->va_list_gpr_size
9899 || crtl
->args
.info
.gprs
>= GP_ARG_NUM_REG
)
9902 min_gpr
= crtl
->args
.info
.gprs
;
9903 max_gpr
= min_gpr
+ cfun
->va_list_gpr_size
- 1;
9904 if (max_gpr
>= GP_ARG_NUM_REG
)
9905 max_gpr
= GP_ARG_NUM_REG
- 1;
9907 /* GPR argument regs start at r2. */
9908 min_gpr
+= GPR2_REGNUM
;
9909 max_gpr
+= GPR2_REGNUM
;
9911 /* If r6 was supposed to be saved into an FPR and now needs to go to
9912 the stack for vararg we have to adjust the restore range to make
9913 sure that the restore is done from stack as well. */
9914 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM
))
9915 && min_gpr
<= GPR6_REGNUM
9916 && max_gpr
>= GPR6_REGNUM
)
9918 if (cfun_frame_layout
.first_restore_gpr
== -1
9919 || cfun_frame_layout
.first_restore_gpr
> GPR6_REGNUM
)
9920 cfun_frame_layout
.first_restore_gpr
= GPR6_REGNUM
;
9921 if (cfun_frame_layout
.last_restore_gpr
== -1
9922 || cfun_frame_layout
.last_restore_gpr
< GPR6_REGNUM
)
9923 cfun_frame_layout
.last_restore_gpr
= GPR6_REGNUM
;
9926 if (cfun_frame_layout
.first_save_gpr
== -1
9927 || cfun_frame_layout
.first_save_gpr
> min_gpr
)
9928 cfun_frame_layout
.first_save_gpr
= min_gpr
;
9930 if (cfun_frame_layout
.last_save_gpr
== -1
9931 || cfun_frame_layout
.last_save_gpr
< max_gpr
)
9932 cfun_frame_layout
.last_save_gpr
= max_gpr
;
9934 for (i
= min_gpr
; i
<= max_gpr
; i
++)
9935 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9938 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9939 prologue and epilogue. */
9942 s390_register_info_set_ranges ()
9946 /* Find the first and the last save slot supposed to use the stack
9947 to set the restore range.
9948 Vararg regs might be marked as save to stack but only the
9949 call-saved regs really need restoring (i.e. r6). This code
9950 assumes that the vararg regs have not yet been recorded in
9951 cfun_gpr_save_slot. */
9952 for (i
= 0; i
< 16 && cfun_gpr_save_slot (i
) != SAVE_SLOT_STACK
; i
++);
9953 for (j
= 15; j
> i
&& cfun_gpr_save_slot (j
) != SAVE_SLOT_STACK
; j
--);
9954 cfun_frame_layout
.first_restore_gpr
= (i
== 16) ? -1 : i
;
9955 cfun_frame_layout
.last_restore_gpr
= (i
== 16) ? -1 : j
;
9956 cfun_frame_layout
.first_save_gpr
= (i
== 16) ? -1 : i
;
9957 cfun_frame_layout
.last_save_gpr
= (i
== 16) ? -1 : j
;
9960 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9961 for registers which need to be saved in function prologue.
9962 This function can be used until the insns emitted for save/restore
9963 of the regs are visible in the RTL stream. */
9966 s390_register_info ()
9969 char clobbered_regs
[32];
9971 gcc_assert (!epilogue_completed
);
9973 if (reload_completed
)
9974 /* After reload we rely on our own routine to determine which
9975 registers need saving. */
9976 s390_regs_ever_clobbered (clobbered_regs
);
9978 /* During reload we use regs_ever_live as a base since reload
9979 does changes in there which we otherwise would not be aware
9981 for (i
= 0; i
< 32; i
++)
9982 clobbered_regs
[i
] = df_regs_ever_live_p (i
);
9984 for (i
= 0; i
< 32; i
++)
9985 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_regs
[i
];
9987 /* Mark the call-saved FPRs which need to be saved.
9988 This needs to be done before checking the special GPRs since the
9989 stack pointer usage depends on whether high FPRs have to be saved
9991 cfun_frame_layout
.fpr_bitmap
= 0;
9992 cfun_frame_layout
.high_fprs
= 0;
9993 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
9994 if (clobbered_regs
[i
] && !call_really_used_regs
[i
])
9996 cfun_set_fpr_save (i
);
9997 if (i
>= FPR8_REGNUM
)
9998 cfun_frame_layout
.high_fprs
++;
10001 /* Register 12 is used for GOT address, but also as temp in prologue
10002 for split-stack stdarg functions (unless r14 is available). */
10004 |= ((flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
10005 || (flag_split_stack
&& cfun
->stdarg
10006 && (crtl
->is_leaf
|| TARGET_TPF_PROFILING
10007 || has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
))));
10009 clobbered_regs
[BASE_REGNUM
]
10010 |= (cfun
->machine
->base_reg
10011 && REGNO (cfun
->machine
->base_reg
) == BASE_REGNUM
);
10013 clobbered_regs
[HARD_FRAME_POINTER_REGNUM
]
10014 |= !!frame_pointer_needed
;
10016 /* On pre z900 machines this might take until machine dependent
10018 save_return_addr_p will only be set on non-zarch machines so
10019 there is no risk that r14 goes into an FPR instead of a stack
10021 clobbered_regs
[RETURN_REGNUM
]
10023 || TARGET_TPF_PROFILING
10024 || cfun
->machine
->split_branches_pending_p
10025 || cfun_frame_layout
.save_return_addr_p
10026 || crtl
->calls_eh_return
);
10028 clobbered_regs
[STACK_POINTER_REGNUM
]
10030 || TARGET_TPF_PROFILING
10031 || cfun_save_high_fprs_p
10032 || get_frame_size () > 0
10033 || (reload_completed
&& cfun_frame_layout
.frame_size
> 0)
10034 || cfun
->calls_alloca
);
10036 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 16);
10038 for (i
= 6; i
< 16; i
++)
10039 if (clobbered_regs
[i
])
10040 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
10042 s390_register_info_stdarg_fpr ();
10043 s390_register_info_gprtofpr ();
10044 s390_register_info_set_ranges ();
10045 /* stdarg functions might need to save GPRs 2 to 6. This might
10046 override the GPR->FPR save decision made by
10047 s390_register_info_gprtofpr for r6 since vararg regs must go to
10049 s390_register_info_stdarg_gpr ();
10052 /* This function is called by s390_optimize_prologue in order to get
10053 rid of unnecessary GPR save/restore instructions. The register info
10054 for the GPRs is re-computed and the ranges are re-calculated. */
10057 s390_optimize_register_info ()
10059 char clobbered_regs
[32];
10062 gcc_assert (epilogue_completed
);
10063 gcc_assert (!cfun
->machine
->split_branches_pending_p
);
10065 s390_regs_ever_clobbered (clobbered_regs
);
10067 for (i
= 0; i
< 32; i
++)
10068 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_regs
[i
];
10070 /* There is still special treatment needed for cases invisible to
10071 s390_regs_ever_clobbered. */
10072 clobbered_regs
[RETURN_REGNUM
]
10073 |= (TARGET_TPF_PROFILING
10074 /* When expanding builtin_return_addr in ESA mode we do not
10075 know whether r14 will later be needed as scratch reg when
10076 doing branch splitting. So the builtin always accesses the
10077 r14 save slot and we need to stick to the save/restore
10078 decision for r14 even if it turns out that it didn't get
10080 || cfun_frame_layout
.save_return_addr_p
10081 || crtl
->calls_eh_return
);
10083 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 6);
10085 for (i
= 6; i
< 16; i
++)
10086 if (!clobbered_regs
[i
])
10087 cfun_gpr_save_slot (i
) = SAVE_SLOT_NONE
;
10089 s390_register_info_set_ranges ();
10090 s390_register_info_stdarg_gpr ();
10093 /* Fill cfun->machine with info about frame of current function. */
10096 s390_frame_info (void)
10098 HOST_WIDE_INT lowest_offset
;
10100 cfun_frame_layout
.first_save_gpr_slot
= cfun_frame_layout
.first_save_gpr
;
10101 cfun_frame_layout
.last_save_gpr_slot
= cfun_frame_layout
.last_save_gpr
;
10103 /* The va_arg builtin uses a constant distance of 16 *
10104 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10105 pointer. So even if we are going to save the stack pointer in an
10106 FPR we need the stack space in order to keep the offsets
10108 if (cfun
->stdarg
&& cfun_save_arg_fprs_p
)
10110 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10112 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10113 cfun_frame_layout
.first_save_gpr_slot
= STACK_POINTER_REGNUM
;
10116 cfun_frame_layout
.frame_size
= get_frame_size ();
10117 if (!TARGET_64BIT
&& cfun_frame_layout
.frame_size
> 0x7fff0000)
10118 fatal_error (input_location
,
10119 "total size of local variables exceeds architecture limit");
10121 if (!TARGET_PACKED_STACK
)
10123 /* Fixed stack layout. */
10124 cfun_frame_layout
.backchain_offset
= 0;
10125 cfun_frame_layout
.f0_offset
= 16 * UNITS_PER_LONG
;
10126 cfun_frame_layout
.f4_offset
= cfun_frame_layout
.f0_offset
+ 2 * 8;
10127 cfun_frame_layout
.f8_offset
= -cfun_frame_layout
.high_fprs
* 8;
10128 cfun_frame_layout
.gprs_offset
= (cfun_frame_layout
.first_save_gpr_slot
10131 else if (TARGET_BACKCHAIN
)
10133 /* Kernel stack layout - packed stack, backchain, no float */
10134 gcc_assert (TARGET_SOFT_FLOAT
);
10135 cfun_frame_layout
.backchain_offset
= (STACK_POINTER_OFFSET
10138 /* The distance between the backchain and the return address
10139 save slot must not change. So we always need a slot for the
10140 stack pointer which resides in between. */
10141 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10143 cfun_frame_layout
.gprs_offset
10144 = cfun_frame_layout
.backchain_offset
- cfun_gprs_save_area_size
;
10146 /* FPRs will not be saved. Nevertheless pick sane values to
10147 keep area calculations valid. */
10148 cfun_frame_layout
.f0_offset
=
10149 cfun_frame_layout
.f4_offset
=
10150 cfun_frame_layout
.f8_offset
= cfun_frame_layout
.gprs_offset
;
10156 /* Packed stack layout without backchain. */
10158 /* With stdarg FPRs need their dedicated slots. */
10159 num_fprs
= (TARGET_64BIT
&& cfun
->stdarg
? 2
10160 : (cfun_fpr_save_p (FPR4_REGNUM
) +
10161 cfun_fpr_save_p (FPR6_REGNUM
)));
10162 cfun_frame_layout
.f4_offset
= STACK_POINTER_OFFSET
- 8 * num_fprs
;
10164 num_fprs
= (cfun
->stdarg
? 2
10165 : (cfun_fpr_save_p (FPR0_REGNUM
)
10166 + cfun_fpr_save_p (FPR2_REGNUM
)));
10167 cfun_frame_layout
.f0_offset
= cfun_frame_layout
.f4_offset
- 8 * num_fprs
;
10169 cfun_frame_layout
.gprs_offset
10170 = cfun_frame_layout
.f0_offset
- cfun_gprs_save_area_size
;
10172 cfun_frame_layout
.f8_offset
= (cfun_frame_layout
.gprs_offset
10173 - cfun_frame_layout
.high_fprs
* 8);
10176 if (cfun_save_high_fprs_p
)
10177 cfun_frame_layout
.frame_size
+= cfun_frame_layout
.high_fprs
* 8;
10179 if (!crtl
->is_leaf
)
10180 cfun_frame_layout
.frame_size
+= crtl
->outgoing_args_size
;
10182 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10183 sized area at the bottom of the stack. This is required also for
10184 leaf functions. When GCC generates a local stack reference it
10185 will always add STACK_POINTER_OFFSET to all these references. */
10187 && !TARGET_TPF_PROFILING
10188 && cfun_frame_layout
.frame_size
== 0
10189 && !cfun
->calls_alloca
)
10192 /* Calculate the number of bytes we have used in our own register
10193 save area. With the packed stack layout we can re-use the
10194 remaining bytes for normal stack elements. */
10196 if (TARGET_PACKED_STACK
)
10197 lowest_offset
= MIN (MIN (cfun_frame_layout
.f0_offset
,
10198 cfun_frame_layout
.f4_offset
),
10199 cfun_frame_layout
.gprs_offset
);
10203 if (TARGET_BACKCHAIN
)
10204 lowest_offset
= MIN (lowest_offset
, cfun_frame_layout
.backchain_offset
);
10206 cfun_frame_layout
.frame_size
+= STACK_POINTER_OFFSET
- lowest_offset
;
10208 /* If under 31 bit an odd number of gprs has to be saved we have to
10209 adjust the frame size to sustain 8 byte alignment of stack
10211 cfun_frame_layout
.frame_size
= ((cfun_frame_layout
.frame_size
+
10212 STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
10213 & ~(STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
10216 /* Generate frame layout. Fills in register and frame data for the current
10217 function in cfun->machine. This routine can be called multiple times;
10218 it will re-do the complete frame layout every time. */
10221 s390_init_frame_layout (void)
10223 HOST_WIDE_INT frame_size
;
10226 /* After LRA the frame layout is supposed to be read-only and should
10227 not be re-computed. */
10228 if (reload_completed
)
10231 /* On S/390 machines, we may need to perform branch splitting, which
10232 will require both base and return address register. We have no
10233 choice but to assume we're going to need them until right at the
10234 end of the machine dependent reorg phase. */
10235 if (!TARGET_CPU_ZARCH
)
10236 cfun
->machine
->split_branches_pending_p
= true;
10240 frame_size
= cfun_frame_layout
.frame_size
;
10242 /* Try to predict whether we'll need the base register. */
10243 base_used
= cfun
->machine
->split_branches_pending_p
10244 || crtl
->uses_const_pool
10245 || (!DISP_IN_RANGE (frame_size
)
10246 && !CONST_OK_FOR_K (frame_size
));
10248 /* Decide which register to use as literal pool base. In small
10249 leaf functions, try to use an unused call-clobbered register
10250 as base register to avoid save/restore overhead. */
10252 cfun
->machine
->base_reg
= NULL_RTX
;
10258 /* Prefer r5 (most likely to be free). */
10259 for (br
= 5; br
>= 2 && df_regs_ever_live_p (br
); br
--)
10261 cfun
->machine
->base_reg
=
10262 gen_rtx_REG (Pmode
, (br
>= 2) ? br
: BASE_REGNUM
);
10265 s390_register_info ();
10266 s390_frame_info ();
10268 while (frame_size
!= cfun_frame_layout
.frame_size
);
10271 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10272 the TX is nonescaping. A transaction is considered escaping if
10273 there is at least one path from tbegin returning CC0 to the
10274 function exit block without an tend.
10276 The check so far has some limitations:
10277 - only single tbegin/tend BBs are supported
10278 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10279 - when CC is copied to a GPR and the CC0 check is done with the GPR
10280 this is not supported
10284 s390_optimize_nonescaping_tx (void)
10286 const unsigned int CC0
= 1 << 3;
10287 basic_block tbegin_bb
= NULL
;
10288 basic_block tend_bb
= NULL
;
10291 bool result
= true;
10293 rtx_insn
*tbegin_insn
= NULL
;
10295 if (!cfun
->machine
->tbegin_p
)
10298 for (bb_index
= 0; bb_index
< n_basic_blocks_for_fn (cfun
); bb_index
++)
10300 bb
= BASIC_BLOCK_FOR_FN (cfun
, bb_index
);
10305 FOR_BB_INSNS (bb
, insn
)
10307 rtx ite
, cc
, pat
, target
;
10308 unsigned HOST_WIDE_INT mask
;
10310 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
10313 pat
= PATTERN (insn
);
10315 if (GET_CODE (pat
) == PARALLEL
)
10316 pat
= XVECEXP (pat
, 0, 0);
10318 if (GET_CODE (pat
) != SET
10319 || GET_CODE (SET_SRC (pat
)) != UNSPEC_VOLATILE
)
10322 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TBEGIN
)
10326 tbegin_insn
= insn
;
10328 /* Just return if the tbegin doesn't have clobbers. */
10329 if (GET_CODE (PATTERN (insn
)) != PARALLEL
)
10332 if (tbegin_bb
!= NULL
)
10335 /* Find the next conditional jump. */
10336 for (tmp
= NEXT_INSN (insn
);
10338 tmp
= NEXT_INSN (tmp
))
10340 if (reg_set_p (gen_rtx_REG (CCmode
, CC_REGNUM
), tmp
))
10345 ite
= SET_SRC (PATTERN (tmp
));
10346 if (GET_CODE (ite
) != IF_THEN_ELSE
)
10349 cc
= XEXP (XEXP (ite
, 0), 0);
10350 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
))
10351 || GET_MODE (cc
) != CCRAWmode
10352 || GET_CODE (XEXP (XEXP (ite
, 0), 1)) != CONST_INT
)
10355 if (bb
->succs
->length () != 2)
10358 mask
= INTVAL (XEXP (XEXP (ite
, 0), 1));
10359 if (GET_CODE (XEXP (ite
, 0)) == NE
)
10363 target
= XEXP (ite
, 1);
10364 else if (mask
== (CC0
^ 0xf))
10365 target
= XEXP (ite
, 2);
10373 ei
= ei_start (bb
->succs
);
10374 e1
= ei_safe_edge (ei
);
10376 e2
= ei_safe_edge (ei
);
10378 if (e2
->flags
& EDGE_FALLTHRU
)
10381 e1
= ei_safe_edge (ei
);
10384 if (!(e1
->flags
& EDGE_FALLTHRU
))
10387 tbegin_bb
= (target
== pc_rtx
) ? e1
->dest
: e2
->dest
;
10389 if (tmp
== BB_END (bb
))
10394 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TEND
)
10396 if (tend_bb
!= NULL
)
10403 /* Either we successfully remove the FPR clobbers here or we are not
10404 able to do anything for this TX. Both cases don't qualify for
10406 cfun
->machine
->tbegin_p
= false;
10408 if (tbegin_bb
== NULL
|| tend_bb
== NULL
)
10411 calculate_dominance_info (CDI_POST_DOMINATORS
);
10412 result
= dominated_by_p (CDI_POST_DOMINATORS
, tbegin_bb
, tend_bb
);
10413 free_dominance_info (CDI_POST_DOMINATORS
);
10418 PATTERN (tbegin_insn
) = gen_rtx_PARALLEL (VOIDmode
,
10420 XVECEXP (PATTERN (tbegin_insn
), 0, 0),
10421 XVECEXP (PATTERN (tbegin_insn
), 0, 1)));
10422 INSN_CODE (tbegin_insn
) = -1;
10423 df_insn_rescan (tbegin_insn
);
10428 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10429 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10431 static unsigned int
10432 s390_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
10434 return s390_class_max_nregs (REGNO_REG_CLASS (regno
), mode
);
10437 /* Implement TARGET_HARD_REGNO_MODE_OK.
10439 Integer modes <= word size fit into any GPR.
10440 Integer modes > word size fit into successive GPRs, starting with
10441 an even-numbered register.
10442 SImode and DImode fit into FPRs as well.
10444 Floating point modes <= word size fit into any FPR or GPR.
10445 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10446 into any FPR, or an even-odd GPR pair.
10447 TFmode fits only into an even-odd FPR pair.
10449 Complex floating point modes fit either into two FPRs, or into
10450 successive GPRs (again starting with an even number).
10451 TCmode fits only into two successive even-odd FPR pairs.
10453 Condition code modes fit only into the CC register. */
10456 s390_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10458 if (!TARGET_VX
&& VECTOR_NOFP_REGNO_P (regno
))
10461 switch (REGNO_REG_CLASS (regno
))
10464 return ((GET_MODE_CLASS (mode
) == MODE_INT
10465 && s390_class_max_nregs (VEC_REGS
, mode
) == 1)
10467 || (TARGET_VXE
&& mode
== SFmode
)
10468 || s390_vector_mode_supported_p (mode
));
10472 && ((GET_MODE_CLASS (mode
) == MODE_INT
10473 && s390_class_max_nregs (FP_REGS
, mode
) == 1)
10475 || s390_vector_mode_supported_p (mode
)))
10478 if (REGNO_PAIR_OK (regno
, mode
))
10480 if (mode
== SImode
|| mode
== DImode
)
10483 if (FLOAT_MODE_P (mode
) && GET_MODE_CLASS (mode
) != MODE_VECTOR_FLOAT
)
10488 if (FRAME_REGNO_P (regno
) && mode
== Pmode
)
10493 if (REGNO_PAIR_OK (regno
, mode
))
10496 || (mode
!= TFmode
&& mode
!= TCmode
&& mode
!= TDmode
))
10501 if (GET_MODE_CLASS (mode
) == MODE_CC
)
10505 if (REGNO_PAIR_OK (regno
, mode
))
10507 if (mode
== SImode
|| mode
== Pmode
)
10518 /* Implement TARGET_MODES_TIEABLE_P. */
10521 s390_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10523 return ((mode1
== SFmode
|| mode1
== DFmode
)
10524 == (mode2
== SFmode
|| mode2
== DFmode
));
10527 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10530 s390_hard_regno_rename_ok (unsigned int old_reg
, unsigned int new_reg
)
10532 /* Once we've decided upon a register to use as base register, it must
10533 no longer be used for any other purpose. */
10534 if (cfun
->machine
->base_reg
)
10535 if (REGNO (cfun
->machine
->base_reg
) == old_reg
10536 || REGNO (cfun
->machine
->base_reg
) == new_reg
)
10539 /* Prevent regrename from using call-saved regs which haven't
10540 actually been saved. This is necessary since regrename assumes
10541 the backend save/restore decisions are based on
10542 df_regs_ever_live. Since we have our own routine we have to tell
10543 regrename manually about it. */
10544 if (GENERAL_REGNO_P (new_reg
)
10545 && !call_really_used_regs
[new_reg
]
10546 && cfun_gpr_save_slot (new_reg
) == SAVE_SLOT_NONE
)
10552 /* Return nonzero if register REGNO can be used as a scratch register
10556 s390_hard_regno_scratch_ok (unsigned int regno
)
10558 /* See s390_hard_regno_rename_ok. */
10559 if (GENERAL_REGNO_P (regno
)
10560 && !call_really_used_regs
[regno
]
10561 && cfun_gpr_save_slot (regno
) == SAVE_SLOT_NONE
)
10567 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10568 code that runs in z/Architecture mode, but conforms to the 31-bit
10569 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10570 bytes are saved across calls, however. */
10573 s390_hard_regno_call_part_clobbered (unsigned int regno
, machine_mode mode
)
10577 && GET_MODE_SIZE (mode
) > 4
10578 && ((regno
>= 6 && regno
<= 15) || regno
== 32))
10582 && GET_MODE_SIZE (mode
) > 8
10583 && (((TARGET_64BIT
&& regno
>= 24 && regno
<= 31))
10584 || (!TARGET_64BIT
&& (regno
== 18 || regno
== 19))))
10590 /* Maximum number of registers to represent a value of mode MODE
10591 in a register of class RCLASS. */
10594 s390_class_max_nregs (enum reg_class rclass
, machine_mode mode
)
10597 bool reg_pair_required_p
= false;
10603 reg_size
= TARGET_VX
? 16 : 8;
10605 /* TF and TD modes would fit into a VR but we put them into a
10606 register pair since we do not have 128bit FP instructions on
10609 && SCALAR_FLOAT_MODE_P (mode
)
10610 && GET_MODE_SIZE (mode
) >= 16)
10611 reg_pair_required_p
= true;
10613 /* Even if complex types would fit into a single FPR/VR we force
10614 them into a register pair to deal with the parts more easily.
10615 (FIXME: What about complex ints?) */
10616 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
10617 reg_pair_required_p
= true;
10623 reg_size
= UNITS_PER_WORD
;
10627 if (reg_pair_required_p
)
10628 return 2 * ((GET_MODE_SIZE (mode
) / 2 + reg_size
- 1) / reg_size
);
10630 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
10633 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10636 s390_can_change_mode_class (machine_mode from_mode
,
10637 machine_mode to_mode
,
10638 reg_class_t rclass
)
10640 machine_mode small_mode
;
10641 machine_mode big_mode
;
10643 /* V1TF and TF have different representations in vector
10645 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10646 && ((from_mode
== V1TFmode
&& to_mode
== TFmode
)
10647 || (from_mode
== TFmode
&& to_mode
== V1TFmode
)))
10650 if (GET_MODE_SIZE (from_mode
) == GET_MODE_SIZE (to_mode
))
10653 if (GET_MODE_SIZE (from_mode
) < GET_MODE_SIZE (to_mode
))
10655 small_mode
= from_mode
;
10656 big_mode
= to_mode
;
10660 small_mode
= to_mode
;
10661 big_mode
= from_mode
;
10664 /* Values residing in VRs are little-endian style. All modes are
10665 placed left-aligned in an VR. This means that we cannot allow
10666 switching between modes with differing sizes. Also if the vector
10667 facility is available we still place TFmode values in VR register
10668 pairs, since the only instructions we have operating on TFmodes
10669 only deal with register pairs. Therefore we have to allow DFmode
10670 subregs of TFmodes to enable the TFmode splitters. */
10671 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10672 && (GET_MODE_SIZE (small_mode
) < 8
10673 || s390_class_max_nregs (VEC_REGS
, big_mode
) == 1))
10676 /* Likewise for access registers, since they have only half the
10677 word size on 64-bit. */
10678 if (reg_classes_intersect_p (ACCESS_REGS
, rclass
))
10684 /* Return true if we use LRA instead of reload pass. */
10688 return s390_lra_flag
;
10691 /* Return true if register FROM can be eliminated via register TO. */
10694 s390_can_eliminate (const int from
, const int to
)
10696 /* On zSeries machines, we have not marked the base register as fixed.
10697 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10698 If a function requires the base register, we say here that this
10699 elimination cannot be performed. This will cause reload to free
10700 up the base register (as if it were fixed). On the other hand,
10701 if the current function does *not* require the base register, we
10702 say here the elimination succeeds, which in turn allows reload
10703 to allocate the base register for any other purpose. */
10704 if (from
== BASE_REGNUM
&& to
== BASE_REGNUM
)
10706 if (TARGET_CPU_ZARCH
)
10708 s390_init_frame_layout ();
10709 return cfun
->machine
->base_reg
== NULL_RTX
;
10715 /* Everything else must point into the stack frame. */
10716 gcc_assert (to
== STACK_POINTER_REGNUM
10717 || to
== HARD_FRAME_POINTER_REGNUM
);
10719 gcc_assert (from
== FRAME_POINTER_REGNUM
10720 || from
== ARG_POINTER_REGNUM
10721 || from
== RETURN_ADDRESS_POINTER_REGNUM
);
10723 /* Make sure we actually saved the return address. */
10724 if (from
== RETURN_ADDRESS_POINTER_REGNUM
)
10725 if (!crtl
->calls_eh_return
10727 && !cfun_frame_layout
.save_return_addr_p
)
10733 /* Return offset between register FROM and TO initially after prolog. */
10736 s390_initial_elimination_offset (int from
, int to
)
10738 HOST_WIDE_INT offset
;
10740 /* ??? Why are we called for non-eliminable pairs? */
10741 if (!s390_can_eliminate (from
, to
))
10746 case FRAME_POINTER_REGNUM
:
10747 offset
= (get_frame_size()
10748 + STACK_POINTER_OFFSET
10749 + crtl
->outgoing_args_size
);
10752 case ARG_POINTER_REGNUM
:
10753 s390_init_frame_layout ();
10754 offset
= cfun_frame_layout
.frame_size
+ STACK_POINTER_OFFSET
;
10757 case RETURN_ADDRESS_POINTER_REGNUM
:
10758 s390_init_frame_layout ();
10760 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10762 /* If it turns out that for stdarg nothing went into the reg
10763 save area we also do not need the return address
10765 if (cfun
->stdarg
&& !cfun_save_arg_fprs_p
)
10768 gcc_unreachable ();
10771 /* In order to make the following work it is not necessary for
10772 r14 to have a save slot. It is sufficient if one other GPR
10773 got one. Since the GPRs are always stored without gaps we
10774 are able to calculate where the r14 save slot would
10776 offset
= (cfun_frame_layout
.frame_size
+ cfun_frame_layout
.gprs_offset
+
10777 (RETURN_REGNUM
- cfun_frame_layout
.first_save_gpr_slot
) *
10786 gcc_unreachable ();
10792 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10793 to register BASE. Return generated insn. */
10796 save_fpr (rtx base
, int offset
, int regnum
)
10799 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10801 if (regnum
>= 16 && regnum
<= (16 + FP_ARG_NUM_REG
))
10802 set_mem_alias_set (addr
, get_varargs_alias_set ());
10804 set_mem_alias_set (addr
, get_frame_alias_set ());
10806 return emit_move_insn (addr
, gen_rtx_REG (DFmode
, regnum
));
10809 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10810 to register BASE. Return generated insn. */
10813 restore_fpr (rtx base
, int offset
, int regnum
)
10816 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10817 set_mem_alias_set (addr
, get_frame_alias_set ());
10819 return emit_move_insn (gen_rtx_REG (DFmode
, regnum
), addr
);
10822 /* Return true if REGNO is a global register, but not one
10823 of the special ones that need to be saved/restored in anyway. */
10826 global_not_special_regno_p (int regno
)
10828 return (global_regs
[regno
]
10829 /* These registers are special and need to be
10830 restored in any case. */
10831 && !(regno
== STACK_POINTER_REGNUM
10832 || regno
== RETURN_REGNUM
10833 || regno
== BASE_REGNUM
10834 || (flag_pic
&& regno
== (int)PIC_OFFSET_TABLE_REGNUM
)));
10837 /* Generate insn to save registers FIRST to LAST into
10838 the register save area located at offset OFFSET
10839 relative to register BASE. */
10842 save_gprs (rtx base
, int offset
, int first
, int last
)
10844 rtx addr
, insn
, note
;
10847 addr
= plus_constant (Pmode
, base
, offset
);
10848 addr
= gen_rtx_MEM (Pmode
, addr
);
10850 set_mem_alias_set (addr
, get_frame_alias_set ());
10852 /* Special-case single register. */
10856 insn
= gen_movdi (addr
, gen_rtx_REG (Pmode
, first
));
10858 insn
= gen_movsi (addr
, gen_rtx_REG (Pmode
, first
));
10860 if (!global_not_special_regno_p (first
))
10861 RTX_FRAME_RELATED_P (insn
) = 1;
10866 insn
= gen_store_multiple (addr
,
10867 gen_rtx_REG (Pmode
, first
),
10868 GEN_INT (last
- first
+ 1));
10870 if (first
<= 6 && cfun
->stdarg
)
10871 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
10873 rtx mem
= XEXP (XVECEXP (PATTERN (insn
), 0, i
), 0);
10875 if (first
+ i
<= 6)
10876 set_mem_alias_set (mem
, get_varargs_alias_set ());
10879 /* We need to set the FRAME_RELATED flag on all SETs
10880 inside the store-multiple pattern.
10882 However, we must not emit DWARF records for registers 2..5
10883 if they are stored for use by variable arguments ...
10885 ??? Unfortunately, it is not enough to simply not the
10886 FRAME_RELATED flags for those SETs, because the first SET
10887 of the PARALLEL is always treated as if it had the flag
10888 set, even if it does not. Therefore we emit a new pattern
10889 without those registers as REG_FRAME_RELATED_EXPR note. */
10891 if (first
>= 6 && !global_not_special_regno_p (first
))
10893 rtx pat
= PATTERN (insn
);
10895 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
10896 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
10897 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat
,
10899 RTX_FRAME_RELATED_P (XVECEXP (pat
, 0, i
)) = 1;
10901 RTX_FRAME_RELATED_P (insn
) = 1;
10903 else if (last
>= 6)
10907 for (start
= first
>= 6 ? first
: 6; start
<= last
; start
++)
10908 if (!global_not_special_regno_p (start
))
10914 addr
= plus_constant (Pmode
, base
,
10915 offset
+ (start
- first
) * UNITS_PER_LONG
);
10920 note
= gen_movdi (gen_rtx_MEM (Pmode
, addr
),
10921 gen_rtx_REG (Pmode
, start
));
10923 note
= gen_movsi (gen_rtx_MEM (Pmode
, addr
),
10924 gen_rtx_REG (Pmode
, start
));
10925 note
= PATTERN (note
);
10927 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10928 RTX_FRAME_RELATED_P (insn
) = 1;
10933 note
= gen_store_multiple (gen_rtx_MEM (Pmode
, addr
),
10934 gen_rtx_REG (Pmode
, start
),
10935 GEN_INT (last
- start
+ 1));
10936 note
= PATTERN (note
);
10938 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10940 for (i
= 0; i
< XVECLEN (note
, 0); i
++)
10941 if (GET_CODE (XVECEXP (note
, 0, i
)) == SET
10942 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note
,
10944 RTX_FRAME_RELATED_P (XVECEXP (note
, 0, i
)) = 1;
10946 RTX_FRAME_RELATED_P (insn
) = 1;
10952 /* Generate insn to restore registers FIRST to LAST from
10953 the register save area located at offset OFFSET
10954 relative to register BASE. */
10957 restore_gprs (rtx base
, int offset
, int first
, int last
)
10961 addr
= plus_constant (Pmode
, base
, offset
);
10962 addr
= gen_rtx_MEM (Pmode
, addr
);
10963 set_mem_alias_set (addr
, get_frame_alias_set ());
10965 /* Special-case single register. */
10969 insn
= gen_movdi (gen_rtx_REG (Pmode
, first
), addr
);
10971 insn
= gen_movsi (gen_rtx_REG (Pmode
, first
), addr
);
10973 RTX_FRAME_RELATED_P (insn
) = 1;
10977 insn
= gen_load_multiple (gen_rtx_REG (Pmode
, first
),
10979 GEN_INT (last
- first
+ 1));
10980 RTX_FRAME_RELATED_P (insn
) = 1;
10984 /* Return insn sequence to load the GOT register. */
10987 s390_load_got (void)
10991 /* We cannot use pic_offset_table_rtx here since we use this
10992 function also for non-pic if __tls_get_offset is called and in
10993 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10995 rtx got_rtx
= gen_rtx_REG (Pmode
, 12);
10999 if (TARGET_CPU_ZARCH
)
11001 emit_move_insn (got_rtx
, s390_got_symbol ());
11007 offset
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, s390_got_symbol ()),
11008 UNSPEC_LTREL_OFFSET
);
11009 offset
= gen_rtx_CONST (Pmode
, offset
);
11010 offset
= force_const_mem (Pmode
, offset
);
11012 emit_move_insn (got_rtx
, offset
);
11014 offset
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (offset
, 0)),
11015 UNSPEC_LTREL_BASE
);
11016 offset
= gen_rtx_PLUS (Pmode
, got_rtx
, offset
);
11018 emit_move_insn (got_rtx
, offset
);
11021 insns
= get_insns ();
11026 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11027 and the change to the stack pointer. */
11030 s390_emit_stack_tie (void)
11032 rtx mem
= gen_frame_mem (BLKmode
,
11033 gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
));
11035 emit_insn (gen_stack_tie (mem
));
11038 /* Copy GPRS into FPR save slots. */
11041 s390_save_gprs_to_fprs (void)
11045 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
11048 for (i
= 6; i
< 16; i
++)
11050 if (FP_REGNO_P (cfun_gpr_save_slot (i
)))
11053 emit_move_insn (gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
)),
11054 gen_rtx_REG (DImode
, i
));
11055 RTX_FRAME_RELATED_P (insn
) = 1;
11056 /* This prevents dwarf2cfi from interpreting the set. Doing
11057 so it might emit def_cfa_register infos setting an FPR as
11059 add_reg_note (insn
, REG_CFA_REGISTER
, copy_rtx (PATTERN (insn
)));
11064 /* Restore GPRs from FPR save slots. */
11067 s390_restore_gprs_from_fprs (void)
11071 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
11074 for (i
= 6; i
< 16; i
++)
11078 if (!FP_REGNO_P (cfun_gpr_save_slot (i
)))
11081 rtx fpr
= gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
));
11083 if (i
== STACK_POINTER_REGNUM
)
11084 insn
= emit_insn (gen_stack_restore_from_fpr (fpr
));
11086 insn
= emit_move_insn (gen_rtx_REG (DImode
, i
), fpr
);
11088 df_set_regs_ever_live (i
, true);
11089 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, i
));
11090 if (i
== STACK_POINTER_REGNUM
)
11091 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11092 plus_constant (Pmode
, stack_pointer_rtx
,
11093 STACK_POINTER_OFFSET
));
11094 RTX_FRAME_RELATED_P (insn
) = 1;
11099 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11104 const pass_data pass_data_s390_early_mach
=
11106 RTL_PASS
, /* type */
11107 "early_mach", /* name */
11108 OPTGROUP_NONE
, /* optinfo_flags */
11109 TV_MACH_DEP
, /* tv_id */
11110 0, /* properties_required */
11111 0, /* properties_provided */
11112 0, /* properties_destroyed */
11113 0, /* todo_flags_start */
11114 ( TODO_df_verify
| TODO_df_finish
), /* todo_flags_finish */
11117 class pass_s390_early_mach
: public rtl_opt_pass
11120 pass_s390_early_mach (gcc::context
*ctxt
)
11121 : rtl_opt_pass (pass_data_s390_early_mach
, ctxt
)
11124 /* opt_pass methods: */
11125 virtual unsigned int execute (function
*);
11127 }; // class pass_s390_early_mach
11130 pass_s390_early_mach::execute (function
*fun
)
11134 /* Try to get rid of the FPR clobbers. */
11135 s390_optimize_nonescaping_tx ();
11137 /* Re-compute register info. */
11138 s390_register_info ();
11140 /* If we're using a base register, ensure that it is always valid for
11141 the first non-prologue instruction. */
11142 if (fun
->machine
->base_reg
)
11143 emit_insn_at_entry (gen_main_pool (fun
->machine
->base_reg
));
11145 /* Annotate all constant pool references to let the scheduler know
11146 they implicitly use the base register. */
11147 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
11150 annotate_constant_pool_refs (&PATTERN (insn
));
11151 df_insn_rescan (insn
);
11156 } // anon namespace
11158 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11159 - push too big immediates to the literal pool and annotate the refs
11160 - emit frame related notes for stack pointer changes. */
11163 s390_prologue_plus_offset (rtx target
, rtx reg
, rtx offset
, bool frame_related_p
)
11166 rtx orig_offset
= offset
;
11168 gcc_assert (REG_P (target
));
11169 gcc_assert (REG_P (reg
));
11170 gcc_assert (CONST_INT_P (offset
));
11172 if (offset
== const0_rtx
) /* lr/lgr */
11174 insn
= emit_move_insn (target
, reg
);
11176 else if (DISP_IN_RANGE (INTVAL (offset
))) /* la */
11178 insn
= emit_move_insn (target
, gen_rtx_PLUS (Pmode
, reg
,
11183 if (!satisfies_constraint_K (offset
) /* ahi/aghi */
11185 || (!satisfies_constraint_Op (offset
) /* alfi/algfi */
11186 && !satisfies_constraint_On (offset
)))) /* slfi/slgfi */
11187 offset
= force_const_mem (Pmode
, offset
);
11191 insn
= emit_move_insn (target
, reg
);
11192 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11195 insn
= emit_insn (gen_add2_insn (target
, offset
));
11197 if (!CONST_INT_P (offset
))
11199 annotate_constant_pool_refs (&PATTERN (insn
));
11201 if (frame_related_p
)
11202 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11203 gen_rtx_SET (target
,
11204 gen_rtx_PLUS (Pmode
, target
,
11209 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11211 /* If this is a stack adjustment and we are generating a stack clash
11212 prologue, then add a REG_STACK_CHECK note to signal that this insn
11213 should be left alone. */
11214 if (flag_stack_clash_protection
&& target
== stack_pointer_rtx
)
11215 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
11220 /* Emit a compare instruction with a volatile memory access as stack
11221 probe. It does not waste store tags and does not clobber any
11222 registers apart from the condition code. */
11224 s390_emit_stack_probe (rtx addr
)
11226 rtx tmp
= gen_rtx_MEM (Pmode
, addr
);
11227 MEM_VOLATILE_P (tmp
) = 1;
11228 s390_emit_compare (EQ
, gen_rtx_REG (Pmode
, 0), tmp
);
11229 emit_insn (gen_blockage ());
11232 /* Use a runtime loop if we have to emit more probes than this. */
11233 #define MIN_UNROLL_PROBES 3
11235 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11236 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11237 probe relative to the stack pointer.
11239 Note that SIZE is negative.
11241 The return value is true if TEMP_REG has been clobbered. */
11243 allocate_stack_space (rtx size
, HOST_WIDE_INT last_probe_offset
,
11246 bool temp_reg_clobbered_p
= false;
11247 HOST_WIDE_INT probe_interval
11248 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL
);
11249 HOST_WIDE_INT guard_size
11250 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE
);
11252 if (flag_stack_clash_protection
)
11254 if (last_probe_offset
+ -INTVAL (size
) < guard_size
)
11255 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
11258 rtx offset
= GEN_INT (probe_interval
- UNITS_PER_LONG
);
11259 HOST_WIDE_INT rounded_size
= -INTVAL (size
) & -probe_interval
;
11260 HOST_WIDE_INT num_probes
= rounded_size
/ probe_interval
;
11261 HOST_WIDE_INT residual
= -INTVAL (size
) - rounded_size
;
11263 if (num_probes
< MIN_UNROLL_PROBES
)
11265 /* Emit unrolled probe statements. */
11267 for (unsigned int i
= 0; i
< num_probes
; i
++)
11269 s390_prologue_plus_offset (stack_pointer_rtx
,
11271 GEN_INT (-probe_interval
), true);
11272 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11276 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
11280 /* Emit a loop probing the pages. */
11282 rtx_code_label
*loop_start_label
= gen_label_rtx ();
11284 /* From now on temp_reg will be the CFA register. */
11285 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11286 GEN_INT (-rounded_size
), true);
11287 emit_label (loop_start_label
);
11289 s390_prologue_plus_offset (stack_pointer_rtx
,
11291 GEN_INT (-probe_interval
), false);
11292 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11295 emit_cmp_and_jump_insns (stack_pointer_rtx
, temp_reg
,
11297 Pmode
, 1, loop_start_label
);
11299 /* Without this make_edges ICEes. */
11300 JUMP_LABEL (get_last_insn ()) = loop_start_label
;
11301 LABEL_NUSES (loop_start_label
) = 1;
11303 /* That's going to be a NOP since stack pointer and
11304 temp_reg are supposed to be the same here. We just
11305 emit it to set the CFA reg back to r15. */
11306 s390_prologue_plus_offset (stack_pointer_rtx
, temp_reg
,
11308 temp_reg_clobbered_p
= true;
11309 dump_stack_clash_frame_info (PROBE_LOOP
, residual
!= 0);
11312 /* Handle any residual allocation request. */
11313 s390_prologue_plus_offset (stack_pointer_rtx
,
11315 GEN_INT (-residual
), true);
11316 last_probe_offset
+= residual
;
11317 if (last_probe_offset
>= probe_interval
)
11318 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11321 - UNITS_PER_LONG
)));
11323 return temp_reg_clobbered_p
;
11327 /* Subtract frame size from stack pointer. */
11328 s390_prologue_plus_offset (stack_pointer_rtx
,
11332 return temp_reg_clobbered_p
;
11335 /* Expand the prologue into a bunch of separate insns. */
11338 s390_emit_prologue (void)
11346 /* Choose best register to use for temp use within prologue.
11347 TPF with profiling must avoid the register 14 - the tracing function
11348 needs the original contents of r14 to be preserved. */
11350 if (!has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
)
11352 && !TARGET_TPF_PROFILING
)
11353 temp_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11354 else if (flag_split_stack
&& cfun
->stdarg
)
11355 temp_reg
= gen_rtx_REG (Pmode
, 12);
11357 temp_reg
= gen_rtx_REG (Pmode
, 1);
11359 /* When probing for stack-clash mitigation, we have to track the distance
11360 between the stack pointer and closest known reference.
11362 Most of the time we have to make a worst cast assumption. The
11363 only exception is when TARGET_BACKCHAIN is active, in which case
11364 we know *sp (offset 0) was written. */
11365 HOST_WIDE_INT probe_interval
11366 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL
);
11367 HOST_WIDE_INT last_probe_offset
11368 = (TARGET_BACKCHAIN
11369 ? (TARGET_PACKED_STACK
? STACK_POINTER_OFFSET
- UNITS_PER_LONG
: 0)
11370 : probe_interval
- (STACK_BOUNDARY
/ UNITS_PER_WORD
));
11372 s390_save_gprs_to_fprs ();
11374 /* Save call saved gprs. */
11375 if (cfun_frame_layout
.first_save_gpr
!= -1)
11377 insn
= save_gprs (stack_pointer_rtx
,
11378 cfun_frame_layout
.gprs_offset
+
11379 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11380 - cfun_frame_layout
.first_save_gpr_slot
),
11381 cfun_frame_layout
.first_save_gpr
,
11382 cfun_frame_layout
.last_save_gpr
);
11384 /* This is not 100% correct. If we have more than one register saved,
11385 then LAST_PROBE_OFFSET can move even closer to sp. */
11387 = (cfun_frame_layout
.gprs_offset
+
11388 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11389 - cfun_frame_layout
.first_save_gpr_slot
));
11394 /* Dummy insn to mark literal pool slot. */
11396 if (cfun
->machine
->base_reg
)
11397 emit_insn (gen_main_pool (cfun
->machine
->base_reg
));
11399 offset
= cfun_frame_layout
.f0_offset
;
11401 /* Save f0 and f2. */
11402 for (i
= FPR0_REGNUM
; i
<= FPR0_REGNUM
+ 1; i
++)
11404 if (cfun_fpr_save_p (i
))
11406 save_fpr (stack_pointer_rtx
, offset
, i
);
11407 if (offset
< last_probe_offset
)
11408 last_probe_offset
= offset
;
11411 else if (!TARGET_PACKED_STACK
|| cfun
->stdarg
)
11415 /* Save f4 and f6. */
11416 offset
= cfun_frame_layout
.f4_offset
;
11417 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11419 if (cfun_fpr_save_p (i
))
11421 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11422 if (offset
< last_probe_offset
)
11423 last_probe_offset
= offset
;
11426 /* If f4 and f6 are call clobbered they are saved due to
11427 stdargs and therefore are not frame related. */
11428 if (!call_really_used_regs
[i
])
11429 RTX_FRAME_RELATED_P (insn
) = 1;
11431 else if (!TARGET_PACKED_STACK
|| call_really_used_regs
[i
])
11435 if (TARGET_PACKED_STACK
11436 && cfun_save_high_fprs_p
11437 && cfun_frame_layout
.f8_offset
+ cfun_frame_layout
.high_fprs
* 8 > 0)
11439 offset
= (cfun_frame_layout
.f8_offset
11440 + (cfun_frame_layout
.high_fprs
- 1) * 8);
11442 for (i
= FPR15_REGNUM
; i
>= FPR8_REGNUM
&& offset
>= 0; i
--)
11443 if (cfun_fpr_save_p (i
))
11445 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11446 if (offset
< last_probe_offset
)
11447 last_probe_offset
= offset
;
11449 RTX_FRAME_RELATED_P (insn
) = 1;
11452 if (offset
>= cfun_frame_layout
.f8_offset
)
11456 if (!TARGET_PACKED_STACK
)
11457 next_fpr
= cfun_save_high_fprs_p
? FPR15_REGNUM
: 0;
11459 if (flag_stack_usage_info
)
11460 current_function_static_stack_size
= cfun_frame_layout
.frame_size
;
11462 /* Decrement stack pointer. */
11464 if (cfun_frame_layout
.frame_size
> 0)
11466 rtx frame_off
= GEN_INT (-cfun_frame_layout
.frame_size
);
11467 rtx_insn
*stack_pointer_backup_loc
;
11468 bool temp_reg_clobbered_p
;
11470 if (s390_stack_size
)
11472 HOST_WIDE_INT stack_guard
;
11474 if (s390_stack_guard
)
11475 stack_guard
= s390_stack_guard
;
11478 /* If no value for stack guard is provided the smallest power of 2
11479 larger than the current frame size is chosen. */
11481 while (stack_guard
< cfun_frame_layout
.frame_size
)
11485 if (cfun_frame_layout
.frame_size
>= s390_stack_size
)
11487 warning (0, "frame size of function %qs is %wd"
11488 " bytes exceeding user provided stack limit of "
11490 "An unconditional trap is added.",
11491 current_function_name(), cfun_frame_layout
.frame_size
,
11493 emit_insn (gen_trap ());
11498 /* stack_guard has to be smaller than s390_stack_size.
11499 Otherwise we would emit an AND with zero which would
11500 not match the test under mask pattern. */
11501 if (stack_guard
>= s390_stack_size
)
11503 warning (0, "frame size of function %qs is %wd"
11504 " bytes which is more than half the stack size. "
11505 "The dynamic check would not be reliable. "
11506 "No check emitted for this function.",
11507 current_function_name(),
11508 cfun_frame_layout
.frame_size
);
11512 HOST_WIDE_INT stack_check_mask
= ((s390_stack_size
- 1)
11513 & ~(stack_guard
- 1));
11515 rtx t
= gen_rtx_AND (Pmode
, stack_pointer_rtx
,
11516 GEN_INT (stack_check_mask
));
11518 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode
,
11520 t
, const0_rtx
, const0_rtx
));
11522 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode
,
11524 t
, const0_rtx
, const0_rtx
));
11529 if (s390_warn_framesize
> 0
11530 && cfun_frame_layout
.frame_size
>= s390_warn_framesize
)
11531 warning (0, "frame size of %qs is %wd bytes",
11532 current_function_name (), cfun_frame_layout
.frame_size
);
11534 if (s390_warn_dynamicstack_p
&& cfun
->calls_alloca
)
11535 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11537 /* Save the location where we could backup the incoming stack
11539 stack_pointer_backup_loc
= get_last_insn ();
11541 temp_reg_clobbered_p
= allocate_stack_space (frame_off
, last_probe_offset
,
11544 if (TARGET_BACKCHAIN
|| next_fpr
)
11546 if (temp_reg_clobbered_p
)
11548 /* allocate_stack_space had to make use of temp_reg and
11549 we need it to hold a backup of the incoming stack
11550 pointer. Calculate back that value from the current
11552 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11553 GEN_INT (cfun_frame_layout
.frame_size
),
11558 /* allocate_stack_space didn't actually required
11559 temp_reg. Insert the stack pointer backup insn
11560 before the stack pointer decrement code - knowing now
11561 that the value will survive. */
11562 emit_insn_after (gen_move_insn (temp_reg
, stack_pointer_rtx
),
11563 stack_pointer_backup_loc
);
11567 /* Set backchain. */
11569 if (TARGET_BACKCHAIN
)
11571 if (cfun_frame_layout
.backchain_offset
)
11572 addr
= gen_rtx_MEM (Pmode
,
11573 plus_constant (Pmode
, stack_pointer_rtx
,
11574 cfun_frame_layout
.backchain_offset
));
11576 addr
= gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
11577 set_mem_alias_set (addr
, get_frame_alias_set ());
11578 insn
= emit_insn (gen_move_insn (addr
, temp_reg
));
11581 /* If we support non-call exceptions (e.g. for Java),
11582 we need to make sure the backchain pointer is set up
11583 before any possibly trapping memory access. */
11584 if (TARGET_BACKCHAIN
&& cfun
->can_throw_non_call_exceptions
)
11586 addr
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
11587 emit_clobber (addr
);
11590 else if (flag_stack_clash_protection
)
11591 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
11593 /* Save fprs 8 - 15 (64 bit ABI). */
11595 if (cfun_save_high_fprs_p
&& next_fpr
)
11597 /* If the stack might be accessed through a different register
11598 we have to make sure that the stack pointer decrement is not
11599 moved below the use of the stack slots. */
11600 s390_emit_stack_tie ();
11602 insn
= emit_insn (gen_add2_insn (temp_reg
,
11603 GEN_INT (cfun_frame_layout
.f8_offset
)));
11607 for (i
= FPR8_REGNUM
; i
<= next_fpr
; i
++)
11608 if (cfun_fpr_save_p (i
))
11610 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
,
11611 cfun_frame_layout
.frame_size
11612 + cfun_frame_layout
.f8_offset
11615 insn
= save_fpr (temp_reg
, offset
, i
);
11617 RTX_FRAME_RELATED_P (insn
) = 1;
11618 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11619 gen_rtx_SET (gen_rtx_MEM (DFmode
, addr
),
11620 gen_rtx_REG (DFmode
, i
)));
11624 /* Set frame pointer, if needed. */
11626 if (frame_pointer_needed
)
11628 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
11629 RTX_FRAME_RELATED_P (insn
) = 1;
11632 /* Set up got pointer, if needed. */
11634 if (flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
11636 rtx_insn
*insns
= s390_load_got ();
11638 for (rtx_insn
*insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
11639 annotate_constant_pool_refs (&PATTERN (insn
));
11644 if (TARGET_TPF_PROFILING
)
11646 /* Generate a BAS instruction to serve as a function
11647 entry intercept to facilitate the use of tracing
11648 algorithms located at the branch target. */
11649 emit_insn (gen_prologue_tpf ());
11651 /* Emit a blockage here so that all code
11652 lies between the profiling mechanisms. */
11653 emit_insn (gen_blockage ());
11657 /* Expand the epilogue into a bunch of separate insns. */
11660 s390_emit_epilogue (bool sibcall
)
11662 rtx frame_pointer
, return_reg
, cfa_restores
= NULL_RTX
;
11663 int area_bottom
, area_top
, offset
= 0;
11668 if (TARGET_TPF_PROFILING
)
11671 /* Generate a BAS instruction to serve as a function
11672 entry intercept to facilitate the use of tracing
11673 algorithms located at the branch target. */
11675 /* Emit a blockage here so that all code
11676 lies between the profiling mechanisms. */
11677 emit_insn (gen_blockage ());
11679 emit_insn (gen_epilogue_tpf ());
11682 /* Check whether to use frame or stack pointer for restore. */
11684 frame_pointer
= (frame_pointer_needed
11685 ? hard_frame_pointer_rtx
: stack_pointer_rtx
);
11687 s390_frame_area (&area_bottom
, &area_top
);
11689 /* Check whether we can access the register save area.
11690 If not, increment the frame pointer as required. */
11692 if (area_top
<= area_bottom
)
11694 /* Nothing to restore. */
11696 else if (DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_bottom
)
11697 && DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_top
- 1))
11699 /* Area is in range. */
11700 offset
= cfun_frame_layout
.frame_size
;
11704 rtx insn
, frame_off
, cfa
;
11706 offset
= area_bottom
< 0 ? -area_bottom
: 0;
11707 frame_off
= GEN_INT (cfun_frame_layout
.frame_size
- offset
);
11709 cfa
= gen_rtx_SET (frame_pointer
,
11710 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11711 if (DISP_IN_RANGE (INTVAL (frame_off
)))
11713 insn
= gen_rtx_SET (frame_pointer
,
11714 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11715 insn
= emit_insn (insn
);
11719 if (!CONST_OK_FOR_K (INTVAL (frame_off
)))
11720 frame_off
= force_const_mem (Pmode
, frame_off
);
11722 insn
= emit_insn (gen_add2_insn (frame_pointer
, frame_off
));
11723 annotate_constant_pool_refs (&PATTERN (insn
));
11725 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, cfa
);
11726 RTX_FRAME_RELATED_P (insn
) = 1;
11729 /* Restore call saved fprs. */
11733 if (cfun_save_high_fprs_p
)
11735 next_offset
= cfun_frame_layout
.f8_offset
;
11736 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
11738 if (cfun_fpr_save_p (i
))
11740 restore_fpr (frame_pointer
,
11741 offset
+ next_offset
, i
);
11743 = alloc_reg_note (REG_CFA_RESTORE
,
11744 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11753 next_offset
= cfun_frame_layout
.f4_offset
;
11755 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11757 if (cfun_fpr_save_p (i
))
11759 restore_fpr (frame_pointer
,
11760 offset
+ next_offset
, i
);
11762 = alloc_reg_note (REG_CFA_RESTORE
,
11763 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11766 else if (!TARGET_PACKED_STACK
)
11772 /* Return register. */
11774 return_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11776 /* Restore call saved gprs. */
11778 if (cfun_frame_layout
.first_restore_gpr
!= -1)
11783 /* Check for global register and save them
11784 to stack location from where they get restored. */
11786 for (i
= cfun_frame_layout
.first_restore_gpr
;
11787 i
<= cfun_frame_layout
.last_restore_gpr
;
11790 if (global_not_special_regno_p (i
))
11792 addr
= plus_constant (Pmode
, frame_pointer
,
11793 offset
+ cfun_frame_layout
.gprs_offset
11794 + (i
- cfun_frame_layout
.first_save_gpr_slot
)
11796 addr
= gen_rtx_MEM (Pmode
, addr
);
11797 set_mem_alias_set (addr
, get_frame_alias_set ());
11798 emit_move_insn (addr
, gen_rtx_REG (Pmode
, i
));
11802 = alloc_reg_note (REG_CFA_RESTORE
,
11803 gen_rtx_REG (Pmode
, i
), cfa_restores
);
11806 /* Fetch return address from stack before load multiple,
11807 this will do good for scheduling.
11809 Only do this if we already decided that r14 needs to be
11810 saved to a stack slot. (And not just because r14 happens to
11811 be in between two GPRs which need saving.) Otherwise it
11812 would be difficult to take that decision back in
11813 s390_optimize_prologue.
11815 This optimization is only helpful on in-order machines. */
11817 && cfun_gpr_save_slot (RETURN_REGNUM
) == SAVE_SLOT_STACK
11818 && s390_tune
<= PROCESSOR_2097_Z10
)
11820 int return_regnum
= find_unused_clobbered_reg();
11821 if (!return_regnum
)
11823 return_reg
= gen_rtx_REG (Pmode
, return_regnum
);
11825 addr
= plus_constant (Pmode
, frame_pointer
,
11826 offset
+ cfun_frame_layout
.gprs_offset
11828 - cfun_frame_layout
.first_save_gpr_slot
)
11830 addr
= gen_rtx_MEM (Pmode
, addr
);
11831 set_mem_alias_set (addr
, get_frame_alias_set ());
11832 emit_move_insn (return_reg
, addr
);
11834 /* Once we did that optimization we have to make sure
11835 s390_optimize_prologue does not try to remove the store
11836 of r14 since we will not be able to find the load issued
11838 cfun_frame_layout
.save_return_addr_p
= true;
11841 insn
= restore_gprs (frame_pointer
,
11842 offset
+ cfun_frame_layout
.gprs_offset
11843 + (cfun_frame_layout
.first_restore_gpr
11844 - cfun_frame_layout
.first_save_gpr_slot
)
11846 cfun_frame_layout
.first_restore_gpr
,
11847 cfun_frame_layout
.last_restore_gpr
);
11848 insn
= emit_insn (insn
);
11849 REG_NOTES (insn
) = cfa_restores
;
11850 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11851 plus_constant (Pmode
, stack_pointer_rtx
,
11852 STACK_POINTER_OFFSET
));
11853 RTX_FRAME_RELATED_P (insn
) = 1;
11856 s390_restore_gprs_from_fprs ();
11861 /* Return to caller. */
11863 p
= rtvec_alloc (2);
11865 RTVEC_ELT (p
, 0) = ret_rtx
;
11866 RTVEC_ELT (p
, 1) = gen_rtx_USE (VOIDmode
, return_reg
);
11867 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
11871 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11874 s300_set_up_by_prologue (hard_reg_set_container
*regs
)
11876 if (cfun
->machine
->base_reg
11877 && !call_really_used_regs
[REGNO (cfun
->machine
->base_reg
)])
11878 SET_HARD_REG_BIT (regs
->set
, REGNO (cfun
->machine
->base_reg
));
11881 /* -fsplit-stack support. */
11883 /* A SYMBOL_REF for __morestack. */
11884 static GTY(()) rtx morestack_ref
;
11886 /* When using -fsplit-stack, the allocation routines set a field in
11887 the TCB to the bottom of the stack plus this much space, measured
11890 #define SPLIT_STACK_AVAILABLE 1024
11892 /* Emit -fsplit-stack prologue, which goes before the regular function
11896 s390_expand_split_stack_prologue (void)
11898 rtx r1
, guard
, cc
= NULL
;
11900 /* Offset from thread pointer to __private_ss. */
11901 int psso
= TARGET_64BIT
? 0x38 : 0x20;
11902 /* Pointer size in bytes. */
11903 /* Frame size and argument size - the two parameters to __morestack. */
11904 HOST_WIDE_INT frame_size
= cfun_frame_layout
.frame_size
;
11905 /* Align argument size to 8 bytes - simplifies __morestack code. */
11906 HOST_WIDE_INT args_size
= crtl
->args
.size
>= 0
11907 ? ((crtl
->args
.size
+ 7) & ~7)
11909 /* Label to be called by __morestack. */
11910 rtx_code_label
*call_done
= NULL
;
11911 rtx_code_label
*parm_base
= NULL
;
11914 gcc_assert (flag_split_stack
&& reload_completed
);
11915 if (!TARGET_CPU_ZARCH
)
11917 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11921 r1
= gen_rtx_REG (Pmode
, 1);
11923 /* If no stack frame will be allocated, don't do anything. */
11926 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11928 /* If va_start is used, just use r15. */
11929 emit_move_insn (r1
,
11930 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11931 GEN_INT (STACK_POINTER_OFFSET
)));
11937 if (morestack_ref
== NULL_RTX
)
11939 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11940 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
11941 | SYMBOL_FLAG_FUNCTION
);
11944 if (CONST_OK_FOR_K (frame_size
) || CONST_OK_FOR_Op (frame_size
))
11946 /* If frame_size will fit in an add instruction, do a stack space
11947 check, and only call __morestack if there's not enough space. */
11949 /* Get thread pointer. r1 is the only register we can always destroy - r0
11950 could contain a static chain (and cannot be used to address memory
11951 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11952 emit_move_insn (r1
, gen_rtx_REG (Pmode
, TP_REGNUM
));
11953 /* Aim at __private_ss. */
11954 guard
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, r1
, psso
));
11956 /* If less that 1kiB used, skip addition and compare directly with
11958 if (frame_size
> SPLIT_STACK_AVAILABLE
)
11960 emit_move_insn (r1
, guard
);
11962 emit_insn (gen_adddi3 (r1
, r1
, GEN_INT (frame_size
)));
11964 emit_insn (gen_addsi3 (r1
, r1
, GEN_INT (frame_size
)));
11968 /* Compare the (maybe adjusted) guard with the stack pointer. */
11969 cc
= s390_emit_compare (LT
, stack_pointer_rtx
, guard
);
11972 call_done
= gen_label_rtx ();
11973 parm_base
= gen_label_rtx ();
11975 /* Emit the parameter block. */
11976 tmp
= gen_split_stack_data (parm_base
, call_done
,
11977 GEN_INT (frame_size
),
11978 GEN_INT (args_size
));
11979 insn
= emit_insn (tmp
);
11980 add_reg_note (insn
, REG_LABEL_OPERAND
, call_done
);
11981 LABEL_NUSES (call_done
)++;
11982 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11983 LABEL_NUSES (parm_base
)++;
11985 /* %r1 = litbase. */
11986 insn
= emit_move_insn (r1
, gen_rtx_LABEL_REF (VOIDmode
, parm_base
));
11987 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
11988 LABEL_NUSES (parm_base
)++;
11990 /* Now, we need to call __morestack. It has very special calling
11991 conventions: it preserves param/return/static chain registers for
11992 calling main function body, and looks for its own parameters at %r1. */
11996 tmp
= gen_split_stack_cond_call (morestack_ref
, cc
, call_done
);
11998 insn
= emit_jump_insn (tmp
);
11999 JUMP_LABEL (insn
) = call_done
;
12000 LABEL_NUSES (call_done
)++;
12002 /* Mark the jump as very unlikely to be taken. */
12003 add_reg_br_prob_note (insn
,
12004 profile_probability::very_unlikely ());
12006 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12008 /* If va_start is used, and __morestack was not called, just use
12010 emit_move_insn (r1
,
12011 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12012 GEN_INT (STACK_POINTER_OFFSET
)));
12017 tmp
= gen_split_stack_call (morestack_ref
, call_done
);
12018 insn
= emit_jump_insn (tmp
);
12019 JUMP_LABEL (insn
) = call_done
;
12020 LABEL_NUSES (call_done
)++;
12024 /* __morestack will call us here. */
12026 emit_label (call_done
);
12029 /* We may have to tell the dataflow pass that the split stack prologue
12030 is initializing a register. */
12033 s390_live_on_entry (bitmap regs
)
12035 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12037 gcc_assert (flag_split_stack
);
12038 bitmap_set_bit (regs
, 1);
12042 /* Return true if the function can use simple_return to return outside
12043 of a shrink-wrapped region. At present shrink-wrapping is supported
12047 s390_can_use_simple_return_insn (void)
12052 /* Return true if the epilogue is guaranteed to contain only a return
12053 instruction and if a direct return can therefore be used instead.
12054 One of the main advantages of using direct return instructions
12055 is that we can then use conditional returns. */
12058 s390_can_use_return_insn (void)
12062 if (!reload_completed
)
12068 if (TARGET_TPF_PROFILING
)
12071 for (i
= 0; i
< 16; i
++)
12072 if (cfun_gpr_save_slot (i
) != SAVE_SLOT_NONE
)
12075 /* For 31 bit this is not covered by the frame_size check below
12076 since f4, f6 are saved in the register save area without needing
12077 additional stack space. */
12079 && (cfun_fpr_save_p (FPR4_REGNUM
) || cfun_fpr_save_p (FPR6_REGNUM
)))
12082 if (cfun
->machine
->base_reg
12083 && !call_really_used_regs
[REGNO (cfun
->machine
->base_reg
)])
12086 return cfun_frame_layout
.frame_size
== 0;
12089 /* The VX ABI differs for vararg functions. Therefore we need the
12090 prototype of the callee to be available when passing vector type
12092 static const char *
12093 s390_invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
12095 return ((TARGET_VX_ABI
12097 && VECTOR_TYPE_P (TREE_TYPE (val
))
12098 && (funcdecl
== NULL_TREE
12099 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
12100 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
12101 ? N_("vector argument passed to unprototyped function")
12106 /* Return the size in bytes of a function argument of
12107 type TYPE and/or mode MODE. At least one of TYPE or
12108 MODE must be specified. */
12111 s390_function_arg_size (machine_mode mode
, const_tree type
)
12114 return int_size_in_bytes (type
);
12116 /* No type info available for some library calls ... */
12117 if (mode
!= BLKmode
)
12118 return GET_MODE_SIZE (mode
);
12120 /* If we have neither type nor mode, abort */
12121 gcc_unreachable ();
12124 /* Return true if a function argument of type TYPE and mode MODE
12125 is to be passed in a vector register, if available. */
12128 s390_function_arg_vector (machine_mode mode
, const_tree type
)
12130 if (!TARGET_VX_ABI
)
12133 if (s390_function_arg_size (mode
, type
) > 16)
12136 /* No type info available for some library calls ... */
12138 return VECTOR_MODE_P (mode
);
12140 /* The ABI says that record types with a single member are treated
12141 just like that member would be. */
12142 while (TREE_CODE (type
) == RECORD_TYPE
)
12144 tree field
, single
= NULL_TREE
;
12146 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12148 if (TREE_CODE (field
) != FIELD_DECL
)
12151 if (single
== NULL_TREE
)
12152 single
= TREE_TYPE (field
);
12157 if (single
== NULL_TREE
)
12161 /* If the field declaration adds extra byte due to
12162 e.g. padding this is not accepted as vector type. */
12163 if (int_size_in_bytes (single
) <= 0
12164 || int_size_in_bytes (single
) != int_size_in_bytes (type
))
12170 return VECTOR_TYPE_P (type
);
12173 /* Return true if a function argument of type TYPE and mode MODE
12174 is to be passed in a floating-point register, if available. */
12177 s390_function_arg_float (machine_mode mode
, const_tree type
)
12179 if (s390_function_arg_size (mode
, type
) > 8)
12182 /* Soft-float changes the ABI: no floating-point registers are used. */
12183 if (TARGET_SOFT_FLOAT
)
12186 /* No type info available for some library calls ... */
12188 return mode
== SFmode
|| mode
== DFmode
|| mode
== SDmode
|| mode
== DDmode
;
12190 /* The ABI says that record types with a single member are treated
12191 just like that member would be. */
12192 while (TREE_CODE (type
) == RECORD_TYPE
)
12194 tree field
, single
= NULL_TREE
;
12196 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12198 if (TREE_CODE (field
) != FIELD_DECL
)
12201 if (single
== NULL_TREE
)
12202 single
= TREE_TYPE (field
);
12207 if (single
== NULL_TREE
)
12213 return TREE_CODE (type
) == REAL_TYPE
;
12216 /* Return true if a function argument of type TYPE and mode MODE
12217 is to be passed in an integer register, or a pair of integer
12218 registers, if available. */
12221 s390_function_arg_integer (machine_mode mode
, const_tree type
)
12223 int size
= s390_function_arg_size (mode
, type
);
12227 /* No type info available for some library calls ... */
12229 return GET_MODE_CLASS (mode
) == MODE_INT
12230 || (TARGET_SOFT_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
));
12232 /* We accept small integral (and similar) types. */
12233 if (INTEGRAL_TYPE_P (type
)
12234 || POINTER_TYPE_P (type
)
12235 || TREE_CODE (type
) == NULLPTR_TYPE
12236 || TREE_CODE (type
) == OFFSET_TYPE
12237 || (TARGET_SOFT_FLOAT
&& TREE_CODE (type
) == REAL_TYPE
))
12240 /* We also accept structs of size 1, 2, 4, 8 that are not
12241 passed in floating-point registers. */
12242 if (AGGREGATE_TYPE_P (type
)
12243 && exact_log2 (size
) >= 0
12244 && !s390_function_arg_float (mode
, type
))
12250 /* Return 1 if a function argument of type TYPE and mode MODE
12251 is to be passed by reference. The ABI specifies that only
12252 structures of size 1, 2, 4, or 8 bytes are passed by value,
12253 all other structures (and complex numbers) are passed by
12257 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
12258 machine_mode mode
, const_tree type
,
12259 bool named ATTRIBUTE_UNUSED
)
12261 int size
= s390_function_arg_size (mode
, type
);
12263 if (s390_function_arg_vector (mode
, type
))
12271 if (AGGREGATE_TYPE_P (type
) && exact_log2 (size
) < 0)
12274 if (TREE_CODE (type
) == COMPLEX_TYPE
12275 || TREE_CODE (type
) == VECTOR_TYPE
)
12282 /* Update the data in CUM to advance over an argument of mode MODE and
12283 data type TYPE. (TYPE is null for libcalls where that information
12284 may not be available.). The boolean NAMED specifies whether the
12285 argument is a named argument (as opposed to an unnamed argument
12286 matching an ellipsis). */
12289 s390_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
12290 const_tree type
, bool named
)
12292 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12294 if (s390_function_arg_vector (mode
, type
))
12296 /* We are called for unnamed vector stdarg arguments which are
12297 passed on the stack. In this case this hook does not have to
12298 do anything since stack arguments are tracked by common
12304 else if (s390_function_arg_float (mode
, type
))
12308 else if (s390_function_arg_integer (mode
, type
))
12310 int size
= s390_function_arg_size (mode
, type
);
12311 cum
->gprs
+= ((size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
);
12314 gcc_unreachable ();
12317 /* Define where to put the arguments to a function.
12318 Value is zero to push the argument on the stack,
12319 or a hard register in which to store the argument.
12321 MODE is the argument's machine mode.
12322 TYPE is the data type of the argument (as a tree).
12323 This is null for libcalls where that information may
12325 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12326 the preceding args and about the function being called.
12327 NAMED is nonzero if this argument is a named parameter
12328 (otherwise it is an extra parameter matching an ellipsis).
12330 On S/390, we use general purpose registers 2 through 6 to
12331 pass integer, pointer, and certain structure arguments, and
12332 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12333 to pass floating point arguments. All remaining arguments
12334 are pushed to the stack. */
12337 s390_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
12338 const_tree type
, bool named
)
12340 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12343 s390_check_type_for_vector_abi (type
, true, false);
12345 if (s390_function_arg_vector (mode
, type
))
12347 /* Vector arguments being part of the ellipsis are passed on the
12349 if (!named
|| (cum
->vrs
+ 1 > VEC_ARG_NUM_REG
))
12352 return gen_rtx_REG (mode
, cum
->vrs
+ FIRST_VEC_ARG_REGNO
);
12354 else if (s390_function_arg_float (mode
, type
))
12356 if (cum
->fprs
+ 1 > FP_ARG_NUM_REG
)
12359 return gen_rtx_REG (mode
, cum
->fprs
+ 16);
12361 else if (s390_function_arg_integer (mode
, type
))
12363 int size
= s390_function_arg_size (mode
, type
);
12364 int n_gprs
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12366 if (cum
->gprs
+ n_gprs
> GP_ARG_NUM_REG
)
12368 else if (n_gprs
== 1 || UNITS_PER_WORD
== UNITS_PER_LONG
)
12369 return gen_rtx_REG (mode
, cum
->gprs
+ 2);
12370 else if (n_gprs
== 2)
12372 rtvec p
= rtvec_alloc (2);
12375 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 2),
12378 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 3),
12381 return gen_rtx_PARALLEL (mode
, p
);
12385 /* After the real arguments, expand_call calls us once again
12386 with a void_type_node type. Whatever we return here is
12387 passed as operand 2 to the call expanders.
12389 We don't need this feature ... */
12390 else if (type
== void_type_node
)
12393 gcc_unreachable ();
12396 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12397 left-justified when placed on the stack during parameter passing. */
12399 static pad_direction
12400 s390_function_arg_padding (machine_mode mode
, const_tree type
)
12402 if (s390_function_arg_vector (mode
, type
))
12405 return default_function_arg_padding (mode
, type
);
12408 /* Return true if return values of type TYPE should be returned
12409 in a memory buffer whose address is passed by the caller as
12410 hidden first argument. */
12413 s390_return_in_memory (const_tree type
, const_tree fundecl ATTRIBUTE_UNUSED
)
12415 /* We accept small integral (and similar) types. */
12416 if (INTEGRAL_TYPE_P (type
)
12417 || POINTER_TYPE_P (type
)
12418 || TREE_CODE (type
) == OFFSET_TYPE
12419 || TREE_CODE (type
) == REAL_TYPE
)
12420 return int_size_in_bytes (type
) > 8;
12422 /* vector types which fit into a VR. */
12424 && VECTOR_TYPE_P (type
)
12425 && int_size_in_bytes (type
) <= 16)
12428 /* Aggregates and similar constructs are always returned
12430 if (AGGREGATE_TYPE_P (type
)
12431 || TREE_CODE (type
) == COMPLEX_TYPE
12432 || VECTOR_TYPE_P (type
))
12435 /* ??? We get called on all sorts of random stuff from
12436 aggregate_value_p. We can't abort, but it's not clear
12437 what's safe to return. Pretend it's a struct I guess. */
12441 /* Function arguments and return values are promoted to word size. */
12443 static machine_mode
12444 s390_promote_function_mode (const_tree type
, machine_mode mode
,
12446 const_tree fntype ATTRIBUTE_UNUSED
,
12447 int for_return ATTRIBUTE_UNUSED
)
12449 if (INTEGRAL_MODE_P (mode
)
12450 && GET_MODE_SIZE (mode
) < UNITS_PER_LONG
)
12452 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
12453 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
12460 /* Define where to return a (scalar) value of type RET_TYPE.
12461 If RET_TYPE is null, define where to return a (scalar)
12462 value of mode MODE from a libcall. */
12465 s390_function_and_libcall_value (machine_mode mode
,
12466 const_tree ret_type
,
12467 const_tree fntype_or_decl
,
12468 bool outgoing ATTRIBUTE_UNUSED
)
12470 /* For vector return types it is important to use the RET_TYPE
12471 argument whenever available since the middle-end might have
12472 changed the mode to a scalar mode. */
12473 bool vector_ret_type_p
= ((ret_type
&& VECTOR_TYPE_P (ret_type
))
12474 || (!ret_type
&& VECTOR_MODE_P (mode
)));
12476 /* For normal functions perform the promotion as
12477 promote_function_mode would do. */
12480 int unsignedp
= TYPE_UNSIGNED (ret_type
);
12481 mode
= promote_function_mode (ret_type
, mode
, &unsignedp
,
12482 fntype_or_decl
, 1);
12485 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
12486 || SCALAR_FLOAT_MODE_P (mode
)
12487 || (TARGET_VX_ABI
&& vector_ret_type_p
));
12488 gcc_assert (GET_MODE_SIZE (mode
) <= (TARGET_VX_ABI
? 16 : 8));
12490 if (TARGET_VX_ABI
&& vector_ret_type_p
)
12491 return gen_rtx_REG (mode
, FIRST_VEC_ARG_REGNO
);
12492 else if (TARGET_HARD_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
))
12493 return gen_rtx_REG (mode
, 16);
12494 else if (GET_MODE_SIZE (mode
) <= UNITS_PER_LONG
12495 || UNITS_PER_LONG
== UNITS_PER_WORD
)
12496 return gen_rtx_REG (mode
, 2);
12497 else if (GET_MODE_SIZE (mode
) == 2 * UNITS_PER_LONG
)
12499 /* This case is triggered when returning a 64 bit value with
12500 -m31 -mzarch. Although the value would fit into a single
12501 register it has to be forced into a 32 bit register pair in
12502 order to match the ABI. */
12503 rtvec p
= rtvec_alloc (2);
12506 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 2), const0_rtx
);
12508 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 3), GEN_INT (4));
12510 return gen_rtx_PARALLEL (mode
, p
);
12513 gcc_unreachable ();
12516 /* Define where to return a scalar return value of type RET_TYPE. */
12519 s390_function_value (const_tree ret_type
, const_tree fn_decl_or_type
,
12522 return s390_function_and_libcall_value (TYPE_MODE (ret_type
), ret_type
,
12523 fn_decl_or_type
, outgoing
);
12526 /* Define where to return a scalar libcall return value of mode
12530 s390_libcall_value (machine_mode mode
, const_rtx fun ATTRIBUTE_UNUSED
)
12532 return s390_function_and_libcall_value (mode
, NULL_TREE
,
12537 /* Create and return the va_list datatype.
12539 On S/390, va_list is an array type equivalent to
12541 typedef struct __va_list_tag
12545 void *__overflow_arg_area;
12546 void *__reg_save_area;
12549 where __gpr and __fpr hold the number of general purpose
12550 or floating point arguments used up to now, respectively,
12551 __overflow_arg_area points to the stack location of the
12552 next argument passed on the stack, and __reg_save_area
12553 always points to the start of the register area in the
12554 call frame of the current function. The function prologue
12555 saves all registers used for argument passing into this
12556 area if the function uses variable arguments. */
12559 s390_build_builtin_va_list (void)
12561 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
12563 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
12566 build_decl (BUILTINS_LOCATION
,
12567 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
12569 f_gpr
= build_decl (BUILTINS_LOCATION
,
12570 FIELD_DECL
, get_identifier ("__gpr"),
12571 long_integer_type_node
);
12572 f_fpr
= build_decl (BUILTINS_LOCATION
,
12573 FIELD_DECL
, get_identifier ("__fpr"),
12574 long_integer_type_node
);
12575 f_ovf
= build_decl (BUILTINS_LOCATION
,
12576 FIELD_DECL
, get_identifier ("__overflow_arg_area"),
12578 f_sav
= build_decl (BUILTINS_LOCATION
,
12579 FIELD_DECL
, get_identifier ("__reg_save_area"),
12582 va_list_gpr_counter_field
= f_gpr
;
12583 va_list_fpr_counter_field
= f_fpr
;
12585 DECL_FIELD_CONTEXT (f_gpr
) = record
;
12586 DECL_FIELD_CONTEXT (f_fpr
) = record
;
12587 DECL_FIELD_CONTEXT (f_ovf
) = record
;
12588 DECL_FIELD_CONTEXT (f_sav
) = record
;
12590 TYPE_STUB_DECL (record
) = type_decl
;
12591 TYPE_NAME (record
) = type_decl
;
12592 TYPE_FIELDS (record
) = f_gpr
;
12593 DECL_CHAIN (f_gpr
) = f_fpr
;
12594 DECL_CHAIN (f_fpr
) = f_ovf
;
12595 DECL_CHAIN (f_ovf
) = f_sav
;
12597 layout_type (record
);
12599 /* The correct type is an array type of one element. */
12600 return build_array_type (record
, build_index_type (size_zero_node
));
12603 /* Implement va_start by filling the va_list structure VALIST.
12604 STDARG_P is always true, and ignored.
12605 NEXTARG points to the first anonymous stack argument.
12607 The following global variables are used to initialize
12608 the va_list structure:
12611 holds number of gprs and fprs used for named arguments.
12612 crtl->args.arg_offset_rtx:
12613 holds the offset of the first anonymous stack argument
12614 (relative to the virtual arg pointer). */
12617 s390_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
12619 HOST_WIDE_INT n_gpr
, n_fpr
;
12621 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12622 tree gpr
, fpr
, ovf
, sav
, t
;
12624 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12625 f_fpr
= DECL_CHAIN (f_gpr
);
12626 f_ovf
= DECL_CHAIN (f_fpr
);
12627 f_sav
= DECL_CHAIN (f_ovf
);
12629 valist
= build_simple_mem_ref (valist
);
12630 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12631 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12632 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12633 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12635 /* Count number of gp and fp argument registers used. */
12637 n_gpr
= crtl
->args
.info
.gprs
;
12638 n_fpr
= crtl
->args
.info
.fprs
;
12640 if (cfun
->va_list_gpr_size
)
12642 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
12643 build_int_cst (NULL_TREE
, n_gpr
));
12644 TREE_SIDE_EFFECTS (t
) = 1;
12645 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12648 if (cfun
->va_list_fpr_size
)
12650 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
12651 build_int_cst (NULL_TREE
, n_fpr
));
12652 TREE_SIDE_EFFECTS (t
) = 1;
12653 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12656 if (flag_split_stack
12657 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
12659 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12664 reg
= gen_reg_rtx (Pmode
);
12665 cfun
->machine
->split_stack_varargs_pointer
= reg
;
12668 emit_move_insn (reg
, gen_rtx_REG (Pmode
, 1));
12669 seq
= get_insns ();
12672 push_topmost_sequence ();
12673 emit_insn_after (seq
, entry_of_function ());
12674 pop_topmost_sequence ();
12677 /* Find the overflow area.
12678 FIXME: This currently is too pessimistic when the vector ABI is
12679 enabled. In that case we *always* set up the overflow area
12681 if (n_gpr
+ cfun
->va_list_gpr_size
> GP_ARG_NUM_REG
12682 || n_fpr
+ cfun
->va_list_fpr_size
> FP_ARG_NUM_REG
12685 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12686 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
12688 t
= make_tree (TREE_TYPE (ovf
), cfun
->machine
->split_stack_varargs_pointer
);
12690 off
= INTVAL (crtl
->args
.arg_offset_rtx
);
12691 off
= off
< 0 ? 0 : off
;
12692 if (TARGET_DEBUG_ARG
)
12693 fprintf (stderr
, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12694 (int)n_gpr
, (int)n_fpr
, off
);
12696 t
= fold_build_pointer_plus_hwi (t
, off
);
12698 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
12699 TREE_SIDE_EFFECTS (t
) = 1;
12700 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12703 /* Find the register save area. */
12704 if ((cfun
->va_list_gpr_size
&& n_gpr
< GP_ARG_NUM_REG
)
12705 || (cfun
->va_list_fpr_size
&& n_fpr
< FP_ARG_NUM_REG
))
12707 t
= make_tree (TREE_TYPE (sav
), return_address_pointer_rtx
);
12708 t
= fold_build_pointer_plus_hwi (t
, -RETURN_REGNUM
* UNITS_PER_LONG
);
12710 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
12711 TREE_SIDE_EFFECTS (t
) = 1;
12712 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12716 /* Implement va_arg by updating the va_list structure
12717 VALIST as required to retrieve an argument of type
12718 TYPE, and returning that argument.
12720 Generates code equivalent to:
12722 if (integral value) {
12723 if (size <= 4 && args.gpr < 5 ||
12724 size > 4 && args.gpr < 4 )
12725 ret = args.reg_save_area[args.gpr+8]
12727 ret = *args.overflow_arg_area++;
12728 } else if (vector value) {
12729 ret = *args.overflow_arg_area;
12730 args.overflow_arg_area += size / 8;
12731 } else if (float value) {
12733 ret = args.reg_save_area[args.fpr+64]
12735 ret = *args.overflow_arg_area++;
12736 } else if (aggregate value) {
12738 ret = *args.reg_save_area[args.gpr]
12740 ret = **args.overflow_arg_area++;
12744 s390_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
12745 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
12747 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12748 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
12749 int indirect_p
, size
, n_reg
, sav_ofs
, sav_scale
, max_reg
;
12750 tree lab_false
, lab_over
= NULL_TREE
;
12751 tree addr
= create_tmp_var (ptr_type_node
, "addr");
12752 bool left_align_p
; /* How a value < UNITS_PER_LONG is aligned within
12755 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12756 f_fpr
= DECL_CHAIN (f_gpr
);
12757 f_ovf
= DECL_CHAIN (f_fpr
);
12758 f_sav
= DECL_CHAIN (f_ovf
);
12760 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12761 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12762 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12764 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12765 both appear on a lhs. */
12766 valist
= unshare_expr (valist
);
12767 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12769 size
= int_size_in_bytes (type
);
12771 s390_check_type_for_vector_abi (type
, true, false);
12773 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
12775 if (TARGET_DEBUG_ARG
)
12777 fprintf (stderr
, "va_arg: aggregate type");
12781 /* Aggregates are passed by reference. */
12786 /* kernel stack layout on 31 bit: It is assumed here that no padding
12787 will be added by s390_frame_info because for va_args always an even
12788 number of gprs has to be saved r15-r2 = 14 regs. */
12789 sav_ofs
= 2 * UNITS_PER_LONG
;
12790 sav_scale
= UNITS_PER_LONG
;
12791 size
= UNITS_PER_LONG
;
12792 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12793 left_align_p
= false;
12795 else if (s390_function_arg_vector (TYPE_MODE (type
), type
))
12797 if (TARGET_DEBUG_ARG
)
12799 fprintf (stderr
, "va_arg: vector type");
12809 left_align_p
= true;
12811 else if (s390_function_arg_float (TYPE_MODE (type
), type
))
12813 if (TARGET_DEBUG_ARG
)
12815 fprintf (stderr
, "va_arg: float type");
12819 /* FP args go in FP registers, if present. */
12823 sav_ofs
= 16 * UNITS_PER_LONG
;
12825 max_reg
= FP_ARG_NUM_REG
- n_reg
;
12826 left_align_p
= false;
12830 if (TARGET_DEBUG_ARG
)
12832 fprintf (stderr
, "va_arg: other type");
12836 /* Otherwise into GP registers. */
12839 n_reg
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12841 /* kernel stack layout on 31 bit: It is assumed here that no padding
12842 will be added by s390_frame_info because for va_args always an even
12843 number of gprs has to be saved r15-r2 = 14 regs. */
12844 sav_ofs
= 2 * UNITS_PER_LONG
;
12846 if (size
< UNITS_PER_LONG
)
12847 sav_ofs
+= UNITS_PER_LONG
- size
;
12849 sav_scale
= UNITS_PER_LONG
;
12850 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12851 left_align_p
= false;
12854 /* Pull the value out of the saved registers ... */
12856 if (reg
!= NULL_TREE
)
12859 if (reg > ((typeof (reg))max_reg))
12862 addr = sav + sav_ofs + reg * save_scale;
12869 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
12870 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
12872 t
= fold_convert (TREE_TYPE (reg
), size_int (max_reg
));
12873 t
= build2 (GT_EXPR
, boolean_type_node
, reg
, t
);
12874 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
12875 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
12876 gimplify_and_add (t
, pre_p
);
12878 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
12879 u
= build2 (MULT_EXPR
, TREE_TYPE (reg
), reg
,
12880 fold_convert (TREE_TYPE (reg
), size_int (sav_scale
)));
12881 t
= fold_build_pointer_plus (t
, u
);
12883 gimplify_assign (addr
, t
, pre_p
);
12885 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
12887 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
12890 /* ... Otherwise out of the overflow area. */
12893 if (size
< UNITS_PER_LONG
&& !left_align_p
)
12894 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
- size
);
12896 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
12898 gimplify_assign (addr
, t
, pre_p
);
12900 if (size
< UNITS_PER_LONG
&& left_align_p
)
12901 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
);
12903 t
= fold_build_pointer_plus_hwi (t
, size
);
12905 gimplify_assign (ovf
, t
, pre_p
);
12907 if (reg
!= NULL_TREE
)
12908 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
12911 /* Increment register save count. */
12915 u
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (reg
), reg
,
12916 fold_convert (TREE_TYPE (reg
), size_int (n_reg
)));
12917 gimplify_and_add (u
, pre_p
);
12922 t
= build_pointer_type_for_mode (build_pointer_type (type
),
12924 addr
= fold_convert (t
, addr
);
12925 addr
= build_va_arg_indirect_ref (addr
);
12929 t
= build_pointer_type_for_mode (type
, ptr_mode
, true);
12930 addr
= fold_convert (t
, addr
);
12933 return build_va_arg_indirect_ref (addr
);
12936 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12938 DEST - Register location where CC will be stored.
12939 TDB - Pointer to a 256 byte area where to store the transaction.
12940 diagnostic block. NULL if TDB is not needed.
12941 RETRY - Retry count value. If non-NULL a retry loop for CC2
12943 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12944 of the tbegin instruction pattern. */
12947 s390_expand_tbegin (rtx dest
, rtx tdb
, rtx retry
, bool clobber_fprs_p
)
12949 rtx retry_plus_two
= gen_reg_rtx (SImode
);
12950 rtx retry_reg
= gen_reg_rtx (SImode
);
12951 rtx_code_label
*retry_label
= NULL
;
12953 if (retry
!= NULL_RTX
)
12955 emit_move_insn (retry_reg
, retry
);
12956 emit_insn (gen_addsi3 (retry_plus_two
, retry_reg
, const2_rtx
));
12957 emit_insn (gen_addsi3 (retry_reg
, retry_reg
, const1_rtx
));
12958 retry_label
= gen_label_rtx ();
12959 emit_label (retry_label
);
12962 if (clobber_fprs_p
)
12965 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12968 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12972 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
12975 emit_move_insn (dest
, gen_rtx_UNSPEC (SImode
,
12976 gen_rtvec (1, gen_rtx_REG (CCRAWmode
,
12978 UNSPEC_CC_TO_INT
));
12979 if (retry
!= NULL_RTX
)
12981 const int CC0
= 1 << 3;
12982 const int CC1
= 1 << 2;
12983 const int CC3
= 1 << 0;
12985 rtx count
= gen_reg_rtx (SImode
);
12986 rtx_code_label
*leave_label
= gen_label_rtx ();
12988 /* Exit for success and permanent failures. */
12989 jump
= s390_emit_jump (leave_label
,
12990 gen_rtx_EQ (VOIDmode
,
12991 gen_rtx_REG (CCRAWmode
, CC_REGNUM
),
12992 gen_rtx_CONST_INT (VOIDmode
, CC0
| CC1
| CC3
)));
12993 LABEL_NUSES (leave_label
) = 1;
12995 /* CC2 - transient failure. Perform retry with ppa. */
12996 emit_move_insn (count
, retry_plus_two
);
12997 emit_insn (gen_subsi3 (count
, count
, retry_reg
));
12998 emit_insn (gen_tx_assist (count
));
12999 jump
= emit_jump_insn (gen_doloop_si64 (retry_label
,
13002 JUMP_LABEL (jump
) = retry_label
;
13003 LABEL_NUSES (retry_label
) = 1;
13004 emit_label (leave_label
);
13009 /* Return the decl for the target specific builtin with the function
13013 s390_builtin_decl (unsigned fcode
, bool initialized_p ATTRIBUTE_UNUSED
)
13015 if (fcode
>= S390_BUILTIN_MAX
)
13016 return error_mark_node
;
13018 return s390_builtin_decls
[fcode
];
13021 /* We call mcount before the function prologue. So a profiled leaf
13022 function should stay a leaf function. */
13025 s390_keep_leaf_when_profiled ()
13030 /* Output assembly code for the trampoline template to
13033 On S/390, we use gpr 1 internally in the trampoline code;
13034 gpr 0 is used to hold the static chain. */
13037 s390_asm_trampoline_template (FILE *file
)
13040 op
[0] = gen_rtx_REG (Pmode
, 0);
13041 op
[1] = gen_rtx_REG (Pmode
, 1);
13045 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13046 output_asm_insn ("lmg\t%0,%1,14(%1)", op
); /* 6 byte */
13047 output_asm_insn ("br\t%1", op
); /* 2 byte */
13048 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 10));
13052 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13053 output_asm_insn ("lm\t%0,%1,6(%1)", op
); /* 4 byte */
13054 output_asm_insn ("br\t%1", op
); /* 2 byte */
13055 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 8));
13059 /* Emit RTL insns to initialize the variable parts of a trampoline.
13060 FNADDR is an RTX for the address of the function's pure code.
13061 CXT is an RTX for the static chain value for the function. */
13064 s390_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
13066 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
13069 emit_block_move (m_tramp
, assemble_trampoline_template (),
13070 GEN_INT (2 * UNITS_PER_LONG
), BLOCK_OP_NORMAL
);
13072 mem
= adjust_address (m_tramp
, Pmode
, 2 * UNITS_PER_LONG
);
13073 emit_move_insn (mem
, cxt
);
13074 mem
= adjust_address (m_tramp
, Pmode
, 3 * UNITS_PER_LONG
);
13075 emit_move_insn (mem
, fnaddr
);
13078 /* Output assembler code to FILE to increment profiler label # LABELNO
13079 for profiling a function entry. */
13082 s390_function_profiler (FILE *file
, int labelno
)
13087 ASM_GENERATE_INTERNAL_LABEL (label
, "LP", labelno
);
13089 fprintf (file
, "# function profiler \n");
13091 op
[0] = gen_rtx_REG (Pmode
, RETURN_REGNUM
);
13092 op
[1] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
13093 op
[1] = gen_rtx_MEM (Pmode
, plus_constant (Pmode
, op
[1], UNITS_PER_LONG
));
13095 op
[2] = gen_rtx_REG (Pmode
, 1);
13096 op
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
13097 SYMBOL_REF_FLAGS (op
[3]) = SYMBOL_FLAG_LOCAL
;
13099 op
[4] = gen_rtx_SYMBOL_REF (Pmode
, "_mcount");
13102 op
[4] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[4]), UNSPEC_PLT
);
13103 op
[4] = gen_rtx_CONST (Pmode
, op
[4]);
13108 output_asm_insn ("stg\t%0,%1", op
);
13109 output_asm_insn ("larl\t%2,%3", op
);
13110 output_asm_insn ("brasl\t%0,%4", op
);
13111 output_asm_insn ("lg\t%0,%1", op
);
13113 else if (TARGET_CPU_ZARCH
)
13115 output_asm_insn ("st\t%0,%1", op
);
13116 output_asm_insn ("larl\t%2,%3", op
);
13117 output_asm_insn ("brasl\t%0,%4", op
);
13118 output_asm_insn ("l\t%0,%1", op
);
13120 else if (!flag_pic
)
13122 op
[6] = gen_label_rtx ();
13124 output_asm_insn ("st\t%0,%1", op
);
13125 output_asm_insn ("bras\t%2,%l6", op
);
13126 output_asm_insn (".long\t%4", op
);
13127 output_asm_insn (".long\t%3", op
);
13128 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[6]));
13129 output_asm_insn ("l\t%0,0(%2)", op
);
13130 output_asm_insn ("l\t%2,4(%2)", op
);
13131 output_asm_insn ("basr\t%0,%0", op
);
13132 output_asm_insn ("l\t%0,%1", op
);
13136 op
[5] = gen_label_rtx ();
13137 op
[6] = gen_label_rtx ();
13139 output_asm_insn ("st\t%0,%1", op
);
13140 output_asm_insn ("bras\t%2,%l6", op
);
13141 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[5]));
13142 output_asm_insn (".long\t%4-%l5", op
);
13143 output_asm_insn (".long\t%3-%l5", op
);
13144 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[6]));
13145 output_asm_insn ("lr\t%0,%2", op
);
13146 output_asm_insn ("a\t%0,0(%2)", op
);
13147 output_asm_insn ("a\t%2,4(%2)", op
);
13148 output_asm_insn ("basr\t%0,%0", op
);
13149 output_asm_insn ("l\t%0,%1", op
);
13153 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13154 into its SYMBOL_REF_FLAGS. */
13157 s390_encode_section_info (tree decl
, rtx rtl
, int first
)
13159 default_encode_section_info (decl
, rtl
, first
);
13161 if (TREE_CODE (decl
) == VAR_DECL
)
13163 /* Store the alignment to be able to check if we can use
13164 a larl/load-relative instruction. We only handle the cases
13165 that can go wrong (i.e. no FUNC_DECLs). */
13166 if (DECL_ALIGN (decl
) == 0 || DECL_ALIGN (decl
) % 16)
13167 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13168 else if (DECL_ALIGN (decl
) % 32)
13169 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13170 else if (DECL_ALIGN (decl
) % 64)
13171 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13174 /* Literal pool references don't have a decl so they are handled
13175 differently here. We rely on the information in the MEM_ALIGN
13176 entry to decide upon the alignment. */
13178 && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
13179 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl
, 0)))
13181 if (MEM_ALIGN (rtl
) == 0 || MEM_ALIGN (rtl
) % 16)
13182 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13183 else if (MEM_ALIGN (rtl
) % 32)
13184 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13185 else if (MEM_ALIGN (rtl
) % 64)
13186 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13190 /* Output thunk to FILE that implements a C++ virtual function call (with
13191 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13192 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13193 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13194 relative to the resulting this pointer. */
13197 s390_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
13198 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
13204 /* Make sure unwind info is emitted for the thunk if needed. */
13205 final_start_function (emit_barrier (), file
, 1);
13207 /* Operand 0 is the target function. */
13208 op
[0] = XEXP (DECL_RTL (function
), 0);
13209 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (op
[0]))
13212 op
[0] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[0]),
13213 TARGET_64BIT
? UNSPEC_PLT
: UNSPEC_GOT
);
13214 op
[0] = gen_rtx_CONST (Pmode
, op
[0]);
13217 /* Operand 1 is the 'this' pointer. */
13218 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
13219 op
[1] = gen_rtx_REG (Pmode
, 3);
13221 op
[1] = gen_rtx_REG (Pmode
, 2);
13223 /* Operand 2 is the delta. */
13224 op
[2] = GEN_INT (delta
);
13226 /* Operand 3 is the vcall_offset. */
13227 op
[3] = GEN_INT (vcall_offset
);
13229 /* Operand 4 is the temporary register. */
13230 op
[4] = gen_rtx_REG (Pmode
, 1);
13232 /* Operands 5 to 8 can be used as labels. */
13238 /* Operand 9 can be used for temporary register. */
13241 /* Generate code. */
13244 /* Setup literal pool pointer if required. */
13245 if ((!DISP_IN_RANGE (delta
)
13246 && !CONST_OK_FOR_K (delta
)
13247 && !CONST_OK_FOR_Os (delta
))
13248 || (!DISP_IN_RANGE (vcall_offset
)
13249 && !CONST_OK_FOR_K (vcall_offset
)
13250 && !CONST_OK_FOR_Os (vcall_offset
)))
13252 op
[5] = gen_label_rtx ();
13253 output_asm_insn ("larl\t%4,%5", op
);
13256 /* Add DELTA to this pointer. */
13259 if (CONST_OK_FOR_J (delta
))
13260 output_asm_insn ("la\t%1,%2(%1)", op
);
13261 else if (DISP_IN_RANGE (delta
))
13262 output_asm_insn ("lay\t%1,%2(%1)", op
);
13263 else if (CONST_OK_FOR_K (delta
))
13264 output_asm_insn ("aghi\t%1,%2", op
);
13265 else if (CONST_OK_FOR_Os (delta
))
13266 output_asm_insn ("agfi\t%1,%2", op
);
13269 op
[6] = gen_label_rtx ();
13270 output_asm_insn ("agf\t%1,%6-%5(%4)", op
);
13274 /* Perform vcall adjustment. */
13277 if (DISP_IN_RANGE (vcall_offset
))
13279 output_asm_insn ("lg\t%4,0(%1)", op
);
13280 output_asm_insn ("ag\t%1,%3(%4)", op
);
13282 else if (CONST_OK_FOR_K (vcall_offset
))
13284 output_asm_insn ("lghi\t%4,%3", op
);
13285 output_asm_insn ("ag\t%4,0(%1)", op
);
13286 output_asm_insn ("ag\t%1,0(%4)", op
);
13288 else if (CONST_OK_FOR_Os (vcall_offset
))
13290 output_asm_insn ("lgfi\t%4,%3", op
);
13291 output_asm_insn ("ag\t%4,0(%1)", op
);
13292 output_asm_insn ("ag\t%1,0(%4)", op
);
13296 op
[7] = gen_label_rtx ();
13297 output_asm_insn ("llgf\t%4,%7-%5(%4)", op
);
13298 output_asm_insn ("ag\t%4,0(%1)", op
);
13299 output_asm_insn ("ag\t%1,0(%4)", op
);
13303 /* Jump to target. */
13304 output_asm_insn ("jg\t%0", op
);
13306 /* Output literal pool if required. */
13309 output_asm_insn (".align\t4", op
);
13310 targetm
.asm_out
.internal_label (file
, "L",
13311 CODE_LABEL_NUMBER (op
[5]));
13315 targetm
.asm_out
.internal_label (file
, "L",
13316 CODE_LABEL_NUMBER (op
[6]));
13317 output_asm_insn (".long\t%2", op
);
13321 targetm
.asm_out
.internal_label (file
, "L",
13322 CODE_LABEL_NUMBER (op
[7]));
13323 output_asm_insn (".long\t%3", op
);
13328 /* Setup base pointer if required. */
13330 || (!DISP_IN_RANGE (delta
)
13331 && !CONST_OK_FOR_K (delta
)
13332 && !CONST_OK_FOR_Os (delta
))
13333 || (!DISP_IN_RANGE (delta
)
13334 && !CONST_OK_FOR_K (vcall_offset
)
13335 && !CONST_OK_FOR_Os (vcall_offset
)))
13337 op
[5] = gen_label_rtx ();
13338 output_asm_insn ("basr\t%4,0", op
);
13339 targetm
.asm_out
.internal_label (file
, "L",
13340 CODE_LABEL_NUMBER (op
[5]));
13343 /* Add DELTA to this pointer. */
13346 if (CONST_OK_FOR_J (delta
))
13347 output_asm_insn ("la\t%1,%2(%1)", op
);
13348 else if (DISP_IN_RANGE (delta
))
13349 output_asm_insn ("lay\t%1,%2(%1)", op
);
13350 else if (CONST_OK_FOR_K (delta
))
13351 output_asm_insn ("ahi\t%1,%2", op
);
13352 else if (CONST_OK_FOR_Os (delta
))
13353 output_asm_insn ("afi\t%1,%2", op
);
13356 op
[6] = gen_label_rtx ();
13357 output_asm_insn ("a\t%1,%6-%5(%4)", op
);
13361 /* Perform vcall adjustment. */
13364 if (CONST_OK_FOR_J (vcall_offset
))
13366 output_asm_insn ("l\t%4,0(%1)", op
);
13367 output_asm_insn ("a\t%1,%3(%4)", op
);
13369 else if (DISP_IN_RANGE (vcall_offset
))
13371 output_asm_insn ("l\t%4,0(%1)", op
);
13372 output_asm_insn ("ay\t%1,%3(%4)", op
);
13374 else if (CONST_OK_FOR_K (vcall_offset
))
13376 output_asm_insn ("lhi\t%4,%3", op
);
13377 output_asm_insn ("a\t%4,0(%1)", op
);
13378 output_asm_insn ("a\t%1,0(%4)", op
);
13380 else if (CONST_OK_FOR_Os (vcall_offset
))
13382 output_asm_insn ("iilf\t%4,%3", op
);
13383 output_asm_insn ("a\t%4,0(%1)", op
);
13384 output_asm_insn ("a\t%1,0(%4)", op
);
13388 op
[7] = gen_label_rtx ();
13389 output_asm_insn ("l\t%4,%7-%5(%4)", op
);
13390 output_asm_insn ("a\t%4,0(%1)", op
);
13391 output_asm_insn ("a\t%1,0(%4)", op
);
13394 /* We had to clobber the base pointer register.
13395 Re-setup the base pointer (with a different base). */
13396 op
[5] = gen_label_rtx ();
13397 output_asm_insn ("basr\t%4,0", op
);
13398 targetm
.asm_out
.internal_label (file
, "L",
13399 CODE_LABEL_NUMBER (op
[5]));
13402 /* Jump to target. */
13403 op
[8] = gen_label_rtx ();
13406 output_asm_insn ("l\t%4,%8-%5(%4)", op
);
13407 else if (!nonlocal
)
13408 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13409 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13410 else if (flag_pic
== 1)
13412 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13413 output_asm_insn ("l\t%4,%0(%4)", op
);
13415 else if (flag_pic
== 2)
13417 op
[9] = gen_rtx_REG (Pmode
, 0);
13418 output_asm_insn ("l\t%9,%8-4-%5(%4)", op
);
13419 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13420 output_asm_insn ("ar\t%4,%9", op
);
13421 output_asm_insn ("l\t%4,0(%4)", op
);
13424 output_asm_insn ("br\t%4", op
);
13426 /* Output literal pool. */
13427 output_asm_insn (".align\t4", op
);
13429 if (nonlocal
&& flag_pic
== 2)
13430 output_asm_insn (".long\t%0", op
);
13433 op
[0] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13434 SYMBOL_REF_FLAGS (op
[0]) = SYMBOL_FLAG_LOCAL
;
13437 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[8]));
13439 output_asm_insn (".long\t%0", op
);
13441 output_asm_insn (".long\t%0-%5", op
);
13445 targetm
.asm_out
.internal_label (file
, "L",
13446 CODE_LABEL_NUMBER (op
[6]));
13447 output_asm_insn (".long\t%2", op
);
13451 targetm
.asm_out
.internal_label (file
, "L",
13452 CODE_LABEL_NUMBER (op
[7]));
13453 output_asm_insn (".long\t%3", op
);
13456 final_end_function ();
13460 s390_valid_pointer_mode (scalar_int_mode mode
)
13462 return (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
));
13465 /* Checks whether the given CALL_EXPR would use a caller
13466 saved register. This is used to decide whether sibling call
13467 optimization could be performed on the respective function
13471 s390_call_saved_register_used (tree call_expr
)
13473 CUMULATIVE_ARGS cum_v
;
13474 cumulative_args_t cum
;
13481 INIT_CUMULATIVE_ARGS (cum_v
, NULL
, NULL
, 0, 0);
13482 cum
= pack_cumulative_args (&cum_v
);
13484 for (i
= 0; i
< call_expr_nargs (call_expr
); i
++)
13486 parameter
= CALL_EXPR_ARG (call_expr
, i
);
13487 gcc_assert (parameter
);
13489 /* For an undeclared variable passed as parameter we will get
13490 an ERROR_MARK node here. */
13491 if (TREE_CODE (parameter
) == ERROR_MARK
)
13494 type
= TREE_TYPE (parameter
);
13497 mode
= TYPE_MODE (type
);
13500 /* We assume that in the target function all parameters are
13501 named. This only has an impact on vector argument register
13502 usage none of which is call-saved. */
13503 if (pass_by_reference (&cum_v
, mode
, type
, true))
13506 type
= build_pointer_type (type
);
13509 parm_rtx
= s390_function_arg (cum
, mode
, type
, true);
13511 s390_function_arg_advance (cum
, mode
, type
, true);
13516 if (REG_P (parm_rtx
))
13518 for (reg
= 0; reg
< REG_NREGS (parm_rtx
); reg
++)
13519 if (!call_used_regs
[reg
+ REGNO (parm_rtx
)])
13523 if (GET_CODE (parm_rtx
) == PARALLEL
)
13527 for (i
= 0; i
< XVECLEN (parm_rtx
, 0); i
++)
13529 rtx r
= XEXP (XVECEXP (parm_rtx
, 0, i
), 0);
13531 gcc_assert (REG_P (r
));
13533 for (reg
= 0; reg
< REG_NREGS (r
); reg
++)
13534 if (!call_used_regs
[reg
+ REGNO (r
)])
13543 /* Return true if the given call expression can be
13544 turned into a sibling call.
13545 DECL holds the declaration of the function to be called whereas
13546 EXP is the call expression itself. */
13549 s390_function_ok_for_sibcall (tree decl
, tree exp
)
13551 /* The TPF epilogue uses register 1. */
13552 if (TARGET_TPF_PROFILING
)
13555 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13556 which would have to be restored before the sibcall. */
13557 if (!TARGET_64BIT
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
13560 /* Register 6 on s390 is available as an argument register but unfortunately
13561 "caller saved". This makes functions needing this register for arguments
13562 not suitable for sibcalls. */
13563 return !s390_call_saved_register_used (exp
);
13566 /* Return the fixed registers used for condition codes. */
13569 s390_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
13572 *p2
= INVALID_REGNUM
;
13577 /* This function is used by the call expanders of the machine description.
13578 It emits the call insn itself together with the necessary operations
13579 to adjust the target address and returns the emitted insn.
13580 ADDR_LOCATION is the target address rtx
13581 TLS_CALL the location of the thread-local symbol
13582 RESULT_REG the register where the result of the call should be stored
13583 RETADDR_REG the register where the return address should be stored
13584 If this parameter is NULL_RTX the call is considered
13585 to be a sibling call. */
13588 s390_emit_call (rtx addr_location
, rtx tls_call
, rtx result_reg
,
13591 bool plt_call
= false;
13597 /* Direct function calls need special treatment. */
13598 if (GET_CODE (addr_location
) == SYMBOL_REF
)
13600 /* When calling a global routine in PIC mode, we must
13601 replace the symbol itself with the PLT stub. */
13602 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (addr_location
))
13604 if (TARGET_64BIT
|| retaddr_reg
!= NULL_RTX
)
13606 addr_location
= gen_rtx_UNSPEC (Pmode
,
13607 gen_rtvec (1, addr_location
),
13609 addr_location
= gen_rtx_CONST (Pmode
, addr_location
);
13613 /* For -fpic code the PLT entries might use r12 which is
13614 call-saved. Therefore we cannot do a sibcall when
13615 calling directly using a symbol ref. When reaching
13616 this point we decided (in s390_function_ok_for_sibcall)
13617 to do a sibcall for a function pointer but one of the
13618 optimizers was able to get rid of the function pointer
13619 by propagating the symbol ref into the call. This
13620 optimization is illegal for S/390 so we turn the direct
13621 call into a indirect call again. */
13622 addr_location
= force_reg (Pmode
, addr_location
);
13625 /* Unless we can use the bras(l) insn, force the
13626 routine address into a register. */
13627 if (!TARGET_SMALL_EXEC
&& !TARGET_CPU_ZARCH
)
13630 addr_location
= legitimize_pic_address (addr_location
, 0);
13632 addr_location
= force_reg (Pmode
, addr_location
);
13636 /* If it is already an indirect call or the code above moved the
13637 SYMBOL_REF to somewhere else make sure the address can be found in
13639 if (retaddr_reg
== NULL_RTX
13640 && GET_CODE (addr_location
) != SYMBOL_REF
13643 emit_move_insn (gen_rtx_REG (Pmode
, SIBCALL_REGNUM
), addr_location
);
13644 addr_location
= gen_rtx_REG (Pmode
, SIBCALL_REGNUM
);
13647 addr_location
= gen_rtx_MEM (QImode
, addr_location
);
13648 call
= gen_rtx_CALL (VOIDmode
, addr_location
, const0_rtx
);
13650 if (result_reg
!= NULL_RTX
)
13651 call
= gen_rtx_SET (result_reg
, call
);
13653 if (retaddr_reg
!= NULL_RTX
)
13655 clobber
= gen_rtx_CLOBBER (VOIDmode
, retaddr_reg
);
13657 if (tls_call
!= NULL_RTX
)
13658 vec
= gen_rtvec (3, call
, clobber
,
13659 gen_rtx_USE (VOIDmode
, tls_call
));
13661 vec
= gen_rtvec (2, call
, clobber
);
13663 call
= gen_rtx_PARALLEL (VOIDmode
, vec
);
13666 insn
= emit_call_insn (call
);
13668 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13669 if ((!TARGET_64BIT
&& plt_call
) || tls_call
!= NULL_RTX
)
13671 /* s390_function_ok_for_sibcall should
13672 have denied sibcalls in this case. */
13673 gcc_assert (retaddr_reg
!= NULL_RTX
);
13674 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, 12));
13679 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13682 s390_conditional_register_usage (void)
13688 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
13689 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
13691 if (TARGET_CPU_ZARCH
)
13693 fixed_regs
[BASE_REGNUM
] = 0;
13694 call_used_regs
[BASE_REGNUM
] = 0;
13695 fixed_regs
[RETURN_REGNUM
] = 0;
13696 call_used_regs
[RETURN_REGNUM
] = 0;
13700 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13701 call_used_regs
[i
] = call_really_used_regs
[i
] = 0;
13705 call_used_regs
[FPR4_REGNUM
] = call_really_used_regs
[FPR4_REGNUM
] = 0;
13706 call_used_regs
[FPR6_REGNUM
] = call_really_used_regs
[FPR6_REGNUM
] = 0;
13709 if (TARGET_SOFT_FLOAT
)
13711 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13712 call_used_regs
[i
] = fixed_regs
[i
] = 1;
13715 /* Disable v16 - v31 for non-vector target. */
13718 for (i
= VR16_REGNUM
; i
<= VR31_REGNUM
; i
++)
13719 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
13723 /* Corresponding function to eh_return expander. */
13725 static GTY(()) rtx s390_tpf_eh_return_symbol
;
13727 s390_emit_tpf_eh_return (rtx target
)
13732 if (!s390_tpf_eh_return_symbol
)
13733 s390_tpf_eh_return_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tpf_eh_return");
13735 reg
= gen_rtx_REG (Pmode
, 2);
13736 orig_ra
= gen_rtx_REG (Pmode
, 3);
13738 emit_move_insn (reg
, target
);
13739 emit_move_insn (orig_ra
, get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
));
13740 insn
= s390_emit_call (s390_tpf_eh_return_symbol
, NULL_RTX
, reg
,
13741 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
13742 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), reg
);
13743 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), orig_ra
);
13745 emit_move_insn (EH_RETURN_HANDLER_RTX
, reg
);
13748 /* Rework the prologue/epilogue to avoid saving/restoring
13749 registers unnecessarily. */
13752 s390_optimize_prologue (void)
13754 rtx_insn
*insn
, *new_insn
, *next_insn
;
13756 /* Do a final recompute of the frame-related data. */
13757 s390_optimize_register_info ();
13759 /* If all special registers are in fact used, there's nothing we
13760 can do, so no point in walking the insn list. */
13762 if (cfun_frame_layout
.first_save_gpr
<= BASE_REGNUM
13763 && cfun_frame_layout
.last_save_gpr
>= BASE_REGNUM
13764 && (TARGET_CPU_ZARCH
13765 || (cfun_frame_layout
.first_save_gpr
<= RETURN_REGNUM
13766 && cfun_frame_layout
.last_save_gpr
>= RETURN_REGNUM
)))
13769 /* Search for prologue/epilogue insns and replace them. */
13771 for (insn
= get_insns (); insn
; insn
= next_insn
)
13773 int first
, last
, off
;
13774 rtx set
, base
, offset
;
13777 next_insn
= NEXT_INSN (insn
);
13779 if (! NONJUMP_INSN_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
13782 pat
= PATTERN (insn
);
13784 /* Remove ldgr/lgdr instructions used for saving and restore
13785 GPRs if possible. */
13790 if (INSN_CODE (insn
) == CODE_FOR_stack_restore_from_fpr
)
13791 tmp_pat
= XVECEXP (pat
, 0, 0);
13793 if (GET_CODE (tmp_pat
) == SET
13794 && GET_MODE (SET_SRC (tmp_pat
)) == DImode
13795 && REG_P (SET_SRC (tmp_pat
))
13796 && REG_P (SET_DEST (tmp_pat
)))
13798 int src_regno
= REGNO (SET_SRC (tmp_pat
));
13799 int dest_regno
= REGNO (SET_DEST (tmp_pat
));
13803 if (!((GENERAL_REGNO_P (src_regno
)
13804 && FP_REGNO_P (dest_regno
))
13805 || (FP_REGNO_P (src_regno
)
13806 && GENERAL_REGNO_P (dest_regno
))))
13809 gpr_regno
= GENERAL_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
13810 fpr_regno
= FP_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
13812 /* GPR must be call-saved, FPR must be call-clobbered. */
13813 if (!call_really_used_regs
[fpr_regno
]
13814 || call_really_used_regs
[gpr_regno
])
13817 /* It must not happen that what we once saved in an FPR now
13818 needs a stack slot. */
13819 gcc_assert (cfun_gpr_save_slot (gpr_regno
) != SAVE_SLOT_STACK
);
13821 if (cfun_gpr_save_slot (gpr_regno
) == SAVE_SLOT_NONE
)
13823 remove_insn (insn
);
13829 if (GET_CODE (pat
) == PARALLEL
13830 && store_multiple_operation (pat
, VOIDmode
))
13832 set
= XVECEXP (pat
, 0, 0);
13833 first
= REGNO (SET_SRC (set
));
13834 last
= first
+ XVECLEN (pat
, 0) - 1;
13835 offset
= const0_rtx
;
13836 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
13837 off
= INTVAL (offset
);
13839 if (GET_CODE (base
) != REG
|| off
< 0)
13841 if (cfun_frame_layout
.first_save_gpr
!= -1
13842 && (cfun_frame_layout
.first_save_gpr
< first
13843 || cfun_frame_layout
.last_save_gpr
> last
))
13845 if (REGNO (base
) != STACK_POINTER_REGNUM
13846 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13848 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
13851 if (cfun_frame_layout
.first_save_gpr
!= -1)
13853 rtx s_pat
= save_gprs (base
,
13854 off
+ (cfun_frame_layout
.first_save_gpr
13855 - first
) * UNITS_PER_LONG
,
13856 cfun_frame_layout
.first_save_gpr
,
13857 cfun_frame_layout
.last_save_gpr
);
13858 new_insn
= emit_insn_before (s_pat
, insn
);
13859 INSN_ADDRESSES_NEW (new_insn
, -1);
13862 remove_insn (insn
);
13866 if (cfun_frame_layout
.first_save_gpr
== -1
13867 && GET_CODE (pat
) == SET
13868 && GENERAL_REG_P (SET_SRC (pat
))
13869 && GET_CODE (SET_DEST (pat
)) == MEM
)
13872 first
= REGNO (SET_SRC (set
));
13873 offset
= const0_rtx
;
13874 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
13875 off
= INTVAL (offset
);
13877 if (GET_CODE (base
) != REG
|| off
< 0)
13879 if (REGNO (base
) != STACK_POINTER_REGNUM
13880 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13883 remove_insn (insn
);
13887 if (GET_CODE (pat
) == PARALLEL
13888 && load_multiple_operation (pat
, VOIDmode
))
13890 set
= XVECEXP (pat
, 0, 0);
13891 first
= REGNO (SET_DEST (set
));
13892 last
= first
+ XVECLEN (pat
, 0) - 1;
13893 offset
= const0_rtx
;
13894 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
13895 off
= INTVAL (offset
);
13897 if (GET_CODE (base
) != REG
|| off
< 0)
13900 if (cfun_frame_layout
.first_restore_gpr
!= -1
13901 && (cfun_frame_layout
.first_restore_gpr
< first
13902 || cfun_frame_layout
.last_restore_gpr
> last
))
13904 if (REGNO (base
) != STACK_POINTER_REGNUM
13905 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13907 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
13910 if (cfun_frame_layout
.first_restore_gpr
!= -1)
13912 rtx rpat
= restore_gprs (base
,
13913 off
+ (cfun_frame_layout
.first_restore_gpr
13914 - first
) * UNITS_PER_LONG
,
13915 cfun_frame_layout
.first_restore_gpr
,
13916 cfun_frame_layout
.last_restore_gpr
);
13918 /* Remove REG_CFA_RESTOREs for registers that we no
13919 longer need to save. */
13920 REG_NOTES (rpat
) = REG_NOTES (insn
);
13921 for (rtx
*ptr
= ®_NOTES (rpat
); *ptr
; )
13922 if (REG_NOTE_KIND (*ptr
) == REG_CFA_RESTORE
13923 && ((int) REGNO (XEXP (*ptr
, 0))
13924 < cfun_frame_layout
.first_restore_gpr
))
13925 *ptr
= XEXP (*ptr
, 1);
13927 ptr
= &XEXP (*ptr
, 1);
13928 new_insn
= emit_insn_before (rpat
, insn
);
13929 RTX_FRAME_RELATED_P (new_insn
) = 1;
13930 INSN_ADDRESSES_NEW (new_insn
, -1);
13933 remove_insn (insn
);
13937 if (cfun_frame_layout
.first_restore_gpr
== -1
13938 && GET_CODE (pat
) == SET
13939 && GENERAL_REG_P (SET_DEST (pat
))
13940 && GET_CODE (SET_SRC (pat
)) == MEM
)
13943 first
= REGNO (SET_DEST (set
));
13944 offset
= const0_rtx
;
13945 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
13946 off
= INTVAL (offset
);
13948 if (GET_CODE (base
) != REG
|| off
< 0)
13951 if (REGNO (base
) != STACK_POINTER_REGNUM
13952 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
13955 remove_insn (insn
);
13961 /* On z10 and later the dynamic branch prediction must see the
13962 backward jump within a certain windows. If not it falls back to
13963 the static prediction. This function rearranges the loop backward
13964 branch in a way which makes the static prediction always correct.
13965 The function returns true if it added an instruction. */
13967 s390_fix_long_loop_prediction (rtx_insn
*insn
)
13969 rtx set
= single_set (insn
);
13970 rtx code_label
, label_ref
;
13971 rtx_insn
*uncond_jump
;
13972 rtx_insn
*cur_insn
;
13976 /* This will exclude branch on count and branch on index patterns
13977 since these are correctly statically predicted. */
13979 || SET_DEST (set
) != pc_rtx
13980 || GET_CODE (SET_SRC(set
)) != IF_THEN_ELSE
)
13983 /* Skip conditional returns. */
13984 if (ANY_RETURN_P (XEXP (SET_SRC (set
), 1))
13985 && XEXP (SET_SRC (set
), 2) == pc_rtx
)
13988 label_ref
= (GET_CODE (XEXP (SET_SRC (set
), 1)) == LABEL_REF
?
13989 XEXP (SET_SRC (set
), 1) : XEXP (SET_SRC (set
), 2));
13991 gcc_assert (GET_CODE (label_ref
) == LABEL_REF
);
13993 code_label
= XEXP (label_ref
, 0);
13995 if (INSN_ADDRESSES (INSN_UID (code_label
)) == -1
13996 || INSN_ADDRESSES (INSN_UID (insn
)) == -1
13997 || (INSN_ADDRESSES (INSN_UID (insn
))
13998 - INSN_ADDRESSES (INSN_UID (code_label
)) < PREDICT_DISTANCE
))
14001 for (distance
= 0, cur_insn
= PREV_INSN (insn
);
14002 distance
< PREDICT_DISTANCE
- 6;
14003 distance
+= get_attr_length (cur_insn
), cur_insn
= PREV_INSN (cur_insn
))
14004 if (!cur_insn
|| JUMP_P (cur_insn
) || LABEL_P (cur_insn
))
14007 rtx_code_label
*new_label
= gen_label_rtx ();
14008 uncond_jump
= emit_jump_insn_after (
14009 gen_rtx_SET (pc_rtx
,
14010 gen_rtx_LABEL_REF (VOIDmode
, code_label
)),
14012 emit_label_after (new_label
, uncond_jump
);
14014 tmp
= XEXP (SET_SRC (set
), 1);
14015 XEXP (SET_SRC (set
), 1) = XEXP (SET_SRC (set
), 2);
14016 XEXP (SET_SRC (set
), 2) = tmp
;
14017 INSN_CODE (insn
) = -1;
14019 XEXP (label_ref
, 0) = new_label
;
14020 JUMP_LABEL (insn
) = new_label
;
14021 JUMP_LABEL (uncond_jump
) = code_label
;
14026 /* Returns 1 if INSN reads the value of REG for purposes not related
14027 to addressing of memory, and 0 otherwise. */
14029 s390_non_addr_reg_read_p (rtx reg
, rtx_insn
*insn
)
14031 return reg_referenced_p (reg
, PATTERN (insn
))
14032 && !reg_used_in_mem_p (REGNO (reg
), PATTERN (insn
));
14035 /* Starting from INSN find_cond_jump looks downwards in the insn
14036 stream for a single jump insn which is the last user of the
14037 condition code set in INSN. */
14039 find_cond_jump (rtx_insn
*insn
)
14041 for (; insn
; insn
= NEXT_INSN (insn
))
14045 if (LABEL_P (insn
))
14048 if (!JUMP_P (insn
))
14050 if (reg_mentioned_p (gen_rtx_REG (CCmode
, CC_REGNUM
), insn
))
14055 /* This will be triggered by a return. */
14056 if (GET_CODE (PATTERN (insn
)) != SET
)
14059 gcc_assert (SET_DEST (PATTERN (insn
)) == pc_rtx
);
14060 ite
= SET_SRC (PATTERN (insn
));
14062 if (GET_CODE (ite
) != IF_THEN_ELSE
)
14065 cc
= XEXP (XEXP (ite
, 0), 0);
14066 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
)))
14069 if (find_reg_note (insn
, REG_DEAD
, cc
))
14077 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14078 the semantics does not change. If NULL_RTX is passed as COND the
14079 function tries to find the conditional jump starting with INSN. */
14081 s390_swap_cmp (rtx cond
, rtx
*op0
, rtx
*op1
, rtx_insn
*insn
)
14085 if (cond
== NULL_RTX
)
14087 rtx_insn
*jump
= find_cond_jump (NEXT_INSN (insn
));
14088 rtx set
= jump
? single_set (jump
) : NULL_RTX
;
14090 if (set
== NULL_RTX
)
14093 cond
= XEXP (SET_SRC (set
), 0);
14098 PUT_CODE (cond
, swap_condition (GET_CODE (cond
)));
14101 /* On z10, instructions of the compare-and-branch family have the
14102 property to access the register occurring as second operand with
14103 its bits complemented. If such a compare is grouped with a second
14104 instruction that accesses the same register non-complemented, and
14105 if that register's value is delivered via a bypass, then the
14106 pipeline recycles, thereby causing significant performance decline.
14107 This function locates such situations and exchanges the two
14108 operands of the compare. The function return true whenever it
14111 s390_z10_optimize_cmp (rtx_insn
*insn
)
14113 rtx_insn
*prev_insn
, *next_insn
;
14114 bool insn_added_p
= false;
14115 rtx cond
, *op0
, *op1
;
14117 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
14119 /* Handle compare and branch and branch on count
14121 rtx pattern
= single_set (insn
);
14124 || SET_DEST (pattern
) != pc_rtx
14125 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
)
14128 cond
= XEXP (SET_SRC (pattern
), 0);
14129 op0
= &XEXP (cond
, 0);
14130 op1
= &XEXP (cond
, 1);
14132 else if (GET_CODE (PATTERN (insn
)) == SET
)
14136 /* Handle normal compare instructions. */
14137 src
= SET_SRC (PATTERN (insn
));
14138 dest
= SET_DEST (PATTERN (insn
));
14141 || !CC_REGNO_P (REGNO (dest
))
14142 || GET_CODE (src
) != COMPARE
)
14145 /* s390_swap_cmp will try to find the conditional
14146 jump when passing NULL_RTX as condition. */
14148 op0
= &XEXP (src
, 0);
14149 op1
= &XEXP (src
, 1);
14154 if (!REG_P (*op0
) || !REG_P (*op1
))
14157 if (GET_MODE_CLASS (GET_MODE (*op0
)) != MODE_INT
)
14160 /* Swap the COMPARE arguments and its mask if there is a
14161 conflicting access in the previous insn. */
14162 prev_insn
= prev_active_insn (insn
);
14163 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14164 && reg_referenced_p (*op1
, PATTERN (prev_insn
)))
14165 s390_swap_cmp (cond
, op0
, op1
, insn
);
14167 /* Check if there is a conflict with the next insn. If there
14168 was no conflict with the previous insn, then swap the
14169 COMPARE arguments and its mask. If we already swapped
14170 the operands, or if swapping them would cause a conflict
14171 with the previous insn, issue a NOP after the COMPARE in
14172 order to separate the two instuctions. */
14173 next_insn
= next_active_insn (insn
);
14174 if (next_insn
!= NULL_RTX
&& INSN_P (next_insn
)
14175 && s390_non_addr_reg_read_p (*op1
, next_insn
))
14177 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14178 && s390_non_addr_reg_read_p (*op0
, prev_insn
))
14180 if (REGNO (*op1
) == 0)
14181 emit_insn_after (gen_nop1 (), insn
);
14183 emit_insn_after (gen_nop (), insn
);
14184 insn_added_p
= true;
14187 s390_swap_cmp (cond
, op0
, op1
, insn
);
14189 return insn_added_p
;
14192 /* Number of INSNs to be scanned backward in the last BB of the loop
14193 and forward in the first BB of the loop. This usually should be a
14194 bit more than the number of INSNs which could go into one
14196 #define S390_OSC_SCAN_INSN_NUM 5
14198 /* Scan LOOP for static OSC collisions and return true if a osc_break
14199 should be issued for this loop. */
14201 s390_adjust_loop_scan_osc (struct loop
* loop
)
14204 HARD_REG_SET modregs
, newregs
;
14205 rtx_insn
*insn
, *store_insn
= NULL
;
14207 struct s390_address addr_store
, addr_load
;
14208 subrtx_iterator::array_type array
;
14211 CLEAR_HARD_REG_SET (modregs
);
14214 FOR_BB_INSNS_REVERSE (loop
->latch
, insn
)
14216 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14220 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14223 find_all_hard_reg_sets (insn
, &newregs
, true);
14224 IOR_HARD_REG_SET (modregs
, newregs
);
14226 set
= single_set (insn
);
14230 if (MEM_P (SET_DEST (set
))
14231 && s390_decompose_address (XEXP (SET_DEST (set
), 0), &addr_store
))
14238 if (store_insn
== NULL_RTX
)
14242 FOR_BB_INSNS (loop
->header
, insn
)
14244 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14247 if (insn
== store_insn
)
14251 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14254 find_all_hard_reg_sets (insn
, &newregs
, true);
14255 IOR_HARD_REG_SET (modregs
, newregs
);
14257 set
= single_set (insn
);
14261 /* An intermediate store disrupts static OSC checking
14263 if (MEM_P (SET_DEST (set
))
14264 && s390_decompose_address (XEXP (SET_DEST (set
), 0), NULL
))
14267 FOR_EACH_SUBRTX (iter
, array
, SET_SRC (set
), NONCONST
)
14269 && s390_decompose_address (XEXP (*iter
, 0), &addr_load
)
14270 && rtx_equal_p (addr_load
.base
, addr_store
.base
)
14271 && rtx_equal_p (addr_load
.indx
, addr_store
.indx
)
14272 && rtx_equal_p (addr_load
.disp
, addr_store
.disp
))
14274 if ((addr_load
.base
!= NULL_RTX
14275 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.base
)))
14276 || (addr_load
.indx
!= NULL_RTX
14277 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.indx
))))
14284 /* Look for adjustments which can be done on simple innermost
14287 s390_adjust_loops ()
14289 struct loop
*loop
= NULL
;
14292 compute_bb_for_insn ();
14294 /* Find the loops. */
14295 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
14297 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
14301 flow_loop_dump (loop
, dump_file
, NULL
, 0);
14302 fprintf (dump_file
, ";; OSC loop scan Loop: ");
14304 if (loop
->latch
== NULL
14305 || pc_set (BB_END (loop
->latch
)) == NULL_RTX
14306 || !s390_adjust_loop_scan_osc (loop
))
14310 if (loop
->latch
== NULL
)
14311 fprintf (dump_file
, " muliple backward jumps\n");
14314 fprintf (dump_file
, " header insn: %d latch insn: %d ",
14315 INSN_UID (BB_HEAD (loop
->header
)),
14316 INSN_UID (BB_END (loop
->latch
)));
14317 if (pc_set (BB_END (loop
->latch
)) == NULL_RTX
)
14318 fprintf (dump_file
, " loop does not end with jump\n");
14320 fprintf (dump_file
, " not instrumented\n");
14326 rtx_insn
*new_insn
;
14329 fprintf (dump_file
, " adding OSC break insn: ");
14330 new_insn
= emit_insn_before (gen_osc_break (),
14331 BB_END (loop
->latch
));
14332 INSN_ADDRESSES_NEW (new_insn
, -1);
14336 loop_optimizer_finalize ();
14338 df_finish_pass (false);
14341 /* Perform machine-dependent processing. */
14346 bool pool_overflow
= false;
14347 int hw_before
, hw_after
;
14349 if (s390_tune
== PROCESSOR_2964_Z13
)
14350 s390_adjust_loops ();
14352 /* Make sure all splits have been performed; splits after
14353 machine_dependent_reorg might confuse insn length counts. */
14354 split_all_insns_noflow ();
14356 /* Install the main literal pool and the associated base
14357 register load insns.
14359 In addition, there are two problematic situations we need
14362 - the literal pool might be > 4096 bytes in size, so that
14363 some of its elements cannot be directly accessed
14365 - a branch target might be > 64K away from the branch, so that
14366 it is not possible to use a PC-relative instruction.
14368 To fix those, we split the single literal pool into multiple
14369 pool chunks, reloading the pool base register at various
14370 points throughout the function to ensure it always points to
14371 the pool chunk the following code expects, and / or replace
14372 PC-relative branches by absolute branches.
14374 However, the two problems are interdependent: splitting the
14375 literal pool can move a branch further away from its target,
14376 causing the 64K limit to overflow, and on the other hand,
14377 replacing a PC-relative branch by an absolute branch means
14378 we need to put the branch target address into the literal
14379 pool, possibly causing it to overflow.
14381 So, we loop trying to fix up both problems until we manage
14382 to satisfy both conditions at the same time. Note that the
14383 loop is guaranteed to terminate as every pass of the loop
14384 strictly decreases the total number of PC-relative branches
14385 in the function. (This is not completely true as there
14386 might be branch-over-pool insns introduced by chunkify_start.
14387 Those never need to be split however.) */
14391 struct constant_pool
*pool
= NULL
;
14393 /* Collect the literal pool. */
14394 if (!pool_overflow
)
14396 pool
= s390_mainpool_start ();
14398 pool_overflow
= true;
14401 /* If literal pool overflowed, start to chunkify it. */
14403 pool
= s390_chunkify_start ();
14405 /* Split out-of-range branches. If this has created new
14406 literal pool entries, cancel current chunk list and
14407 recompute it. zSeries machines have large branch
14408 instructions, so we never need to split a branch. */
14409 if (!TARGET_CPU_ZARCH
&& s390_split_branches ())
14412 s390_chunkify_cancel (pool
);
14414 s390_mainpool_cancel (pool
);
14419 /* If we made it up to here, both conditions are satisfied.
14420 Finish up literal pool related changes. */
14422 s390_chunkify_finish (pool
);
14424 s390_mainpool_finish (pool
);
14426 /* We're done splitting branches. */
14427 cfun
->machine
->split_branches_pending_p
= false;
14431 /* Generate out-of-pool execute target insns. */
14432 if (TARGET_CPU_ZARCH
)
14434 rtx_insn
*insn
, *target
;
14437 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14439 label
= s390_execute_label (insn
);
14443 gcc_assert (label
!= const0_rtx
);
14445 target
= emit_label (XEXP (label
, 0));
14446 INSN_ADDRESSES_NEW (target
, -1);
14448 target
= emit_insn (s390_execute_target (insn
));
14449 INSN_ADDRESSES_NEW (target
, -1);
14453 /* Try to optimize prologue and epilogue further. */
14454 s390_optimize_prologue ();
14456 /* Walk over the insns and do some >=z10 specific changes. */
14457 if (s390_tune
>= PROCESSOR_2097_Z10
)
14460 bool insn_added_p
= false;
14462 /* The insn lengths and addresses have to be up to date for the
14463 following manipulations. */
14464 shorten_branches (get_insns ());
14466 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14468 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14472 insn_added_p
|= s390_fix_long_loop_prediction (insn
);
14474 if ((GET_CODE (PATTERN (insn
)) == PARALLEL
14475 || GET_CODE (PATTERN (insn
)) == SET
)
14476 && s390_tune
== PROCESSOR_2097_Z10
)
14477 insn_added_p
|= s390_z10_optimize_cmp (insn
);
14480 /* Adjust branches if we added new instructions. */
14482 shorten_branches (get_insns ());
14485 s390_function_num_hotpatch_hw (current_function_decl
, &hw_before
, &hw_after
);
14490 /* Insert NOPs for hotpatching. */
14491 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14493 1. inside the area covered by debug information to allow setting
14494 breakpoints at the NOPs,
14495 2. before any insn which results in an asm instruction,
14496 3. before in-function labels to avoid jumping to the NOPs, for
14497 example as part of a loop,
14498 4. before any barrier in case the function is completely empty
14499 (__builtin_unreachable ()) and has neither internal labels nor
14502 if (active_insn_p (insn
) || BARRIER_P (insn
) || LABEL_P (insn
))
14504 /* Output a series of NOPs before the first active insn. */
14505 while (insn
&& hw_after
> 0)
14507 if (hw_after
>= 3 && TARGET_CPU_ZARCH
)
14509 emit_insn_before (gen_nop_6_byte (), insn
);
14512 else if (hw_after
>= 2)
14514 emit_insn_before (gen_nop_4_byte (), insn
);
14519 emit_insn_before (gen_nop_2_byte (), insn
);
14526 /* Return true if INSN is a fp load insn writing register REGNO. */
14528 s390_fpload_toreg (rtx_insn
*insn
, unsigned int regno
)
14531 enum attr_type flag
= s390_safe_attr_type (insn
);
14533 if (flag
!= TYPE_FLOADSF
&& flag
!= TYPE_FLOADDF
)
14536 set
= single_set (insn
);
14538 if (set
== NULL_RTX
)
14541 if (!REG_P (SET_DEST (set
)) || !MEM_P (SET_SRC (set
)))
14544 if (REGNO (SET_DEST (set
)) != regno
)
14550 /* This value describes the distance to be avoided between an
14551 arithmetic fp instruction and an fp load writing the same register.
14552 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14553 fine but the exact value has to be avoided. Otherwise the FP
14554 pipeline will throw an exception causing a major penalty. */
14555 #define Z10_EARLYLOAD_DISTANCE 7
14557 /* Rearrange the ready list in order to avoid the situation described
14558 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14559 moved to the very end of the ready list. */
14561 s390_z10_prevent_earlyload_conflicts (rtx_insn
**ready
, int *nready_p
)
14563 unsigned int regno
;
14564 int nready
= *nready_p
;
14569 enum attr_type flag
;
14572 /* Skip DISTANCE - 1 active insns. */
14573 for (insn
= last_scheduled_insn
, distance
= Z10_EARLYLOAD_DISTANCE
- 1;
14574 distance
> 0 && insn
!= NULL_RTX
;
14575 distance
--, insn
= prev_active_insn (insn
))
14576 if (CALL_P (insn
) || JUMP_P (insn
))
14579 if (insn
== NULL_RTX
)
14582 set
= single_set (insn
);
14584 if (set
== NULL_RTX
|| !REG_P (SET_DEST (set
))
14585 || GET_MODE_CLASS (GET_MODE (SET_DEST (set
))) != MODE_FLOAT
)
14588 flag
= s390_safe_attr_type (insn
);
14590 if (flag
== TYPE_FLOADSF
|| flag
== TYPE_FLOADDF
)
14593 regno
= REGNO (SET_DEST (set
));
14596 while (!s390_fpload_toreg (ready
[i
], regno
) && i
> 0)
14603 memmove (&ready
[1], &ready
[0], sizeof (rtx_insn
*) * i
);
14608 /* The s390_sched_state variable tracks the state of the current or
14609 the last instruction group.
14611 0,1,2 number of instructions scheduled in the current group
14612 3 the last group is complete - normal insns
14613 4 the last group was a cracked/expanded insn */
14615 static int s390_sched_state
;
14617 #define S390_SCHED_STATE_NORMAL 3
14618 #define S390_SCHED_STATE_CRACKED 4
14620 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14621 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14622 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14623 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14625 static unsigned int
14626 s390_get_sched_attrmask (rtx_insn
*insn
)
14628 unsigned int mask
= 0;
14632 case PROCESSOR_2827_ZEC12
:
14633 if (get_attr_zEC12_cracked (insn
))
14634 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14635 if (get_attr_zEC12_expanded (insn
))
14636 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14637 if (get_attr_zEC12_endgroup (insn
))
14638 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14639 if (get_attr_zEC12_groupalone (insn
))
14640 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14642 case PROCESSOR_2964_Z13
:
14643 case PROCESSOR_3906_Z14
:
14644 if (get_attr_z13_cracked (insn
))
14645 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14646 if (get_attr_z13_expanded (insn
))
14647 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14648 if (get_attr_z13_endgroup (insn
))
14649 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14650 if (get_attr_z13_groupalone (insn
))
14651 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14654 gcc_unreachable ();
14659 static unsigned int
14660 s390_get_unit_mask (rtx_insn
*insn
, int *units
)
14662 unsigned int mask
= 0;
14666 case PROCESSOR_2964_Z13
:
14667 case PROCESSOR_3906_Z14
:
14669 if (get_attr_z13_unit_lsu (insn
))
14671 if (get_attr_z13_unit_fxu (insn
))
14673 if (get_attr_z13_unit_vfu (insn
))
14677 gcc_unreachable ();
14682 /* Return the scheduling score for INSN. The higher the score the
14683 better. The score is calculated from the OOO scheduling attributes
14684 of INSN and the scheduling state s390_sched_state. */
14686 s390_sched_score (rtx_insn
*insn
)
14688 unsigned int mask
= s390_get_sched_attrmask (insn
);
14691 switch (s390_sched_state
)
14694 /* Try to put insns into the first slot which would otherwise
14696 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14697 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
14699 if ((mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
14703 /* Prefer not cracked insns while trying to put together a
14705 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14706 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14707 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14709 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) == 0)
14713 /* Prefer not cracked insns while trying to put together a
14715 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14716 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
14717 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
14719 /* Prefer endgroup insns in the last slot. */
14720 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
14723 case S390_SCHED_STATE_NORMAL
:
14724 /* Prefer not cracked insns if the last was not cracked. */
14725 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
14726 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0)
14728 if ((mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
14731 case S390_SCHED_STATE_CRACKED
:
14732 /* Try to keep cracked insns together to prevent them from
14733 interrupting groups. */
14734 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14735 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
14740 if (s390_tune
>= PROCESSOR_2964_Z13
)
14743 unsigned unit_mask
, m
= 1;
14745 unit_mask
= s390_get_unit_mask (insn
, &units
);
14746 gcc_assert (units
<= MAX_SCHED_UNITS
);
14748 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14749 ago the last insn of this unit type got scheduled. This is
14750 supposed to help providing a proper instruction mix to the
14752 for (i
= 0; i
< units
; i
++, m
<<= 1)
14754 score
+= (last_scheduled_unit_distance
[i
] * MAX_SCHED_MIX_SCORE
/
14755 MAX_SCHED_MIX_DISTANCE
);
14760 /* This function is called via hook TARGET_SCHED_REORDER before
14761 issuing one insn from list READY which contains *NREADYP entries.
14762 For target z10 it reorders load instructions to avoid early load
14763 conflicts in the floating point pipeline */
14765 s390_sched_reorder (FILE *file
, int verbose
,
14766 rtx_insn
**ready
, int *nreadyp
, int clock ATTRIBUTE_UNUSED
)
14768 if (s390_tune
== PROCESSOR_2097_Z10
14769 && reload_completed
14771 s390_z10_prevent_earlyload_conflicts (ready
, nreadyp
);
14773 if (s390_tune
>= PROCESSOR_2827_ZEC12
14774 && reload_completed
14778 int last_index
= *nreadyp
- 1;
14779 int max_index
= -1;
14780 int max_score
= -1;
14783 /* Just move the insn with the highest score to the top (the
14784 end) of the list. A full sort is not needed since a conflict
14785 in the hazard recognition cannot happen. So the top insn in
14786 the ready list will always be taken. */
14787 for (i
= last_index
; i
>= 0; i
--)
14791 if (recog_memoized (ready
[i
]) < 0)
14794 score
= s390_sched_score (ready
[i
]);
14795 if (score
> max_score
)
14802 if (max_index
!= -1)
14804 if (max_index
!= last_index
)
14806 tmp
= ready
[max_index
];
14807 ready
[max_index
] = ready
[last_index
];
14808 ready
[last_index
] = tmp
;
14812 ";;\t\tBACKEND: move insn %d to the top of list\n",
14813 INSN_UID (ready
[last_index
]));
14815 else if (verbose
> 5)
14817 ";;\t\tBACKEND: best insn %d already on top\n",
14818 INSN_UID (ready
[last_index
]));
14823 fprintf (file
, "ready list ooo attributes - sched state: %d\n",
14826 for (i
= last_index
; i
>= 0; i
--)
14828 unsigned int sched_mask
;
14829 rtx_insn
*insn
= ready
[i
];
14831 if (recog_memoized (insn
) < 0)
14834 sched_mask
= s390_get_sched_attrmask (insn
);
14835 fprintf (file
, ";;\t\tBACKEND: insn %d score: %d: ",
14837 s390_sched_score (insn
));
14838 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14839 ((M) & sched_mask) ? #ATTR : "");
14840 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
14841 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
14842 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
14843 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
14844 #undef PRINT_SCHED_ATTR
14845 if (s390_tune
>= PROCESSOR_2964_Z13
)
14847 unsigned int unit_mask
, m
= 1;
14850 unit_mask
= s390_get_unit_mask (insn
, &units
);
14851 fprintf (file
, "(units:");
14852 for (j
= 0; j
< units
; j
++, m
<<= 1)
14854 fprintf (file
, " u%d", j
);
14855 fprintf (file
, ")");
14857 fprintf (file
, "\n");
14862 return s390_issue_rate ();
14866 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14867 the scheduler has issued INSN. It stores the last issued insn into
14868 last_scheduled_insn in order to make it available for
14869 s390_sched_reorder. */
14871 s390_sched_variable_issue (FILE *file
, int verbose
, rtx_insn
*insn
, int more
)
14873 last_scheduled_insn
= insn
;
14875 if (s390_tune
>= PROCESSOR_2827_ZEC12
14876 && reload_completed
14877 && recog_memoized (insn
) >= 0)
14879 unsigned int mask
= s390_get_sched_attrmask (insn
);
14881 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14882 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
14883 s390_sched_state
= S390_SCHED_STATE_CRACKED
;
14884 else if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0
14885 || (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
14886 s390_sched_state
= S390_SCHED_STATE_NORMAL
;
14889 /* Only normal insns are left (mask == 0). */
14890 switch (s390_sched_state
)
14895 case S390_SCHED_STATE_NORMAL
:
14896 if (s390_sched_state
== S390_SCHED_STATE_NORMAL
)
14897 s390_sched_state
= 1;
14899 s390_sched_state
++;
14902 case S390_SCHED_STATE_CRACKED
:
14903 s390_sched_state
= S390_SCHED_STATE_NORMAL
;
14908 if (s390_tune
>= PROCESSOR_2964_Z13
)
14911 unsigned unit_mask
, m
= 1;
14913 unit_mask
= s390_get_unit_mask (insn
, &units
);
14914 gcc_assert (units
<= MAX_SCHED_UNITS
);
14916 for (i
= 0; i
< units
; i
++, m
<<= 1)
14918 last_scheduled_unit_distance
[i
] = 0;
14919 else if (last_scheduled_unit_distance
[i
] < MAX_SCHED_MIX_DISTANCE
)
14920 last_scheduled_unit_distance
[i
]++;
14925 unsigned int sched_mask
;
14927 sched_mask
= s390_get_sched_attrmask (insn
);
14929 fprintf (file
, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn
));
14930 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14931 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
14932 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
14933 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
14934 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
14935 #undef PRINT_SCHED_ATTR
14937 if (s390_tune
>= PROCESSOR_2964_Z13
)
14939 unsigned int unit_mask
, m
= 1;
14942 unit_mask
= s390_get_unit_mask (insn
, &units
);
14943 fprintf (file
, "(units:");
14944 for (j
= 0; j
< units
; j
++, m
<<= 1)
14946 fprintf (file
, " %d", j
);
14947 fprintf (file
, ")");
14949 fprintf (file
, " sched state: %d\n", s390_sched_state
);
14951 if (s390_tune
>= PROCESSOR_2964_Z13
)
14955 s390_get_unit_mask (insn
, &units
);
14957 fprintf (file
, ";;\t\tBACKEND: units unused for: ");
14958 for (j
= 0; j
< units
; j
++)
14959 fprintf (file
, "%d:%d ", j
, last_scheduled_unit_distance
[j
]);
14960 fprintf (file
, "\n");
14965 if (GET_CODE (PATTERN (insn
)) != USE
14966 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
14973 s390_sched_init (FILE *file ATTRIBUTE_UNUSED
,
14974 int verbose ATTRIBUTE_UNUSED
,
14975 int max_ready ATTRIBUTE_UNUSED
)
14977 last_scheduled_insn
= NULL
;
14978 memset (last_scheduled_unit_distance
, 0, MAX_SCHED_UNITS
* sizeof (int));
14979 s390_sched_state
= 0;
14982 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14983 a new number struct loop *loop should be unrolled if tuned for cpus with
14984 a built-in stride prefetcher.
14985 The loop is analyzed for memory accesses by calling check_dpu for
14986 each rtx of the loop. Depending on the loop_depth and the amount of
14987 memory accesses a new number <=nunroll is returned to improve the
14988 behavior of the hardware prefetch unit. */
14990 s390_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
14995 unsigned mem_count
= 0;
14997 if (s390_tune
< PROCESSOR_2097_Z10
)
15000 /* Count the number of memory references within the loop body. */
15001 bbs
= get_loop_body (loop
);
15002 subrtx_iterator::array_type array
;
15003 for (i
= 0; i
< loop
->num_nodes
; i
++)
15004 FOR_BB_INSNS (bbs
[i
], insn
)
15005 if (INSN_P (insn
) && INSN_CODE (insn
) != -1)
15006 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
15011 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15012 if (mem_count
== 0)
15015 switch (loop_depth(loop
))
15018 return MIN (nunroll
, 28 / mem_count
);
15020 return MIN (nunroll
, 22 / mem_count
);
15022 return MIN (nunroll
, 16 / mem_count
);
15026 /* Restore the current options. This is a hook function and also called
15030 s390_function_specific_restore (struct gcc_options
*opts
,
15031 struct cl_target_option
*ptr ATTRIBUTE_UNUSED
)
15033 opts
->x_s390_cost_pointer
= (long)processor_table
[opts
->x_s390_tune
].cost
;
15037 s390_option_override_internal (bool main_args_p
,
15038 struct gcc_options
*opts
,
15039 const struct gcc_options
*opts_set
)
15041 const char *prefix
;
15042 const char *suffix
;
15044 /* Set up prefix/suffix so the error messages refer to either the command
15045 line argument, or the attribute(target). */
15053 prefix
= "option(\"";
15058 /* Architecture mode defaults according to ABI. */
15059 if (!(opts_set
->x_target_flags
& MASK_ZARCH
))
15062 opts
->x_target_flags
|= MASK_ZARCH
;
15064 opts
->x_target_flags
&= ~MASK_ZARCH
;
15067 /* Set the march default in case it hasn't been specified on cmdline. */
15068 if (!opts_set
->x_s390_arch
)
15069 opts
->x_s390_arch
= PROCESSOR_2064_Z900
;
15070 else if (opts
->x_s390_arch
== PROCESSOR_9672_G5
15071 || opts
->x_s390_arch
== PROCESSOR_9672_G6
)
15072 warning (OPT_Wdeprecated
, "%sarch=%s%s is deprecated and will be removed "
15073 "in future releases; use at least %sarch=z900%s",
15074 prefix
, opts
->x_s390_arch
== PROCESSOR_9672_G5
? "g5" : "g6",
15075 suffix
, prefix
, suffix
);
15077 opts
->x_s390_arch_flags
= processor_flags_table
[(int) opts
->x_s390_arch
];
15079 /* Determine processor to tune for. */
15080 if (!opts_set
->x_s390_tune
)
15081 opts
->x_s390_tune
= opts
->x_s390_arch
;
15082 else if (opts
->x_s390_tune
== PROCESSOR_9672_G5
15083 || opts
->x_s390_tune
== PROCESSOR_9672_G6
)
15084 warning (OPT_Wdeprecated
, "%stune=%s%s is deprecated and will be removed "
15085 "in future releases; use at least %stune=z900%s",
15086 prefix
, opts
->x_s390_tune
== PROCESSOR_9672_G5
? "g5" : "g6",
15087 suffix
, prefix
, suffix
);
15089 opts
->x_s390_tune_flags
= processor_flags_table
[opts
->x_s390_tune
];
15091 /* Sanity checks. */
15092 if (opts
->x_s390_arch
== PROCESSOR_NATIVE
15093 || opts
->x_s390_tune
== PROCESSOR_NATIVE
)
15094 gcc_unreachable ();
15095 if (TARGET_ZARCH_P (opts
->x_target_flags
) && !TARGET_CPU_ZARCH_P (opts
))
15096 error ("z/Architecture mode not supported on %s",
15097 processor_table
[(int)opts
->x_s390_arch
].name
);
15098 if (TARGET_64BIT
&& !TARGET_ZARCH_P (opts
->x_target_flags
))
15099 error ("64-bit ABI not supported in ESA/390 mode");
15101 /* Enable hardware transactions if available and not explicitly
15102 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15103 if (!TARGET_OPT_HTM_P (opts_set
->x_target_flags
))
15105 if (TARGET_CPU_HTM_P (opts
) && TARGET_ZARCH_P (opts
->x_target_flags
))
15106 opts
->x_target_flags
|= MASK_OPT_HTM
;
15108 opts
->x_target_flags
&= ~MASK_OPT_HTM
;
15111 if (TARGET_OPT_VX_P (opts_set
->x_target_flags
))
15113 if (TARGET_OPT_VX_P (opts
->x_target_flags
))
15115 if (!TARGET_CPU_VX_P (opts
))
15116 error ("hardware vector support not available on %s",
15117 processor_table
[(int)opts
->x_s390_arch
].name
);
15118 if (TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15119 error ("hardware vector support not available with -msoft-float");
15124 if (TARGET_CPU_VX_P (opts
))
15125 /* Enable vector support if available and not explicitly disabled
15126 by user. E.g. with -m31 -march=z13 -mzarch */
15127 opts
->x_target_flags
|= MASK_OPT_VX
;
15129 opts
->x_target_flags
&= ~MASK_OPT_VX
;
15132 /* Use hardware DFP if available and not explicitly disabled by
15133 user. E.g. with -m31 -march=z10 -mzarch */
15134 if (!TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15136 if (TARGET_DFP_P (opts
))
15137 opts
->x_target_flags
|= MASK_HARD_DFP
;
15139 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15142 if (TARGET_HARD_DFP_P (opts
->x_target_flags
) && !TARGET_DFP_P (opts
))
15144 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15146 if (!TARGET_CPU_DFP_P (opts
))
15147 error ("hardware decimal floating point instructions"
15148 " not available on %s",
15149 processor_table
[(int)opts
->x_s390_arch
].name
);
15150 if (!TARGET_ZARCH_P (opts
->x_target_flags
))
15151 error ("hardware decimal floating point instructions"
15152 " not available in ESA/390 mode");
15155 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15158 if (TARGET_SOFT_FLOAT_P (opts_set
->x_target_flags
)
15159 && TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15161 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
)
15162 && TARGET_HARD_DFP_P (opts
->x_target_flags
))
15163 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
15165 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15168 if (TARGET_BACKCHAIN_P (opts
->x_target_flags
)
15169 && TARGET_PACKED_STACK_P (opts
->x_target_flags
)
15170 && TARGET_HARD_FLOAT_P (opts
->x_target_flags
))
15171 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
15174 if (opts
->x_s390_stack_size
)
15176 if (opts
->x_s390_stack_guard
>= opts
->x_s390_stack_size
)
15177 error ("stack size must be greater than the stack guard value");
15178 else if (opts
->x_s390_stack_size
> 1 << 16)
15179 error ("stack size must not be greater than 64k");
15181 else if (opts
->x_s390_stack_guard
)
15182 error ("-mstack-guard implies use of -mstack-size");
15184 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15185 if (!TARGET_LONG_DOUBLE_128_P (opts_set
->x_target_flags
))
15186 opts
->x_target_flags
|= MASK_LONG_DOUBLE_128
;
15189 if (opts
->x_s390_tune
>= PROCESSOR_2097_Z10
)
15191 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS
, 100,
15192 opts
->x_param_values
,
15193 opts_set
->x_param_values
);
15194 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES
, 32,
15195 opts
->x_param_values
,
15196 opts_set
->x_param_values
);
15197 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS
, 2000,
15198 opts
->x_param_values
,
15199 opts_set
->x_param_values
);
15200 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 64,
15201 opts
->x_param_values
,
15202 opts_set
->x_param_values
);
15205 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH
, 256,
15206 opts
->x_param_values
,
15207 opts_set
->x_param_values
);
15208 /* values for loop prefetching */
15209 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, 256,
15210 opts
->x_param_values
,
15211 opts_set
->x_param_values
);
15212 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, 128,
15213 opts
->x_param_values
,
15214 opts_set
->x_param_values
);
15215 /* s390 has more than 2 levels and the size is much larger. Since
15216 we are always running virtualized assume that we only get a small
15217 part of the caches above l1. */
15218 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, 1500,
15219 opts
->x_param_values
,
15220 opts_set
->x_param_values
);
15221 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO
, 2,
15222 opts
->x_param_values
,
15223 opts_set
->x_param_values
);
15224 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
, 6,
15225 opts
->x_param_values
,
15226 opts_set
->x_param_values
);
15228 /* Use the alternative scheduling-pressure algorithm by default. */
15229 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
15230 opts
->x_param_values
,
15231 opts_set
->x_param_values
);
15233 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND
, 2,
15234 opts
->x_param_values
,
15235 opts_set
->x_param_values
);
15237 /* Call target specific restore function to do post-init work. At the moment,
15238 this just sets opts->x_s390_cost_pointer. */
15239 s390_function_specific_restore (opts
, NULL
);
15243 s390_option_override (void)
15246 cl_deferred_option
*opt
;
15247 vec
<cl_deferred_option
> *v
=
15248 (vec
<cl_deferred_option
> *) s390_deferred_options
;
15251 FOR_EACH_VEC_ELT (*v
, i
, opt
)
15253 switch (opt
->opt_index
)
15255 case OPT_mhotpatch_
:
15262 strncpy (s
, opt
->arg
, 256);
15264 t
= strchr (s
, ',');
15269 val1
= integral_argument (s
);
15270 val2
= integral_argument (t
);
15277 if (val1
== -1 || val2
== -1)
15279 /* argument is not a plain number */
15280 error ("arguments to %qs should be non-negative integers",
15284 else if (val1
> s390_hotpatch_hw_max
15285 || val2
> s390_hotpatch_hw_max
)
15287 error ("argument to %qs is too large (max. %d)",
15288 "-mhotpatch=n,m", s390_hotpatch_hw_max
);
15291 s390_hotpatch_hw_before_label
= val1
;
15292 s390_hotpatch_hw_after_label
= val2
;
15296 gcc_unreachable ();
15300 /* Set up function hooks. */
15301 init_machine_status
= s390_init_machine_status
;
15303 s390_option_override_internal (true, &global_options
, &global_options_set
);
15305 /* Save the initial options in case the user does function specific
15307 target_option_default_node
= build_target_option_node (&global_options
);
15308 target_option_current_node
= target_option_default_node
;
15310 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15311 requires the arch flags to be evaluated already. Since prefetching
15312 is beneficial on s390, we enable it if available. */
15313 if (flag_prefetch_loop_arrays
< 0 && HAVE_prefetch
&& optimize
>= 3)
15314 flag_prefetch_loop_arrays
= 1;
15316 if (!s390_pic_data_is_text_relative
&& !flag_pic
)
15317 error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15321 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15322 debuggers do not yet support DWARF 3/4. */
15323 if (!global_options_set
.x_dwarf_strict
)
15325 if (!global_options_set
.x_dwarf_version
)
15329 /* Register a target-specific optimization-and-lowering pass
15330 to run immediately before prologue and epilogue generation.
15332 Registering the pass must be done at start up. It's
15333 convenient to do it here. */
15334 opt_pass
*new_pass
= new pass_s390_early_mach (g
);
15335 struct register_pass_info insert_pass_s390_early_mach
=
15337 new_pass
, /* pass */
15338 "pro_and_epilogue", /* reference_pass_name */
15339 1, /* ref_pass_instance_number */
15340 PASS_POS_INSERT_BEFORE
/* po_op */
15342 register_pass (&insert_pass_s390_early_mach
);
15345 #if S390_USE_TARGET_ATTRIBUTE
15346 /* Inner function to process the attribute((target(...))), take an argument and
15347 set the current options from the argument. If we have a list, recursively go
15351 s390_valid_target_attribute_inner_p (tree args
,
15352 struct gcc_options
*opts
,
15353 struct gcc_options
*new_opts_set
,
15359 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15360 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15361 static const struct
15363 const char *string
;
15367 int only_as_pragma
;
15370 S390_ATTRIB ("arch=", OPT_march_
, 1),
15371 S390_ATTRIB ("tune=", OPT_mtune_
, 1),
15372 /* uinteger options */
15373 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_
, 1),
15374 S390_ATTRIB ("stack-size=", OPT_mstack_size_
, 1),
15375 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_
, 1),
15376 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_
, 1),
15378 S390_ATTRIB ("backchain", OPT_mbackchain
, 0),
15379 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp
, 0),
15380 S390_ATTRIB ("hard-float", OPT_mhard_float
, 0),
15381 S390_ATTRIB ("htm", OPT_mhtm
, 0),
15382 S390_ATTRIB ("vx", OPT_mvx
, 0),
15383 S390_ATTRIB ("packed-stack", OPT_mpacked_stack
, 0),
15384 S390_ATTRIB ("small-exec", OPT_msmall_exec
, 0),
15385 S390_ATTRIB ("soft-float", OPT_msoft_float
, 0),
15386 S390_ATTRIB ("mvcle", OPT_mmvcle
, 0),
15387 S390_PRAGMA ("zvector", OPT_mzvector
, 0),
15388 /* boolean options */
15389 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack
, 0),
15394 /* If this is a list, recurse to get the options. */
15395 if (TREE_CODE (args
) == TREE_LIST
)
15398 int num_pragma_values
;
15401 /* Note: attribs.c:decl_attributes prepends the values from
15402 current_target_pragma to the list of target attributes. To determine
15403 whether we're looking at a value of the attribute or the pragma we
15404 assume that the first [list_length (current_target_pragma)] values in
15405 the list are the values from the pragma. */
15406 num_pragma_values
= (!force_pragma
&& current_target_pragma
!= NULL
)
15407 ? list_length (current_target_pragma
) : 0;
15408 for (i
= 0; args
; args
= TREE_CHAIN (args
), i
++)
15412 is_pragma
= (force_pragma
|| i
< num_pragma_values
);
15413 if (TREE_VALUE (args
)
15414 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args
),
15415 opts
, new_opts_set
,
15424 else if (TREE_CODE (args
) != STRING_CST
)
15426 error ("attribute %<target%> argument not a string");
15430 /* Handle multiple arguments separated by commas. */
15431 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
15433 while (next_optstr
&& *next_optstr
!= '\0')
15435 char *p
= next_optstr
;
15437 char *comma
= strchr (next_optstr
, ',');
15438 size_t len
, opt_len
;
15444 enum cl_var_type var_type
;
15450 len
= comma
- next_optstr
;
15451 next_optstr
= comma
+ 1;
15456 next_optstr
= NULL
;
15459 /* Recognize no-xxx. */
15460 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
15469 /* Find the option. */
15472 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
15474 opt_len
= attrs
[i
].len
;
15475 if (ch
== attrs
[i
].string
[0]
15476 && ((attrs
[i
].has_arg
) ? len
> opt_len
: len
== opt_len
)
15477 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
15479 opt
= attrs
[i
].opt
;
15480 if (!opt_set_p
&& cl_options
[opt
].cl_reject_negative
)
15482 mask
= cl_options
[opt
].var_value
;
15483 var_type
= cl_options
[opt
].var_type
;
15489 /* Process the option. */
15492 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15495 else if (attrs
[i
].only_as_pragma
&& !force_pragma
)
15497 /* Value is not allowed for the target attribute. */
15498 error ("value %qs is not supported by attribute %<target%>",
15503 else if (var_type
== CLVC_BIT_SET
|| var_type
== CLVC_BIT_CLEAR
)
15505 if (var_type
== CLVC_BIT_CLEAR
)
15506 opt_set_p
= !opt_set_p
;
15509 opts
->x_target_flags
|= mask
;
15511 opts
->x_target_flags
&= ~mask
;
15512 new_opts_set
->x_target_flags
|= mask
;
15515 else if (cl_options
[opt
].var_type
== CLVC_BOOLEAN
)
15519 if (cl_options
[opt
].cl_uinteger
)
15521 /* Unsigned integer argument. Code based on the function
15522 decode_cmdline_option () in opts-common.c. */
15523 value
= integral_argument (p
+ opt_len
);
15526 value
= (opt_set_p
) ? 1 : 0;
15530 struct cl_decoded_option decoded
;
15532 /* Value range check; only implemented for numeric and boolean
15533 options at the moment. */
15534 generate_option (opt
, NULL
, value
, CL_TARGET
, &decoded
);
15535 s390_handle_option (opts
, new_opts_set
, &decoded
, input_location
);
15536 set_option (opts
, new_opts_set
, opt
, value
,
15537 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15542 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15547 else if (cl_options
[opt
].var_type
== CLVC_ENUM
)
15552 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
15554 set_option (opts
, new_opts_set
, opt
, value
,
15555 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15559 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15565 gcc_unreachable ();
15570 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15573 s390_valid_target_attribute_tree (tree args
,
15574 struct gcc_options
*opts
,
15575 const struct gcc_options
*opts_set
,
15578 tree t
= NULL_TREE
;
15579 struct gcc_options new_opts_set
;
15581 memset (&new_opts_set
, 0, sizeof (new_opts_set
));
15583 /* Process each of the options on the chain. */
15584 if (! s390_valid_target_attribute_inner_p (args
, opts
, &new_opts_set
,
15586 return error_mark_node
;
15588 /* If some option was set (even if it has not changed), rerun
15589 s390_option_override_internal, and then save the options away. */
15590 if (new_opts_set
.x_target_flags
15591 || new_opts_set
.x_s390_arch
15592 || new_opts_set
.x_s390_tune
15593 || new_opts_set
.x_s390_stack_guard
15594 || new_opts_set
.x_s390_stack_size
15595 || new_opts_set
.x_s390_branch_cost
15596 || new_opts_set
.x_s390_warn_framesize
15597 || new_opts_set
.x_s390_warn_dynamicstack_p
)
15599 const unsigned char *src
= (const unsigned char *)opts_set
;
15600 unsigned char *dest
= (unsigned char *)&new_opts_set
;
15603 /* Merge the original option flags into the new ones. */
15604 for (i
= 0; i
< sizeof(*opts_set
); i
++)
15607 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15608 s390_option_override_internal (false, opts
, &new_opts_set
);
15609 /* Save the current options unless we are validating options for
15611 t
= build_target_option_node (opts
);
15616 /* Hook to validate attribute((target("string"))). */
15619 s390_valid_target_attribute_p (tree fndecl
,
15620 tree
ARG_UNUSED (name
),
15622 int ARG_UNUSED (flags
))
15624 struct gcc_options func_options
;
15625 tree new_target
, new_optimize
;
15628 /* attribute((target("default"))) does nothing, beyond
15629 affecting multi-versioning. */
15630 if (TREE_VALUE (args
)
15631 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
15632 && TREE_CHAIN (args
) == NULL_TREE
15633 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
15636 tree old_optimize
= build_optimization_node (&global_options
);
15638 /* Get the optimization options of the current function. */
15639 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
15641 if (!func_optimize
)
15642 func_optimize
= old_optimize
;
15644 /* Init func_options. */
15645 memset (&func_options
, 0, sizeof (func_options
));
15646 init_options_struct (&func_options
, NULL
);
15647 lang_hooks
.init_options_struct (&func_options
);
15649 cl_optimization_restore (&func_options
, TREE_OPTIMIZATION (func_optimize
));
15651 /* Initialize func_options to the default before its target options can
15653 cl_target_option_restore (&func_options
,
15654 TREE_TARGET_OPTION (target_option_default_node
));
15656 new_target
= s390_valid_target_attribute_tree (args
, &func_options
,
15657 &global_options_set
,
15659 current_target_pragma
));
15660 new_optimize
= build_optimization_node (&func_options
);
15661 if (new_target
== error_mark_node
)
15663 else if (fndecl
&& new_target
)
15665 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
15666 if (old_optimize
!= new_optimize
)
15667 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
15672 /* Hook to determine if one function can safely inline another. */
15675 s390_can_inline_p (tree caller
, tree callee
)
15677 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
15678 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
15681 callee_tree
= target_option_default_node
;
15683 caller_tree
= target_option_default_node
;
15684 if (callee_tree
== caller_tree
)
15687 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
15688 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
15691 if ((caller_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
))
15692 != (callee_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
)))
15695 /* Don't inline functions to be compiled for a more recent arch into a
15696 function for an older arch. */
15697 else if (caller_opts
->x_s390_arch
< callee_opts
->x_s390_arch
)
15700 /* Inlining a hard float function into a soft float function is only
15701 allowed if the hard float function doesn't actually make use of
15704 We are called from FEs for multi-versioning call optimization, so
15705 beware of ipa_fn_summaries not available. */
15706 else if (((TARGET_SOFT_FLOAT_P (caller_opts
->x_target_flags
)
15707 && !TARGET_SOFT_FLOAT_P (callee_opts
->x_target_flags
))
15708 || (!TARGET_HARD_DFP_P (caller_opts
->x_target_flags
)
15709 && TARGET_HARD_DFP_P (callee_opts
->x_target_flags
)))
15710 && (! ipa_fn_summaries
15711 || ipa_fn_summaries
->get
15712 (cgraph_node::get (callee
))->fp_expressions
))
15718 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15722 s390_activate_target_options (tree new_tree
)
15724 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
15725 if (TREE_TARGET_GLOBALS (new_tree
))
15726 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
15727 else if (new_tree
== target_option_default_node
)
15728 restore_target_globals (&default_target_globals
);
15730 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
15731 s390_previous_fndecl
= NULL_TREE
;
15734 /* Establish appropriate back-end context for processing the function
15735 FNDECL. The argument might be NULL to indicate processing at top
15736 level, outside of any function scope. */
15738 s390_set_current_function (tree fndecl
)
15740 /* Only change the context if the function changes. This hook is called
15741 several times in the course of compiling a function, and we don't want to
15742 slow things down too much or call target_reinit when it isn't safe. */
15743 if (fndecl
== s390_previous_fndecl
)
15747 if (s390_previous_fndecl
== NULL_TREE
)
15748 old_tree
= target_option_current_node
;
15749 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
))
15750 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
);
15752 old_tree
= target_option_default_node
;
15754 if (fndecl
== NULL_TREE
)
15756 if (old_tree
!= target_option_current_node
)
15757 s390_activate_target_options (target_option_current_node
);
15761 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
15762 if (new_tree
== NULL_TREE
)
15763 new_tree
= target_option_default_node
;
15765 if (old_tree
!= new_tree
)
15766 s390_activate_target_options (new_tree
);
15767 s390_previous_fndecl
= fndecl
;
15771 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15774 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
15775 unsigned int align ATTRIBUTE_UNUSED
,
15776 enum by_pieces_operation op ATTRIBUTE_UNUSED
,
15777 bool speed_p ATTRIBUTE_UNUSED
)
15779 return (size
== 1 || size
== 2
15780 || size
== 4 || (TARGET_ZARCH
&& size
== 8));
15783 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15786 s390_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
15788 tree sfpc
= s390_builtin_decls
[S390_BUILTIN_s390_sfpc
];
15789 tree efpc
= s390_builtin_decls
[S390_BUILTIN_s390_efpc
];
15790 tree call_efpc
= build_call_expr (efpc
, 0);
15791 tree fenv_var
= create_tmp_var_raw (unsigned_type_node
);
15793 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15794 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15795 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15796 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15797 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15798 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15800 /* Generates the equivalent of feholdexcept (&fenv_var)
15802 fenv_var = __builtin_s390_efpc ();
15803 __builtin_s390_sfpc (fenv_var & mask) */
15804 tree old_fpc
= build2 (MODIFY_EXPR
, unsigned_type_node
, fenv_var
, call_efpc
);
15806 build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
,
15807 build_int_cst (unsigned_type_node
,
15808 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
|
15809 FPC_EXCEPTION_MASK
)));
15810 tree set_new_fpc
= build_call_expr (sfpc
, 1, new_fpc
);
15811 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, old_fpc
, set_new_fpc
);
15813 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15815 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15816 new_fpc
= build2 (BIT_AND_EXPR
, unsigned_type_node
, call_efpc
,
15817 build_int_cst (unsigned_type_node
,
15818 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
)));
15819 *clear
= build_call_expr (sfpc
, 1, new_fpc
);
15821 /* Generates the equivalent of feupdateenv (fenv_var)
15823 old_fpc = __builtin_s390_efpc ();
15824 __builtin_s390_sfpc (fenv_var);
15825 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15827 old_fpc
= create_tmp_var_raw (unsigned_type_node
);
15828 tree store_old_fpc
= build2 (MODIFY_EXPR
, void_type_node
,
15829 old_fpc
, call_efpc
);
15831 set_new_fpc
= build_call_expr (sfpc
, 1, fenv_var
);
15833 tree raise_old_except
= build2 (BIT_AND_EXPR
, unsigned_type_node
, old_fpc
,
15834 build_int_cst (unsigned_type_node
,
15836 raise_old_except
= build2 (RSHIFT_EXPR
, unsigned_type_node
, raise_old_except
,
15837 build_int_cst (unsigned_type_node
,
15839 tree atomic_feraiseexcept
15840 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
15841 raise_old_except
= build_call_expr (atomic_feraiseexcept
,
15842 1, raise_old_except
);
15844 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
15845 build2 (COMPOUND_EXPR
, void_type_node
,
15846 store_old_fpc
, set_new_fpc
),
15849 #undef FPC_EXCEPTION_MASK
15850 #undef FPC_FLAGS_MASK
15851 #undef FPC_DXC_MASK
15852 #undef FPC_EXCEPTION_MASK_SHIFT
15853 #undef FPC_FLAGS_SHIFT
15854 #undef FPC_DXC_SHIFT
15857 /* Return the vector mode to be used for inner mode MODE when doing
15859 static machine_mode
15860 s390_preferred_simd_mode (scalar_mode mode
)
15888 /* Our hardware does not require vectors to be strictly aligned. */
15890 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED
,
15891 const_tree type ATTRIBUTE_UNUSED
,
15892 int misalignment ATTRIBUTE_UNUSED
,
15893 bool is_packed ATTRIBUTE_UNUSED
)
15898 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
15902 /* The vector ABI requires vector types to be aligned on an 8 byte
15903 boundary (our stack alignment). However, we allow this to be
15904 overriden by the user, while this definitely breaks the ABI. */
15905 static HOST_WIDE_INT
15906 s390_vector_alignment (const_tree type
)
15908 if (!TARGET_VX_ABI
)
15909 return default_vector_alignment (type
);
15911 if (TYPE_USER_ALIGN (type
))
15912 return TYPE_ALIGN (type
);
15914 return MIN (64, tree_to_shwi (TYPE_SIZE (type
)));
15917 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
15918 LARL instruction. */
15920 static HOST_WIDE_INT
15921 s390_constant_alignment (const_tree
, HOST_WIDE_INT align
)
15923 return MAX (align
, 16);
15926 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15927 /* Implement TARGET_ASM_FILE_START. */
15929 s390_asm_file_start (void)
15931 default_file_start ();
15932 s390_asm_output_machine_for_arch (asm_out_file
);
15936 /* Implement TARGET_ASM_FILE_END. */
15938 s390_asm_file_end (void)
15940 #ifdef HAVE_AS_GNU_ATTRIBUTE
15941 varpool_node
*vnode
;
15942 cgraph_node
*cnode
;
15944 FOR_EACH_VARIABLE (vnode
)
15945 if (TREE_PUBLIC (vnode
->decl
))
15946 s390_check_type_for_vector_abi (TREE_TYPE (vnode
->decl
), false, false);
15948 FOR_EACH_FUNCTION (cnode
)
15949 if (TREE_PUBLIC (cnode
->decl
))
15950 s390_check_type_for_vector_abi (TREE_TYPE (cnode
->decl
), false, false);
15953 if (s390_vector_abi
!= 0)
15954 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
15957 file_end_indicate_exec_stack ();
15959 if (flag_split_stack
)
15960 file_end_indicate_split_stack ();
15963 /* Return true if TYPE is a vector bool type. */
15965 s390_vector_bool_type_p (const_tree type
)
15967 return TYPE_VECTOR_OPAQUE (type
);
15970 /* Return the diagnostic message string if the binary operation OP is
15971 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15973 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
15975 bool bool1_p
, bool2_p
;
15979 machine_mode mode1
, mode2
;
15981 if (!TARGET_ZVECTOR
)
15984 if (!VECTOR_TYPE_P (type1
) || !VECTOR_TYPE_P (type2
))
15987 bool1_p
= s390_vector_bool_type_p (type1
);
15988 bool2_p
= s390_vector_bool_type_p (type2
);
15990 /* Mixing signed and unsigned types is forbidden for all
15992 if (!bool1_p
&& !bool2_p
15993 && TYPE_UNSIGNED (type1
) != TYPE_UNSIGNED (type2
))
15994 return N_("types differ in signedness");
15996 plusminus_p
= (op
== PLUS_EXPR
|| op
== MINUS_EXPR
);
15997 muldiv_p
= (op
== MULT_EXPR
|| op
== RDIV_EXPR
|| op
== TRUNC_DIV_EXPR
15998 || op
== CEIL_DIV_EXPR
|| op
== FLOOR_DIV_EXPR
15999 || op
== ROUND_DIV_EXPR
);
16000 compare_p
= (op
== LT_EXPR
|| op
== LE_EXPR
|| op
== GT_EXPR
|| op
== GE_EXPR
16001 || op
== EQ_EXPR
|| op
== NE_EXPR
);
16003 if (bool1_p
&& bool2_p
&& (plusminus_p
|| muldiv_p
))
16004 return N_("binary operator does not support two vector bool operands");
16006 if (bool1_p
!= bool2_p
&& (muldiv_p
|| compare_p
))
16007 return N_("binary operator does not support vector bool operand");
16009 mode1
= TYPE_MODE (type1
);
16010 mode2
= TYPE_MODE (type2
);
16012 if (bool1_p
!= bool2_p
&& plusminus_p
16013 && (GET_MODE_CLASS (mode1
) == MODE_VECTOR_FLOAT
16014 || GET_MODE_CLASS (mode2
) == MODE_VECTOR_FLOAT
))
16015 return N_("binary operator does not support mixing vector "
16016 "bool with floating point vector operands");
16021 /* Implement TARGET_C_EXCESS_PRECISION.
16023 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16024 double on s390, causing operations on float_t to operate in a higher
16025 precision than is necessary. However, it is not the case that SFmode
16026 operations have implicit excess precision, and we generate more optimal
16027 code if we let the compiler know no implicit extra precision is added.
16029 That means when we are compiling with -fexcess-precision=fast, the value
16030 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16031 float_t (though they would be correct for -fexcess-precision=standard).
16033 A complete fix would modify glibc to remove the unnecessary typedef
16034 of float_t to double. */
16036 static enum flt_eval_method
16037 s390_excess_precision (enum excess_precision_type type
)
16041 case EXCESS_PRECISION_TYPE_IMPLICIT
:
16042 case EXCESS_PRECISION_TYPE_FAST
:
16043 /* The fastest type to promote to will always be the native type,
16044 whether that occurs with implicit excess precision or
16046 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
16047 case EXCESS_PRECISION_TYPE_STANDARD
:
16048 /* Otherwise, when we are in a standards compliant mode, to
16049 ensure consistency with the implementation in glibc, report that
16050 float is evaluated to the range and precision of double. */
16051 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE
;
16053 gcc_unreachable ();
16055 return FLT_EVAL_METHOD_UNPREDICTABLE
;
16058 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16060 static unsigned HOST_WIDE_INT
16061 s390_asan_shadow_offset (void)
16063 return TARGET_64BIT
? HOST_WIDE_INT_1U
<< 52 : HOST_WIDE_INT_UC (0x20000000);
16066 /* Initialize GCC target structure. */
16068 #undef TARGET_ASM_ALIGNED_HI_OP
16069 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16070 #undef TARGET_ASM_ALIGNED_DI_OP
16071 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16072 #undef TARGET_ASM_INTEGER
16073 #define TARGET_ASM_INTEGER s390_assemble_integer
16075 #undef TARGET_ASM_OPEN_PAREN
16076 #define TARGET_ASM_OPEN_PAREN ""
16078 #undef TARGET_ASM_CLOSE_PAREN
16079 #define TARGET_ASM_CLOSE_PAREN ""
16081 #undef TARGET_OPTION_OVERRIDE
16082 #define TARGET_OPTION_OVERRIDE s390_option_override
16084 #ifdef TARGET_THREAD_SSP_OFFSET
16085 #undef TARGET_STACK_PROTECT_GUARD
16086 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16089 #undef TARGET_ENCODE_SECTION_INFO
16090 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16092 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16093 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16096 #undef TARGET_HAVE_TLS
16097 #define TARGET_HAVE_TLS true
16099 #undef TARGET_CANNOT_FORCE_CONST_MEM
16100 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16102 #undef TARGET_DELEGITIMIZE_ADDRESS
16103 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16105 #undef TARGET_LEGITIMIZE_ADDRESS
16106 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16108 #undef TARGET_RETURN_IN_MEMORY
16109 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16111 #undef TARGET_INIT_BUILTINS
16112 #define TARGET_INIT_BUILTINS s390_init_builtins
16113 #undef TARGET_EXPAND_BUILTIN
16114 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16115 #undef TARGET_BUILTIN_DECL
16116 #define TARGET_BUILTIN_DECL s390_builtin_decl
16118 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16119 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16121 #undef TARGET_ASM_OUTPUT_MI_THUNK
16122 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16123 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16124 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16126 #undef TARGET_C_EXCESS_PRECISION
16127 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16129 #undef TARGET_SCHED_ADJUST_PRIORITY
16130 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16131 #undef TARGET_SCHED_ISSUE_RATE
16132 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16133 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16134 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16136 #undef TARGET_SCHED_VARIABLE_ISSUE
16137 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16138 #undef TARGET_SCHED_REORDER
16139 #define TARGET_SCHED_REORDER s390_sched_reorder
16140 #undef TARGET_SCHED_INIT
16141 #define TARGET_SCHED_INIT s390_sched_init
16143 #undef TARGET_CANNOT_COPY_INSN_P
16144 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16145 #undef TARGET_RTX_COSTS
16146 #define TARGET_RTX_COSTS s390_rtx_costs
16147 #undef TARGET_ADDRESS_COST
16148 #define TARGET_ADDRESS_COST s390_address_cost
16149 #undef TARGET_REGISTER_MOVE_COST
16150 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16151 #undef TARGET_MEMORY_MOVE_COST
16152 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16155 s390_builtin_vectorization_cost
16157 #undef TARGET_MACHINE_DEPENDENT_REORG
16158 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16160 #undef TARGET_VALID_POINTER_MODE
16161 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16163 #undef TARGET_BUILD_BUILTIN_VA_LIST
16164 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16165 #undef TARGET_EXPAND_BUILTIN_VA_START
16166 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16167 #undef TARGET_ASAN_SHADOW_OFFSET
16168 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16169 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16170 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16172 #undef TARGET_PROMOTE_FUNCTION_MODE
16173 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16174 #undef TARGET_PASS_BY_REFERENCE
16175 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16177 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16178 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16179 #undef TARGET_FUNCTION_ARG
16180 #define TARGET_FUNCTION_ARG s390_function_arg
16181 #undef TARGET_FUNCTION_ARG_ADVANCE
16182 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16183 #undef TARGET_FUNCTION_ARG_PADDING
16184 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16185 #undef TARGET_FUNCTION_VALUE
16186 #define TARGET_FUNCTION_VALUE s390_function_value
16187 #undef TARGET_LIBCALL_VALUE
16188 #define TARGET_LIBCALL_VALUE s390_libcall_value
16189 #undef TARGET_STRICT_ARGUMENT_NAMING
16190 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16192 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16193 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16195 #undef TARGET_FIXED_CONDITION_CODE_REGS
16196 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16198 #undef TARGET_CC_MODES_COMPATIBLE
16199 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16201 #undef TARGET_INVALID_WITHIN_DOLOOP
16202 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16205 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16206 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16209 #undef TARGET_DWARF_FRAME_REG_MODE
16210 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16212 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16213 #undef TARGET_MANGLE_TYPE
16214 #define TARGET_MANGLE_TYPE s390_mangle_type
16217 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16218 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16220 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16221 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16223 #undef TARGET_PREFERRED_RELOAD_CLASS
16224 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16226 #undef TARGET_SECONDARY_RELOAD
16227 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16228 #undef TARGET_SECONDARY_MEMORY_NEEDED
16229 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16230 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16231 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16233 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16234 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16236 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16237 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16239 #undef TARGET_LEGITIMATE_ADDRESS_P
16240 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16242 #undef TARGET_LEGITIMATE_CONSTANT_P
16243 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16245 #undef TARGET_LRA_P
16246 #define TARGET_LRA_P s390_lra_p
16248 #undef TARGET_CAN_ELIMINATE
16249 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16251 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16252 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16254 #undef TARGET_LOOP_UNROLL_ADJUST
16255 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16257 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16258 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16259 #undef TARGET_TRAMPOLINE_INIT
16260 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16263 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16264 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16266 #undef TARGET_UNWIND_WORD_MODE
16267 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16269 #undef TARGET_CANONICALIZE_COMPARISON
16270 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16272 #undef TARGET_HARD_REGNO_SCRATCH_OK
16273 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16275 #undef TARGET_HARD_REGNO_NREGS
16276 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16277 #undef TARGET_HARD_REGNO_MODE_OK
16278 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16279 #undef TARGET_MODES_TIEABLE_P
16280 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16282 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16283 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16284 s390_hard_regno_call_part_clobbered
16286 #undef TARGET_ATTRIBUTE_TABLE
16287 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16289 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16290 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16292 #undef TARGET_SET_UP_BY_PROLOGUE
16293 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16295 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16296 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16298 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16299 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16300 s390_use_by_pieces_infrastructure_p
16302 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16303 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16305 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16306 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16308 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16309 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16311 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16312 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16314 #undef TARGET_VECTOR_ALIGNMENT
16315 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16317 #undef TARGET_INVALID_BINARY_OP
16318 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16320 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16321 #undef TARGET_ASM_FILE_START
16322 #define TARGET_ASM_FILE_START s390_asm_file_start
16325 #undef TARGET_ASM_FILE_END
16326 #define TARGET_ASM_FILE_END s390_asm_file_end
16328 #if S390_USE_TARGET_ATTRIBUTE
16329 #undef TARGET_SET_CURRENT_FUNCTION
16330 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16332 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16333 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16335 #undef TARGET_CAN_INLINE_P
16336 #define TARGET_CAN_INLINE_P s390_can_inline_p
16339 #undef TARGET_OPTION_RESTORE
16340 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16342 #undef TARGET_CAN_CHANGE_MODE_CLASS
16343 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16345 #undef TARGET_CONSTANT_ALIGNMENT
16346 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16348 struct gcc_target targetm
= TARGET_INITIALIZER
;
16350 #include "gt-s390.h"