Configury changes for obstack optimization
[official-gcc.git] / gcc / config / s390 / s390.c
blob5e3513a6b8954d16f50c3a914a789be740c8ae98
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "print-tree.h"
46 #include "stor-layout.h"
47 #include "varasm.h"
48 #include "calls.h"
49 #include "conditions.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "except.h"
54 #include "dojump.h"
55 #include "explow.h"
56 #include "stmt.h"
57 #include "expr.h"
58 #include "reload.h"
59 #include "cfgrtl.h"
60 #include "cfganal.h"
61 #include "lcm.h"
62 #include "cfgbuild.h"
63 #include "cfgcleanup.h"
64 #include "debug.h"
65 #include "langhooks.h"
66 #include "internal-fn.h"
67 #include "gimple-fold.h"
68 #include "tree-eh.h"
69 #include "gimplify.h"
70 #include "params.h"
71 #include "opts.h"
72 #include "tree-pass.h"
73 #include "context.h"
74 #include "builtins.h"
75 #include "rtl-iter.h"
76 #include "intl.h"
78 /* This file should be included last. */
79 #include "target-def.h"
81 /* Define the specific costs for a given cpu. */
83 struct processor_costs
85 /* multiplication */
86 const int m; /* cost of an M instruction. */
87 const int mghi; /* cost of an MGHI instruction. */
88 const int mh; /* cost of an MH instruction. */
89 const int mhi; /* cost of an MHI instruction. */
90 const int ml; /* cost of an ML instruction. */
91 const int mr; /* cost of an MR instruction. */
92 const int ms; /* cost of an MS instruction. */
93 const int msg; /* cost of an MSG instruction. */
94 const int msgf; /* cost of an MSGF instruction. */
95 const int msgfr; /* cost of an MSGFR instruction. */
96 const int msgr; /* cost of an MSGR instruction. */
97 const int msr; /* cost of an MSR instruction. */
98 const int mult_df; /* cost of multiplication in DFmode. */
99 const int mxbr;
100 /* square root */
101 const int sqxbr; /* cost of square root in TFmode. */
102 const int sqdbr; /* cost of square root in DFmode. */
103 const int sqebr; /* cost of square root in SFmode. */
104 /* multiply and add */
105 const int madbr; /* cost of multiply and add in DFmode. */
106 const int maebr; /* cost of multiply and add in SFmode. */
107 /* division */
108 const int dxbr;
109 const int ddbr;
110 const int debr;
111 const int dlgr;
112 const int dlr;
113 const int dr;
114 const int dsgfr;
115 const int dsgr;
118 const struct processor_costs *s390_cost;
120 static const
121 struct processor_costs z900_cost =
123 COSTS_N_INSNS (5), /* M */
124 COSTS_N_INSNS (10), /* MGHI */
125 COSTS_N_INSNS (5), /* MH */
126 COSTS_N_INSNS (4), /* MHI */
127 COSTS_N_INSNS (5), /* ML */
128 COSTS_N_INSNS (5), /* MR */
129 COSTS_N_INSNS (4), /* MS */
130 COSTS_N_INSNS (15), /* MSG */
131 COSTS_N_INSNS (7), /* MSGF */
132 COSTS_N_INSNS (7), /* MSGFR */
133 COSTS_N_INSNS (10), /* MSGR */
134 COSTS_N_INSNS (4), /* MSR */
135 COSTS_N_INSNS (7), /* multiplication in DFmode */
136 COSTS_N_INSNS (13), /* MXBR */
137 COSTS_N_INSNS (136), /* SQXBR */
138 COSTS_N_INSNS (44), /* SQDBR */
139 COSTS_N_INSNS (35), /* SQEBR */
140 COSTS_N_INSNS (18), /* MADBR */
141 COSTS_N_INSNS (13), /* MAEBR */
142 COSTS_N_INSNS (134), /* DXBR */
143 COSTS_N_INSNS (30), /* DDBR */
144 COSTS_N_INSNS (27), /* DEBR */
145 COSTS_N_INSNS (220), /* DLGR */
146 COSTS_N_INSNS (34), /* DLR */
147 COSTS_N_INSNS (34), /* DR */
148 COSTS_N_INSNS (32), /* DSGFR */
149 COSTS_N_INSNS (32), /* DSGR */
152 static const
153 struct processor_costs z990_cost =
155 COSTS_N_INSNS (4), /* M */
156 COSTS_N_INSNS (2), /* MGHI */
157 COSTS_N_INSNS (2), /* MH */
158 COSTS_N_INSNS (2), /* MHI */
159 COSTS_N_INSNS (4), /* ML */
160 COSTS_N_INSNS (4), /* MR */
161 COSTS_N_INSNS (5), /* MS */
162 COSTS_N_INSNS (6), /* MSG */
163 COSTS_N_INSNS (4), /* MSGF */
164 COSTS_N_INSNS (4), /* MSGFR */
165 COSTS_N_INSNS (4), /* MSGR */
166 COSTS_N_INSNS (4), /* MSR */
167 COSTS_N_INSNS (1), /* multiplication in DFmode */
168 COSTS_N_INSNS (28), /* MXBR */
169 COSTS_N_INSNS (130), /* SQXBR */
170 COSTS_N_INSNS (66), /* SQDBR */
171 COSTS_N_INSNS (38), /* SQEBR */
172 COSTS_N_INSNS (1), /* MADBR */
173 COSTS_N_INSNS (1), /* MAEBR */
174 COSTS_N_INSNS (60), /* DXBR */
175 COSTS_N_INSNS (40), /* DDBR */
176 COSTS_N_INSNS (26), /* DEBR */
177 COSTS_N_INSNS (176), /* DLGR */
178 COSTS_N_INSNS (31), /* DLR */
179 COSTS_N_INSNS (31), /* DR */
180 COSTS_N_INSNS (31), /* DSGFR */
181 COSTS_N_INSNS (31), /* DSGR */
184 static const
185 struct processor_costs z9_109_cost =
187 COSTS_N_INSNS (4), /* M */
188 COSTS_N_INSNS (2), /* MGHI */
189 COSTS_N_INSNS (2), /* MH */
190 COSTS_N_INSNS (2), /* MHI */
191 COSTS_N_INSNS (4), /* ML */
192 COSTS_N_INSNS (4), /* MR */
193 COSTS_N_INSNS (5), /* MS */
194 COSTS_N_INSNS (6), /* MSG */
195 COSTS_N_INSNS (4), /* MSGF */
196 COSTS_N_INSNS (4), /* MSGFR */
197 COSTS_N_INSNS (4), /* MSGR */
198 COSTS_N_INSNS (4), /* MSR */
199 COSTS_N_INSNS (1), /* multiplication in DFmode */
200 COSTS_N_INSNS (28), /* MXBR */
201 COSTS_N_INSNS (130), /* SQXBR */
202 COSTS_N_INSNS (66), /* SQDBR */
203 COSTS_N_INSNS (38), /* SQEBR */
204 COSTS_N_INSNS (1), /* MADBR */
205 COSTS_N_INSNS (1), /* MAEBR */
206 COSTS_N_INSNS (60), /* DXBR */
207 COSTS_N_INSNS (40), /* DDBR */
208 COSTS_N_INSNS (26), /* DEBR */
209 COSTS_N_INSNS (30), /* DLGR */
210 COSTS_N_INSNS (23), /* DLR */
211 COSTS_N_INSNS (23), /* DR */
212 COSTS_N_INSNS (24), /* DSGFR */
213 COSTS_N_INSNS (24), /* DSGR */
216 static const
217 struct processor_costs z10_cost =
219 COSTS_N_INSNS (10), /* M */
220 COSTS_N_INSNS (10), /* MGHI */
221 COSTS_N_INSNS (10), /* MH */
222 COSTS_N_INSNS (10), /* MHI */
223 COSTS_N_INSNS (10), /* ML */
224 COSTS_N_INSNS (10), /* MR */
225 COSTS_N_INSNS (10), /* MS */
226 COSTS_N_INSNS (10), /* MSG */
227 COSTS_N_INSNS (10), /* MSGF */
228 COSTS_N_INSNS (10), /* MSGFR */
229 COSTS_N_INSNS (10), /* MSGR */
230 COSTS_N_INSNS (10), /* MSR */
231 COSTS_N_INSNS (1) , /* multiplication in DFmode */
232 COSTS_N_INSNS (50), /* MXBR */
233 COSTS_N_INSNS (120), /* SQXBR */
234 COSTS_N_INSNS (52), /* SQDBR */
235 COSTS_N_INSNS (38), /* SQEBR */
236 COSTS_N_INSNS (1), /* MADBR */
237 COSTS_N_INSNS (1), /* MAEBR */
238 COSTS_N_INSNS (111), /* DXBR */
239 COSTS_N_INSNS (39), /* DDBR */
240 COSTS_N_INSNS (32), /* DEBR */
241 COSTS_N_INSNS (160), /* DLGR */
242 COSTS_N_INSNS (71), /* DLR */
243 COSTS_N_INSNS (71), /* DR */
244 COSTS_N_INSNS (71), /* DSGFR */
245 COSTS_N_INSNS (71), /* DSGR */
248 static const
249 struct processor_costs z196_cost =
251 COSTS_N_INSNS (7), /* M */
252 COSTS_N_INSNS (5), /* MGHI */
253 COSTS_N_INSNS (5), /* MH */
254 COSTS_N_INSNS (5), /* MHI */
255 COSTS_N_INSNS (7), /* ML */
256 COSTS_N_INSNS (7), /* MR */
257 COSTS_N_INSNS (6), /* MS */
258 COSTS_N_INSNS (8), /* MSG */
259 COSTS_N_INSNS (6), /* MSGF */
260 COSTS_N_INSNS (6), /* MSGFR */
261 COSTS_N_INSNS (8), /* MSGR */
262 COSTS_N_INSNS (6), /* MSR */
263 COSTS_N_INSNS (1) , /* multiplication in DFmode */
264 COSTS_N_INSNS (40), /* MXBR B+40 */
265 COSTS_N_INSNS (100), /* SQXBR B+100 */
266 COSTS_N_INSNS (42), /* SQDBR B+42 */
267 COSTS_N_INSNS (28), /* SQEBR B+28 */
268 COSTS_N_INSNS (1), /* MADBR B */
269 COSTS_N_INSNS (1), /* MAEBR B */
270 COSTS_N_INSNS (101), /* DXBR B+101 */
271 COSTS_N_INSNS (29), /* DDBR */
272 COSTS_N_INSNS (22), /* DEBR */
273 COSTS_N_INSNS (160), /* DLGR cracked */
274 COSTS_N_INSNS (160), /* DLR cracked */
275 COSTS_N_INSNS (160), /* DR expanded */
276 COSTS_N_INSNS (160), /* DSGFR cracked */
277 COSTS_N_INSNS (160), /* DSGR cracked */
280 static const
281 struct processor_costs zEC12_cost =
283 COSTS_N_INSNS (7), /* M */
284 COSTS_N_INSNS (5), /* MGHI */
285 COSTS_N_INSNS (5), /* MH */
286 COSTS_N_INSNS (5), /* MHI */
287 COSTS_N_INSNS (7), /* ML */
288 COSTS_N_INSNS (7), /* MR */
289 COSTS_N_INSNS (6), /* MS */
290 COSTS_N_INSNS (8), /* MSG */
291 COSTS_N_INSNS (6), /* MSGF */
292 COSTS_N_INSNS (6), /* MSGFR */
293 COSTS_N_INSNS (8), /* MSGR */
294 COSTS_N_INSNS (6), /* MSR */
295 COSTS_N_INSNS (1) , /* multiplication in DFmode */
296 COSTS_N_INSNS (40), /* MXBR B+40 */
297 COSTS_N_INSNS (100), /* SQXBR B+100 */
298 COSTS_N_INSNS (42), /* SQDBR B+42 */
299 COSTS_N_INSNS (28), /* SQEBR B+28 */
300 COSTS_N_INSNS (1), /* MADBR B */
301 COSTS_N_INSNS (1), /* MAEBR B */
302 COSTS_N_INSNS (131), /* DXBR B+131 */
303 COSTS_N_INSNS (29), /* DDBR */
304 COSTS_N_INSNS (22), /* DEBR */
305 COSTS_N_INSNS (160), /* DLGR cracked */
306 COSTS_N_INSNS (160), /* DLR cracked */
307 COSTS_N_INSNS (160), /* DR expanded */
308 COSTS_N_INSNS (160), /* DSGFR cracked */
309 COSTS_N_INSNS (160), /* DSGR cracked */
312 extern int reload_completed;
314 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
315 static rtx_insn *last_scheduled_insn;
317 /* Structure used to hold the components of a S/390 memory
318 address. A legitimate address on S/390 is of the general
319 form
320 base + index + displacement
321 where any of the components is optional.
323 base and index are registers of the class ADDR_REGS,
324 displacement is an unsigned 12-bit immediate constant. */
326 struct s390_address
328 rtx base;
329 rtx indx;
330 rtx disp;
331 bool pointer;
332 bool literal_pool;
335 /* The following structure is embedded in the machine
336 specific part of struct function. */
338 struct GTY (()) s390_frame_layout
340 /* Offset within stack frame. */
341 HOST_WIDE_INT gprs_offset;
342 HOST_WIDE_INT f0_offset;
343 HOST_WIDE_INT f4_offset;
344 HOST_WIDE_INT f8_offset;
345 HOST_WIDE_INT backchain_offset;
347 /* Number of first and last gpr where slots in the register
348 save area are reserved for. */
349 int first_save_gpr_slot;
350 int last_save_gpr_slot;
352 /* Location (FP register number) where GPRs (r0-r15) should
353 be saved to.
354 0 - does not need to be saved at all
355 -1 - stack slot */
356 signed char gpr_save_slots[16];
358 /* Number of first and last gpr to be saved, restored. */
359 int first_save_gpr;
360 int first_restore_gpr;
361 int last_save_gpr;
362 int last_restore_gpr;
364 /* Bits standing for floating point registers. Set, if the
365 respective register has to be saved. Starting with reg 16 (f0)
366 at the rightmost bit.
367 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
368 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
369 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
370 unsigned int fpr_bitmap;
372 /* Number of floating point registers f8-f15 which must be saved. */
373 int high_fprs;
375 /* Set if return address needs to be saved.
376 This flag is set by s390_return_addr_rtx if it could not use
377 the initial value of r14 and therefore depends on r14 saved
378 to the stack. */
379 bool save_return_addr_p;
381 /* Size of stack frame. */
382 HOST_WIDE_INT frame_size;
385 /* Define the structure for the machine field in struct function. */
387 struct GTY(()) machine_function
389 struct s390_frame_layout frame_layout;
391 /* Literal pool base register. */
392 rtx base_reg;
394 /* True if we may need to perform branch splitting. */
395 bool split_branches_pending_p;
397 bool has_landing_pad_p;
399 /* True if the current function may contain a tbegin clobbering
400 FPRs. */
401 bool tbegin_p;
404 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
406 #define cfun_frame_layout (cfun->machine->frame_layout)
407 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
408 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
409 ? cfun_frame_layout.fpr_bitmap & 0x0f \
410 : cfun_frame_layout.fpr_bitmap & 0x03))
411 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
412 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
413 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
414 (1 << (REGNO - FPR0_REGNUM)))
415 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
416 (1 << (REGNO - FPR0_REGNUM))))
417 #define cfun_gpr_save_slot(REGNO) \
418 cfun->machine->frame_layout.gpr_save_slots[REGNO]
420 /* Number of GPRs and FPRs used for argument passing. */
421 #define GP_ARG_NUM_REG 5
422 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
423 #define VEC_ARG_NUM_REG 8
425 /* A couple of shortcuts. */
426 #define CONST_OK_FOR_J(x) \
427 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
428 #define CONST_OK_FOR_K(x) \
429 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
430 #define CONST_OK_FOR_Os(x) \
431 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
432 #define CONST_OK_FOR_Op(x) \
433 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
434 #define CONST_OK_FOR_On(x) \
435 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
437 #define REGNO_PAIR_OK(REGNO, MODE) \
438 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
440 /* That's the read ahead of the dynamic branch prediction unit in
441 bytes on a z10 (or higher) CPU. */
442 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
445 /* Indicate which ABI has been used for passing vector args.
446 0 - no vector type arguments have been passed where the ABI is relevant
447 1 - the old ABI has been used
448 2 - a vector type argument has been passed either in a vector register
449 or on the stack by value */
450 static int s390_vector_abi = 0;
452 /* Set the vector ABI marker if TYPE is subject to the vector ABI
453 switch. The vector ABI affects only vector data types. There are
454 two aspects of the vector ABI relevant here:
456 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
457 ABI and natural alignment with the old.
459 2. vector <= 16 bytes are passed in VRs or by value on the stack
460 with the new ABI but by reference on the stack with the old.
462 If ARG_P is true TYPE is used for a function argument or return
463 value. The ABI marker then is set for all vector data types. If
464 ARG_P is false only type 1 vectors are being checked. */
466 static void
467 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
469 static hash_set<const_tree> visited_types_hash;
471 if (s390_vector_abi)
472 return;
474 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
475 return;
477 if (visited_types_hash.contains (type))
478 return;
480 visited_types_hash.add (type);
482 if (VECTOR_TYPE_P (type))
484 int type_size = int_size_in_bytes (type);
486 /* Outside arguments only the alignment is changing and this
487 only happens for vector types >= 16 bytes. */
488 if (!arg_p && type_size < 16)
489 return;
491 /* In arguments vector types > 16 are passed as before (GCC
492 never enforced the bigger alignment for arguments which was
493 required by the old vector ABI). However, it might still be
494 ABI relevant due to the changed alignment if it is a struct
495 member. */
496 if (arg_p && type_size > 16 && !in_struct_p)
497 return;
499 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
501 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
503 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
504 natural alignment there will never be ABI dependent padding
505 in an array type. That's why we do not set in_struct_p to
506 true here. */
507 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
509 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
511 tree arg_chain;
513 /* Check the return type. */
514 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
516 for (arg_chain = TYPE_ARG_TYPES (type);
517 arg_chain;
518 arg_chain = TREE_CHAIN (arg_chain))
519 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
521 else if (RECORD_OR_UNION_TYPE_P (type))
523 tree field;
525 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
527 if (TREE_CODE (field) != FIELD_DECL)
528 continue;
530 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
536 /* System z builtins. */
538 #include "s390-builtins.h"
540 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
542 #undef B_DEF
543 #undef OB_DEF
544 #undef OB_DEF_VAR
545 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
546 #define OB_DEF(...)
547 #define OB_DEF_VAR(...)
548 #include "s390-builtins.def"
552 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
554 #undef B_DEF
555 #undef OB_DEF
556 #undef OB_DEF_VAR
557 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
558 #define OB_DEF(...)
559 #define OB_DEF_VAR(...)
560 #include "s390-builtins.def"
564 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
566 #undef B_DEF
567 #undef OB_DEF
568 #undef OB_DEF_VAR
569 #define B_DEF(...)
570 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
571 #define OB_DEF_VAR(...)
572 #include "s390-builtins.def"
576 const unsigned int
577 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
579 #undef B_DEF
580 #undef OB_DEF
581 #undef OB_DEF_VAR
582 #define B_DEF(...)
583 #define OB_DEF(...)
584 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
585 #include "s390-builtins.def"
589 tree s390_builtin_types[BT_MAX];
590 tree s390_builtin_fn_types[BT_FN_MAX];
591 tree s390_builtin_decls[S390_BUILTIN_MAX +
592 S390_OVERLOADED_BUILTIN_MAX +
593 S390_OVERLOADED_BUILTIN_VAR_MAX];
595 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
596 #undef B_DEF
597 #undef OB_DEF
598 #undef OB_DEF_VAR
599 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
600 #define OB_DEF(...)
601 #define OB_DEF_VAR(...)
603 #include "s390-builtins.def"
604 CODE_FOR_nothing
607 static void
608 s390_init_builtins (void)
610 /* These definitions are being used in s390-builtins.def. */
611 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
612 NULL, NULL);
613 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
614 tree c_uint64_type_node;
615 unsigned int bflags_mask = (BFLAGS_MASK_INIT);
617 bflags_mask |= (TARGET_VX) ? B_VX : 0;
618 bflags_mask |= (TARGET_HTM) ? B_HTM : 0;
620 /* The uint64_type_node from tree.c is not compatible to the C99
621 uint64_t data type. What we want is c_uint64_type_node from
622 c-common.c. But since backend code is not supposed to interface
623 with the frontend we recreate it here. */
624 if (TARGET_64BIT)
625 c_uint64_type_node = long_unsigned_type_node;
626 else
627 c_uint64_type_node = long_long_unsigned_type_node;
629 #undef DEF_TYPE
630 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
631 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
632 s390_builtin_types[INDEX] = (!CONST_P) ? \
633 (NODE) : build_type_variant ((NODE), 1, 0);
635 #undef DEF_POINTER_TYPE
636 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
637 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
638 s390_builtin_types[INDEX] = \
639 build_pointer_type (s390_builtin_types[INDEX_BASE]);
641 #undef DEF_DISTINCT_TYPE
642 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
643 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
644 s390_builtin_types[INDEX] = \
645 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
647 #undef DEF_VECTOR_TYPE
648 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
649 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
650 s390_builtin_types[INDEX] = \
651 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
653 #undef DEF_OPAQUE_VECTOR_TYPE
654 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
655 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
656 s390_builtin_types[INDEX] = \
657 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
659 #undef DEF_FN_TYPE
660 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
661 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
662 s390_builtin_fn_types[INDEX] = \
663 build_function_type_list (args, NULL_TREE);
664 #undef DEF_OV_TYPE
665 #define DEF_OV_TYPE(...)
666 #include "s390-builtin-types.def"
668 #undef B_DEF
669 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
670 if (((BFLAGS) & ~bflags_mask) == 0) \
671 s390_builtin_decls[S390_BUILTIN_##NAME] = \
672 add_builtin_function ("__builtin_" #NAME, \
673 s390_builtin_fn_types[FNTYPE], \
674 S390_BUILTIN_##NAME, \
675 BUILT_IN_MD, \
676 NULL, \
677 ATTRS);
678 #undef OB_DEF
679 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
680 if (((BFLAGS) & ~bflags_mask) == 0) \
681 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
682 add_builtin_function ("__builtin_" #NAME, \
683 s390_builtin_fn_types[FNTYPE], \
684 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
685 BUILT_IN_MD, \
686 NULL, \
688 #undef OB_DEF_VAR
689 #define OB_DEF_VAR(...)
690 #include "s390-builtins.def"
694 /* Return true if ARG is appropriate as argument number ARGNUM of
695 builtin DECL. The operand flags from s390-builtins.def have to
696 passed as OP_FLAGS. */
697 bool
698 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
700 if (O_UIMM_P (op_flags))
702 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
703 int bitwidth = bitwidths[op_flags - O_U1];
705 if (!tree_fits_uhwi_p (arg)
706 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
708 error("constant argument %d for builtin %qF is out of range (0.."
709 HOST_WIDE_INT_PRINT_UNSIGNED ")",
710 argnum, decl,
711 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
712 return false;
716 if (O_SIMM_P (op_flags))
718 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
719 int bitwidth = bitwidths[op_flags - O_S2];
721 if (!tree_fits_shwi_p (arg)
722 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
723 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
725 error("constant argument %d for builtin %qF is out of range ("
726 HOST_WIDE_INT_PRINT_DEC ".."
727 HOST_WIDE_INT_PRINT_DEC ")",
728 argnum, decl,
729 -((HOST_WIDE_INT)1 << (bitwidth - 1)),
730 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
731 return false;
734 return true;
737 /* Expand an expression EXP that calls a built-in function,
738 with result going to TARGET if that's convenient
739 (and in mode MODE if that's convenient).
740 SUBTARGET may be used as the target for computing one of EXP's operands.
741 IGNORE is nonzero if the value is to be ignored. */
743 static rtx
744 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
745 machine_mode mode ATTRIBUTE_UNUSED,
746 int ignore ATTRIBUTE_UNUSED)
748 #define MAX_ARGS 5
750 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
751 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
752 enum insn_code icode;
753 rtx op[MAX_ARGS], pat;
754 int arity;
755 bool nonvoid;
756 tree arg;
757 call_expr_arg_iterator iter;
758 unsigned int all_op_flags = opflags_for_builtin (fcode);
759 machine_mode last_vec_mode = VOIDmode;
761 if (TARGET_DEBUG_ARG)
763 fprintf (stderr,
764 "s390_expand_builtin, code = %4d, %s\n",
765 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
768 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
769 && fcode < S390_ALL_BUILTIN_MAX)
771 gcc_unreachable ();
773 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
775 icode = code_for_builtin[fcode];
776 /* Set a flag in the machine specific cfun part in order to support
777 saving/restoring of FPRs. */
778 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
779 cfun->machine->tbegin_p = true;
781 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
783 error ("Unresolved overloaded builtin");
784 return const0_rtx;
786 else
787 internal_error ("bad builtin fcode");
789 if (icode == 0)
790 internal_error ("bad builtin icode");
792 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
794 if (nonvoid)
796 machine_mode tmode = insn_data[icode].operand[0].mode;
797 if (!target
798 || GET_MODE (target) != tmode
799 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
800 target = gen_reg_rtx (tmode);
802 /* There are builtins (e.g. vec_promote) with no vector
803 arguments but an element selector. So we have to also look
804 at the vector return type when emitting the modulo
805 operation. */
806 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
807 last_vec_mode = insn_data[icode].operand[0].mode;
810 arity = 0;
811 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
813 const struct insn_operand_data *insn_op;
814 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
816 all_op_flags = all_op_flags >> O_SHIFT;
818 if (arg == error_mark_node)
819 return NULL_RTX;
820 if (arity >= MAX_ARGS)
821 return NULL_RTX;
823 if (O_IMM_P (op_flags)
824 && TREE_CODE (arg) != INTEGER_CST)
826 error ("constant value required for builtin %qF argument %d",
827 fndecl, arity + 1);
828 return const0_rtx;
831 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
832 return const0_rtx;
834 insn_op = &insn_data[icode].operand[arity + nonvoid];
835 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
837 /* Wrap the expanded RTX for pointer types into a MEM expr with
838 the proper mode. This allows us to use e.g. (match_operand
839 "memory_operand"..) in the insn patterns instead of (mem
840 (match_operand "address_operand)). This is helpful for
841 patterns not just accepting MEMs. */
842 if (POINTER_TYPE_P (TREE_TYPE (arg))
843 && insn_op->predicate != address_operand)
844 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
846 /* Expand the module operation required on element selectors. */
847 if (op_flags == O_ELEM)
849 gcc_assert (last_vec_mode != VOIDmode);
850 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
851 op[arity],
852 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
853 NULL_RTX, 1, OPTAB_DIRECT);
856 /* Record the vector mode used for an element selector. This assumes:
857 1. There is no builtin with two different vector modes and an element selector
858 2. The element selector comes after the vector type it is referring to.
859 This currently the true for all the builtins but FIXME we
860 should better check for that. */
861 if (VECTOR_MODE_P (insn_op->mode))
862 last_vec_mode = insn_op->mode;
864 if (insn_op->predicate (op[arity], insn_op->mode))
866 arity++;
867 continue;
870 if (MEM_P (op[arity])
871 && insn_op->predicate == memory_operand
872 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
873 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
875 op[arity] = replace_equiv_address (op[arity],
876 copy_to_mode_reg (Pmode,
877 XEXP (op[arity], 0)));
879 else if (GET_MODE (op[arity]) == insn_op->mode
880 || GET_MODE (op[arity]) == VOIDmode
881 || (insn_op->predicate == address_operand
882 && GET_MODE (op[arity]) == Pmode))
884 /* An address_operand usually has VOIDmode in the expander
885 so we cannot use this. */
886 machine_mode target_mode =
887 (insn_op->predicate == address_operand
888 ? Pmode : insn_op->mode);
889 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
892 if (!insn_op->predicate (op[arity], insn_op->mode))
894 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
895 return const0_rtx;
897 arity++;
900 if (last_vec_mode != VOIDmode && !TARGET_VX)
902 error ("Vector type builtin %qF is not supported without -mvx "
903 "(default with -march=z13).",
904 fndecl);
905 return const0_rtx;
908 switch (arity)
910 case 0:
911 pat = GEN_FCN (icode) (target);
912 break;
913 case 1:
914 if (nonvoid)
915 pat = GEN_FCN (icode) (target, op[0]);
916 else
917 pat = GEN_FCN (icode) (op[0]);
918 break;
919 case 2:
920 if (nonvoid)
921 pat = GEN_FCN (icode) (target, op[0], op[1]);
922 else
923 pat = GEN_FCN (icode) (op[0], op[1]);
924 break;
925 case 3:
926 if (nonvoid)
927 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
928 else
929 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
930 break;
931 case 4:
932 if (nonvoid)
933 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
934 else
935 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
936 break;
937 case 5:
938 if (nonvoid)
939 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
940 else
941 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
942 break;
943 case 6:
944 if (nonvoid)
945 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
946 else
947 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
948 break;
949 default:
950 gcc_unreachable ();
952 if (!pat)
953 return NULL_RTX;
954 emit_insn (pat);
956 if (nonvoid)
957 return target;
958 else
959 return const0_rtx;
963 static const int s390_hotpatch_hw_max = 1000000;
964 static int s390_hotpatch_hw_before_label = 0;
965 static int s390_hotpatch_hw_after_label = 0;
967 /* Check whether the hotpatch attribute is applied to a function and, if it has
968 an argument, the argument is valid. */
970 static tree
971 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
972 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
974 tree expr;
975 tree expr2;
976 int err;
978 if (TREE_CODE (*node) != FUNCTION_DECL)
980 warning (OPT_Wattributes, "%qE attribute only applies to functions",
981 name);
982 *no_add_attrs = true;
984 if (args != NULL && TREE_CHAIN (args) != NULL)
986 expr = TREE_VALUE (args);
987 expr2 = TREE_VALUE (TREE_CHAIN (args));
989 if (args == NULL || TREE_CHAIN (args) == NULL)
990 err = 1;
991 else if (TREE_CODE (expr) != INTEGER_CST
992 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
993 || wi::gtu_p (expr, s390_hotpatch_hw_max))
994 err = 1;
995 else if (TREE_CODE (expr2) != INTEGER_CST
996 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
997 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
998 err = 1;
999 else
1000 err = 0;
1001 if (err)
1003 error ("requested %qE attribute is not a comma separated pair of"
1004 " non-negative integer constants or too large (max. %d)", name,
1005 s390_hotpatch_hw_max);
1006 *no_add_attrs = true;
1009 return NULL_TREE;
1012 /* Expand the s390_vector_bool type attribute. */
1014 static tree
1015 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1016 tree args ATTRIBUTE_UNUSED,
1017 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1019 tree type = *node, result = NULL_TREE;
1020 machine_mode mode;
1022 while (POINTER_TYPE_P (type)
1023 || TREE_CODE (type) == FUNCTION_TYPE
1024 || TREE_CODE (type) == METHOD_TYPE
1025 || TREE_CODE (type) == ARRAY_TYPE)
1026 type = TREE_TYPE (type);
1028 mode = TYPE_MODE (type);
1029 switch (mode)
1031 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1032 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1033 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1034 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1035 default: break;
1038 *no_add_attrs = true; /* No need to hang on to the attribute. */
1040 if (result)
1041 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1043 return NULL_TREE;
1046 static const struct attribute_spec s390_attribute_table[] = {
1047 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1048 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1049 /* End element. */
1050 { NULL, 0, 0, false, false, false, NULL, false }
1053 /* Return the alignment for LABEL. We default to the -falign-labels
1054 value except for the literal pool base label. */
1056 s390_label_align (rtx label)
1058 rtx_insn *prev_insn = prev_active_insn (label);
1059 rtx set, src;
1061 if (prev_insn == NULL_RTX)
1062 goto old;
1064 set = single_set (prev_insn);
1066 if (set == NULL_RTX)
1067 goto old;
1069 src = SET_SRC (set);
1071 /* Don't align literal pool base labels. */
1072 if (GET_CODE (src) == UNSPEC
1073 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1074 return 0;
1076 old:
1077 return align_labels_log;
1080 static machine_mode
1081 s390_libgcc_cmp_return_mode (void)
1083 return TARGET_64BIT ? DImode : SImode;
1086 static machine_mode
1087 s390_libgcc_shift_count_mode (void)
1089 return TARGET_64BIT ? DImode : SImode;
1092 static machine_mode
1093 s390_unwind_word_mode (void)
1095 return TARGET_64BIT ? DImode : SImode;
1098 /* Return true if the back end supports mode MODE. */
1099 static bool
1100 s390_scalar_mode_supported_p (machine_mode mode)
1102 /* In contrast to the default implementation reject TImode constants on 31bit
1103 TARGET_ZARCH for ABI compliance. */
1104 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1105 return false;
1107 if (DECIMAL_FLOAT_MODE_P (mode))
1108 return default_decimal_float_supported_p ();
1110 return default_scalar_mode_supported_p (mode);
1113 /* Return true if the back end supports vector mode MODE. */
1114 static bool
1115 s390_vector_mode_supported_p (machine_mode mode)
1117 machine_mode inner;
1119 if (!VECTOR_MODE_P (mode)
1120 || !TARGET_VX
1121 || GET_MODE_SIZE (mode) > 16)
1122 return false;
1124 inner = GET_MODE_INNER (mode);
1126 switch (inner)
1128 case QImode:
1129 case HImode:
1130 case SImode:
1131 case DImode:
1132 case TImode:
1133 case SFmode:
1134 case DFmode:
1135 case TFmode:
1136 return true;
1137 default:
1138 return false;
1142 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1144 void
1145 s390_set_has_landing_pad_p (bool value)
1147 cfun->machine->has_landing_pad_p = value;
1150 /* If two condition code modes are compatible, return a condition code
1151 mode which is compatible with both. Otherwise, return
1152 VOIDmode. */
1154 static machine_mode
1155 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1157 if (m1 == m2)
1158 return m1;
1160 switch (m1)
1162 case CCZmode:
1163 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1164 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1165 return m2;
1166 return VOIDmode;
1168 case CCSmode:
1169 case CCUmode:
1170 case CCTmode:
1171 case CCSRmode:
1172 case CCURmode:
1173 case CCZ1mode:
1174 if (m2 == CCZmode)
1175 return m1;
1177 return VOIDmode;
1179 default:
1180 return VOIDmode;
1182 return VOIDmode;
1185 /* Return true if SET either doesn't set the CC register, or else
1186 the source and destination have matching CC modes and that
1187 CC mode is at least as constrained as REQ_MODE. */
1189 static bool
1190 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1192 machine_mode set_mode;
1194 gcc_assert (GET_CODE (set) == SET);
1196 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1197 return 1;
1199 set_mode = GET_MODE (SET_DEST (set));
1200 switch (set_mode)
1202 case CCSmode:
1203 case CCSRmode:
1204 case CCUmode:
1205 case CCURmode:
1206 case CCLmode:
1207 case CCL1mode:
1208 case CCL2mode:
1209 case CCL3mode:
1210 case CCT1mode:
1211 case CCT2mode:
1212 case CCT3mode:
1213 case CCVEQmode:
1214 case CCVHmode:
1215 case CCVHUmode:
1216 case CCVFHmode:
1217 case CCVFHEmode:
1218 if (req_mode != set_mode)
1219 return 0;
1220 break;
1222 case CCZmode:
1223 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1224 && req_mode != CCSRmode && req_mode != CCURmode)
1225 return 0;
1226 break;
1228 case CCAPmode:
1229 case CCANmode:
1230 if (req_mode != CCAmode)
1231 return 0;
1232 break;
1234 default:
1235 gcc_unreachable ();
1238 return (GET_MODE (SET_SRC (set)) == set_mode);
1241 /* Return true if every SET in INSN that sets the CC register
1242 has source and destination with matching CC modes and that
1243 CC mode is at least as constrained as REQ_MODE.
1244 If REQ_MODE is VOIDmode, always return false. */
1246 bool
1247 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1249 int i;
1251 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1252 if (req_mode == VOIDmode)
1253 return false;
1255 if (GET_CODE (PATTERN (insn)) == SET)
1256 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1258 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1259 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1261 rtx set = XVECEXP (PATTERN (insn), 0, i);
1262 if (GET_CODE (set) == SET)
1263 if (!s390_match_ccmode_set (set, req_mode))
1264 return false;
1267 return true;
1270 /* If a test-under-mask instruction can be used to implement
1271 (compare (and ... OP1) OP2), return the CC mode required
1272 to do that. Otherwise, return VOIDmode.
1273 MIXED is true if the instruction can distinguish between
1274 CC1 and CC2 for mixed selected bits (TMxx), it is false
1275 if the instruction cannot (TM). */
1277 machine_mode
1278 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1280 int bit0, bit1;
1282 /* ??? Fixme: should work on CONST_DOUBLE as well. */
1283 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1284 return VOIDmode;
1286 /* Selected bits all zero: CC0.
1287 e.g.: int a; if ((a & (16 + 128)) == 0) */
1288 if (INTVAL (op2) == 0)
1289 return CCTmode;
1291 /* Selected bits all one: CC3.
1292 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1293 if (INTVAL (op2) == INTVAL (op1))
1294 return CCT3mode;
1296 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1297 int a;
1298 if ((a & (16 + 128)) == 16) -> CCT1
1299 if ((a & (16 + 128)) == 128) -> CCT2 */
1300 if (mixed)
1302 bit1 = exact_log2 (INTVAL (op2));
1303 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1304 if (bit0 != -1 && bit1 != -1)
1305 return bit0 > bit1 ? CCT1mode : CCT2mode;
1308 return VOIDmode;
1311 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1312 OP0 and OP1 of a COMPARE, return the mode to be used for the
1313 comparison. */
1315 machine_mode
1316 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1318 if (TARGET_VX
1319 && register_operand (op0, DFmode)
1320 && register_operand (op1, DFmode))
1322 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1323 s390_emit_compare or s390_canonicalize_comparison will take
1324 care of it. */
1325 switch (code)
1327 case EQ:
1328 case NE:
1329 return CCVEQmode;
1330 case GT:
1331 case UNLE:
1332 return CCVFHmode;
1333 case GE:
1334 case UNLT:
1335 return CCVFHEmode;
1336 default:
1341 switch (code)
1343 case EQ:
1344 case NE:
1345 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1346 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1347 return CCAPmode;
1348 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1349 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1350 return CCAPmode;
1351 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1352 || GET_CODE (op1) == NEG)
1353 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1354 return CCLmode;
1356 if (GET_CODE (op0) == AND)
1358 /* Check whether we can potentially do it via TM. */
1359 machine_mode ccmode;
1360 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1361 if (ccmode != VOIDmode)
1363 /* Relax CCTmode to CCZmode to allow fall-back to AND
1364 if that turns out to be beneficial. */
1365 return ccmode == CCTmode ? CCZmode : ccmode;
1369 if (register_operand (op0, HImode)
1370 && GET_CODE (op1) == CONST_INT
1371 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1372 return CCT3mode;
1373 if (register_operand (op0, QImode)
1374 && GET_CODE (op1) == CONST_INT
1375 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1376 return CCT3mode;
1378 return CCZmode;
1380 case LE:
1381 case LT:
1382 case GE:
1383 case GT:
1384 /* The only overflow condition of NEG and ABS happens when
1385 -INT_MAX is used as parameter, which stays negative. So
1386 we have an overflow from a positive value to a negative.
1387 Using CCAP mode the resulting cc can be used for comparisons. */
1388 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1389 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1390 return CCAPmode;
1392 /* If constants are involved in an add instruction it is possible to use
1393 the resulting cc for comparisons with zero. Knowing the sign of the
1394 constant the overflow behavior gets predictable. e.g.:
1395 int a, b; if ((b = a + c) > 0)
1396 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1397 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1398 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1399 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1400 /* Avoid INT32_MIN on 32 bit. */
1401 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1403 if (INTVAL (XEXP((op0), 1)) < 0)
1404 return CCANmode;
1405 else
1406 return CCAPmode;
1408 /* Fall through. */
1409 case UNORDERED:
1410 case ORDERED:
1411 case UNEQ:
1412 case UNLE:
1413 case UNLT:
1414 case UNGE:
1415 case UNGT:
1416 case LTGT:
1417 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1418 && GET_CODE (op1) != CONST_INT)
1419 return CCSRmode;
1420 return CCSmode;
1422 case LTU:
1423 case GEU:
1424 if (GET_CODE (op0) == PLUS
1425 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1426 return CCL1mode;
1428 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1429 && GET_CODE (op1) != CONST_INT)
1430 return CCURmode;
1431 return CCUmode;
1433 case LEU:
1434 case GTU:
1435 if (GET_CODE (op0) == MINUS
1436 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1437 return CCL2mode;
1439 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1440 && GET_CODE (op1) != CONST_INT)
1441 return CCURmode;
1442 return CCUmode;
1444 default:
1445 gcc_unreachable ();
1449 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1450 that we can implement more efficiently. */
1452 static void
1453 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1454 bool op0_preserve_value)
1456 if (op0_preserve_value)
1457 return;
1459 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1460 if ((*code == EQ || *code == NE)
1461 && *op1 == const0_rtx
1462 && GET_CODE (*op0) == ZERO_EXTRACT
1463 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1464 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1465 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1467 rtx inner = XEXP (*op0, 0);
1468 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1469 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1470 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1472 if (len > 0 && len < modesize
1473 && pos >= 0 && pos + len <= modesize
1474 && modesize <= HOST_BITS_PER_WIDE_INT)
1476 unsigned HOST_WIDE_INT block;
1477 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1478 block <<= modesize - pos - len;
1480 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1481 gen_int_mode (block, GET_MODE (inner)));
1485 /* Narrow AND of memory against immediate to enable TM. */
1486 if ((*code == EQ || *code == NE)
1487 && *op1 == const0_rtx
1488 && GET_CODE (*op0) == AND
1489 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1490 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1492 rtx inner = XEXP (*op0, 0);
1493 rtx mask = XEXP (*op0, 1);
1495 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1496 if (GET_CODE (inner) == SUBREG
1497 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1498 && (GET_MODE_SIZE (GET_MODE (inner))
1499 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1500 && ((INTVAL (mask)
1501 & GET_MODE_MASK (GET_MODE (inner))
1502 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1503 == 0))
1504 inner = SUBREG_REG (inner);
1506 /* Do not change volatile MEMs. */
1507 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1509 int part = s390_single_part (XEXP (*op0, 1),
1510 GET_MODE (inner), QImode, 0);
1511 if (part >= 0)
1513 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1514 inner = adjust_address_nv (inner, QImode, part);
1515 *op0 = gen_rtx_AND (QImode, inner, mask);
1520 /* Narrow comparisons against 0xffff to HImode if possible. */
1521 if ((*code == EQ || *code == NE)
1522 && GET_CODE (*op1) == CONST_INT
1523 && INTVAL (*op1) == 0xffff
1524 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1525 && (nonzero_bits (*op0, GET_MODE (*op0))
1526 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1528 *op0 = gen_lowpart (HImode, *op0);
1529 *op1 = constm1_rtx;
1532 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1533 if (GET_CODE (*op0) == UNSPEC
1534 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1535 && XVECLEN (*op0, 0) == 1
1536 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1537 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1538 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1539 && *op1 == const0_rtx)
1541 enum rtx_code new_code = UNKNOWN;
1542 switch (*code)
1544 case EQ: new_code = EQ; break;
1545 case NE: new_code = NE; break;
1546 case LT: new_code = GTU; break;
1547 case GT: new_code = LTU; break;
1548 case LE: new_code = GEU; break;
1549 case GE: new_code = LEU; break;
1550 default: break;
1553 if (new_code != UNKNOWN)
1555 *op0 = XVECEXP (*op0, 0, 0);
1556 *code = new_code;
1560 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1561 if (GET_CODE (*op0) == UNSPEC
1562 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1563 && XVECLEN (*op0, 0) == 1
1564 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1565 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1566 && CONST_INT_P (*op1))
1568 enum rtx_code new_code = UNKNOWN;
1569 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1571 case CCZmode:
1572 case CCRAWmode:
1573 switch (*code)
1575 case EQ: new_code = EQ; break;
1576 case NE: new_code = NE; break;
1577 default: break;
1579 break;
1580 default: break;
1583 if (new_code != UNKNOWN)
1585 /* For CCRAWmode put the required cc mask into the second
1586 operand. */
1587 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1588 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1589 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1590 *op0 = XVECEXP (*op0, 0, 0);
1591 *code = new_code;
1595 /* Simplify cascaded EQ, NE with const0_rtx. */
1596 if ((*code == NE || *code == EQ)
1597 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1598 && GET_MODE (*op0) == SImode
1599 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1600 && REG_P (XEXP (*op0, 0))
1601 && XEXP (*op0, 1) == const0_rtx
1602 && *op1 == const0_rtx)
1604 if ((*code == EQ && GET_CODE (*op0) == NE)
1605 || (*code == NE && GET_CODE (*op0) == EQ))
1606 *code = EQ;
1607 else
1608 *code = NE;
1609 *op0 = XEXP (*op0, 0);
1612 /* Prefer register over memory as first operand. */
1613 if (MEM_P (*op0) && REG_P (*op1))
1615 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1616 *code = (int)swap_condition ((enum rtx_code)*code);
1619 /* Using the scalar variants of vector instructions for 64 bit FP
1620 comparisons might require swapping the operands. */
1621 if (TARGET_VX
1622 && register_operand (*op0, DFmode)
1623 && register_operand (*op1, DFmode)
1624 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1626 rtx tmp;
1628 switch (*code)
1630 case LT: *code = GT; break;
1631 case LE: *code = GE; break;
1632 case UNGT: *code = UNLE; break;
1633 case UNGE: *code = UNLT; break;
1634 default: ;
1636 tmp = *op0; *op0 = *op1; *op1 = tmp;
1640 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1641 FP compare using the single element variant of vector instructions.
1642 Replace CODE with the comparison code to be used in the CC reg
1643 compare and return the condition code register RTX in CC. */
1645 static bool
1646 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1647 rtx *cc)
1649 machine_mode cmp_mode;
1650 bool swap_p = false;
1652 switch (*code)
1654 case EQ: cmp_mode = CCVEQmode; break;
1655 case NE: cmp_mode = CCVEQmode; break;
1656 case GT: cmp_mode = CCVFHmode; break;
1657 case GE: cmp_mode = CCVFHEmode; break;
1658 case UNLE: cmp_mode = CCVFHmode; break;
1659 case UNLT: cmp_mode = CCVFHEmode; break;
1660 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1661 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1662 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1663 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1664 default: return false;
1667 if (swap_p)
1669 rtx tmp = cmp2;
1670 cmp2 = cmp1;
1671 cmp1 = tmp;
1673 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1674 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1675 gen_rtvec (2,
1676 gen_rtx_SET (*cc,
1677 gen_rtx_COMPARE (cmp_mode, cmp1,
1678 cmp2)),
1679 gen_rtx_CLOBBER (VOIDmode,
1680 gen_rtx_SCRATCH (V2DImode)))));
1681 return true;
1685 /* Emit a compare instruction suitable to implement the comparison
1686 OP0 CODE OP1. Return the correct condition RTL to be placed in
1687 the IF_THEN_ELSE of the conditional branch testing the result. */
1690 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1692 machine_mode mode = s390_select_ccmode (code, op0, op1);
1693 rtx cc;
1695 if (TARGET_VX
1696 && register_operand (op0, DFmode)
1697 && register_operand (op1, DFmode)
1698 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1700 /* Work has been done by s390_expand_vec_compare_scalar already. */
1702 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1704 /* Do not output a redundant compare instruction if a
1705 compare_and_swap pattern already computed the result and the
1706 machine modes are compatible. */
1707 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1708 == GET_MODE (op0));
1709 cc = op0;
1711 else
1713 cc = gen_rtx_REG (mode, CC_REGNUM);
1714 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1717 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1720 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1721 matches CMP.
1722 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1723 conditional branch testing the result. */
1725 static rtx
1726 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1727 rtx cmp, rtx new_rtx)
1729 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1730 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1731 const0_rtx);
1734 /* Emit a jump instruction to TARGET and return it. If COND is
1735 NULL_RTX, emit an unconditional jump, else a conditional jump under
1736 condition COND. */
1738 rtx_insn *
1739 s390_emit_jump (rtx target, rtx cond)
1741 rtx insn;
1743 target = gen_rtx_LABEL_REF (VOIDmode, target);
1744 if (cond)
1745 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1747 insn = gen_rtx_SET (pc_rtx, target);
1748 return emit_jump_insn (insn);
1751 /* Return branch condition mask to implement a branch
1752 specified by CODE. Return -1 for invalid comparisons. */
1755 s390_branch_condition_mask (rtx code)
1757 const int CC0 = 1 << 3;
1758 const int CC1 = 1 << 2;
1759 const int CC2 = 1 << 1;
1760 const int CC3 = 1 << 0;
1762 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1763 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1764 gcc_assert (XEXP (code, 1) == const0_rtx
1765 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1766 && CONST_INT_P (XEXP (code, 1))));
1769 switch (GET_MODE (XEXP (code, 0)))
1771 case CCZmode:
1772 case CCZ1mode:
1773 switch (GET_CODE (code))
1775 case EQ: return CC0;
1776 case NE: return CC1 | CC2 | CC3;
1777 default: return -1;
1779 break;
1781 case CCT1mode:
1782 switch (GET_CODE (code))
1784 case EQ: return CC1;
1785 case NE: return CC0 | CC2 | CC3;
1786 default: return -1;
1788 break;
1790 case CCT2mode:
1791 switch (GET_CODE (code))
1793 case EQ: return CC2;
1794 case NE: return CC0 | CC1 | CC3;
1795 default: return -1;
1797 break;
1799 case CCT3mode:
1800 switch (GET_CODE (code))
1802 case EQ: return CC3;
1803 case NE: return CC0 | CC1 | CC2;
1804 default: return -1;
1806 break;
1808 case CCLmode:
1809 switch (GET_CODE (code))
1811 case EQ: return CC0 | CC2;
1812 case NE: return CC1 | CC3;
1813 default: return -1;
1815 break;
1817 case CCL1mode:
1818 switch (GET_CODE (code))
1820 case LTU: return CC2 | CC3; /* carry */
1821 case GEU: return CC0 | CC1; /* no carry */
1822 default: return -1;
1824 break;
1826 case CCL2mode:
1827 switch (GET_CODE (code))
1829 case GTU: return CC0 | CC1; /* borrow */
1830 case LEU: return CC2 | CC3; /* no borrow */
1831 default: return -1;
1833 break;
1835 case CCL3mode:
1836 switch (GET_CODE (code))
1838 case EQ: return CC0 | CC2;
1839 case NE: return CC1 | CC3;
1840 case LTU: return CC1;
1841 case GTU: return CC3;
1842 case LEU: return CC1 | CC2;
1843 case GEU: return CC2 | CC3;
1844 default: return -1;
1847 case CCUmode:
1848 switch (GET_CODE (code))
1850 case EQ: return CC0;
1851 case NE: return CC1 | CC2 | CC3;
1852 case LTU: return CC1;
1853 case GTU: return CC2;
1854 case LEU: return CC0 | CC1;
1855 case GEU: return CC0 | CC2;
1856 default: return -1;
1858 break;
1860 case CCURmode:
1861 switch (GET_CODE (code))
1863 case EQ: return CC0;
1864 case NE: return CC2 | CC1 | CC3;
1865 case LTU: return CC2;
1866 case GTU: return CC1;
1867 case LEU: return CC0 | CC2;
1868 case GEU: return CC0 | CC1;
1869 default: return -1;
1871 break;
1873 case CCAPmode:
1874 switch (GET_CODE (code))
1876 case EQ: return CC0;
1877 case NE: return CC1 | CC2 | CC3;
1878 case LT: return CC1 | CC3;
1879 case GT: return CC2;
1880 case LE: return CC0 | CC1 | CC3;
1881 case GE: return CC0 | CC2;
1882 default: return -1;
1884 break;
1886 case CCANmode:
1887 switch (GET_CODE (code))
1889 case EQ: return CC0;
1890 case NE: return CC1 | CC2 | CC3;
1891 case LT: return CC1;
1892 case GT: return CC2 | CC3;
1893 case LE: return CC0 | CC1;
1894 case GE: return CC0 | CC2 | CC3;
1895 default: return -1;
1897 break;
1899 case CCSmode:
1900 switch (GET_CODE (code))
1902 case EQ: return CC0;
1903 case NE: return CC1 | CC2 | CC3;
1904 case LT: return CC1;
1905 case GT: return CC2;
1906 case LE: return CC0 | CC1;
1907 case GE: return CC0 | CC2;
1908 case UNORDERED: return CC3;
1909 case ORDERED: return CC0 | CC1 | CC2;
1910 case UNEQ: return CC0 | CC3;
1911 case UNLT: return CC1 | CC3;
1912 case UNGT: return CC2 | CC3;
1913 case UNLE: return CC0 | CC1 | CC3;
1914 case UNGE: return CC0 | CC2 | CC3;
1915 case LTGT: return CC1 | CC2;
1916 default: return -1;
1918 break;
1920 case CCSRmode:
1921 switch (GET_CODE (code))
1923 case EQ: return CC0;
1924 case NE: return CC2 | CC1 | CC3;
1925 case LT: return CC2;
1926 case GT: return CC1;
1927 case LE: return CC0 | CC2;
1928 case GE: return CC0 | CC1;
1929 case UNORDERED: return CC3;
1930 case ORDERED: return CC0 | CC2 | CC1;
1931 case UNEQ: return CC0 | CC3;
1932 case UNLT: return CC2 | CC3;
1933 case UNGT: return CC1 | CC3;
1934 case UNLE: return CC0 | CC2 | CC3;
1935 case UNGE: return CC0 | CC1 | CC3;
1936 case LTGT: return CC2 | CC1;
1937 default: return -1;
1939 break;
1941 /* Vector comparison modes. */
1943 case CCVEQmode:
1944 switch (GET_CODE (code))
1946 case EQ: return CC0;
1947 case NE: return CC3;
1948 default: return -1;
1951 case CCVEQANYmode:
1952 switch (GET_CODE (code))
1954 case EQ: return CC0 | CC1;
1955 case NE: return CC3 | CC1;
1956 default: return -1;
1959 /* Integer vector compare modes. */
1961 case CCVHmode:
1962 switch (GET_CODE (code))
1964 case GT: return CC0;
1965 case LE: return CC3;
1966 default: return -1;
1969 case CCVHANYmode:
1970 switch (GET_CODE (code))
1972 case GT: return CC0 | CC1;
1973 case LE: return CC3 | CC1;
1974 default: return -1;
1977 case CCVHUmode:
1978 switch (GET_CODE (code))
1980 case GTU: return CC0;
1981 case LEU: return CC3;
1982 default: return -1;
1985 case CCVHUANYmode:
1986 switch (GET_CODE (code))
1988 case GTU: return CC0 | CC1;
1989 case LEU: return CC3 | CC1;
1990 default: return -1;
1993 /* FP vector compare modes. */
1995 case CCVFHmode:
1996 switch (GET_CODE (code))
1998 case GT: return CC0;
1999 case UNLE: return CC3;
2000 default: return -1;
2003 case CCVFHANYmode:
2004 switch (GET_CODE (code))
2006 case GT: return CC0 | CC1;
2007 case UNLE: return CC3 | CC1;
2008 default: return -1;
2011 case CCVFHEmode:
2012 switch (GET_CODE (code))
2014 case GE: return CC0;
2015 case UNLT: return CC3;
2016 default: return -1;
2019 case CCVFHEANYmode:
2020 switch (GET_CODE (code))
2022 case GE: return CC0 | CC1;
2023 case UNLT: return CC3 | CC1;
2024 default: return -1;
2028 case CCRAWmode:
2029 switch (GET_CODE (code))
2031 case EQ:
2032 return INTVAL (XEXP (code, 1));
2033 case NE:
2034 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2035 default:
2036 gcc_unreachable ();
2039 default:
2040 return -1;
2045 /* Return branch condition mask to implement a compare and branch
2046 specified by CODE. Return -1 for invalid comparisons. */
2049 s390_compare_and_branch_condition_mask (rtx code)
2051 const int CC0 = 1 << 3;
2052 const int CC1 = 1 << 2;
2053 const int CC2 = 1 << 1;
2055 switch (GET_CODE (code))
2057 case EQ:
2058 return CC0;
2059 case NE:
2060 return CC1 | CC2;
2061 case LT:
2062 case LTU:
2063 return CC1;
2064 case GT:
2065 case GTU:
2066 return CC2;
2067 case LE:
2068 case LEU:
2069 return CC0 | CC1;
2070 case GE:
2071 case GEU:
2072 return CC0 | CC2;
2073 default:
2074 gcc_unreachable ();
2076 return -1;
2079 /* If INV is false, return assembler mnemonic string to implement
2080 a branch specified by CODE. If INV is true, return mnemonic
2081 for the corresponding inverted branch. */
2083 static const char *
2084 s390_branch_condition_mnemonic (rtx code, int inv)
2086 int mask;
2088 static const char *const mnemonic[16] =
2090 NULL, "o", "h", "nle",
2091 "l", "nhe", "lh", "ne",
2092 "e", "nlh", "he", "nl",
2093 "le", "nh", "no", NULL
2096 if (GET_CODE (XEXP (code, 0)) == REG
2097 && REGNO (XEXP (code, 0)) == CC_REGNUM
2098 && (XEXP (code, 1) == const0_rtx
2099 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2100 && CONST_INT_P (XEXP (code, 1)))))
2101 mask = s390_branch_condition_mask (code);
2102 else
2103 mask = s390_compare_and_branch_condition_mask (code);
2105 gcc_assert (mask >= 0);
2107 if (inv)
2108 mask ^= 15;
2110 gcc_assert (mask >= 1 && mask <= 14);
2112 return mnemonic[mask];
2115 /* Return the part of op which has a value different from def.
2116 The size of the part is determined by mode.
2117 Use this function only if you already know that op really
2118 contains such a part. */
2120 unsigned HOST_WIDE_INT
2121 s390_extract_part (rtx op, machine_mode mode, int def)
2123 unsigned HOST_WIDE_INT value = 0;
2124 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2125 int part_bits = GET_MODE_BITSIZE (mode);
2126 unsigned HOST_WIDE_INT part_mask
2127 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2128 int i;
2130 for (i = 0; i < max_parts; i++)
2132 if (i == 0)
2133 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2134 else
2135 value >>= part_bits;
2137 if ((value & part_mask) != (def & part_mask))
2138 return value & part_mask;
2141 gcc_unreachable ();
2144 /* If OP is an integer constant of mode MODE with exactly one
2145 part of mode PART_MODE unequal to DEF, return the number of that
2146 part. Otherwise, return -1. */
2149 s390_single_part (rtx op,
2150 machine_mode mode,
2151 machine_mode part_mode,
2152 int def)
2154 unsigned HOST_WIDE_INT value = 0;
2155 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2156 unsigned HOST_WIDE_INT part_mask
2157 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2158 int i, part = -1;
2160 if (GET_CODE (op) != CONST_INT)
2161 return -1;
2163 for (i = 0; i < n_parts; i++)
2165 if (i == 0)
2166 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2167 else
2168 value >>= GET_MODE_BITSIZE (part_mode);
2170 if ((value & part_mask) != (def & part_mask))
2172 if (part != -1)
2173 return -1;
2174 else
2175 part = i;
2178 return part == -1 ? -1 : n_parts - 1 - part;
2181 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2182 bits and no other bits are set in IN. POS and LENGTH can be used
2183 to obtain the start position and the length of the bitfield.
2185 POS gives the position of the first bit of the bitfield counting
2186 from the lowest order bit starting with zero. In order to use this
2187 value for S/390 instructions this has to be converted to "bits big
2188 endian" style. */
2190 bool
2191 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
2192 int *pos, int *length)
2194 int tmp_pos = 0;
2195 int tmp_length = 0;
2196 int i;
2197 unsigned HOST_WIDE_INT mask = 1ULL;
2198 bool contiguous = false;
2200 for (i = 0; i < size; mask <<= 1, i++)
2202 if (contiguous)
2204 if (mask & in)
2205 tmp_length++;
2206 else
2207 break;
2209 else
2211 if (mask & in)
2213 contiguous = true;
2214 tmp_length++;
2216 else
2217 tmp_pos++;
2221 if (!tmp_length)
2222 return false;
2224 /* Calculate a mask for all bits beyond the contiguous bits. */
2225 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
2227 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
2228 mask &= (HOST_WIDE_INT_1U << size) - 1;
2230 if (mask & in)
2231 return false;
2233 if (tmp_length + tmp_pos - 1 > size)
2234 return false;
2236 if (length)
2237 *length = tmp_length;
2239 if (pos)
2240 *pos = tmp_pos;
2242 return true;
2245 /* Return true if OP contains the same contiguous bitfield in *all*
2246 its elements. START and END can be used to obtain the start and
2247 end position of the bitfield.
2249 START/STOP give the position of the first/last bit of the bitfield
2250 counting from the lowest order bit starting with zero. In order to
2251 use these values for S/390 instructions this has to be converted to
2252 "bits big endian" style. */
2254 bool
2255 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2257 unsigned HOST_WIDE_INT mask;
2258 int length, size;
2259 rtx elt;
2261 if (!const_vec_duplicate_p (op, &elt)
2262 || !CONST_INT_P (elt))
2263 return false;
2265 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2267 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2268 if (size > 64)
2269 return false;
2271 mask = UINTVAL (elt);
2272 if (s390_contiguous_bitmask_p (mask, size, start,
2273 end != NULL ? &length : NULL))
2275 if (end != NULL)
2276 *end = *start + length - 1;
2277 return true;
2279 /* 0xff00000f style immediates can be covered by swapping start and
2280 end indices in vgm. */
2281 if (s390_contiguous_bitmask_p (~mask, size, start,
2282 end != NULL ? &length : NULL))
2284 if (end != NULL)
2285 *end = *start - 1;
2286 if (start != NULL)
2287 *start = *start + length;
2288 return true;
2290 return false;
2293 /* Return true if C consists only of byte chunks being either 0 or
2294 0xff. If MASK is !=NULL a byte mask is generated which is
2295 appropriate for the vector generate byte mask instruction. */
2297 bool
2298 s390_bytemask_vector_p (rtx op, unsigned *mask)
2300 int i;
2301 unsigned tmp_mask = 0;
2302 int nunit, unit_size;
2304 if (!VECTOR_MODE_P (GET_MODE (op))
2305 || GET_CODE (op) != CONST_VECTOR
2306 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2307 return false;
2309 nunit = GET_MODE_NUNITS (GET_MODE (op));
2310 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2312 for (i = 0; i < nunit; i++)
2314 unsigned HOST_WIDE_INT c;
2315 int j;
2317 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2318 return false;
2320 c = UINTVAL (XVECEXP (op, 0, i));
2321 for (j = 0; j < unit_size; j++)
2323 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2324 return false;
2325 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2326 c = c >> BITS_PER_UNIT;
2330 if (mask != NULL)
2331 *mask = tmp_mask;
2333 return true;
2336 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2337 equivalent to a shift followed by the AND. In particular, CONTIG
2338 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2339 for ROTL indicate a rotate to the right. */
2341 bool
2342 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2344 int pos, len;
2345 bool ok;
2347 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
2348 gcc_assert (ok);
2350 return ((rotl >= 0 && rotl <= pos)
2351 || (rotl < 0 && -rotl <= bitsize - len - pos));
2354 /* Check whether we can (and want to) split a double-word
2355 move in mode MODE from SRC to DST into two single-word
2356 moves, moving the subword FIRST_SUBWORD first. */
2358 bool
2359 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2361 /* Floating point and vector registers cannot be split. */
2362 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2363 return false;
2365 /* We don't need to split if operands are directly accessible. */
2366 if (s_operand (src, mode) || s_operand (dst, mode))
2367 return false;
2369 /* Non-offsettable memory references cannot be split. */
2370 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2371 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2372 return false;
2374 /* Moving the first subword must not clobber a register
2375 needed to move the second subword. */
2376 if (register_operand (dst, mode))
2378 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2379 if (reg_overlap_mentioned_p (subreg, src))
2380 return false;
2383 return true;
2386 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2387 and [MEM2, MEM2 + SIZE] do overlap and false
2388 otherwise. */
2390 bool
2391 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2393 rtx addr1, addr2, addr_delta;
2394 HOST_WIDE_INT delta;
2396 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2397 return true;
2399 if (size == 0)
2400 return false;
2402 addr1 = XEXP (mem1, 0);
2403 addr2 = XEXP (mem2, 0);
2405 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2407 /* This overlapping check is used by peepholes merging memory block operations.
2408 Overlapping operations would otherwise be recognized by the S/390 hardware
2409 and would fall back to a slower implementation. Allowing overlapping
2410 operations would lead to slow code but not to wrong code. Therefore we are
2411 somewhat optimistic if we cannot prove that the memory blocks are
2412 overlapping.
2413 That's why we return false here although this may accept operations on
2414 overlapping memory areas. */
2415 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2416 return false;
2418 delta = INTVAL (addr_delta);
2420 if (delta == 0
2421 || (delta > 0 && delta < size)
2422 || (delta < 0 && -delta < size))
2423 return true;
2425 return false;
2428 /* Check whether the address of memory reference MEM2 equals exactly
2429 the address of memory reference MEM1 plus DELTA. Return true if
2430 we can prove this to be the case, false otherwise. */
2432 bool
2433 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2435 rtx addr1, addr2, addr_delta;
2437 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2438 return false;
2440 addr1 = XEXP (mem1, 0);
2441 addr2 = XEXP (mem2, 0);
2443 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2444 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2445 return false;
2447 return true;
2450 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2452 void
2453 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2454 rtx *operands)
2456 machine_mode wmode = mode;
2457 rtx dst = operands[0];
2458 rtx src1 = operands[1];
2459 rtx src2 = operands[2];
2460 rtx op, clob, tem;
2462 /* If we cannot handle the operation directly, use a temp register. */
2463 if (!s390_logical_operator_ok_p (operands))
2464 dst = gen_reg_rtx (mode);
2466 /* QImode and HImode patterns make sense only if we have a destination
2467 in memory. Otherwise perform the operation in SImode. */
2468 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2469 wmode = SImode;
2471 /* Widen operands if required. */
2472 if (mode != wmode)
2474 if (GET_CODE (dst) == SUBREG
2475 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2476 dst = tem;
2477 else if (REG_P (dst))
2478 dst = gen_rtx_SUBREG (wmode, dst, 0);
2479 else
2480 dst = gen_reg_rtx (wmode);
2482 if (GET_CODE (src1) == SUBREG
2483 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2484 src1 = tem;
2485 else if (GET_MODE (src1) != VOIDmode)
2486 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2488 if (GET_CODE (src2) == SUBREG
2489 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2490 src2 = tem;
2491 else if (GET_MODE (src2) != VOIDmode)
2492 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2495 /* Emit the instruction. */
2496 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2497 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2498 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2500 /* Fix up the destination if needed. */
2501 if (dst != operands[0])
2502 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2505 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2507 bool
2508 s390_logical_operator_ok_p (rtx *operands)
2510 /* If the destination operand is in memory, it needs to coincide
2511 with one of the source operands. After reload, it has to be
2512 the first source operand. */
2513 if (GET_CODE (operands[0]) == MEM)
2514 return rtx_equal_p (operands[0], operands[1])
2515 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2517 return true;
2520 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2521 operand IMMOP to switch from SS to SI type instructions. */
2523 void
2524 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2526 int def = code == AND ? -1 : 0;
2527 HOST_WIDE_INT mask;
2528 int part;
2530 gcc_assert (GET_CODE (*memop) == MEM);
2531 gcc_assert (!MEM_VOLATILE_P (*memop));
2533 mask = s390_extract_part (*immop, QImode, def);
2534 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2535 gcc_assert (part >= 0);
2537 *memop = adjust_address (*memop, QImode, part);
2538 *immop = gen_int_mode (mask, QImode);
2542 /* How to allocate a 'struct machine_function'. */
2544 static struct machine_function *
2545 s390_init_machine_status (void)
2547 return ggc_cleared_alloc<machine_function> ();
2550 /* Map for smallest class containing reg regno. */
2552 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2553 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2554 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2555 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2556 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2557 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2558 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2559 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2560 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2561 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2562 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2563 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2564 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2565 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2566 VEC_REGS, VEC_REGS /* 52 */
2569 /* Return attribute type of insn. */
2571 static enum attr_type
2572 s390_safe_attr_type (rtx_insn *insn)
2574 if (recog_memoized (insn) >= 0)
2575 return get_attr_type (insn);
2576 else
2577 return TYPE_NONE;
2580 /* Return true if DISP is a valid short displacement. */
2582 static bool
2583 s390_short_displacement (rtx disp)
2585 /* No displacement is OK. */
2586 if (!disp)
2587 return true;
2589 /* Without the long displacement facility we don't need to
2590 distingiush between long and short displacement. */
2591 if (!TARGET_LONG_DISPLACEMENT)
2592 return true;
2594 /* Integer displacement in range. */
2595 if (GET_CODE (disp) == CONST_INT)
2596 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2598 /* GOT offset is not OK, the GOT can be large. */
2599 if (GET_CODE (disp) == CONST
2600 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2601 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2602 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2603 return false;
2605 /* All other symbolic constants are literal pool references,
2606 which are OK as the literal pool must be small. */
2607 if (GET_CODE (disp) == CONST)
2608 return true;
2610 return false;
2613 /* Decompose a RTL expression ADDR for a memory address into
2614 its components, returned in OUT.
2616 Returns false if ADDR is not a valid memory address, true
2617 otherwise. If OUT is NULL, don't return the components,
2618 but check for validity only.
2620 Note: Only addresses in canonical form are recognized.
2621 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2622 canonical form so that they will be recognized. */
2624 static int
2625 s390_decompose_address (rtx addr, struct s390_address *out)
2627 HOST_WIDE_INT offset = 0;
2628 rtx base = NULL_RTX;
2629 rtx indx = NULL_RTX;
2630 rtx disp = NULL_RTX;
2631 rtx orig_disp;
2632 bool pointer = false;
2633 bool base_ptr = false;
2634 bool indx_ptr = false;
2635 bool literal_pool = false;
2637 /* We may need to substitute the literal pool base register into the address
2638 below. However, at this point we do not know which register is going to
2639 be used as base, so we substitute the arg pointer register. This is going
2640 to be treated as holding a pointer below -- it shouldn't be used for any
2641 other purpose. */
2642 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2644 /* Decompose address into base + index + displacement. */
2646 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2647 base = addr;
2649 else if (GET_CODE (addr) == PLUS)
2651 rtx op0 = XEXP (addr, 0);
2652 rtx op1 = XEXP (addr, 1);
2653 enum rtx_code code0 = GET_CODE (op0);
2654 enum rtx_code code1 = GET_CODE (op1);
2656 if (code0 == REG || code0 == UNSPEC)
2658 if (code1 == REG || code1 == UNSPEC)
2660 indx = op0; /* index + base */
2661 base = op1;
2664 else
2666 base = op0; /* base + displacement */
2667 disp = op1;
2671 else if (code0 == PLUS)
2673 indx = XEXP (op0, 0); /* index + base + disp */
2674 base = XEXP (op0, 1);
2675 disp = op1;
2678 else
2680 return false;
2684 else
2685 disp = addr; /* displacement */
2687 /* Extract integer part of displacement. */
2688 orig_disp = disp;
2689 if (disp)
2691 if (GET_CODE (disp) == CONST_INT)
2693 offset = INTVAL (disp);
2694 disp = NULL_RTX;
2696 else if (GET_CODE (disp) == CONST
2697 && GET_CODE (XEXP (disp, 0)) == PLUS
2698 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2700 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2701 disp = XEXP (XEXP (disp, 0), 0);
2705 /* Strip off CONST here to avoid special case tests later. */
2706 if (disp && GET_CODE (disp) == CONST)
2707 disp = XEXP (disp, 0);
2709 /* We can convert literal pool addresses to
2710 displacements by basing them off the base register. */
2711 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2713 /* Either base or index must be free to hold the base register. */
2714 if (!base)
2715 base = fake_pool_base, literal_pool = true;
2716 else if (!indx)
2717 indx = fake_pool_base, literal_pool = true;
2718 else
2719 return false;
2721 /* Mark up the displacement. */
2722 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2723 UNSPEC_LTREL_OFFSET);
2726 /* Validate base register. */
2727 if (base)
2729 if (GET_CODE (base) == UNSPEC)
2730 switch (XINT (base, 1))
2732 case UNSPEC_LTREF:
2733 if (!disp)
2734 disp = gen_rtx_UNSPEC (Pmode,
2735 gen_rtvec (1, XVECEXP (base, 0, 0)),
2736 UNSPEC_LTREL_OFFSET);
2737 else
2738 return false;
2740 base = XVECEXP (base, 0, 1);
2741 break;
2743 case UNSPEC_LTREL_BASE:
2744 if (XVECLEN (base, 0) == 1)
2745 base = fake_pool_base, literal_pool = true;
2746 else
2747 base = XVECEXP (base, 0, 1);
2748 break;
2750 default:
2751 return false;
2754 if (!REG_P (base)
2755 || (GET_MODE (base) != SImode
2756 && GET_MODE (base) != Pmode))
2757 return false;
2759 if (REGNO (base) == STACK_POINTER_REGNUM
2760 || REGNO (base) == FRAME_POINTER_REGNUM
2761 || ((reload_completed || reload_in_progress)
2762 && frame_pointer_needed
2763 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2764 || REGNO (base) == ARG_POINTER_REGNUM
2765 || (flag_pic
2766 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2767 pointer = base_ptr = true;
2769 if ((reload_completed || reload_in_progress)
2770 && base == cfun->machine->base_reg)
2771 pointer = base_ptr = literal_pool = true;
2774 /* Validate index register. */
2775 if (indx)
2777 if (GET_CODE (indx) == UNSPEC)
2778 switch (XINT (indx, 1))
2780 case UNSPEC_LTREF:
2781 if (!disp)
2782 disp = gen_rtx_UNSPEC (Pmode,
2783 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2784 UNSPEC_LTREL_OFFSET);
2785 else
2786 return false;
2788 indx = XVECEXP (indx, 0, 1);
2789 break;
2791 case UNSPEC_LTREL_BASE:
2792 if (XVECLEN (indx, 0) == 1)
2793 indx = fake_pool_base, literal_pool = true;
2794 else
2795 indx = XVECEXP (indx, 0, 1);
2796 break;
2798 default:
2799 return false;
2802 if (!REG_P (indx)
2803 || (GET_MODE (indx) != SImode
2804 && GET_MODE (indx) != Pmode))
2805 return false;
2807 if (REGNO (indx) == STACK_POINTER_REGNUM
2808 || REGNO (indx) == FRAME_POINTER_REGNUM
2809 || ((reload_completed || reload_in_progress)
2810 && frame_pointer_needed
2811 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2812 || REGNO (indx) == ARG_POINTER_REGNUM
2813 || (flag_pic
2814 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2815 pointer = indx_ptr = true;
2817 if ((reload_completed || reload_in_progress)
2818 && indx == cfun->machine->base_reg)
2819 pointer = indx_ptr = literal_pool = true;
2822 /* Prefer to use pointer as base, not index. */
2823 if (base && indx && !base_ptr
2824 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2826 rtx tmp = base;
2827 base = indx;
2828 indx = tmp;
2831 /* Validate displacement. */
2832 if (!disp)
2834 /* If virtual registers are involved, the displacement will change later
2835 anyway as the virtual registers get eliminated. This could make a
2836 valid displacement invalid, but it is more likely to make an invalid
2837 displacement valid, because we sometimes access the register save area
2838 via negative offsets to one of those registers.
2839 Thus we don't check the displacement for validity here. If after
2840 elimination the displacement turns out to be invalid after all,
2841 this is fixed up by reload in any case. */
2842 /* LRA maintains always displacements up to date and we need to
2843 know the displacement is right during all LRA not only at the
2844 final elimination. */
2845 if (lra_in_progress
2846 || (base != arg_pointer_rtx
2847 && indx != arg_pointer_rtx
2848 && base != return_address_pointer_rtx
2849 && indx != return_address_pointer_rtx
2850 && base != frame_pointer_rtx
2851 && indx != frame_pointer_rtx
2852 && base != virtual_stack_vars_rtx
2853 && indx != virtual_stack_vars_rtx))
2854 if (!DISP_IN_RANGE (offset))
2855 return false;
2857 else
2859 /* All the special cases are pointers. */
2860 pointer = true;
2862 /* In the small-PIC case, the linker converts @GOT
2863 and @GOTNTPOFF offsets to possible displacements. */
2864 if (GET_CODE (disp) == UNSPEC
2865 && (XINT (disp, 1) == UNSPEC_GOT
2866 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2867 && flag_pic == 1)
2872 /* Accept pool label offsets. */
2873 else if (GET_CODE (disp) == UNSPEC
2874 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2877 /* Accept literal pool references. */
2878 else if (GET_CODE (disp) == UNSPEC
2879 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2881 /* In case CSE pulled a non literal pool reference out of
2882 the pool we have to reject the address. This is
2883 especially important when loading the GOT pointer on non
2884 zarch CPUs. In this case the literal pool contains an lt
2885 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2886 will most likely exceed the displacement. */
2887 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2888 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2889 return false;
2891 orig_disp = gen_rtx_CONST (Pmode, disp);
2892 if (offset)
2894 /* If we have an offset, make sure it does not
2895 exceed the size of the constant pool entry. */
2896 rtx sym = XVECEXP (disp, 0, 0);
2897 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2898 return false;
2900 orig_disp = plus_constant (Pmode, orig_disp, offset);
2904 else
2905 return false;
2908 if (!base && !indx)
2909 pointer = true;
2911 if (out)
2913 out->base = base;
2914 out->indx = indx;
2915 out->disp = orig_disp;
2916 out->pointer = pointer;
2917 out->literal_pool = literal_pool;
2920 return true;
2923 /* Decompose a RTL expression OP for a shift count into its components,
2924 and return the base register in BASE and the offset in OFFSET.
2926 Return true if OP is a valid shift count, false if not. */
2928 bool
2929 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2931 HOST_WIDE_INT off = 0;
2933 /* We can have an integer constant, an address register,
2934 or a sum of the two. */
2935 if (GET_CODE (op) == CONST_INT)
2937 off = INTVAL (op);
2938 op = NULL_RTX;
2940 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2942 off = INTVAL (XEXP (op, 1));
2943 op = XEXP (op, 0);
2945 while (op && GET_CODE (op) == SUBREG)
2946 op = SUBREG_REG (op);
2948 if (op && GET_CODE (op) != REG)
2949 return false;
2951 if (offset)
2952 *offset = off;
2953 if (base)
2954 *base = op;
2956 return true;
2960 /* Return true if CODE is a valid address without index. */
2962 bool
2963 s390_legitimate_address_without_index_p (rtx op)
2965 struct s390_address addr;
2967 if (!s390_decompose_address (XEXP (op, 0), &addr))
2968 return false;
2969 if (addr.indx)
2970 return false;
2972 return true;
2976 /* Return TRUE if ADDR is an operand valid for a load/store relative
2977 instruction. Be aware that the alignment of the operand needs to
2978 be checked separately.
2979 Valid addresses are single references or a sum of a reference and a
2980 constant integer. Return these parts in SYMREF and ADDEND. You can
2981 pass NULL in REF and/or ADDEND if you are not interested in these
2982 values. Literal pool references are *not* considered symbol
2983 references. */
2985 static bool
2986 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2988 HOST_WIDE_INT tmpaddend = 0;
2990 if (GET_CODE (addr) == CONST)
2991 addr = XEXP (addr, 0);
2993 if (GET_CODE (addr) == PLUS)
2995 if (!CONST_INT_P (XEXP (addr, 1)))
2996 return false;
2998 tmpaddend = INTVAL (XEXP (addr, 1));
2999 addr = XEXP (addr, 0);
3002 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3003 || (GET_CODE (addr) == UNSPEC
3004 && (XINT (addr, 1) == UNSPEC_GOTENT
3005 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3007 if (symref)
3008 *symref = addr;
3009 if (addend)
3010 *addend = tmpaddend;
3012 return true;
3014 return false;
3017 /* Return true if the address in OP is valid for constraint letter C
3018 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3019 pool MEMs should be accepted. Only the Q, R, S, T constraint
3020 letters are allowed for C. */
3022 static int
3023 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3025 struct s390_address addr;
3026 bool decomposed = false;
3028 /* This check makes sure that no symbolic address (except literal
3029 pool references) are accepted by the R or T constraints. */
3030 if (s390_loadrelative_operand_p (op, NULL, NULL))
3031 return 0;
3033 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3034 if (!lit_pool_ok)
3036 if (!s390_decompose_address (op, &addr))
3037 return 0;
3038 if (addr.literal_pool)
3039 return 0;
3040 decomposed = true;
3043 switch (c)
3045 case 'Q': /* no index short displacement */
3046 if (!decomposed && !s390_decompose_address (op, &addr))
3047 return 0;
3048 if (addr.indx)
3049 return 0;
3050 if (!s390_short_displacement (addr.disp))
3051 return 0;
3052 break;
3054 case 'R': /* with index short displacement */
3055 if (TARGET_LONG_DISPLACEMENT)
3057 if (!decomposed && !s390_decompose_address (op, &addr))
3058 return 0;
3059 if (!s390_short_displacement (addr.disp))
3060 return 0;
3062 /* Any invalid address here will be fixed up by reload,
3063 so accept it for the most generic constraint. */
3064 break;
3066 case 'S': /* no index long displacement */
3067 if (!TARGET_LONG_DISPLACEMENT)
3068 return 0;
3069 if (!decomposed && !s390_decompose_address (op, &addr))
3070 return 0;
3071 if (addr.indx)
3072 return 0;
3073 if (s390_short_displacement (addr.disp))
3074 return 0;
3075 break;
3077 case 'T': /* with index long displacement */
3078 if (!TARGET_LONG_DISPLACEMENT)
3079 return 0;
3080 /* Any invalid address here will be fixed up by reload,
3081 so accept it for the most generic constraint. */
3082 if ((decomposed || s390_decompose_address (op, &addr))
3083 && s390_short_displacement (addr.disp))
3084 return 0;
3085 break;
3086 default:
3087 return 0;
3089 return 1;
3093 /* Evaluates constraint strings described by the regular expression
3094 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
3095 the constraint given in STR, or 0 else. */
3098 s390_mem_constraint (const char *str, rtx op)
3100 char c = str[0];
3102 switch (c)
3104 case 'A':
3105 /* Check for offsettable variants of memory constraints. */
3106 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3107 return 0;
3108 if ((reload_completed || reload_in_progress)
3109 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3110 return 0;
3111 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3112 case 'B':
3113 /* Check for non-literal-pool variants of memory constraints. */
3114 if (!MEM_P (op))
3115 return 0;
3116 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3117 case 'Q':
3118 case 'R':
3119 case 'S':
3120 case 'T':
3121 if (GET_CODE (op) != MEM)
3122 return 0;
3123 return s390_check_qrst_address (c, XEXP (op, 0), true);
3124 case 'U':
3125 return (s390_check_qrst_address ('Q', op, true)
3126 || s390_check_qrst_address ('R', op, true));
3127 case 'W':
3128 return (s390_check_qrst_address ('S', op, true)
3129 || s390_check_qrst_address ('T', op, true));
3130 case 'Y':
3131 /* Simply check for the basic form of a shift count. Reload will
3132 take care of making sure we have a proper base register. */
3133 if (!s390_decompose_shift_count (op, NULL, NULL))
3134 return 0;
3135 break;
3136 case 'Z':
3137 return s390_check_qrst_address (str[1], op, true);
3138 default:
3139 return 0;
3141 return 1;
3145 /* Evaluates constraint strings starting with letter O. Input
3146 parameter C is the second letter following the "O" in the constraint
3147 string. Returns 1 if VALUE meets the respective constraint and 0
3148 otherwise. */
3151 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3153 if (!TARGET_EXTIMM)
3154 return 0;
3156 switch (c)
3158 case 's':
3159 return trunc_int_for_mode (value, SImode) == value;
3161 case 'p':
3162 return value == 0
3163 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3165 case 'n':
3166 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3168 default:
3169 gcc_unreachable ();
3174 /* Evaluates constraint strings starting with letter N. Parameter STR
3175 contains the letters following letter "N" in the constraint string.
3176 Returns true if VALUE matches the constraint. */
3179 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3181 machine_mode mode, part_mode;
3182 int def;
3183 int part, part_goal;
3186 if (str[0] == 'x')
3187 part_goal = -1;
3188 else
3189 part_goal = str[0] - '0';
3191 switch (str[1])
3193 case 'Q':
3194 part_mode = QImode;
3195 break;
3196 case 'H':
3197 part_mode = HImode;
3198 break;
3199 case 'S':
3200 part_mode = SImode;
3201 break;
3202 default:
3203 return 0;
3206 switch (str[2])
3208 case 'H':
3209 mode = HImode;
3210 break;
3211 case 'S':
3212 mode = SImode;
3213 break;
3214 case 'D':
3215 mode = DImode;
3216 break;
3217 default:
3218 return 0;
3221 switch (str[3])
3223 case '0':
3224 def = 0;
3225 break;
3226 case 'F':
3227 def = -1;
3228 break;
3229 default:
3230 return 0;
3233 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3234 return 0;
3236 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3237 if (part < 0)
3238 return 0;
3239 if (part_goal != -1 && part_goal != part)
3240 return 0;
3242 return 1;
3246 /* Returns true if the input parameter VALUE is a float zero. */
3249 s390_float_const_zero_p (rtx value)
3251 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3252 && value == CONST0_RTX (GET_MODE (value)));
3255 /* Implement TARGET_REGISTER_MOVE_COST. */
3257 static int
3258 s390_register_move_cost (machine_mode mode,
3259 reg_class_t from, reg_class_t to)
3261 /* On s390, copy between fprs and gprs is expensive. */
3263 /* It becomes somewhat faster having ldgr/lgdr. */
3264 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3266 /* ldgr is single cycle. */
3267 if (reg_classes_intersect_p (from, GENERAL_REGS)
3268 && reg_classes_intersect_p (to, FP_REGS))
3269 return 1;
3270 /* lgdr needs 3 cycles. */
3271 if (reg_classes_intersect_p (to, GENERAL_REGS)
3272 && reg_classes_intersect_p (from, FP_REGS))
3273 return 3;
3276 /* Otherwise copying is done via memory. */
3277 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3278 && reg_classes_intersect_p (to, FP_REGS))
3279 || (reg_classes_intersect_p (from, FP_REGS)
3280 && reg_classes_intersect_p (to, GENERAL_REGS)))
3281 return 10;
3283 return 1;
3286 /* Implement TARGET_MEMORY_MOVE_COST. */
3288 static int
3289 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3290 reg_class_t rclass ATTRIBUTE_UNUSED,
3291 bool in ATTRIBUTE_UNUSED)
3293 return 2;
3296 /* Compute a (partial) cost for rtx X. Return true if the complete
3297 cost has been computed, and false if subexpressions should be
3298 scanned. In either case, *TOTAL contains the cost result.
3299 OUTER_CODE contains the code of the superexpression of x. */
3301 static bool
3302 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3303 int opno ATTRIBUTE_UNUSED,
3304 int *total, bool speed ATTRIBUTE_UNUSED)
3306 int code = GET_CODE (x);
3307 switch (code)
3309 case CONST:
3310 case CONST_INT:
3311 case LABEL_REF:
3312 case SYMBOL_REF:
3313 case CONST_DOUBLE:
3314 case MEM:
3315 *total = 0;
3316 return true;
3318 case IOR:
3319 /* risbg */
3320 if (GET_CODE (XEXP (x, 0)) == AND
3321 && GET_CODE (XEXP (x, 1)) == ASHIFT
3322 && REG_P (XEXP (XEXP (x, 0), 0))
3323 && REG_P (XEXP (XEXP (x, 1), 0))
3324 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3325 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3326 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3327 (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3329 *total = COSTS_N_INSNS (2);
3330 return true;
3332 case ASHIFT:
3333 case ASHIFTRT:
3334 case LSHIFTRT:
3335 case ROTATE:
3336 case ROTATERT:
3337 case AND:
3338 case XOR:
3339 case NEG:
3340 case NOT:
3341 *total = COSTS_N_INSNS (1);
3342 return false;
3344 case PLUS:
3345 case MINUS:
3346 *total = COSTS_N_INSNS (1);
3347 return false;
3349 case MULT:
3350 switch (mode)
3352 case SImode:
3354 rtx left = XEXP (x, 0);
3355 rtx right = XEXP (x, 1);
3356 if (GET_CODE (right) == CONST_INT
3357 && CONST_OK_FOR_K (INTVAL (right)))
3358 *total = s390_cost->mhi;
3359 else if (GET_CODE (left) == SIGN_EXTEND)
3360 *total = s390_cost->mh;
3361 else
3362 *total = s390_cost->ms; /* msr, ms, msy */
3363 break;
3365 case DImode:
3367 rtx left = XEXP (x, 0);
3368 rtx right = XEXP (x, 1);
3369 if (TARGET_ZARCH)
3371 if (GET_CODE (right) == CONST_INT
3372 && CONST_OK_FOR_K (INTVAL (right)))
3373 *total = s390_cost->mghi;
3374 else if (GET_CODE (left) == SIGN_EXTEND)
3375 *total = s390_cost->msgf;
3376 else
3377 *total = s390_cost->msg; /* msgr, msg */
3379 else /* TARGET_31BIT */
3381 if (GET_CODE (left) == SIGN_EXTEND
3382 && GET_CODE (right) == SIGN_EXTEND)
3383 /* mulsidi case: mr, m */
3384 *total = s390_cost->m;
3385 else if (GET_CODE (left) == ZERO_EXTEND
3386 && GET_CODE (right) == ZERO_EXTEND
3387 && TARGET_CPU_ZARCH)
3388 /* umulsidi case: ml, mlr */
3389 *total = s390_cost->ml;
3390 else
3391 /* Complex calculation is required. */
3392 *total = COSTS_N_INSNS (40);
3394 break;
3396 case SFmode:
3397 case DFmode:
3398 *total = s390_cost->mult_df;
3399 break;
3400 case TFmode:
3401 *total = s390_cost->mxbr;
3402 break;
3403 default:
3404 return false;
3406 return false;
3408 case FMA:
3409 switch (mode)
3411 case DFmode:
3412 *total = s390_cost->madbr;
3413 break;
3414 case SFmode:
3415 *total = s390_cost->maebr;
3416 break;
3417 default:
3418 return false;
3420 /* Negate in the third argument is free: FMSUB. */
3421 if (GET_CODE (XEXP (x, 2)) == NEG)
3423 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3424 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3425 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3426 return true;
3428 return false;
3430 case UDIV:
3431 case UMOD:
3432 if (mode == TImode) /* 128 bit division */
3433 *total = s390_cost->dlgr;
3434 else if (mode == DImode)
3436 rtx right = XEXP (x, 1);
3437 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3438 *total = s390_cost->dlr;
3439 else /* 64 by 64 bit division */
3440 *total = s390_cost->dlgr;
3442 else if (mode == SImode) /* 32 bit division */
3443 *total = s390_cost->dlr;
3444 return false;
3446 case DIV:
3447 case MOD:
3448 if (mode == DImode)
3450 rtx right = XEXP (x, 1);
3451 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3452 if (TARGET_ZARCH)
3453 *total = s390_cost->dsgfr;
3454 else
3455 *total = s390_cost->dr;
3456 else /* 64 by 64 bit division */
3457 *total = s390_cost->dsgr;
3459 else if (mode == SImode) /* 32 bit division */
3460 *total = s390_cost->dlr;
3461 else if (mode == SFmode)
3463 *total = s390_cost->debr;
3465 else if (mode == DFmode)
3467 *total = s390_cost->ddbr;
3469 else if (mode == TFmode)
3471 *total = s390_cost->dxbr;
3473 return false;
3475 case SQRT:
3476 if (mode == SFmode)
3477 *total = s390_cost->sqebr;
3478 else if (mode == DFmode)
3479 *total = s390_cost->sqdbr;
3480 else /* TFmode */
3481 *total = s390_cost->sqxbr;
3482 return false;
3484 case SIGN_EXTEND:
3485 case ZERO_EXTEND:
3486 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3487 || outer_code == PLUS || outer_code == MINUS
3488 || outer_code == COMPARE)
3489 *total = 0;
3490 return false;
3492 case COMPARE:
3493 *total = COSTS_N_INSNS (1);
3494 if (GET_CODE (XEXP (x, 0)) == AND
3495 && GET_CODE (XEXP (x, 1)) == CONST_INT
3496 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3498 rtx op0 = XEXP (XEXP (x, 0), 0);
3499 rtx op1 = XEXP (XEXP (x, 0), 1);
3500 rtx op2 = XEXP (x, 1);
3502 if (memory_operand (op0, GET_MODE (op0))
3503 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3504 return true;
3505 if (register_operand (op0, GET_MODE (op0))
3506 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3507 return true;
3509 return false;
3511 default:
3512 return false;
3516 /* Return the cost of an address rtx ADDR. */
3518 static int
3519 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3520 addr_space_t as ATTRIBUTE_UNUSED,
3521 bool speed ATTRIBUTE_UNUSED)
3523 struct s390_address ad;
3524 if (!s390_decompose_address (addr, &ad))
3525 return 1000;
3527 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3530 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3531 otherwise return 0. */
3534 tls_symbolic_operand (rtx op)
3536 if (GET_CODE (op) != SYMBOL_REF)
3537 return 0;
3538 return SYMBOL_REF_TLS_MODEL (op);
3541 /* Split DImode access register reference REG (on 64-bit) into its constituent
3542 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3543 gen_highpart cannot be used as they assume all registers are word-sized,
3544 while our access registers have only half that size. */
3546 void
3547 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3549 gcc_assert (TARGET_64BIT);
3550 gcc_assert (ACCESS_REG_P (reg));
3551 gcc_assert (GET_MODE (reg) == DImode);
3552 gcc_assert (!(REGNO (reg) & 1));
3554 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3555 *hi = gen_rtx_REG (SImode, REGNO (reg));
3558 /* Return true if OP contains a symbol reference */
3560 bool
3561 symbolic_reference_mentioned_p (rtx op)
3563 const char *fmt;
3564 int i;
3566 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3567 return 1;
3569 fmt = GET_RTX_FORMAT (GET_CODE (op));
3570 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3572 if (fmt[i] == 'E')
3574 int j;
3576 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3577 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3578 return 1;
3581 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3582 return 1;
3585 return 0;
3588 /* Return true if OP contains a reference to a thread-local symbol. */
3590 bool
3591 tls_symbolic_reference_mentioned_p (rtx op)
3593 const char *fmt;
3594 int i;
3596 if (GET_CODE (op) == SYMBOL_REF)
3597 return tls_symbolic_operand (op);
3599 fmt = GET_RTX_FORMAT (GET_CODE (op));
3600 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3602 if (fmt[i] == 'E')
3604 int j;
3606 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3607 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3608 return true;
3611 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3612 return true;
3615 return false;
3619 /* Return true if OP is a legitimate general operand when
3620 generating PIC code. It is given that flag_pic is on
3621 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3624 legitimate_pic_operand_p (rtx op)
3626 /* Accept all non-symbolic constants. */
3627 if (!SYMBOLIC_CONST (op))
3628 return 1;
3630 /* Reject everything else; must be handled
3631 via emit_symbolic_move. */
3632 return 0;
3635 /* Returns true if the constant value OP is a legitimate general operand.
3636 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3638 static bool
3639 s390_legitimate_constant_p (machine_mode mode, rtx op)
3641 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3643 if (GET_MODE_SIZE (mode) != 16)
3644 return 0;
3646 if (!const0_operand (op, mode)
3647 && !s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3648 && !s390_bytemask_vector_p (op, NULL))
3649 return 0;
3652 /* Accept all non-symbolic constants. */
3653 if (!SYMBOLIC_CONST (op))
3654 return 1;
3656 /* Accept immediate LARL operands. */
3657 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3658 return 1;
3660 /* Thread-local symbols are never legal constants. This is
3661 so that emit_call knows that computing such addresses
3662 might require a function call. */
3663 if (TLS_SYMBOLIC_CONST (op))
3664 return 0;
3666 /* In the PIC case, symbolic constants must *not* be
3667 forced into the literal pool. We accept them here,
3668 so that they will be handled by emit_symbolic_move. */
3669 if (flag_pic)
3670 return 1;
3672 /* All remaining non-PIC symbolic constants are
3673 forced into the literal pool. */
3674 return 0;
3677 /* Determine if it's legal to put X into the constant pool. This
3678 is not possible if X contains the address of a symbol that is
3679 not constant (TLS) or not known at final link time (PIC). */
3681 static bool
3682 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3684 switch (GET_CODE (x))
3686 case CONST_INT:
3687 case CONST_DOUBLE:
3688 case CONST_VECTOR:
3689 /* Accept all non-symbolic constants. */
3690 return false;
3692 case LABEL_REF:
3693 /* Labels are OK iff we are non-PIC. */
3694 return flag_pic != 0;
3696 case SYMBOL_REF:
3697 /* 'Naked' TLS symbol references are never OK,
3698 non-TLS symbols are OK iff we are non-PIC. */
3699 if (tls_symbolic_operand (x))
3700 return true;
3701 else
3702 return flag_pic != 0;
3704 case CONST:
3705 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3706 case PLUS:
3707 case MINUS:
3708 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3709 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3711 case UNSPEC:
3712 switch (XINT (x, 1))
3714 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3715 case UNSPEC_LTREL_OFFSET:
3716 case UNSPEC_GOT:
3717 case UNSPEC_GOTOFF:
3718 case UNSPEC_PLTOFF:
3719 case UNSPEC_TLSGD:
3720 case UNSPEC_TLSLDM:
3721 case UNSPEC_NTPOFF:
3722 case UNSPEC_DTPOFF:
3723 case UNSPEC_GOTNTPOFF:
3724 case UNSPEC_INDNTPOFF:
3725 return false;
3727 /* If the literal pool shares the code section, be put
3728 execute template placeholders into the pool as well. */
3729 case UNSPEC_INSN:
3730 return TARGET_CPU_ZARCH;
3732 default:
3733 return true;
3735 break;
3737 default:
3738 gcc_unreachable ();
3742 /* Returns true if the constant value OP is a legitimate general
3743 operand during and after reload. The difference to
3744 legitimate_constant_p is that this function will not accept
3745 a constant that would need to be forced to the literal pool
3746 before it can be used as operand.
3747 This function accepts all constants which can be loaded directly
3748 into a GPR. */
3750 bool
3751 legitimate_reload_constant_p (rtx op)
3753 /* Accept la(y) operands. */
3754 if (GET_CODE (op) == CONST_INT
3755 && DISP_IN_RANGE (INTVAL (op)))
3756 return true;
3758 /* Accept l(g)hi/l(g)fi operands. */
3759 if (GET_CODE (op) == CONST_INT
3760 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3761 return true;
3763 /* Accept lliXX operands. */
3764 if (TARGET_ZARCH
3765 && GET_CODE (op) == CONST_INT
3766 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3767 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3768 return true;
3770 if (TARGET_EXTIMM
3771 && GET_CODE (op) == CONST_INT
3772 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3773 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3774 return true;
3776 /* Accept larl operands. */
3777 if (TARGET_CPU_ZARCH
3778 && larl_operand (op, VOIDmode))
3779 return true;
3781 /* Accept floating-point zero operands that fit into a single GPR. */
3782 if (GET_CODE (op) == CONST_DOUBLE
3783 && s390_float_const_zero_p (op)
3784 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3785 return true;
3787 /* Accept double-word operands that can be split. */
3788 if (GET_CODE (op) == CONST_INT
3789 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
3791 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3792 rtx hi = operand_subword (op, 0, 0, dword_mode);
3793 rtx lo = operand_subword (op, 1, 0, dword_mode);
3794 return legitimate_reload_constant_p (hi)
3795 && legitimate_reload_constant_p (lo);
3798 /* Everything else cannot be handled without reload. */
3799 return false;
3802 /* Returns true if the constant value OP is a legitimate fp operand
3803 during and after reload.
3804 This function accepts all constants which can be loaded directly
3805 into an FPR. */
3807 static bool
3808 legitimate_reload_fp_constant_p (rtx op)
3810 /* Accept floating-point zero operands if the load zero instruction
3811 can be used. Prior to z196 the load fp zero instruction caused a
3812 performance penalty if the result is used as BFP number. */
3813 if (TARGET_Z196
3814 && GET_CODE (op) == CONST_DOUBLE
3815 && s390_float_const_zero_p (op))
3816 return true;
3818 return false;
3821 /* Returns true if the constant value OP is a legitimate vector operand
3822 during and after reload.
3823 This function accepts all constants which can be loaded directly
3824 into an VR. */
3826 static bool
3827 legitimate_reload_vector_constant_p (rtx op)
3829 /* FIXME: Support constant vectors with all the same 16 bit unsigned
3830 operands. These can be loaded with vrepi. */
3832 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3833 && (const0_operand (op, GET_MODE (op))
3834 || constm1_operand (op, GET_MODE (op))
3835 || s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3836 || s390_bytemask_vector_p (op, NULL)))
3837 return true;
3839 return false;
3842 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3843 return the class of reg to actually use. */
3845 static reg_class_t
3846 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3848 switch (GET_CODE (op))
3850 /* Constants we cannot reload into general registers
3851 must be forced into the literal pool. */
3852 case CONST_VECTOR:
3853 case CONST_DOUBLE:
3854 case CONST_INT:
3855 if (reg_class_subset_p (GENERAL_REGS, rclass)
3856 && legitimate_reload_constant_p (op))
3857 return GENERAL_REGS;
3858 else if (reg_class_subset_p (ADDR_REGS, rclass)
3859 && legitimate_reload_constant_p (op))
3860 return ADDR_REGS;
3861 else if (reg_class_subset_p (FP_REGS, rclass)
3862 && legitimate_reload_fp_constant_p (op))
3863 return FP_REGS;
3864 else if (reg_class_subset_p (VEC_REGS, rclass)
3865 && legitimate_reload_vector_constant_p (op))
3866 return VEC_REGS;
3868 return NO_REGS;
3870 /* If a symbolic constant or a PLUS is reloaded,
3871 it is most likely being used as an address, so
3872 prefer ADDR_REGS. If 'class' is not a superset
3873 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3874 case CONST:
3875 /* Symrefs cannot be pushed into the literal pool with -fPIC
3876 so we *MUST NOT* return NO_REGS for these cases
3877 (s390_cannot_force_const_mem will return true).
3879 On the other hand we MUST return NO_REGS for symrefs with
3880 invalid addend which might have been pushed to the literal
3881 pool (no -fPIC). Usually we would expect them to be
3882 handled via secondary reload but this does not happen if
3883 they are used as literal pool slot replacement in reload
3884 inheritance (see emit_input_reload_insns). */
3885 if (TARGET_CPU_ZARCH
3886 && GET_CODE (XEXP (op, 0)) == PLUS
3887 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3888 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3890 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3891 return ADDR_REGS;
3892 else
3893 return NO_REGS;
3895 /* fallthrough */
3896 case LABEL_REF:
3897 case SYMBOL_REF:
3898 if (!legitimate_reload_constant_p (op))
3899 return NO_REGS;
3900 /* fallthrough */
3901 case PLUS:
3902 /* load address will be used. */
3903 if (reg_class_subset_p (ADDR_REGS, rclass))
3904 return ADDR_REGS;
3905 else
3906 return NO_REGS;
3908 default:
3909 break;
3912 return rclass;
3915 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3916 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3917 aligned. */
3919 bool
3920 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3922 HOST_WIDE_INT addend;
3923 rtx symref;
3925 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3926 return false;
3928 if (addend & (alignment - 1))
3929 return false;
3931 if (GET_CODE (symref) == SYMBOL_REF
3932 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3933 return true;
3935 if (GET_CODE (symref) == UNSPEC
3936 && alignment <= UNITS_PER_LONG)
3937 return true;
3939 return false;
3942 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3943 operand SCRATCH is used to reload the even part of the address and
3944 adding one. */
3946 void
3947 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3949 HOST_WIDE_INT addend;
3950 rtx symref;
3952 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3953 gcc_unreachable ();
3955 if (!(addend & 1))
3956 /* Easy case. The addend is even so larl will do fine. */
3957 emit_move_insn (reg, addr);
3958 else
3960 /* We can leave the scratch register untouched if the target
3961 register is a valid base register. */
3962 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3963 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3964 scratch = reg;
3966 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3967 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3969 if (addend != 1)
3970 emit_move_insn (scratch,
3971 gen_rtx_CONST (Pmode,
3972 gen_rtx_PLUS (Pmode, symref,
3973 GEN_INT (addend - 1))));
3974 else
3975 emit_move_insn (scratch, symref);
3977 /* Increment the address using la in order to avoid clobbering cc. */
3978 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3982 /* Generate what is necessary to move between REG and MEM using
3983 SCRATCH. The direction is given by TOMEM. */
3985 void
3986 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3988 /* Reload might have pulled a constant out of the literal pool.
3989 Force it back in. */
3990 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3991 || GET_CODE (mem) == CONST_VECTOR
3992 || GET_CODE (mem) == CONST)
3993 mem = force_const_mem (GET_MODE (reg), mem);
3995 gcc_assert (MEM_P (mem));
3997 /* For a load from memory we can leave the scratch register
3998 untouched if the target register is a valid base register. */
3999 if (!tomem
4000 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4001 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4002 && GET_MODE (reg) == GET_MODE (scratch))
4003 scratch = reg;
4005 /* Load address into scratch register. Since we can't have a
4006 secondary reload for a secondary reload we have to cover the case
4007 where larl would need a secondary reload here as well. */
4008 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4010 /* Now we can use a standard load/store to do the move. */
4011 if (tomem)
4012 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4013 else
4014 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4017 /* Inform reload about cases where moving X with a mode MODE to a register in
4018 RCLASS requires an extra scratch or immediate register. Return the class
4019 needed for the immediate register. */
4021 static reg_class_t
4022 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4023 machine_mode mode, secondary_reload_info *sri)
4025 enum reg_class rclass = (enum reg_class) rclass_i;
4027 /* Intermediate register needed. */
4028 if (reg_classes_intersect_p (CC_REGS, rclass))
4029 return GENERAL_REGS;
4031 if (TARGET_VX)
4033 /* The vst/vl vector move instructions allow only for short
4034 displacements. */
4035 if (MEM_P (x)
4036 && GET_CODE (XEXP (x, 0)) == PLUS
4037 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4038 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4039 && reg_class_subset_p (rclass, VEC_REGS)
4040 && (!reg_class_subset_p (rclass, FP_REGS)
4041 || (GET_MODE_SIZE (mode) > 8
4042 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4044 if (in_p)
4045 sri->icode = (TARGET_64BIT ?
4046 CODE_FOR_reloaddi_la_in :
4047 CODE_FOR_reloadsi_la_in);
4048 else
4049 sri->icode = (TARGET_64BIT ?
4050 CODE_FOR_reloaddi_la_out :
4051 CODE_FOR_reloadsi_la_out);
4055 if (TARGET_Z10)
4057 HOST_WIDE_INT offset;
4058 rtx symref;
4060 /* On z10 several optimizer steps may generate larl operands with
4061 an odd addend. */
4062 if (in_p
4063 && s390_loadrelative_operand_p (x, &symref, &offset)
4064 && mode == Pmode
4065 && !SYMBOL_REF_ALIGN1_P (symref)
4066 && (offset & 1) == 1)
4067 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4068 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4070 /* Handle all the (mem (symref)) accesses we cannot use the z10
4071 instructions for. */
4072 if (MEM_P (x)
4073 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4074 && (mode == QImode
4075 || !reg_class_subset_p (rclass, GENERAL_REGS)
4076 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4077 || !s390_check_symref_alignment (XEXP (x, 0),
4078 GET_MODE_SIZE (mode))))
4080 #define __SECONDARY_RELOAD_CASE(M,m) \
4081 case M##mode: \
4082 if (TARGET_64BIT) \
4083 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4084 CODE_FOR_reload##m##di_tomem_z10; \
4085 else \
4086 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4087 CODE_FOR_reload##m##si_tomem_z10; \
4088 break;
4090 switch (GET_MODE (x))
4092 __SECONDARY_RELOAD_CASE (QI, qi);
4093 __SECONDARY_RELOAD_CASE (HI, hi);
4094 __SECONDARY_RELOAD_CASE (SI, si);
4095 __SECONDARY_RELOAD_CASE (DI, di);
4096 __SECONDARY_RELOAD_CASE (TI, ti);
4097 __SECONDARY_RELOAD_CASE (SF, sf);
4098 __SECONDARY_RELOAD_CASE (DF, df);
4099 __SECONDARY_RELOAD_CASE (TF, tf);
4100 __SECONDARY_RELOAD_CASE (SD, sd);
4101 __SECONDARY_RELOAD_CASE (DD, dd);
4102 __SECONDARY_RELOAD_CASE (TD, td);
4103 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4104 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4105 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4106 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4107 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4108 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4109 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4110 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4111 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4112 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4113 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4114 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4115 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4116 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4117 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4118 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4119 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4120 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4121 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4122 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4123 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4124 default:
4125 gcc_unreachable ();
4127 #undef __SECONDARY_RELOAD_CASE
4131 /* We need a scratch register when loading a PLUS expression which
4132 is not a legitimate operand of the LOAD ADDRESS instruction. */
4133 /* LRA can deal with transformation of plus op very well -- so we
4134 don't need to prompt LRA in this case. */
4135 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4136 sri->icode = (TARGET_64BIT ?
4137 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4139 /* Performing a multiword move from or to memory we have to make sure the
4140 second chunk in memory is addressable without causing a displacement
4141 overflow. If that would be the case we calculate the address in
4142 a scratch register. */
4143 if (MEM_P (x)
4144 && GET_CODE (XEXP (x, 0)) == PLUS
4145 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4146 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4147 + GET_MODE_SIZE (mode) - 1))
4149 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4150 in a s_operand address since we may fallback to lm/stm. So we only
4151 have to care about overflows in the b+i+d case. */
4152 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4153 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4154 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4155 /* For FP_REGS no lm/stm is available so this check is triggered
4156 for displacement overflows in b+i+d and b+d like addresses. */
4157 || (reg_classes_intersect_p (FP_REGS, rclass)
4158 && s390_class_max_nregs (FP_REGS, mode) > 1))
4160 if (in_p)
4161 sri->icode = (TARGET_64BIT ?
4162 CODE_FOR_reloaddi_la_in :
4163 CODE_FOR_reloadsi_la_in);
4164 else
4165 sri->icode = (TARGET_64BIT ?
4166 CODE_FOR_reloaddi_la_out :
4167 CODE_FOR_reloadsi_la_out);
4171 /* A scratch address register is needed when a symbolic constant is
4172 copied to r0 compiling with -fPIC. In other cases the target
4173 register might be used as temporary (see legitimize_pic_address). */
4174 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4175 sri->icode = (TARGET_64BIT ?
4176 CODE_FOR_reloaddi_PIC_addr :
4177 CODE_FOR_reloadsi_PIC_addr);
4179 /* Either scratch or no register needed. */
4180 return NO_REGS;
4183 /* Generate code to load SRC, which is PLUS that is not a
4184 legitimate operand for the LA instruction, into TARGET.
4185 SCRATCH may be used as scratch register. */
4187 void
4188 s390_expand_plus_operand (rtx target, rtx src,
4189 rtx scratch)
4191 rtx sum1, sum2;
4192 struct s390_address ad;
4194 /* src must be a PLUS; get its two operands. */
4195 gcc_assert (GET_CODE (src) == PLUS);
4196 gcc_assert (GET_MODE (src) == Pmode);
4198 /* Check if any of the two operands is already scheduled
4199 for replacement by reload. This can happen e.g. when
4200 float registers occur in an address. */
4201 sum1 = find_replacement (&XEXP (src, 0));
4202 sum2 = find_replacement (&XEXP (src, 1));
4203 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4205 /* If the address is already strictly valid, there's nothing to do. */
4206 if (!s390_decompose_address (src, &ad)
4207 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4208 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4210 /* Otherwise, one of the operands cannot be an address register;
4211 we reload its value into the scratch register. */
4212 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4214 emit_move_insn (scratch, sum1);
4215 sum1 = scratch;
4217 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4219 emit_move_insn (scratch, sum2);
4220 sum2 = scratch;
4223 /* According to the way these invalid addresses are generated
4224 in reload.c, it should never happen (at least on s390) that
4225 *neither* of the PLUS components, after find_replacements
4226 was applied, is an address register. */
4227 if (sum1 == scratch && sum2 == scratch)
4229 debug_rtx (src);
4230 gcc_unreachable ();
4233 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4236 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4237 is only ever performed on addresses, so we can mark the
4238 sum as legitimate for LA in any case. */
4239 s390_load_address (target, src);
4243 /* Return true if ADDR is a valid memory address.
4244 STRICT specifies whether strict register checking applies. */
4246 static bool
4247 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4249 struct s390_address ad;
4251 if (TARGET_Z10
4252 && larl_operand (addr, VOIDmode)
4253 && (mode == VOIDmode
4254 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4255 return true;
4257 if (!s390_decompose_address (addr, &ad))
4258 return false;
4260 if (strict)
4262 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4263 return false;
4265 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4266 return false;
4268 else
4270 if (ad.base
4271 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4272 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4273 return false;
4275 if (ad.indx
4276 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4277 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4278 return false;
4280 return true;
4283 /* Return true if OP is a valid operand for the LA instruction.
4284 In 31-bit, we need to prove that the result is used as an
4285 address, as LA performs only a 31-bit addition. */
4287 bool
4288 legitimate_la_operand_p (rtx op)
4290 struct s390_address addr;
4291 if (!s390_decompose_address (op, &addr))
4292 return false;
4294 return (TARGET_64BIT || addr.pointer);
4297 /* Return true if it is valid *and* preferable to use LA to
4298 compute the sum of OP1 and OP2. */
4300 bool
4301 preferred_la_operand_p (rtx op1, rtx op2)
4303 struct s390_address addr;
4305 if (op2 != const0_rtx)
4306 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4308 if (!s390_decompose_address (op1, &addr))
4309 return false;
4310 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4311 return false;
4312 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4313 return false;
4315 /* Avoid LA instructions with index register on z196; it is
4316 preferable to use regular add instructions when possible.
4317 Starting with zEC12 the la with index register is "uncracked"
4318 again. */
4319 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4320 return false;
4322 if (!TARGET_64BIT && !addr.pointer)
4323 return false;
4325 if (addr.pointer)
4326 return true;
4328 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4329 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4330 return true;
4332 return false;
4335 /* Emit a forced load-address operation to load SRC into DST.
4336 This will use the LOAD ADDRESS instruction even in situations
4337 where legitimate_la_operand_p (SRC) returns false. */
4339 void
4340 s390_load_address (rtx dst, rtx src)
4342 if (TARGET_64BIT)
4343 emit_move_insn (dst, src);
4344 else
4345 emit_insn (gen_force_la_31 (dst, src));
4348 /* Return a legitimate reference for ORIG (an address) using the
4349 register REG. If REG is 0, a new pseudo is generated.
4351 There are two types of references that must be handled:
4353 1. Global data references must load the address from the GOT, via
4354 the PIC reg. An insn is emitted to do this load, and the reg is
4355 returned.
4357 2. Static data references, constant pool addresses, and code labels
4358 compute the address as an offset from the GOT, whose base is in
4359 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4360 differentiate them from global data objects. The returned
4361 address is the PIC reg + an unspec constant.
4363 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4364 reg also appears in the address. */
4367 legitimize_pic_address (rtx orig, rtx reg)
4369 rtx addr = orig;
4370 rtx addend = const0_rtx;
4371 rtx new_rtx = orig;
4373 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4375 if (GET_CODE (addr) == CONST)
4376 addr = XEXP (addr, 0);
4378 if (GET_CODE (addr) == PLUS)
4380 addend = XEXP (addr, 1);
4381 addr = XEXP (addr, 0);
4384 if ((GET_CODE (addr) == LABEL_REF
4385 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4386 || (GET_CODE (addr) == UNSPEC &&
4387 (XINT (addr, 1) == UNSPEC_GOTENT
4388 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4389 && GET_CODE (addend) == CONST_INT)
4391 /* This can be locally addressed. */
4393 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4394 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4395 gen_rtx_CONST (Pmode, addr) : addr);
4397 if (TARGET_CPU_ZARCH
4398 && larl_operand (const_addr, VOIDmode)
4399 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4400 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4402 if (INTVAL (addend) & 1)
4404 /* LARL can't handle odd offsets, so emit a pair of LARL
4405 and LA. */
4406 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4408 if (!DISP_IN_RANGE (INTVAL (addend)))
4410 HOST_WIDE_INT even = INTVAL (addend) - 1;
4411 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4412 addr = gen_rtx_CONST (Pmode, addr);
4413 addend = const1_rtx;
4416 emit_move_insn (temp, addr);
4417 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4419 if (reg != 0)
4421 s390_load_address (reg, new_rtx);
4422 new_rtx = reg;
4425 else
4427 /* If the offset is even, we can just use LARL. This
4428 will happen automatically. */
4431 else
4433 /* No larl - Access local symbols relative to the GOT. */
4435 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4437 if (reload_in_progress || reload_completed)
4438 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4440 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4441 if (addend != const0_rtx)
4442 addr = gen_rtx_PLUS (Pmode, addr, addend);
4443 addr = gen_rtx_CONST (Pmode, addr);
4444 addr = force_const_mem (Pmode, addr);
4445 emit_move_insn (temp, addr);
4447 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4448 if (reg != 0)
4450 s390_load_address (reg, new_rtx);
4451 new_rtx = reg;
4455 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4457 /* A non-local symbol reference without addend.
4459 The symbol ref is wrapped into an UNSPEC to make sure the
4460 proper operand modifier (@GOT or @GOTENT) will be emitted.
4461 This will tell the linker to put the symbol into the GOT.
4463 Additionally the code dereferencing the GOT slot is emitted here.
4465 An addend to the symref needs to be added afterwards.
4466 legitimize_pic_address calls itself recursively to handle
4467 that case. So no need to do it here. */
4469 if (reg == 0)
4470 reg = gen_reg_rtx (Pmode);
4472 if (TARGET_Z10)
4474 /* Use load relative if possible.
4475 lgrl <target>, sym@GOTENT */
4476 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4477 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4478 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4480 emit_move_insn (reg, new_rtx);
4481 new_rtx = reg;
4483 else if (flag_pic == 1)
4485 /* Assume GOT offset is a valid displacement operand (< 4k
4486 or < 512k with z990). This is handled the same way in
4487 both 31- and 64-bit code (@GOT).
4488 lg <target>, sym@GOT(r12) */
4490 if (reload_in_progress || reload_completed)
4491 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4493 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4494 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4495 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4496 new_rtx = gen_const_mem (Pmode, new_rtx);
4497 emit_move_insn (reg, new_rtx);
4498 new_rtx = reg;
4500 else if (TARGET_CPU_ZARCH)
4502 /* If the GOT offset might be >= 4k, we determine the position
4503 of the GOT entry via a PC-relative LARL (@GOTENT).
4504 larl temp, sym@GOTENT
4505 lg <target>, 0(temp) */
4507 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4509 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4510 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4512 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4513 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4514 emit_move_insn (temp, new_rtx);
4516 new_rtx = gen_const_mem (Pmode, temp);
4517 emit_move_insn (reg, new_rtx);
4519 new_rtx = reg;
4521 else
4523 /* If the GOT offset might be >= 4k, we have to load it
4524 from the literal pool (@GOT).
4526 lg temp, lit-litbase(r13)
4527 lg <target>, 0(temp)
4528 lit: .long sym@GOT */
4530 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4532 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4533 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4535 if (reload_in_progress || reload_completed)
4536 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4538 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4539 addr = gen_rtx_CONST (Pmode, addr);
4540 addr = force_const_mem (Pmode, addr);
4541 emit_move_insn (temp, addr);
4543 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4544 new_rtx = gen_const_mem (Pmode, new_rtx);
4545 emit_move_insn (reg, new_rtx);
4546 new_rtx = reg;
4549 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4551 gcc_assert (XVECLEN (addr, 0) == 1);
4552 switch (XINT (addr, 1))
4554 /* These address symbols (or PLT slots) relative to the GOT
4555 (not GOT slots!). In general this will exceed the
4556 displacement range so these value belong into the literal
4557 pool. */
4558 case UNSPEC_GOTOFF:
4559 case UNSPEC_PLTOFF:
4560 new_rtx = force_const_mem (Pmode, orig);
4561 break;
4563 /* For -fPIC the GOT size might exceed the displacement
4564 range so make sure the value is in the literal pool. */
4565 case UNSPEC_GOT:
4566 if (flag_pic == 2)
4567 new_rtx = force_const_mem (Pmode, orig);
4568 break;
4570 /* For @GOTENT larl is used. This is handled like local
4571 symbol refs. */
4572 case UNSPEC_GOTENT:
4573 gcc_unreachable ();
4574 break;
4576 /* @PLT is OK as is on 64-bit, must be converted to
4577 GOT-relative @PLTOFF on 31-bit. */
4578 case UNSPEC_PLT:
4579 if (!TARGET_CPU_ZARCH)
4581 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4583 if (reload_in_progress || reload_completed)
4584 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4586 addr = XVECEXP (addr, 0, 0);
4587 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4588 UNSPEC_PLTOFF);
4589 if (addend != const0_rtx)
4590 addr = gen_rtx_PLUS (Pmode, addr, addend);
4591 addr = gen_rtx_CONST (Pmode, addr);
4592 addr = force_const_mem (Pmode, addr);
4593 emit_move_insn (temp, addr);
4595 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4596 if (reg != 0)
4598 s390_load_address (reg, new_rtx);
4599 new_rtx = reg;
4602 else
4603 /* On 64 bit larl can be used. This case is handled like
4604 local symbol refs. */
4605 gcc_unreachable ();
4606 break;
4608 /* Everything else cannot happen. */
4609 default:
4610 gcc_unreachable ();
4613 else if (addend != const0_rtx)
4615 /* Otherwise, compute the sum. */
4617 rtx base = legitimize_pic_address (addr, reg);
4618 new_rtx = legitimize_pic_address (addend,
4619 base == reg ? NULL_RTX : reg);
4620 if (GET_CODE (new_rtx) == CONST_INT)
4621 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4622 else
4624 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4626 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4627 new_rtx = XEXP (new_rtx, 1);
4629 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4632 if (GET_CODE (new_rtx) == CONST)
4633 new_rtx = XEXP (new_rtx, 0);
4634 new_rtx = force_operand (new_rtx, 0);
4637 return new_rtx;
4640 /* Load the thread pointer into a register. */
4643 s390_get_thread_pointer (void)
4645 rtx tp = gen_reg_rtx (Pmode);
4647 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4648 mark_reg_pointer (tp, BITS_PER_WORD);
4650 return tp;
4653 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4654 in s390_tls_symbol which always refers to __tls_get_offset.
4655 The returned offset is written to RESULT_REG and an USE rtx is
4656 generated for TLS_CALL. */
4658 static GTY(()) rtx s390_tls_symbol;
4660 static void
4661 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4663 rtx insn;
4665 if (!flag_pic)
4666 emit_insn (s390_load_got ());
4668 if (!s390_tls_symbol)
4669 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4671 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4672 gen_rtx_REG (Pmode, RETURN_REGNUM));
4674 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4675 RTL_CONST_CALL_P (insn) = 1;
4678 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4679 this (thread-local) address. REG may be used as temporary. */
4681 static rtx
4682 legitimize_tls_address (rtx addr, rtx reg)
4684 rtx new_rtx, tls_call, temp, base, r2, insn;
4686 if (GET_CODE (addr) == SYMBOL_REF)
4687 switch (tls_symbolic_operand (addr))
4689 case TLS_MODEL_GLOBAL_DYNAMIC:
4690 start_sequence ();
4691 r2 = gen_rtx_REG (Pmode, 2);
4692 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4693 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4694 new_rtx = force_const_mem (Pmode, new_rtx);
4695 emit_move_insn (r2, new_rtx);
4696 s390_emit_tls_call_insn (r2, tls_call);
4697 insn = get_insns ();
4698 end_sequence ();
4700 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4701 temp = gen_reg_rtx (Pmode);
4702 emit_libcall_block (insn, temp, r2, new_rtx);
4704 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4705 if (reg != 0)
4707 s390_load_address (reg, new_rtx);
4708 new_rtx = reg;
4710 break;
4712 case TLS_MODEL_LOCAL_DYNAMIC:
4713 start_sequence ();
4714 r2 = gen_rtx_REG (Pmode, 2);
4715 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4716 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4717 new_rtx = force_const_mem (Pmode, new_rtx);
4718 emit_move_insn (r2, new_rtx);
4719 s390_emit_tls_call_insn (r2, tls_call);
4720 insn = get_insns ();
4721 end_sequence ();
4723 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4724 temp = gen_reg_rtx (Pmode);
4725 emit_libcall_block (insn, temp, r2, new_rtx);
4727 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4728 base = gen_reg_rtx (Pmode);
4729 s390_load_address (base, new_rtx);
4731 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4732 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4733 new_rtx = force_const_mem (Pmode, new_rtx);
4734 temp = gen_reg_rtx (Pmode);
4735 emit_move_insn (temp, new_rtx);
4737 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4738 if (reg != 0)
4740 s390_load_address (reg, new_rtx);
4741 new_rtx = reg;
4743 break;
4745 case TLS_MODEL_INITIAL_EXEC:
4746 if (flag_pic == 1)
4748 /* Assume GOT offset < 4k. This is handled the same way
4749 in both 31- and 64-bit code. */
4751 if (reload_in_progress || reload_completed)
4752 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4754 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4755 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4756 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4757 new_rtx = gen_const_mem (Pmode, new_rtx);
4758 temp = gen_reg_rtx (Pmode);
4759 emit_move_insn (temp, new_rtx);
4761 else if (TARGET_CPU_ZARCH)
4763 /* If the GOT offset might be >= 4k, we determine the position
4764 of the GOT entry via a PC-relative LARL. */
4766 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4767 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4768 temp = gen_reg_rtx (Pmode);
4769 emit_move_insn (temp, new_rtx);
4771 new_rtx = gen_const_mem (Pmode, temp);
4772 temp = gen_reg_rtx (Pmode);
4773 emit_move_insn (temp, new_rtx);
4775 else if (flag_pic)
4777 /* If the GOT offset might be >= 4k, we have to load it
4778 from the literal pool. */
4780 if (reload_in_progress || reload_completed)
4781 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4784 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4785 new_rtx = force_const_mem (Pmode, new_rtx);
4786 temp = gen_reg_rtx (Pmode);
4787 emit_move_insn (temp, new_rtx);
4789 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4790 new_rtx = gen_const_mem (Pmode, new_rtx);
4792 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4793 temp = gen_reg_rtx (Pmode);
4794 emit_insn (gen_rtx_SET (temp, new_rtx));
4796 else
4798 /* In position-dependent code, load the absolute address of
4799 the GOT entry from the literal pool. */
4801 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4802 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4803 new_rtx = force_const_mem (Pmode, new_rtx);
4804 temp = gen_reg_rtx (Pmode);
4805 emit_move_insn (temp, new_rtx);
4807 new_rtx = temp;
4808 new_rtx = gen_const_mem (Pmode, new_rtx);
4809 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4810 temp = gen_reg_rtx (Pmode);
4811 emit_insn (gen_rtx_SET (temp, new_rtx));
4814 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4815 if (reg != 0)
4817 s390_load_address (reg, new_rtx);
4818 new_rtx = reg;
4820 break;
4822 case TLS_MODEL_LOCAL_EXEC:
4823 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4824 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4825 new_rtx = force_const_mem (Pmode, new_rtx);
4826 temp = gen_reg_rtx (Pmode);
4827 emit_move_insn (temp, new_rtx);
4829 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4830 if (reg != 0)
4832 s390_load_address (reg, new_rtx);
4833 new_rtx = reg;
4835 break;
4837 default:
4838 gcc_unreachable ();
4841 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4843 switch (XINT (XEXP (addr, 0), 1))
4845 case UNSPEC_INDNTPOFF:
4846 gcc_assert (TARGET_CPU_ZARCH);
4847 new_rtx = addr;
4848 break;
4850 default:
4851 gcc_unreachable ();
4855 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4856 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4858 new_rtx = XEXP (XEXP (addr, 0), 0);
4859 if (GET_CODE (new_rtx) != SYMBOL_REF)
4860 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4862 new_rtx = legitimize_tls_address (new_rtx, reg);
4863 new_rtx = plus_constant (Pmode, new_rtx,
4864 INTVAL (XEXP (XEXP (addr, 0), 1)));
4865 new_rtx = force_operand (new_rtx, 0);
4868 else
4869 gcc_unreachable (); /* for now ... */
4871 return new_rtx;
4874 /* Emit insns making the address in operands[1] valid for a standard
4875 move to operands[0]. operands[1] is replaced by an address which
4876 should be used instead of the former RTX to emit the move
4877 pattern. */
4879 void
4880 emit_symbolic_move (rtx *operands)
4882 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4884 if (GET_CODE (operands[0]) == MEM)
4885 operands[1] = force_reg (Pmode, operands[1]);
4886 else if (TLS_SYMBOLIC_CONST (operands[1]))
4887 operands[1] = legitimize_tls_address (operands[1], temp);
4888 else if (flag_pic)
4889 operands[1] = legitimize_pic_address (operands[1], temp);
4892 /* Try machine-dependent ways of modifying an illegitimate address X
4893 to be legitimate. If we find one, return the new, valid address.
4895 OLDX is the address as it was before break_out_memory_refs was called.
4896 In some cases it is useful to look at this to decide what needs to be done.
4898 MODE is the mode of the operand pointed to by X.
4900 When -fpic is used, special handling is needed for symbolic references.
4901 See comments by legitimize_pic_address for details. */
4903 static rtx
4904 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4905 machine_mode mode ATTRIBUTE_UNUSED)
4907 rtx constant_term = const0_rtx;
4909 if (TLS_SYMBOLIC_CONST (x))
4911 x = legitimize_tls_address (x, 0);
4913 if (s390_legitimate_address_p (mode, x, FALSE))
4914 return x;
4916 else if (GET_CODE (x) == PLUS
4917 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4918 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4920 return x;
4922 else if (flag_pic)
4924 if (SYMBOLIC_CONST (x)
4925 || (GET_CODE (x) == PLUS
4926 && (SYMBOLIC_CONST (XEXP (x, 0))
4927 || SYMBOLIC_CONST (XEXP (x, 1)))))
4928 x = legitimize_pic_address (x, 0);
4930 if (s390_legitimate_address_p (mode, x, FALSE))
4931 return x;
4934 x = eliminate_constant_term (x, &constant_term);
4936 /* Optimize loading of large displacements by splitting them
4937 into the multiple of 4K and the rest; this allows the
4938 former to be CSE'd if possible.
4940 Don't do this if the displacement is added to a register
4941 pointing into the stack frame, as the offsets will
4942 change later anyway. */
4944 if (GET_CODE (constant_term) == CONST_INT
4945 && !TARGET_LONG_DISPLACEMENT
4946 && !DISP_IN_RANGE (INTVAL (constant_term))
4947 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4949 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4950 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4952 rtx temp = gen_reg_rtx (Pmode);
4953 rtx val = force_operand (GEN_INT (upper), temp);
4954 if (val != temp)
4955 emit_move_insn (temp, val);
4957 x = gen_rtx_PLUS (Pmode, x, temp);
4958 constant_term = GEN_INT (lower);
4961 if (GET_CODE (x) == PLUS)
4963 if (GET_CODE (XEXP (x, 0)) == REG)
4965 rtx temp = gen_reg_rtx (Pmode);
4966 rtx val = force_operand (XEXP (x, 1), temp);
4967 if (val != temp)
4968 emit_move_insn (temp, val);
4970 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4973 else if (GET_CODE (XEXP (x, 1)) == REG)
4975 rtx temp = gen_reg_rtx (Pmode);
4976 rtx val = force_operand (XEXP (x, 0), temp);
4977 if (val != temp)
4978 emit_move_insn (temp, val);
4980 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4984 if (constant_term != const0_rtx)
4985 x = gen_rtx_PLUS (Pmode, x, constant_term);
4987 return x;
4990 /* Try a machine-dependent way of reloading an illegitimate address AD
4991 operand. If we find one, push the reload and return the new address.
4993 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4994 and TYPE is the reload type of the current reload. */
4997 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
4998 int opnum, int type)
5000 if (!optimize || TARGET_LONG_DISPLACEMENT)
5001 return NULL_RTX;
5003 if (GET_CODE (ad) == PLUS)
5005 rtx tem = simplify_binary_operation (PLUS, Pmode,
5006 XEXP (ad, 0), XEXP (ad, 1));
5007 if (tem)
5008 ad = tem;
5011 if (GET_CODE (ad) == PLUS
5012 && GET_CODE (XEXP (ad, 0)) == REG
5013 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5014 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5016 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5017 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5018 rtx cst, tem, new_rtx;
5020 cst = GEN_INT (upper);
5021 if (!legitimate_reload_constant_p (cst))
5022 cst = force_const_mem (Pmode, cst);
5024 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5025 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5027 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5028 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5029 opnum, (enum reload_type) type);
5030 return new_rtx;
5033 return NULL_RTX;
5036 /* Emit code to move LEN bytes from DST to SRC. */
5038 bool
5039 s390_expand_movmem (rtx dst, rtx src, rtx len)
5041 /* When tuning for z10 or higher we rely on the Glibc functions to
5042 do the right thing. Only for constant lengths below 64k we will
5043 generate inline code. */
5044 if (s390_tune >= PROCESSOR_2097_Z10
5045 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5046 return false;
5048 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5050 if (INTVAL (len) > 0)
5051 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5054 else if (TARGET_MVCLE)
5056 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5059 else
5061 rtx dst_addr, src_addr, count, blocks, temp;
5062 rtx_code_label *loop_start_label = gen_label_rtx ();
5063 rtx_code_label *loop_end_label = gen_label_rtx ();
5064 rtx_code_label *end_label = gen_label_rtx ();
5065 machine_mode mode;
5067 mode = GET_MODE (len);
5068 if (mode == VOIDmode)
5069 mode = Pmode;
5071 dst_addr = gen_reg_rtx (Pmode);
5072 src_addr = gen_reg_rtx (Pmode);
5073 count = gen_reg_rtx (mode);
5074 blocks = gen_reg_rtx (mode);
5076 convert_move (count, len, 1);
5077 emit_cmp_and_jump_insns (count, const0_rtx,
5078 EQ, NULL_RTX, mode, 1, end_label);
5080 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5081 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5082 dst = change_address (dst, VOIDmode, dst_addr);
5083 src = change_address (src, VOIDmode, src_addr);
5085 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5086 OPTAB_DIRECT);
5087 if (temp != count)
5088 emit_move_insn (count, temp);
5090 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5091 OPTAB_DIRECT);
5092 if (temp != blocks)
5093 emit_move_insn (blocks, temp);
5095 emit_cmp_and_jump_insns (blocks, const0_rtx,
5096 EQ, NULL_RTX, mode, 1, loop_end_label);
5098 emit_label (loop_start_label);
5100 if (TARGET_Z10
5101 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5103 rtx prefetch;
5105 /* Issue a read prefetch for the +3 cache line. */
5106 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5107 const0_rtx, const0_rtx);
5108 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5109 emit_insn (prefetch);
5111 /* Issue a write prefetch for the +3 cache line. */
5112 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5113 const1_rtx, const0_rtx);
5114 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5115 emit_insn (prefetch);
5118 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5119 s390_load_address (dst_addr,
5120 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5121 s390_load_address (src_addr,
5122 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5124 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5125 OPTAB_DIRECT);
5126 if (temp != blocks)
5127 emit_move_insn (blocks, temp);
5129 emit_cmp_and_jump_insns (blocks, const0_rtx,
5130 EQ, NULL_RTX, mode, 1, loop_end_label);
5132 emit_jump (loop_start_label);
5133 emit_label (loop_end_label);
5135 emit_insn (gen_movmem_short (dst, src,
5136 convert_to_mode (Pmode, count, 1)));
5137 emit_label (end_label);
5139 return true;
5142 /* Emit code to set LEN bytes at DST to VAL.
5143 Make use of clrmem if VAL is zero. */
5145 void
5146 s390_expand_setmem (rtx dst, rtx len, rtx val)
5148 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5149 return;
5151 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5153 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5155 if (val == const0_rtx && INTVAL (len) <= 256)
5156 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5157 else
5159 /* Initialize memory by storing the first byte. */
5160 emit_move_insn (adjust_address (dst, QImode, 0), val);
5162 if (INTVAL (len) > 1)
5164 /* Initiate 1 byte overlap move.
5165 The first byte of DST is propagated through DSTP1.
5166 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5167 DST is set to size 1 so the rest of the memory location
5168 does not count as source operand. */
5169 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5170 set_mem_size (dst, 1);
5172 emit_insn (gen_movmem_short (dstp1, dst,
5173 GEN_INT (INTVAL (len) - 2)));
5178 else if (TARGET_MVCLE)
5180 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5181 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
5184 else
5186 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5187 rtx_code_label *loop_start_label = gen_label_rtx ();
5188 rtx_code_label *loop_end_label = gen_label_rtx ();
5189 rtx_code_label *end_label = gen_label_rtx ();
5190 machine_mode mode;
5192 mode = GET_MODE (len);
5193 if (mode == VOIDmode)
5194 mode = Pmode;
5196 dst_addr = gen_reg_rtx (Pmode);
5197 count = gen_reg_rtx (mode);
5198 blocks = gen_reg_rtx (mode);
5200 convert_move (count, len, 1);
5201 emit_cmp_and_jump_insns (count, const0_rtx,
5202 EQ, NULL_RTX, mode, 1, end_label);
5204 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5205 dst = change_address (dst, VOIDmode, dst_addr);
5207 if (val == const0_rtx)
5208 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5209 OPTAB_DIRECT);
5210 else
5212 dstp1 = adjust_address (dst, VOIDmode, 1);
5213 set_mem_size (dst, 1);
5215 /* Initialize memory by storing the first byte. */
5216 emit_move_insn (adjust_address (dst, QImode, 0), val);
5218 /* If count is 1 we are done. */
5219 emit_cmp_and_jump_insns (count, const1_rtx,
5220 EQ, NULL_RTX, mode, 1, end_label);
5222 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5223 OPTAB_DIRECT);
5225 if (temp != count)
5226 emit_move_insn (count, temp);
5228 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5229 OPTAB_DIRECT);
5230 if (temp != blocks)
5231 emit_move_insn (blocks, temp);
5233 emit_cmp_and_jump_insns (blocks, const0_rtx,
5234 EQ, NULL_RTX, mode, 1, loop_end_label);
5236 emit_label (loop_start_label);
5238 if (TARGET_Z10
5239 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5241 /* Issue a write prefetch for the +4 cache line. */
5242 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5243 GEN_INT (1024)),
5244 const1_rtx, const0_rtx);
5245 emit_insn (prefetch);
5246 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5249 if (val == const0_rtx)
5250 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5251 else
5252 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5253 s390_load_address (dst_addr,
5254 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5256 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5257 OPTAB_DIRECT);
5258 if (temp != blocks)
5259 emit_move_insn (blocks, temp);
5261 emit_cmp_and_jump_insns (blocks, const0_rtx,
5262 EQ, NULL_RTX, mode, 1, loop_end_label);
5264 emit_jump (loop_start_label);
5265 emit_label (loop_end_label);
5267 if (val == const0_rtx)
5268 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5269 else
5270 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5271 emit_label (end_label);
5275 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5276 and return the result in TARGET. */
5278 bool
5279 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5281 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5282 rtx tmp;
5284 /* When tuning for z10 or higher we rely on the Glibc functions to
5285 do the right thing. Only for constant lengths below 64k we will
5286 generate inline code. */
5287 if (s390_tune >= PROCESSOR_2097_Z10
5288 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5289 return false;
5291 /* As the result of CMPINT is inverted compared to what we need,
5292 we have to swap the operands. */
5293 tmp = op0; op0 = op1; op1 = tmp;
5295 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5297 if (INTVAL (len) > 0)
5299 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5300 emit_insn (gen_cmpint (target, ccreg));
5302 else
5303 emit_move_insn (target, const0_rtx);
5305 else if (TARGET_MVCLE)
5307 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5308 emit_insn (gen_cmpint (target, ccreg));
5310 else
5312 rtx addr0, addr1, count, blocks, temp;
5313 rtx_code_label *loop_start_label = gen_label_rtx ();
5314 rtx_code_label *loop_end_label = gen_label_rtx ();
5315 rtx_code_label *end_label = gen_label_rtx ();
5316 machine_mode mode;
5318 mode = GET_MODE (len);
5319 if (mode == VOIDmode)
5320 mode = Pmode;
5322 addr0 = gen_reg_rtx (Pmode);
5323 addr1 = gen_reg_rtx (Pmode);
5324 count = gen_reg_rtx (mode);
5325 blocks = gen_reg_rtx (mode);
5327 convert_move (count, len, 1);
5328 emit_cmp_and_jump_insns (count, const0_rtx,
5329 EQ, NULL_RTX, mode, 1, end_label);
5331 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5332 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5333 op0 = change_address (op0, VOIDmode, addr0);
5334 op1 = change_address (op1, VOIDmode, addr1);
5336 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5337 OPTAB_DIRECT);
5338 if (temp != count)
5339 emit_move_insn (count, temp);
5341 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5342 OPTAB_DIRECT);
5343 if (temp != blocks)
5344 emit_move_insn (blocks, temp);
5346 emit_cmp_and_jump_insns (blocks, const0_rtx,
5347 EQ, NULL_RTX, mode, 1, loop_end_label);
5349 emit_label (loop_start_label);
5351 if (TARGET_Z10
5352 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5354 rtx prefetch;
5356 /* Issue a read prefetch for the +2 cache line of operand 1. */
5357 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5358 const0_rtx, const0_rtx);
5359 emit_insn (prefetch);
5360 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5362 /* Issue a read prefetch for the +2 cache line of operand 2. */
5363 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5364 const0_rtx, const0_rtx);
5365 emit_insn (prefetch);
5366 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5369 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5370 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5371 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5372 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5373 temp = gen_rtx_SET (pc_rtx, temp);
5374 emit_jump_insn (temp);
5376 s390_load_address (addr0,
5377 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5378 s390_load_address (addr1,
5379 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5381 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5382 OPTAB_DIRECT);
5383 if (temp != blocks)
5384 emit_move_insn (blocks, temp);
5386 emit_cmp_and_jump_insns (blocks, const0_rtx,
5387 EQ, NULL_RTX, mode, 1, loop_end_label);
5389 emit_jump (loop_start_label);
5390 emit_label (loop_end_label);
5392 emit_insn (gen_cmpmem_short (op0, op1,
5393 convert_to_mode (Pmode, count, 1)));
5394 emit_label (end_label);
5396 emit_insn (gen_cmpint (target, ccreg));
5398 return true;
5401 /* Emit a conditional jump to LABEL for condition code mask MASK using
5402 comparsion operator COMPARISON. Return the emitted jump insn. */
5404 static rtx
5405 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5407 rtx temp;
5409 gcc_assert (comparison == EQ || comparison == NE);
5410 gcc_assert (mask > 0 && mask < 15);
5412 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5413 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5414 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5415 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5416 temp = gen_rtx_SET (pc_rtx, temp);
5417 return emit_jump_insn (temp);
5420 /* Emit the instructions to implement strlen of STRING and store the
5421 result in TARGET. The string has the known ALIGNMENT. This
5422 version uses vector instructions and is therefore not appropriate
5423 for targets prior to z13. */
5425 void
5426 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5428 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5429 int very_likely = REG_BR_PROB_BASE - 1;
5430 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5431 rtx str_reg = gen_reg_rtx (V16QImode);
5432 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5433 rtx str_idx_reg = gen_reg_rtx (Pmode);
5434 rtx result_reg = gen_reg_rtx (V16QImode);
5435 rtx is_aligned_label = gen_label_rtx ();
5436 rtx into_loop_label = NULL_RTX;
5437 rtx loop_start_label = gen_label_rtx ();
5438 rtx temp;
5439 rtx len = gen_reg_rtx (QImode);
5440 rtx cond;
5442 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5443 emit_move_insn (str_idx_reg, const0_rtx);
5445 if (INTVAL (alignment) < 16)
5447 /* Check whether the address happens to be aligned properly so
5448 jump directly to the aligned loop. */
5449 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5450 str_addr_base_reg, GEN_INT (15)),
5451 const0_rtx, EQ, NULL_RTX,
5452 Pmode, 1, is_aligned_label);
5454 temp = gen_reg_rtx (Pmode);
5455 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5456 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5457 gcc_assert (REG_P (temp));
5458 highest_index_to_load_reg =
5459 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5460 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5461 gcc_assert (REG_P (highest_index_to_load_reg));
5462 emit_insn (gen_vllv16qi (str_reg,
5463 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5464 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5466 into_loop_label = gen_label_rtx ();
5467 s390_emit_jump (into_loop_label, NULL_RTX);
5468 emit_barrier ();
5471 emit_label (is_aligned_label);
5472 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5474 /* Reaching this point we are only performing 16 bytes aligned
5475 loads. */
5476 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5478 emit_label (loop_start_label);
5479 LABEL_NUSES (loop_start_label) = 1;
5481 /* Load 16 bytes of the string into VR. */
5482 emit_move_insn (str_reg,
5483 gen_rtx_MEM (V16QImode,
5484 gen_rtx_PLUS (Pmode, str_idx_reg,
5485 str_addr_base_reg)));
5486 if (into_loop_label != NULL_RTX)
5488 emit_label (into_loop_label);
5489 LABEL_NUSES (into_loop_label) = 1;
5492 /* Increment string index by 16 bytes. */
5493 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5494 str_idx_reg, 1, OPTAB_DIRECT);
5496 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5497 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5499 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5500 REG_BR_PROB, very_likely);
5501 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5503 /* If the string pointer wasn't aligned we have loaded less then 16
5504 bytes and the remaining bytes got filled with zeros (by vll).
5505 Now we have to check whether the resulting index lies within the
5506 bytes actually part of the string. */
5508 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5509 highest_index_to_load_reg);
5510 s390_load_address (highest_index_to_load_reg,
5511 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5512 const1_rtx));
5513 if (TARGET_64BIT)
5514 emit_insn (gen_movdicc (str_idx_reg, cond,
5515 highest_index_to_load_reg, str_idx_reg));
5516 else
5517 emit_insn (gen_movsicc (str_idx_reg, cond,
5518 highest_index_to_load_reg, str_idx_reg));
5520 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5521 very_unlikely);
5523 expand_binop (Pmode, add_optab, str_idx_reg,
5524 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5525 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5526 here. */
5527 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5528 convert_to_mode (Pmode, len, 1),
5529 target, 1, OPTAB_DIRECT);
5530 if (temp != target)
5531 emit_move_insn (target, temp);
5534 /* Expand conditional increment or decrement using alc/slb instructions.
5535 Should generate code setting DST to either SRC or SRC + INCREMENT,
5536 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5537 Returns true if successful, false otherwise.
5539 That makes it possible to implement some if-constructs without jumps e.g.:
5540 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5541 unsigned int a, b, c;
5542 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5543 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5544 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5545 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5547 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5548 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5549 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5550 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5551 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5553 bool
5554 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5555 rtx dst, rtx src, rtx increment)
5557 machine_mode cmp_mode;
5558 machine_mode cc_mode;
5559 rtx op_res;
5560 rtx insn;
5561 rtvec p;
5562 int ret;
5564 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5565 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5566 cmp_mode = SImode;
5567 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5568 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5569 cmp_mode = DImode;
5570 else
5571 return false;
5573 /* Try ADD LOGICAL WITH CARRY. */
5574 if (increment == const1_rtx)
5576 /* Determine CC mode to use. */
5577 if (cmp_code == EQ || cmp_code == NE)
5579 if (cmp_op1 != const0_rtx)
5581 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5582 NULL_RTX, 0, OPTAB_WIDEN);
5583 cmp_op1 = const0_rtx;
5586 cmp_code = cmp_code == EQ ? LEU : GTU;
5589 if (cmp_code == LTU || cmp_code == LEU)
5591 rtx tem = cmp_op0;
5592 cmp_op0 = cmp_op1;
5593 cmp_op1 = tem;
5594 cmp_code = swap_condition (cmp_code);
5597 switch (cmp_code)
5599 case GTU:
5600 cc_mode = CCUmode;
5601 break;
5603 case GEU:
5604 cc_mode = CCL3mode;
5605 break;
5607 default:
5608 return false;
5611 /* Emit comparison instruction pattern. */
5612 if (!register_operand (cmp_op0, cmp_mode))
5613 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5615 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5616 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5617 /* We use insn_invalid_p here to add clobbers if required. */
5618 ret = insn_invalid_p (emit_insn (insn), false);
5619 gcc_assert (!ret);
5621 /* Emit ALC instruction pattern. */
5622 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5623 gen_rtx_REG (cc_mode, CC_REGNUM),
5624 const0_rtx);
5626 if (src != const0_rtx)
5628 if (!register_operand (src, GET_MODE (dst)))
5629 src = force_reg (GET_MODE (dst), src);
5631 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5632 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5635 p = rtvec_alloc (2);
5636 RTVEC_ELT (p, 0) =
5637 gen_rtx_SET (dst, op_res);
5638 RTVEC_ELT (p, 1) =
5639 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5640 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5642 return true;
5645 /* Try SUBTRACT LOGICAL WITH BORROW. */
5646 if (increment == constm1_rtx)
5648 /* Determine CC mode to use. */
5649 if (cmp_code == EQ || cmp_code == NE)
5651 if (cmp_op1 != const0_rtx)
5653 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5654 NULL_RTX, 0, OPTAB_WIDEN);
5655 cmp_op1 = const0_rtx;
5658 cmp_code = cmp_code == EQ ? LEU : GTU;
5661 if (cmp_code == GTU || cmp_code == GEU)
5663 rtx tem = cmp_op0;
5664 cmp_op0 = cmp_op1;
5665 cmp_op1 = tem;
5666 cmp_code = swap_condition (cmp_code);
5669 switch (cmp_code)
5671 case LEU:
5672 cc_mode = CCUmode;
5673 break;
5675 case LTU:
5676 cc_mode = CCL3mode;
5677 break;
5679 default:
5680 return false;
5683 /* Emit comparison instruction pattern. */
5684 if (!register_operand (cmp_op0, cmp_mode))
5685 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5687 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5688 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5689 /* We use insn_invalid_p here to add clobbers if required. */
5690 ret = insn_invalid_p (emit_insn (insn), false);
5691 gcc_assert (!ret);
5693 /* Emit SLB instruction pattern. */
5694 if (!register_operand (src, GET_MODE (dst)))
5695 src = force_reg (GET_MODE (dst), src);
5697 op_res = gen_rtx_MINUS (GET_MODE (dst),
5698 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5699 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5700 gen_rtx_REG (cc_mode, CC_REGNUM),
5701 const0_rtx));
5702 p = rtvec_alloc (2);
5703 RTVEC_ELT (p, 0) =
5704 gen_rtx_SET (dst, op_res);
5705 RTVEC_ELT (p, 1) =
5706 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5707 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5709 return true;
5712 return false;
5715 /* Expand code for the insv template. Return true if successful. */
5717 bool
5718 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5720 int bitsize = INTVAL (op1);
5721 int bitpos = INTVAL (op2);
5722 machine_mode mode = GET_MODE (dest);
5723 machine_mode smode;
5724 int smode_bsize, mode_bsize;
5725 rtx op, clobber;
5727 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5728 return false;
5730 /* Generate INSERT IMMEDIATE (IILL et al). */
5731 /* (set (ze (reg)) (const_int)). */
5732 if (TARGET_ZARCH
5733 && register_operand (dest, word_mode)
5734 && (bitpos % 16) == 0
5735 && (bitsize % 16) == 0
5736 && const_int_operand (src, VOIDmode))
5738 HOST_WIDE_INT val = INTVAL (src);
5739 int regpos = bitpos + bitsize;
5741 while (regpos > bitpos)
5743 machine_mode putmode;
5744 int putsize;
5746 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5747 putmode = SImode;
5748 else
5749 putmode = HImode;
5751 putsize = GET_MODE_BITSIZE (putmode);
5752 regpos -= putsize;
5753 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5754 GEN_INT (putsize),
5755 GEN_INT (regpos)),
5756 gen_int_mode (val, putmode));
5757 val >>= putsize;
5759 gcc_assert (regpos == bitpos);
5760 return true;
5763 smode = smallest_mode_for_size (bitsize, MODE_INT);
5764 smode_bsize = GET_MODE_BITSIZE (smode);
5765 mode_bsize = GET_MODE_BITSIZE (mode);
5767 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
5768 if (bitpos == 0
5769 && (bitsize % BITS_PER_UNIT) == 0
5770 && MEM_P (dest)
5771 && (register_operand (src, word_mode)
5772 || const_int_operand (src, VOIDmode)))
5774 /* Emit standard pattern if possible. */
5775 if (smode_bsize == bitsize)
5777 emit_move_insn (adjust_address (dest, smode, 0),
5778 gen_lowpart (smode, src));
5779 return true;
5782 /* (set (ze (mem)) (const_int)). */
5783 else if (const_int_operand (src, VOIDmode))
5785 int size = bitsize / BITS_PER_UNIT;
5786 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
5787 BLKmode,
5788 UNITS_PER_WORD - size);
5790 dest = adjust_address (dest, BLKmode, 0);
5791 set_mem_size (dest, size);
5792 s390_expand_movmem (dest, src_mem, GEN_INT (size));
5793 return true;
5796 /* (set (ze (mem)) (reg)). */
5797 else if (register_operand (src, word_mode))
5799 if (bitsize <= 32)
5800 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
5801 const0_rtx), src);
5802 else
5804 /* Emit st,stcmh sequence. */
5805 int stcmh_width = bitsize - 32;
5806 int size = stcmh_width / BITS_PER_UNIT;
5808 emit_move_insn (adjust_address (dest, SImode, size),
5809 gen_lowpart (SImode, src));
5810 set_mem_size (dest, size);
5811 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5812 GEN_INT (stcmh_width),
5813 const0_rtx),
5814 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
5816 return true;
5820 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
5821 if ((bitpos % BITS_PER_UNIT) == 0
5822 && (bitsize % BITS_PER_UNIT) == 0
5823 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
5824 && MEM_P (src)
5825 && (mode == DImode || mode == SImode)
5826 && register_operand (dest, mode))
5828 /* Emit a strict_low_part pattern if possible. */
5829 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
5831 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
5832 op = gen_rtx_SET (op, gen_lowpart (smode, src));
5833 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5834 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
5835 return true;
5838 /* ??? There are more powerful versions of ICM that are not
5839 completely represented in the md file. */
5842 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
5843 if (TARGET_Z10 && (mode == DImode || mode == SImode))
5845 machine_mode mode_s = GET_MODE (src);
5847 if (mode_s == VOIDmode)
5849 /* For constant zero values the representation with AND
5850 appears to be folded in more situations than the (set
5851 (zero_extract) ...).
5852 We only do this when the start and end of the bitfield
5853 remain in the same SImode chunk. That way nihf or nilf
5854 can be used.
5855 The AND patterns might still generate a risbg for this. */
5856 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
5857 return false;
5858 else
5859 src = force_reg (mode, src);
5861 else if (mode_s != mode)
5863 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
5864 src = force_reg (mode_s, src);
5865 src = gen_lowpart (mode, src);
5868 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
5869 op = gen_rtx_SET (op, src);
5871 if (!TARGET_ZEC12)
5873 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5874 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
5876 emit_insn (op);
5878 return true;
5881 return false;
5884 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
5885 register that holds VAL of mode MODE shifted by COUNT bits. */
5887 static inline rtx
5888 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
5890 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
5891 NULL_RTX, 1, OPTAB_DIRECT);
5892 return expand_simple_binop (SImode, ASHIFT, val, count,
5893 NULL_RTX, 1, OPTAB_DIRECT);
5896 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
5897 the result in TARGET. */
5899 void
5900 s390_expand_vec_compare (rtx target, enum rtx_code cond,
5901 rtx cmp_op1, rtx cmp_op2)
5903 machine_mode mode = GET_MODE (target);
5904 bool neg_p = false, swap_p = false;
5905 rtx tmp;
5907 if (GET_MODE (cmp_op1) == V2DFmode)
5909 switch (cond)
5911 /* NE a != b -> !(a == b) */
5912 case NE: cond = EQ; neg_p = true; break;
5913 /* UNGT a u> b -> !(b >= a) */
5914 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
5915 /* UNGE a u>= b -> !(b > a) */
5916 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
5917 /* LE: a <= b -> b >= a */
5918 case LE: cond = GE; swap_p = true; break;
5919 /* UNLE: a u<= b -> !(a > b) */
5920 case UNLE: cond = GT; neg_p = true; break;
5921 /* LT: a < b -> b > a */
5922 case LT: cond = GT; swap_p = true; break;
5923 /* UNLT: a u< b -> !(a >= b) */
5924 case UNLT: cond = GE; neg_p = true; break;
5925 case UNEQ:
5926 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
5927 return;
5928 case LTGT:
5929 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
5930 return;
5931 case ORDERED:
5932 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
5933 return;
5934 case UNORDERED:
5935 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
5936 return;
5937 default: break;
5940 else
5942 switch (cond)
5944 /* NE: a != b -> !(a == b) */
5945 case NE: cond = EQ; neg_p = true; break;
5946 /* GE: a >= b -> !(b > a) */
5947 case GE: cond = GT; neg_p = true; swap_p = true; break;
5948 /* GEU: a >= b -> !(b > a) */
5949 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
5950 /* LE: a <= b -> !(a > b) */
5951 case LE: cond = GT; neg_p = true; break;
5952 /* LEU: a <= b -> !(a > b) */
5953 case LEU: cond = GTU; neg_p = true; break;
5954 /* LT: a < b -> b > a */
5955 case LT: cond = GT; swap_p = true; break;
5956 /* LTU: a < b -> b > a */
5957 case LTU: cond = GTU; swap_p = true; break;
5958 default: break;
5962 if (swap_p)
5964 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
5967 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
5968 mode,
5969 cmp_op1, cmp_op2)));
5970 if (neg_p)
5971 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
5974 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
5975 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
5976 elements in CMP1 and CMP2 fulfill the comparison. */
5977 void
5978 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
5979 rtx cmp1, rtx cmp2, bool all_p)
5981 enum rtx_code new_code = code;
5982 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
5983 rtx tmp_reg = gen_reg_rtx (SImode);
5984 bool swap_p = false;
5986 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
5988 switch (code)
5990 case EQ: cmp_mode = CCVEQmode; break;
5991 case NE: cmp_mode = CCVEQmode; break;
5992 case GT: cmp_mode = CCVHmode; break;
5993 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
5994 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
5995 case LE: cmp_mode = CCVHmode; new_code = LE; break;
5996 case GTU: cmp_mode = CCVHUmode; break;
5997 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
5998 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
5999 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
6000 default: gcc_unreachable ();
6002 scratch_mode = GET_MODE (cmp1);
6004 else if (GET_MODE (cmp1) == V2DFmode)
6006 switch (code)
6008 case EQ: cmp_mode = CCVEQmode; break;
6009 case NE: cmp_mode = CCVEQmode; break;
6010 case GT: cmp_mode = CCVFHmode; break;
6011 case GE: cmp_mode = CCVFHEmode; break;
6012 case UNLE: cmp_mode = CCVFHmode; break;
6013 case UNLT: cmp_mode = CCVFHEmode; break;
6014 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
6015 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
6016 default: gcc_unreachable ();
6018 scratch_mode = V2DImode;
6020 else
6021 gcc_unreachable ();
6023 if (!all_p)
6024 switch (cmp_mode)
6026 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
6027 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
6028 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
6029 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
6030 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6031 default: gcc_unreachable ();
6033 else
6034 /* The modes without ANY match the ALL modes. */
6035 full_cmp_mode = cmp_mode;
6037 if (swap_p)
6039 rtx tmp = cmp2;
6040 cmp2 = cmp1;
6041 cmp1 = tmp;
6044 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6045 gen_rtvec (2, gen_rtx_SET (
6046 gen_rtx_REG (cmp_mode, CC_REGNUM),
6047 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6048 gen_rtx_CLOBBER (VOIDmode,
6049 gen_rtx_SCRATCH (scratch_mode)))));
6050 emit_move_insn (target, const0_rtx);
6051 emit_move_insn (tmp_reg, const1_rtx);
6053 emit_move_insn (target,
6054 gen_rtx_IF_THEN_ELSE (SImode,
6055 gen_rtx_fmt_ee (new_code, VOIDmode,
6056 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6057 const0_rtx),
6058 target, tmp_reg));
6061 /* Generate a vector comparison expression loading either elements of
6062 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6063 and CMP_OP2. */
6065 void
6066 s390_expand_vcond (rtx target, rtx then, rtx els,
6067 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6069 rtx tmp;
6070 machine_mode result_mode;
6071 rtx result_target;
6073 /* We always use an integral type vector to hold the comparison
6074 result. */
6075 result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
6076 result_target = gen_reg_rtx (result_mode);
6078 /* Alternatively this could be done by reload by lowering the cmp*
6079 predicates. But it appears to be better for scheduling etc. to
6080 have that in early. */
6081 if (!REG_P (cmp_op1))
6082 cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
6084 if (!REG_P (cmp_op2))
6085 cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
6087 s390_expand_vec_compare (result_target, cond,
6088 cmp_op1, cmp_op2);
6090 /* If the results are supposed to be either -1 or 0 we are done
6091 since this is what our compare instructions generate anyway. */
6092 if (constm1_operand (then, GET_MODE (then))
6093 && const0_operand (els, GET_MODE (els)))
6095 emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
6096 result_target, 0));
6097 return;
6100 /* Otherwise we will do a vsel afterwards. */
6101 /* This gets triggered e.g.
6102 with gcc.c-torture/compile/pr53410-1.c */
6103 if (!REG_P (then))
6104 then = force_reg (GET_MODE (target), then);
6106 if (!REG_P (els))
6107 els = force_reg (GET_MODE (target), els);
6109 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6110 result_target,
6111 CONST0_RTX (result_mode));
6113 /* We compared the result against zero above so we have to swap then
6114 and els here. */
6115 tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
6117 gcc_assert (GET_MODE (target) == GET_MODE (then));
6118 emit_insn (gen_rtx_SET (target, tmp));
6121 /* Emit the RTX necessary to initialize the vector TARGET with values
6122 in VALS. */
6123 void
6124 s390_expand_vec_init (rtx target, rtx vals)
6126 machine_mode mode = GET_MODE (target);
6127 machine_mode inner_mode = GET_MODE_INNER (mode);
6128 int n_elts = GET_MODE_NUNITS (mode);
6129 bool all_same = true, all_regs = true, all_const_int = true;
6130 rtx x;
6131 int i;
6133 for (i = 0; i < n_elts; ++i)
6135 x = XVECEXP (vals, 0, i);
6137 if (!CONST_INT_P (x))
6138 all_const_int = false;
6140 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6141 all_same = false;
6143 if (!REG_P (x))
6144 all_regs = false;
6147 /* Use vector gen mask or vector gen byte mask if possible. */
6148 if (all_same && all_const_int
6149 && (XVECEXP (vals, 0, 0) == const0_rtx
6150 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6151 NULL, NULL)
6152 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6154 emit_insn (gen_rtx_SET (target,
6155 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6156 return;
6159 if (all_same)
6161 emit_insn (gen_rtx_SET (target,
6162 gen_rtx_VEC_DUPLICATE (mode,
6163 XVECEXP (vals, 0, 0))));
6164 return;
6167 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6169 /* Use vector load pair. */
6170 emit_insn (gen_rtx_SET (target,
6171 gen_rtx_VEC_CONCAT (mode,
6172 XVECEXP (vals, 0, 0),
6173 XVECEXP (vals, 0, 1))));
6174 return;
6177 /* We are about to set the vector elements one by one. Zero out the
6178 full register first in order to help the data flow framework to
6179 detect it as full VR set. */
6180 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6182 /* Unfortunately the vec_init expander is not allowed to fail. So
6183 we have to implement the fallback ourselves. */
6184 for (i = 0; i < n_elts; i++)
6185 emit_insn (gen_rtx_SET (target,
6186 gen_rtx_UNSPEC (mode,
6187 gen_rtvec (3, XVECEXP (vals, 0, i),
6188 GEN_INT (i), target),
6189 UNSPEC_VEC_SET)));
6192 /* Structure to hold the initial parameters for a compare_and_swap operation
6193 in HImode and QImode. */
6195 struct alignment_context
6197 rtx memsi; /* SI aligned memory location. */
6198 rtx shift; /* Bit offset with regard to lsb. */
6199 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6200 rtx modemaski; /* ~modemask */
6201 bool aligned; /* True if memory is aligned, false else. */
6204 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6205 structure AC for transparent simplifying, if the memory alignment is known
6206 to be at least 32bit. MEM is the memory location for the actual operation
6207 and MODE its mode. */
6209 static void
6210 init_alignment_context (struct alignment_context *ac, rtx mem,
6211 machine_mode mode)
6213 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6214 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6216 if (ac->aligned)
6217 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6218 else
6220 /* Alignment is unknown. */
6221 rtx byteoffset, addr, align;
6223 /* Force the address into a register. */
6224 addr = force_reg (Pmode, XEXP (mem, 0));
6226 /* Align it to SImode. */
6227 align = expand_simple_binop (Pmode, AND, addr,
6228 GEN_INT (-GET_MODE_SIZE (SImode)),
6229 NULL_RTX, 1, OPTAB_DIRECT);
6230 /* Generate MEM. */
6231 ac->memsi = gen_rtx_MEM (SImode, align);
6232 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6233 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6234 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6236 /* Calculate shiftcount. */
6237 byteoffset = expand_simple_binop (Pmode, AND, addr,
6238 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6239 NULL_RTX, 1, OPTAB_DIRECT);
6240 /* As we already have some offset, evaluate the remaining distance. */
6241 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6242 NULL_RTX, 1, OPTAB_DIRECT);
6245 /* Shift is the byte count, but we need the bitcount. */
6246 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6247 NULL_RTX, 1, OPTAB_DIRECT);
6249 /* Calculate masks. */
6250 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6251 GEN_INT (GET_MODE_MASK (mode)),
6252 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6253 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6254 NULL_RTX, 1);
6257 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6258 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6259 perform the merge in SEQ2. */
6261 static rtx
6262 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6263 machine_mode mode, rtx val, rtx ins)
6265 rtx tmp;
6267 if (ac->aligned)
6269 start_sequence ();
6270 tmp = copy_to_mode_reg (SImode, val);
6271 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6272 const0_rtx, ins))
6274 *seq1 = NULL;
6275 *seq2 = get_insns ();
6276 end_sequence ();
6277 return tmp;
6279 end_sequence ();
6282 /* Failed to use insv. Generate a two part shift and mask. */
6283 start_sequence ();
6284 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6285 *seq1 = get_insns ();
6286 end_sequence ();
6288 start_sequence ();
6289 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6290 *seq2 = get_insns ();
6291 end_sequence ();
6293 return tmp;
6296 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6297 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6298 value to set if CMP == MEM. */
6300 void
6301 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6302 rtx cmp, rtx new_rtx, bool is_weak)
6304 struct alignment_context ac;
6305 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6306 rtx res = gen_reg_rtx (SImode);
6307 rtx_code_label *csloop = NULL, *csend = NULL;
6309 gcc_assert (MEM_P (mem));
6311 init_alignment_context (&ac, mem, mode);
6313 /* Load full word. Subsequent loads are performed by CS. */
6314 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6315 NULL_RTX, 1, OPTAB_DIRECT);
6317 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6318 possible, we try to use insv to make this happen efficiently. If
6319 that fails we'll generate code both inside and outside the loop. */
6320 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6321 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6323 if (seq0)
6324 emit_insn (seq0);
6325 if (seq1)
6326 emit_insn (seq1);
6328 /* Start CS loop. */
6329 if (!is_weak)
6331 /* Begin assuming success. */
6332 emit_move_insn (btarget, const1_rtx);
6334 csloop = gen_label_rtx ();
6335 csend = gen_label_rtx ();
6336 emit_label (csloop);
6339 /* val = "<mem>00..0<mem>"
6340 * cmp = "00..0<cmp>00..0"
6341 * new = "00..0<new>00..0"
6344 emit_insn (seq2);
6345 emit_insn (seq3);
6347 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6348 if (is_weak)
6349 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6350 else
6352 rtx tmp;
6354 /* Jump to end if we're done (likely?). */
6355 s390_emit_jump (csend, cc);
6357 /* Check for changes outside mode, and loop internal if so.
6358 Arrange the moves so that the compare is adjacent to the
6359 branch so that we can generate CRJ. */
6360 tmp = copy_to_reg (val);
6361 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6362 1, OPTAB_DIRECT);
6363 cc = s390_emit_compare (NE, val, tmp);
6364 s390_emit_jump (csloop, cc);
6366 /* Failed. */
6367 emit_move_insn (btarget, const0_rtx);
6368 emit_label (csend);
6371 /* Return the correct part of the bitfield. */
6372 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6373 NULL_RTX, 1, OPTAB_DIRECT), 1);
6376 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6377 and VAL the value to play with. If AFTER is true then store the value
6378 MEM holds after the operation, if AFTER is false then store the value MEM
6379 holds before the operation. If TARGET is zero then discard that value, else
6380 store it to TARGET. */
6382 void
6383 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6384 rtx target, rtx mem, rtx val, bool after)
6386 struct alignment_context ac;
6387 rtx cmp;
6388 rtx new_rtx = gen_reg_rtx (SImode);
6389 rtx orig = gen_reg_rtx (SImode);
6390 rtx_code_label *csloop = gen_label_rtx ();
6392 gcc_assert (!target || register_operand (target, VOIDmode));
6393 gcc_assert (MEM_P (mem));
6395 init_alignment_context (&ac, mem, mode);
6397 /* Shift val to the correct bit positions.
6398 Preserve "icm", but prevent "ex icm". */
6399 if (!(ac.aligned && code == SET && MEM_P (val)))
6400 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6402 /* Further preparation insns. */
6403 if (code == PLUS || code == MINUS)
6404 emit_move_insn (orig, val);
6405 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6406 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6407 NULL_RTX, 1, OPTAB_DIRECT);
6409 /* Load full word. Subsequent loads are performed by CS. */
6410 cmp = force_reg (SImode, ac.memsi);
6412 /* Start CS loop. */
6413 emit_label (csloop);
6414 emit_move_insn (new_rtx, cmp);
6416 /* Patch new with val at correct position. */
6417 switch (code)
6419 case PLUS:
6420 case MINUS:
6421 val = expand_simple_binop (SImode, code, new_rtx, orig,
6422 NULL_RTX, 1, OPTAB_DIRECT);
6423 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6424 NULL_RTX, 1, OPTAB_DIRECT);
6425 /* FALLTHRU */
6426 case SET:
6427 if (ac.aligned && MEM_P (val))
6428 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6429 0, 0, SImode, val, false);
6430 else
6432 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6433 NULL_RTX, 1, OPTAB_DIRECT);
6434 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6435 NULL_RTX, 1, OPTAB_DIRECT);
6437 break;
6438 case AND:
6439 case IOR:
6440 case XOR:
6441 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6442 NULL_RTX, 1, OPTAB_DIRECT);
6443 break;
6444 case MULT: /* NAND */
6445 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6446 NULL_RTX, 1, OPTAB_DIRECT);
6447 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6448 NULL_RTX, 1, OPTAB_DIRECT);
6449 break;
6450 default:
6451 gcc_unreachable ();
6454 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6455 ac.memsi, cmp, new_rtx));
6457 /* Return the correct part of the bitfield. */
6458 if (target)
6459 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6460 after ? new_rtx : cmp, ac.shift,
6461 NULL_RTX, 1, OPTAB_DIRECT), 1);
6464 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6465 We need to emit DTP-relative relocations. */
6467 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6469 static void
6470 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6472 switch (size)
6474 case 4:
6475 fputs ("\t.long\t", file);
6476 break;
6477 case 8:
6478 fputs ("\t.quad\t", file);
6479 break;
6480 default:
6481 gcc_unreachable ();
6483 output_addr_const (file, x);
6484 fputs ("@DTPOFF", file);
6487 /* Return the proper mode for REGNO being represented in the dwarf
6488 unwind table. */
6489 machine_mode
6490 s390_dwarf_frame_reg_mode (int regno)
6492 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6494 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6495 if (GENERAL_REGNO_P (regno))
6496 save_mode = Pmode;
6498 /* The rightmost 64 bits of vector registers are call-clobbered. */
6499 if (GET_MODE_SIZE (save_mode) > 8)
6500 save_mode = DImode;
6502 return save_mode;
6505 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6506 /* Implement TARGET_MANGLE_TYPE. */
6508 static const char *
6509 s390_mangle_type (const_tree type)
6511 type = TYPE_MAIN_VARIANT (type);
6513 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6514 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6515 return NULL;
6517 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6518 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6519 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6520 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6522 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6523 && TARGET_LONG_DOUBLE_128)
6524 return "g";
6526 /* For all other types, use normal C++ mangling. */
6527 return NULL;
6529 #endif
6531 /* In the name of slightly smaller debug output, and to cater to
6532 general assembler lossage, recognize various UNSPEC sequences
6533 and turn them back into a direct symbol reference. */
6535 static rtx
6536 s390_delegitimize_address (rtx orig_x)
6538 rtx x, y;
6540 orig_x = delegitimize_mem_from_attrs (orig_x);
6541 x = orig_x;
6543 /* Extract the symbol ref from:
6544 (plus:SI (reg:SI 12 %r12)
6545 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6546 UNSPEC_GOTOFF/PLTOFF)))
6548 (plus:SI (reg:SI 12 %r12)
6549 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6550 UNSPEC_GOTOFF/PLTOFF)
6551 (const_int 4 [0x4])))) */
6552 if (GET_CODE (x) == PLUS
6553 && REG_P (XEXP (x, 0))
6554 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6555 && GET_CODE (XEXP (x, 1)) == CONST)
6557 HOST_WIDE_INT offset = 0;
6559 /* The const operand. */
6560 y = XEXP (XEXP (x, 1), 0);
6562 if (GET_CODE (y) == PLUS
6563 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6565 offset = INTVAL (XEXP (y, 1));
6566 y = XEXP (y, 0);
6569 if (GET_CODE (y) == UNSPEC
6570 && (XINT (y, 1) == UNSPEC_GOTOFF
6571 || XINT (y, 1) == UNSPEC_PLTOFF))
6572 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6575 if (GET_CODE (x) != MEM)
6576 return orig_x;
6578 x = XEXP (x, 0);
6579 if (GET_CODE (x) == PLUS
6580 && GET_CODE (XEXP (x, 1)) == CONST
6581 && GET_CODE (XEXP (x, 0)) == REG
6582 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6584 y = XEXP (XEXP (x, 1), 0);
6585 if (GET_CODE (y) == UNSPEC
6586 && XINT (y, 1) == UNSPEC_GOT)
6587 y = XVECEXP (y, 0, 0);
6588 else
6589 return orig_x;
6591 else if (GET_CODE (x) == CONST)
6593 /* Extract the symbol ref from:
6594 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6595 UNSPEC_PLT/GOTENT))) */
6597 y = XEXP (x, 0);
6598 if (GET_CODE (y) == UNSPEC
6599 && (XINT (y, 1) == UNSPEC_GOTENT
6600 || XINT (y, 1) == UNSPEC_PLT))
6601 y = XVECEXP (y, 0, 0);
6602 else
6603 return orig_x;
6605 else
6606 return orig_x;
6608 if (GET_MODE (orig_x) != Pmode)
6610 if (GET_MODE (orig_x) == BLKmode)
6611 return orig_x;
6612 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6613 if (y == NULL_RTX)
6614 return orig_x;
6616 return y;
6619 /* Output operand OP to stdio stream FILE.
6620 OP is an address (register + offset) which is not used to address data;
6621 instead the rightmost bits are interpreted as the value. */
6623 static void
6624 print_shift_count_operand (FILE *file, rtx op)
6626 HOST_WIDE_INT offset;
6627 rtx base;
6629 /* Extract base register and offset. */
6630 if (!s390_decompose_shift_count (op, &base, &offset))
6631 gcc_unreachable ();
6633 /* Sanity check. */
6634 if (base)
6636 gcc_assert (GET_CODE (base) == REG);
6637 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6638 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6641 /* Offsets are constricted to twelve bits. */
6642 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6643 if (base)
6644 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6647 /* Assigns the number of NOP halfwords to be emitted before and after the
6648 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6649 If hotpatching is disabled for the function, the values are set to zero.
6652 static void
6653 s390_function_num_hotpatch_hw (tree decl,
6654 int *hw_before,
6655 int *hw_after)
6657 tree attr;
6659 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6661 /* Handle the arguments of the hotpatch attribute. The values
6662 specified via attribute might override the cmdline argument
6663 values. */
6664 if (attr)
6666 tree args = TREE_VALUE (attr);
6668 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6669 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6671 else
6673 /* Use the values specified by the cmdline arguments. */
6674 *hw_before = s390_hotpatch_hw_before_label;
6675 *hw_after = s390_hotpatch_hw_after_label;
6679 /* Write the extra assembler code needed to declare a function properly. */
6681 void
6682 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
6683 tree decl)
6685 int hw_before, hw_after;
6687 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
6688 if (hw_before > 0)
6690 unsigned int function_alignment;
6691 int i;
6693 /* Add a trampoline code area before the function label and initialize it
6694 with two-byte nop instructions. This area can be overwritten with code
6695 that jumps to a patched version of the function. */
6696 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
6697 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
6698 hw_before);
6699 for (i = 1; i < hw_before; i++)
6700 fputs ("\tnopr\t%r7\n", asm_out_file);
6702 /* Note: The function label must be aligned so that (a) the bytes of the
6703 following nop do not cross a cacheline boundary, and (b) a jump address
6704 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
6705 stored directly before the label without crossing a cacheline
6706 boundary. All this is necessary to make sure the trampoline code can
6707 be changed atomically.
6708 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
6709 if there are NOPs before the function label, the alignment is placed
6710 before them. So it is necessary to duplicate the alignment after the
6711 NOPs. */
6712 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
6713 if (! DECL_USER_ALIGN (decl))
6714 function_alignment = MAX (function_alignment,
6715 (unsigned int) align_functions);
6716 fputs ("\t# alignment for hotpatch\n", asm_out_file);
6717 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
6720 ASM_OUTPUT_LABEL (asm_out_file, fname);
6721 if (hw_after > 0)
6722 asm_fprintf (asm_out_file,
6723 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
6724 hw_after);
6727 /* Output machine-dependent UNSPECs occurring in address constant X
6728 in assembler syntax to stdio stream FILE. Returns true if the
6729 constant X could be recognized, false otherwise. */
6731 static bool
6732 s390_output_addr_const_extra (FILE *file, rtx x)
6734 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
6735 switch (XINT (x, 1))
6737 case UNSPEC_GOTENT:
6738 output_addr_const (file, XVECEXP (x, 0, 0));
6739 fprintf (file, "@GOTENT");
6740 return true;
6741 case UNSPEC_GOT:
6742 output_addr_const (file, XVECEXP (x, 0, 0));
6743 fprintf (file, "@GOT");
6744 return true;
6745 case UNSPEC_GOTOFF:
6746 output_addr_const (file, XVECEXP (x, 0, 0));
6747 fprintf (file, "@GOTOFF");
6748 return true;
6749 case UNSPEC_PLT:
6750 output_addr_const (file, XVECEXP (x, 0, 0));
6751 fprintf (file, "@PLT");
6752 return true;
6753 case UNSPEC_PLTOFF:
6754 output_addr_const (file, XVECEXP (x, 0, 0));
6755 fprintf (file, "@PLTOFF");
6756 return true;
6757 case UNSPEC_TLSGD:
6758 output_addr_const (file, XVECEXP (x, 0, 0));
6759 fprintf (file, "@TLSGD");
6760 return true;
6761 case UNSPEC_TLSLDM:
6762 assemble_name (file, get_some_local_dynamic_name ());
6763 fprintf (file, "@TLSLDM");
6764 return true;
6765 case UNSPEC_DTPOFF:
6766 output_addr_const (file, XVECEXP (x, 0, 0));
6767 fprintf (file, "@DTPOFF");
6768 return true;
6769 case UNSPEC_NTPOFF:
6770 output_addr_const (file, XVECEXP (x, 0, 0));
6771 fprintf (file, "@NTPOFF");
6772 return true;
6773 case UNSPEC_GOTNTPOFF:
6774 output_addr_const (file, XVECEXP (x, 0, 0));
6775 fprintf (file, "@GOTNTPOFF");
6776 return true;
6777 case UNSPEC_INDNTPOFF:
6778 output_addr_const (file, XVECEXP (x, 0, 0));
6779 fprintf (file, "@INDNTPOFF");
6780 return true;
6783 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
6784 switch (XINT (x, 1))
6786 case UNSPEC_POOL_OFFSET:
6787 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
6788 output_addr_const (file, x);
6789 return true;
6791 return false;
6794 /* Output address operand ADDR in assembler syntax to
6795 stdio stream FILE. */
6797 void
6798 print_operand_address (FILE *file, rtx addr)
6800 struct s390_address ad;
6802 if (s390_loadrelative_operand_p (addr, NULL, NULL))
6804 if (!TARGET_Z10)
6806 output_operand_lossage ("symbolic memory references are "
6807 "only supported on z10 or later");
6808 return;
6810 output_addr_const (file, addr);
6811 return;
6814 if (!s390_decompose_address (addr, &ad)
6815 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6816 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
6817 output_operand_lossage ("cannot decompose address");
6819 if (ad.disp)
6820 output_addr_const (file, ad.disp);
6821 else
6822 fprintf (file, "0");
6824 if (ad.base && ad.indx)
6825 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
6826 reg_names[REGNO (ad.base)]);
6827 else if (ad.base)
6828 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6831 /* Output operand X in assembler syntax to stdio stream FILE.
6832 CODE specified the format flag. The following format flags
6833 are recognized:
6835 'C': print opcode suffix for branch condition.
6836 'D': print opcode suffix for inverse branch condition.
6837 'E': print opcode suffix for branch on index instruction.
6838 'G': print the size of the operand in bytes.
6839 'J': print tls_load/tls_gdcall/tls_ldcall suffix
6840 'M': print the second word of a TImode operand.
6841 'N': print the second word of a DImode operand.
6842 'O': print only the displacement of a memory reference or address.
6843 'R': print only the base register of a memory reference or address.
6844 'S': print S-type memory reference (base+displacement).
6845 'Y': print shift count operand.
6847 'b': print integer X as if it's an unsigned byte.
6848 'c': print integer X as if it's an signed byte.
6849 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
6850 'f': "end" contiguous bitmask X in SImode.
6851 'h': print integer X as if it's a signed halfword.
6852 'i': print the first nonzero HImode part of X.
6853 'j': print the first HImode part unequal to -1 of X.
6854 'k': print the first nonzero SImode part of X.
6855 'm': print the first SImode part unequal to -1 of X.
6856 'o': print integer X as if it's an unsigned 32bit word.
6857 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
6858 't': CONST_INT: "start" of contiguous bitmask X in SImode.
6859 CONST_VECTOR: Generate a bitmask for vgbm instruction.
6860 'x': print integer X as if it's an unsigned halfword.
6861 'v': print register number as vector register (v1 instead of f1).
6864 void
6865 print_operand (FILE *file, rtx x, int code)
6867 HOST_WIDE_INT ival;
6869 switch (code)
6871 case 'C':
6872 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
6873 return;
6875 case 'D':
6876 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
6877 return;
6879 case 'E':
6880 if (GET_CODE (x) == LE)
6881 fprintf (file, "l");
6882 else if (GET_CODE (x) == GT)
6883 fprintf (file, "h");
6884 else
6885 output_operand_lossage ("invalid comparison operator "
6886 "for 'E' output modifier");
6887 return;
6889 case 'J':
6890 if (GET_CODE (x) == SYMBOL_REF)
6892 fprintf (file, "%s", ":tls_load:");
6893 output_addr_const (file, x);
6895 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
6897 fprintf (file, "%s", ":tls_gdcall:");
6898 output_addr_const (file, XVECEXP (x, 0, 0));
6900 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
6902 fprintf (file, "%s", ":tls_ldcall:");
6903 const char *name = get_some_local_dynamic_name ();
6904 gcc_assert (name);
6905 assemble_name (file, name);
6907 else
6908 output_operand_lossage ("invalid reference for 'J' output modifier");
6909 return;
6911 case 'G':
6912 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
6913 return;
6915 case 'O':
6917 struct s390_address ad;
6918 int ret;
6920 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6922 if (!ret
6923 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6924 || ad.indx)
6926 output_operand_lossage ("invalid address for 'O' output modifier");
6927 return;
6930 if (ad.disp)
6931 output_addr_const (file, ad.disp);
6932 else
6933 fprintf (file, "0");
6935 return;
6937 case 'R':
6939 struct s390_address ad;
6940 int ret;
6942 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6944 if (!ret
6945 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6946 || ad.indx)
6948 output_operand_lossage ("invalid address for 'R' output modifier");
6949 return;
6952 if (ad.base)
6953 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
6954 else
6955 fprintf (file, "0");
6957 return;
6959 case 'S':
6961 struct s390_address ad;
6962 int ret;
6964 if (!MEM_P (x))
6966 output_operand_lossage ("memory reference expected for "
6967 "'S' output modifier");
6968 return;
6970 ret = s390_decompose_address (XEXP (x, 0), &ad);
6972 if (!ret
6973 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6974 || ad.indx)
6976 output_operand_lossage ("invalid address for 'S' output modifier");
6977 return;
6980 if (ad.disp)
6981 output_addr_const (file, ad.disp);
6982 else
6983 fprintf (file, "0");
6985 if (ad.base)
6986 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6988 return;
6990 case 'N':
6991 if (GET_CODE (x) == REG)
6992 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6993 else if (GET_CODE (x) == MEM)
6994 x = change_address (x, VOIDmode,
6995 plus_constant (Pmode, XEXP (x, 0), 4));
6996 else
6997 output_operand_lossage ("register or memory expression expected "
6998 "for 'N' output modifier");
6999 break;
7001 case 'M':
7002 if (GET_CODE (x) == REG)
7003 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7004 else if (GET_CODE (x) == MEM)
7005 x = change_address (x, VOIDmode,
7006 plus_constant (Pmode, XEXP (x, 0), 8));
7007 else
7008 output_operand_lossage ("register or memory expression expected "
7009 "for 'M' output modifier");
7010 break;
7012 case 'Y':
7013 print_shift_count_operand (file, x);
7014 return;
7017 switch (GET_CODE (x))
7019 case REG:
7020 /* Print FP regs as fx instead of vx when they are accessed
7021 through non-vector mode. */
7022 if (code == 'v'
7023 || VECTOR_NOFP_REG_P (x)
7024 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7025 || (VECTOR_REG_P (x)
7026 && (GET_MODE_SIZE (GET_MODE (x)) /
7027 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7028 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7029 else
7030 fprintf (file, "%s", reg_names[REGNO (x)]);
7031 break;
7033 case MEM:
7034 output_address (XEXP (x, 0));
7035 break;
7037 case CONST:
7038 case CODE_LABEL:
7039 case LABEL_REF:
7040 case SYMBOL_REF:
7041 output_addr_const (file, x);
7042 break;
7044 case CONST_INT:
7045 ival = INTVAL (x);
7046 switch (code)
7048 case 0:
7049 break;
7050 case 'b':
7051 ival &= 0xff;
7052 break;
7053 case 'c':
7054 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7055 break;
7056 case 'x':
7057 ival &= 0xffff;
7058 break;
7059 case 'h':
7060 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7061 break;
7062 case 'i':
7063 ival = s390_extract_part (x, HImode, 0);
7064 break;
7065 case 'j':
7066 ival = s390_extract_part (x, HImode, -1);
7067 break;
7068 case 'k':
7069 ival = s390_extract_part (x, SImode, 0);
7070 break;
7071 case 'm':
7072 ival = s390_extract_part (x, SImode, -1);
7073 break;
7074 case 'o':
7075 ival &= 0xffffffff;
7076 break;
7077 case 'e': case 'f':
7078 case 's': case 't':
7080 int pos, len;
7081 bool ok;
7083 len = (code == 's' || code == 'e' ? 64 : 32);
7084 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
7085 gcc_assert (ok);
7086 if (code == 's' || code == 't')
7087 ival = 64 - pos - len;
7088 else
7089 ival = 64 - 1 - pos;
7091 break;
7092 default:
7093 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7095 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7096 break;
7098 case CONST_DOUBLE:
7099 gcc_assert (GET_MODE (x) == VOIDmode);
7100 if (code == 'b')
7101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
7102 else if (code == 'x')
7103 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
7104 else if (code == 'h')
7105 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7106 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
7107 else
7109 if (code == 0)
7110 output_operand_lossage ("invalid constant - try using "
7111 "an output modifier");
7112 else
7113 output_operand_lossage ("invalid constant for output modifier '%c'",
7114 code);
7116 break;
7117 case CONST_VECTOR:
7118 switch (code)
7120 case 'e':
7121 case 's':
7123 int start, stop, inner_len;
7124 bool ok;
7126 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
7127 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
7128 gcc_assert (ok);
7129 if (code == 's' || code == 't')
7130 ival = inner_len - stop - 1;
7131 else
7132 ival = inner_len - start - 1;
7133 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7135 break;
7136 case 't':
7138 unsigned mask;
7139 bool ok = s390_bytemask_vector_p (x, &mask);
7140 gcc_assert (ok);
7141 fprintf (file, "%u", mask);
7143 break;
7145 default:
7146 output_operand_lossage ("invalid constant vector for output "
7147 "modifier '%c'", code);
7149 break;
7151 default:
7152 if (code == 0)
7153 output_operand_lossage ("invalid expression - try using "
7154 "an output modifier");
7155 else
7156 output_operand_lossage ("invalid expression for output "
7157 "modifier '%c'", code);
7158 break;
7162 /* Target hook for assembling integer objects. We need to define it
7163 here to work a round a bug in some versions of GAS, which couldn't
7164 handle values smaller than INT_MIN when printed in decimal. */
7166 static bool
7167 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7169 if (size == 8 && aligned_p
7170 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7172 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7173 INTVAL (x));
7174 return true;
7176 return default_assemble_integer (x, size, aligned_p);
7179 /* Returns true if register REGNO is used for forming
7180 a memory address in expression X. */
7182 static bool
7183 reg_used_in_mem_p (int regno, rtx x)
7185 enum rtx_code code = GET_CODE (x);
7186 int i, j;
7187 const char *fmt;
7189 if (code == MEM)
7191 if (refers_to_regno_p (regno, XEXP (x, 0)))
7192 return true;
7194 else if (code == SET
7195 && GET_CODE (SET_DEST (x)) == PC)
7197 if (refers_to_regno_p (regno, SET_SRC (x)))
7198 return true;
7201 fmt = GET_RTX_FORMAT (code);
7202 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7204 if (fmt[i] == 'e'
7205 && reg_used_in_mem_p (regno, XEXP (x, i)))
7206 return true;
7208 else if (fmt[i] == 'E')
7209 for (j = 0; j < XVECLEN (x, i); j++)
7210 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7211 return true;
7213 return false;
7216 /* Returns true if expression DEP_RTX sets an address register
7217 used by instruction INSN to address memory. */
7219 static bool
7220 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7222 rtx target, pat;
7224 if (NONJUMP_INSN_P (dep_rtx))
7225 dep_rtx = PATTERN (dep_rtx);
7227 if (GET_CODE (dep_rtx) == SET)
7229 target = SET_DEST (dep_rtx);
7230 if (GET_CODE (target) == STRICT_LOW_PART)
7231 target = XEXP (target, 0);
7232 while (GET_CODE (target) == SUBREG)
7233 target = SUBREG_REG (target);
7235 if (GET_CODE (target) == REG)
7237 int regno = REGNO (target);
7239 if (s390_safe_attr_type (insn) == TYPE_LA)
7241 pat = PATTERN (insn);
7242 if (GET_CODE (pat) == PARALLEL)
7244 gcc_assert (XVECLEN (pat, 0) == 2);
7245 pat = XVECEXP (pat, 0, 0);
7247 gcc_assert (GET_CODE (pat) == SET);
7248 return refers_to_regno_p (regno, SET_SRC (pat));
7250 else if (get_attr_atype (insn) == ATYPE_AGEN)
7251 return reg_used_in_mem_p (regno, PATTERN (insn));
7254 return false;
7257 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7260 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7262 rtx dep_rtx = PATTERN (dep_insn);
7263 int i;
7265 if (GET_CODE (dep_rtx) == SET
7266 && addr_generation_dependency_p (dep_rtx, insn))
7267 return 1;
7268 else if (GET_CODE (dep_rtx) == PARALLEL)
7270 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7272 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7273 return 1;
7276 return 0;
7280 /* A C statement (sans semicolon) to update the integer scheduling priority
7281 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7282 reduce the priority to execute INSN later. Do not define this macro if
7283 you do not need to adjust the scheduling priorities of insns.
7285 A STD instruction should be scheduled earlier,
7286 in order to use the bypass. */
7287 static int
7288 s390_adjust_priority (rtx_insn *insn, int priority)
7290 if (! INSN_P (insn))
7291 return priority;
7293 if (s390_tune <= PROCESSOR_2064_Z900)
7294 return priority;
7296 switch (s390_safe_attr_type (insn))
7298 case TYPE_FSTOREDF:
7299 case TYPE_FSTORESF:
7300 priority = priority << 3;
7301 break;
7302 case TYPE_STORE:
7303 case TYPE_STM:
7304 priority = priority << 1;
7305 break;
7306 default:
7307 break;
7309 return priority;
7313 /* The number of instructions that can be issued per cycle. */
7315 static int
7316 s390_issue_rate (void)
7318 switch (s390_tune)
7320 case PROCESSOR_2084_Z990:
7321 case PROCESSOR_2094_Z9_109:
7322 case PROCESSOR_2094_Z9_EC:
7323 case PROCESSOR_2817_Z196:
7324 return 3;
7325 case PROCESSOR_2097_Z10:
7326 return 2;
7327 case PROCESSOR_9672_G5:
7328 case PROCESSOR_9672_G6:
7329 case PROCESSOR_2064_Z900:
7330 /* Starting with EC12 we use the sched_reorder hook to take care
7331 of instruction dispatch constraints. The algorithm only
7332 picks the best instruction and assumes only a single
7333 instruction gets issued per cycle. */
7334 case PROCESSOR_2827_ZEC12:
7335 case PROCESSOR_2964_Z13:
7336 default:
7337 return 1;
7341 static int
7342 s390_first_cycle_multipass_dfa_lookahead (void)
7344 return 4;
7347 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7348 Fix up MEMs as required. */
7350 static void
7351 annotate_constant_pool_refs (rtx *x)
7353 int i, j;
7354 const char *fmt;
7356 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7357 || !CONSTANT_POOL_ADDRESS_P (*x));
7359 /* Literal pool references can only occur inside a MEM ... */
7360 if (GET_CODE (*x) == MEM)
7362 rtx memref = XEXP (*x, 0);
7364 if (GET_CODE (memref) == SYMBOL_REF
7365 && CONSTANT_POOL_ADDRESS_P (memref))
7367 rtx base = cfun->machine->base_reg;
7368 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7369 UNSPEC_LTREF);
7371 *x = replace_equiv_address (*x, addr);
7372 return;
7375 if (GET_CODE (memref) == CONST
7376 && GET_CODE (XEXP (memref, 0)) == PLUS
7377 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7378 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7379 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7381 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7382 rtx sym = XEXP (XEXP (memref, 0), 0);
7383 rtx base = cfun->machine->base_reg;
7384 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7385 UNSPEC_LTREF);
7387 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7388 return;
7392 /* ... or a load-address type pattern. */
7393 if (GET_CODE (*x) == SET)
7395 rtx addrref = SET_SRC (*x);
7397 if (GET_CODE (addrref) == SYMBOL_REF
7398 && CONSTANT_POOL_ADDRESS_P (addrref))
7400 rtx base = cfun->machine->base_reg;
7401 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7402 UNSPEC_LTREF);
7404 SET_SRC (*x) = addr;
7405 return;
7408 if (GET_CODE (addrref) == CONST
7409 && GET_CODE (XEXP (addrref, 0)) == PLUS
7410 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7411 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7412 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7414 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7415 rtx sym = XEXP (XEXP (addrref, 0), 0);
7416 rtx base = cfun->machine->base_reg;
7417 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7418 UNSPEC_LTREF);
7420 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7421 return;
7425 /* Annotate LTREL_BASE as well. */
7426 if (GET_CODE (*x) == UNSPEC
7427 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7429 rtx base = cfun->machine->base_reg;
7430 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7431 UNSPEC_LTREL_BASE);
7432 return;
7435 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7436 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7438 if (fmt[i] == 'e')
7440 annotate_constant_pool_refs (&XEXP (*x, i));
7442 else if (fmt[i] == 'E')
7444 for (j = 0; j < XVECLEN (*x, i); j++)
7445 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7450 /* Split all branches that exceed the maximum distance.
7451 Returns true if this created a new literal pool entry. */
7453 static int
7454 s390_split_branches (void)
7456 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7457 int new_literal = 0, ret;
7458 rtx_insn *insn;
7459 rtx pat, target;
7460 rtx *label;
7462 /* We need correct insn addresses. */
7464 shorten_branches (get_insns ());
7466 /* Find all branches that exceed 64KB, and split them. */
7468 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7470 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7471 continue;
7473 pat = PATTERN (insn);
7474 if (GET_CODE (pat) == PARALLEL)
7475 pat = XVECEXP (pat, 0, 0);
7476 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7477 continue;
7479 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7481 label = &SET_SRC (pat);
7483 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7485 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7486 label = &XEXP (SET_SRC (pat), 1);
7487 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7488 label = &XEXP (SET_SRC (pat), 2);
7489 else
7490 continue;
7492 else
7493 continue;
7495 if (get_attr_length (insn) <= 4)
7496 continue;
7498 /* We are going to use the return register as scratch register,
7499 make sure it will be saved/restored by the prologue/epilogue. */
7500 cfun_frame_layout.save_return_addr_p = 1;
7502 if (!flag_pic)
7504 new_literal = 1;
7505 rtx mem = force_const_mem (Pmode, *label);
7506 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7507 insn);
7508 INSN_ADDRESSES_NEW (set_insn, -1);
7509 annotate_constant_pool_refs (&PATTERN (set_insn));
7511 target = temp_reg;
7513 else
7515 new_literal = 1;
7516 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7517 UNSPEC_LTREL_OFFSET);
7518 target = gen_rtx_CONST (Pmode, target);
7519 target = force_const_mem (Pmode, target);
7520 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7521 insn);
7522 INSN_ADDRESSES_NEW (set_insn, -1);
7523 annotate_constant_pool_refs (&PATTERN (set_insn));
7525 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7526 cfun->machine->base_reg),
7527 UNSPEC_LTREL_BASE);
7528 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7531 ret = validate_change (insn, label, target, 0);
7532 gcc_assert (ret);
7535 return new_literal;
7539 /* Find an annotated literal pool symbol referenced in RTX X,
7540 and store it at REF. Will abort if X contains references to
7541 more than one such pool symbol; multiple references to the same
7542 symbol are allowed, however.
7544 The rtx pointed to by REF must be initialized to NULL_RTX
7545 by the caller before calling this routine. */
7547 static void
7548 find_constant_pool_ref (rtx x, rtx *ref)
7550 int i, j;
7551 const char *fmt;
7553 /* Ignore LTREL_BASE references. */
7554 if (GET_CODE (x) == UNSPEC
7555 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7556 return;
7557 /* Likewise POOL_ENTRY insns. */
7558 if (GET_CODE (x) == UNSPEC_VOLATILE
7559 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7560 return;
7562 gcc_assert (GET_CODE (x) != SYMBOL_REF
7563 || !CONSTANT_POOL_ADDRESS_P (x));
7565 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7567 rtx sym = XVECEXP (x, 0, 0);
7568 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7569 && CONSTANT_POOL_ADDRESS_P (sym));
7571 if (*ref == NULL_RTX)
7572 *ref = sym;
7573 else
7574 gcc_assert (*ref == sym);
7576 return;
7579 fmt = GET_RTX_FORMAT (GET_CODE (x));
7580 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7582 if (fmt[i] == 'e')
7584 find_constant_pool_ref (XEXP (x, i), ref);
7586 else if (fmt[i] == 'E')
7588 for (j = 0; j < XVECLEN (x, i); j++)
7589 find_constant_pool_ref (XVECEXP (x, i, j), ref);
7594 /* Replace every reference to the annotated literal pool
7595 symbol REF in X by its base plus OFFSET. */
7597 static void
7598 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
7600 int i, j;
7601 const char *fmt;
7603 gcc_assert (*x != ref);
7605 if (GET_CODE (*x) == UNSPEC
7606 && XINT (*x, 1) == UNSPEC_LTREF
7607 && XVECEXP (*x, 0, 0) == ref)
7609 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
7610 return;
7613 if (GET_CODE (*x) == PLUS
7614 && GET_CODE (XEXP (*x, 1)) == CONST_INT
7615 && GET_CODE (XEXP (*x, 0)) == UNSPEC
7616 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
7617 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
7619 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
7620 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
7621 return;
7624 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7625 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7627 if (fmt[i] == 'e')
7629 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
7631 else if (fmt[i] == 'E')
7633 for (j = 0; j < XVECLEN (*x, i); j++)
7634 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
7639 /* Check whether X contains an UNSPEC_LTREL_BASE.
7640 Return its constant pool symbol if found, NULL_RTX otherwise. */
7642 static rtx
7643 find_ltrel_base (rtx x)
7645 int i, j;
7646 const char *fmt;
7648 if (GET_CODE (x) == UNSPEC
7649 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7650 return XVECEXP (x, 0, 0);
7652 fmt = GET_RTX_FORMAT (GET_CODE (x));
7653 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7655 if (fmt[i] == 'e')
7657 rtx fnd = find_ltrel_base (XEXP (x, i));
7658 if (fnd)
7659 return fnd;
7661 else if (fmt[i] == 'E')
7663 for (j = 0; j < XVECLEN (x, i); j++)
7665 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
7666 if (fnd)
7667 return fnd;
7672 return NULL_RTX;
7675 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
7677 static void
7678 replace_ltrel_base (rtx *x)
7680 int i, j;
7681 const char *fmt;
7683 if (GET_CODE (*x) == UNSPEC
7684 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7686 *x = XVECEXP (*x, 0, 1);
7687 return;
7690 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7691 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7693 if (fmt[i] == 'e')
7695 replace_ltrel_base (&XEXP (*x, i));
7697 else if (fmt[i] == 'E')
7699 for (j = 0; j < XVECLEN (*x, i); j++)
7700 replace_ltrel_base (&XVECEXP (*x, i, j));
7706 /* We keep a list of constants which we have to add to internal
7707 constant tables in the middle of large functions. */
7709 #define NR_C_MODES 32
7710 machine_mode constant_modes[NR_C_MODES] =
7712 TFmode, TImode, TDmode,
7713 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
7714 V4SFmode, V2DFmode, V1TFmode,
7715 DFmode, DImode, DDmode,
7716 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
7717 SFmode, SImode, SDmode,
7718 V4QImode, V2HImode, V1SImode, V1SFmode,
7719 HImode,
7720 V2QImode, V1HImode,
7721 QImode,
7722 V1QImode
7725 struct constant
7727 struct constant *next;
7728 rtx value;
7729 rtx_code_label *label;
7732 struct constant_pool
7734 struct constant_pool *next;
7735 rtx_insn *first_insn;
7736 rtx_insn *pool_insn;
7737 bitmap insns;
7738 rtx_insn *emit_pool_after;
7740 struct constant *constants[NR_C_MODES];
7741 struct constant *execute;
7742 rtx_code_label *label;
7743 int size;
7746 /* Allocate new constant_pool structure. */
7748 static struct constant_pool *
7749 s390_alloc_pool (void)
7751 struct constant_pool *pool;
7752 int i;
7754 pool = (struct constant_pool *) xmalloc (sizeof *pool);
7755 pool->next = NULL;
7756 for (i = 0; i < NR_C_MODES; i++)
7757 pool->constants[i] = NULL;
7759 pool->execute = NULL;
7760 pool->label = gen_label_rtx ();
7761 pool->first_insn = NULL;
7762 pool->pool_insn = NULL;
7763 pool->insns = BITMAP_ALLOC (NULL);
7764 pool->size = 0;
7765 pool->emit_pool_after = NULL;
7767 return pool;
7770 /* Create new constant pool covering instructions starting at INSN
7771 and chain it to the end of POOL_LIST. */
7773 static struct constant_pool *
7774 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
7776 struct constant_pool *pool, **prev;
7778 pool = s390_alloc_pool ();
7779 pool->first_insn = insn;
7781 for (prev = pool_list; *prev; prev = &(*prev)->next)
7783 *prev = pool;
7785 return pool;
7788 /* End range of instructions covered by POOL at INSN and emit
7789 placeholder insn representing the pool. */
7791 static void
7792 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
7794 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
7796 if (!insn)
7797 insn = get_last_insn ();
7799 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
7800 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7803 /* Add INSN to the list of insns covered by POOL. */
7805 static void
7806 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
7808 bitmap_set_bit (pool->insns, INSN_UID (insn));
7811 /* Return pool out of POOL_LIST that covers INSN. */
7813 static struct constant_pool *
7814 s390_find_pool (struct constant_pool *pool_list, rtx insn)
7816 struct constant_pool *pool;
7818 for (pool = pool_list; pool; pool = pool->next)
7819 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
7820 break;
7822 return pool;
7825 /* Add constant VAL of mode MODE to the constant pool POOL. */
7827 static void
7828 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
7830 struct constant *c;
7831 int i;
7833 for (i = 0; i < NR_C_MODES; i++)
7834 if (constant_modes[i] == mode)
7835 break;
7836 gcc_assert (i != NR_C_MODES);
7838 for (c = pool->constants[i]; c != NULL; c = c->next)
7839 if (rtx_equal_p (val, c->value))
7840 break;
7842 if (c == NULL)
7844 c = (struct constant *) xmalloc (sizeof *c);
7845 c->value = val;
7846 c->label = gen_label_rtx ();
7847 c->next = pool->constants[i];
7848 pool->constants[i] = c;
7849 pool->size += GET_MODE_SIZE (mode);
7853 /* Return an rtx that represents the offset of X from the start of
7854 pool POOL. */
7856 static rtx
7857 s390_pool_offset (struct constant_pool *pool, rtx x)
7859 rtx label;
7861 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
7862 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
7863 UNSPEC_POOL_OFFSET);
7864 return gen_rtx_CONST (GET_MODE (x), x);
7867 /* Find constant VAL of mode MODE in the constant pool POOL.
7868 Return an RTX describing the distance from the start of
7869 the pool to the location of the new constant. */
7871 static rtx
7872 s390_find_constant (struct constant_pool *pool, rtx val,
7873 machine_mode mode)
7875 struct constant *c;
7876 int i;
7878 for (i = 0; i < NR_C_MODES; i++)
7879 if (constant_modes[i] == mode)
7880 break;
7881 gcc_assert (i != NR_C_MODES);
7883 for (c = pool->constants[i]; c != NULL; c = c->next)
7884 if (rtx_equal_p (val, c->value))
7885 break;
7887 gcc_assert (c);
7889 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7892 /* Check whether INSN is an execute. Return the label_ref to its
7893 execute target template if so, NULL_RTX otherwise. */
7895 static rtx
7896 s390_execute_label (rtx insn)
7898 if (NONJUMP_INSN_P (insn)
7899 && GET_CODE (PATTERN (insn)) == PARALLEL
7900 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
7901 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
7902 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
7904 return NULL_RTX;
7907 /* Add execute target for INSN to the constant pool POOL. */
7909 static void
7910 s390_add_execute (struct constant_pool *pool, rtx insn)
7912 struct constant *c;
7914 for (c = pool->execute; c != NULL; c = c->next)
7915 if (INSN_UID (insn) == INSN_UID (c->value))
7916 break;
7918 if (c == NULL)
7920 c = (struct constant *) xmalloc (sizeof *c);
7921 c->value = insn;
7922 c->label = gen_label_rtx ();
7923 c->next = pool->execute;
7924 pool->execute = c;
7925 pool->size += 6;
7929 /* Find execute target for INSN in the constant pool POOL.
7930 Return an RTX describing the distance from the start of
7931 the pool to the location of the execute target. */
7933 static rtx
7934 s390_find_execute (struct constant_pool *pool, rtx insn)
7936 struct constant *c;
7938 for (c = pool->execute; c != NULL; c = c->next)
7939 if (INSN_UID (insn) == INSN_UID (c->value))
7940 break;
7942 gcc_assert (c);
7944 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7947 /* For an execute INSN, extract the execute target template. */
7949 static rtx
7950 s390_execute_target (rtx insn)
7952 rtx pattern = PATTERN (insn);
7953 gcc_assert (s390_execute_label (insn));
7955 if (XVECLEN (pattern, 0) == 2)
7957 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
7959 else
7961 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
7962 int i;
7964 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
7965 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
7967 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
7970 return pattern;
7973 /* Indicate that INSN cannot be duplicated. This is the case for
7974 execute insns that carry a unique label. */
7976 static bool
7977 s390_cannot_copy_insn_p (rtx_insn *insn)
7979 rtx label = s390_execute_label (insn);
7980 return label && label != const0_rtx;
7983 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
7984 do not emit the pool base label. */
7986 static void
7987 s390_dump_pool (struct constant_pool *pool, bool remote_label)
7989 struct constant *c;
7990 rtx_insn *insn = pool->pool_insn;
7991 int i;
7993 /* Switch to rodata section. */
7994 if (TARGET_CPU_ZARCH)
7996 insn = emit_insn_after (gen_pool_section_start (), insn);
7997 INSN_ADDRESSES_NEW (insn, -1);
8000 /* Ensure minimum pool alignment. */
8001 if (TARGET_CPU_ZARCH)
8002 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8003 else
8004 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8005 INSN_ADDRESSES_NEW (insn, -1);
8007 /* Emit pool base label. */
8008 if (!remote_label)
8010 insn = emit_label_after (pool->label, insn);
8011 INSN_ADDRESSES_NEW (insn, -1);
8014 /* Dump constants in descending alignment requirement order,
8015 ensuring proper alignment for every constant. */
8016 for (i = 0; i < NR_C_MODES; i++)
8017 for (c = pool->constants[i]; c; c = c->next)
8019 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8020 rtx value = copy_rtx (c->value);
8021 if (GET_CODE (value) == CONST
8022 && GET_CODE (XEXP (value, 0)) == UNSPEC
8023 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8024 && XVECLEN (XEXP (value, 0), 0) == 1)
8025 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8027 insn = emit_label_after (c->label, insn);
8028 INSN_ADDRESSES_NEW (insn, -1);
8030 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8031 gen_rtvec (1, value),
8032 UNSPECV_POOL_ENTRY);
8033 insn = emit_insn_after (value, insn);
8034 INSN_ADDRESSES_NEW (insn, -1);
8037 /* Ensure minimum alignment for instructions. */
8038 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8039 INSN_ADDRESSES_NEW (insn, -1);
8041 /* Output in-pool execute template insns. */
8042 for (c = pool->execute; c; c = c->next)
8044 insn = emit_label_after (c->label, insn);
8045 INSN_ADDRESSES_NEW (insn, -1);
8047 insn = emit_insn_after (s390_execute_target (c->value), insn);
8048 INSN_ADDRESSES_NEW (insn, -1);
8051 /* Switch back to previous section. */
8052 if (TARGET_CPU_ZARCH)
8054 insn = emit_insn_after (gen_pool_section_end (), insn);
8055 INSN_ADDRESSES_NEW (insn, -1);
8058 insn = emit_barrier_after (insn);
8059 INSN_ADDRESSES_NEW (insn, -1);
8061 /* Remove placeholder insn. */
8062 remove_insn (pool->pool_insn);
8065 /* Free all memory used by POOL. */
8067 static void
8068 s390_free_pool (struct constant_pool *pool)
8070 struct constant *c, *next;
8071 int i;
8073 for (i = 0; i < NR_C_MODES; i++)
8074 for (c = pool->constants[i]; c; c = next)
8076 next = c->next;
8077 free (c);
8080 for (c = pool->execute; c; c = next)
8082 next = c->next;
8083 free (c);
8086 BITMAP_FREE (pool->insns);
8087 free (pool);
8091 /* Collect main literal pool. Return NULL on overflow. */
8093 static struct constant_pool *
8094 s390_mainpool_start (void)
8096 struct constant_pool *pool;
8097 rtx_insn *insn;
8099 pool = s390_alloc_pool ();
8101 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8103 if (NONJUMP_INSN_P (insn)
8104 && GET_CODE (PATTERN (insn)) == SET
8105 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8106 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8108 /* There might be two main_pool instructions if base_reg
8109 is call-clobbered; one for shrink-wrapped code and one
8110 for the rest. We want to keep the first. */
8111 if (pool->pool_insn)
8113 insn = PREV_INSN (insn);
8114 delete_insn (NEXT_INSN (insn));
8115 continue;
8117 pool->pool_insn = insn;
8120 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8122 s390_add_execute (pool, insn);
8124 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8126 rtx pool_ref = NULL_RTX;
8127 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8128 if (pool_ref)
8130 rtx constant = get_pool_constant (pool_ref);
8131 machine_mode mode = get_pool_mode (pool_ref);
8132 s390_add_constant (pool, constant, mode);
8136 /* If hot/cold partitioning is enabled we have to make sure that
8137 the literal pool is emitted in the same section where the
8138 initialization of the literal pool base pointer takes place.
8139 emit_pool_after is only used in the non-overflow case on non
8140 Z cpus where we can emit the literal pool at the end of the
8141 function body within the text section. */
8142 if (NOTE_P (insn)
8143 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8144 && !pool->emit_pool_after)
8145 pool->emit_pool_after = PREV_INSN (insn);
8148 gcc_assert (pool->pool_insn || pool->size == 0);
8150 if (pool->size >= 4096)
8152 /* We're going to chunkify the pool, so remove the main
8153 pool placeholder insn. */
8154 remove_insn (pool->pool_insn);
8156 s390_free_pool (pool);
8157 pool = NULL;
8160 /* If the functions ends with the section where the literal pool
8161 should be emitted set the marker to its end. */
8162 if (pool && !pool->emit_pool_after)
8163 pool->emit_pool_after = get_last_insn ();
8165 return pool;
8168 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8169 Modify the current function to output the pool constants as well as
8170 the pool register setup instruction. */
8172 static void
8173 s390_mainpool_finish (struct constant_pool *pool)
8175 rtx base_reg = cfun->machine->base_reg;
8177 /* If the pool is empty, we're done. */
8178 if (pool->size == 0)
8180 /* We don't actually need a base register after all. */
8181 cfun->machine->base_reg = NULL_RTX;
8183 if (pool->pool_insn)
8184 remove_insn (pool->pool_insn);
8185 s390_free_pool (pool);
8186 return;
8189 /* We need correct insn addresses. */
8190 shorten_branches (get_insns ());
8192 /* On zSeries, we use a LARL to load the pool register. The pool is
8193 located in the .rodata section, so we emit it after the function. */
8194 if (TARGET_CPU_ZARCH)
8196 rtx set = gen_main_base_64 (base_reg, pool->label);
8197 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8198 INSN_ADDRESSES_NEW (insn, -1);
8199 remove_insn (pool->pool_insn);
8201 insn = get_last_insn ();
8202 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8203 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8205 s390_dump_pool (pool, 0);
8208 /* On S/390, if the total size of the function's code plus literal pool
8209 does not exceed 4096 bytes, we use BASR to set up a function base
8210 pointer, and emit the literal pool at the end of the function. */
8211 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8212 + pool->size + 8 /* alignment slop */ < 4096)
8214 rtx set = gen_main_base_31_small (base_reg, pool->label);
8215 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8216 INSN_ADDRESSES_NEW (insn, -1);
8217 remove_insn (pool->pool_insn);
8219 insn = emit_label_after (pool->label, insn);
8220 INSN_ADDRESSES_NEW (insn, -1);
8222 /* emit_pool_after will be set by s390_mainpool_start to the
8223 last insn of the section where the literal pool should be
8224 emitted. */
8225 insn = pool->emit_pool_after;
8227 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8228 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8230 s390_dump_pool (pool, 1);
8233 /* Otherwise, we emit an inline literal pool and use BASR to branch
8234 over it, setting up the pool register at the same time. */
8235 else
8237 rtx_code_label *pool_end = gen_label_rtx ();
8239 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8240 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8241 JUMP_LABEL (insn) = pool_end;
8242 INSN_ADDRESSES_NEW (insn, -1);
8243 remove_insn (pool->pool_insn);
8245 insn = emit_label_after (pool->label, insn);
8246 INSN_ADDRESSES_NEW (insn, -1);
8248 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8249 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8251 insn = emit_label_after (pool_end, pool->pool_insn);
8252 INSN_ADDRESSES_NEW (insn, -1);
8254 s390_dump_pool (pool, 1);
8258 /* Replace all literal pool references. */
8260 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8262 if (INSN_P (insn))
8263 replace_ltrel_base (&PATTERN (insn));
8265 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8267 rtx addr, pool_ref = NULL_RTX;
8268 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8269 if (pool_ref)
8271 if (s390_execute_label (insn))
8272 addr = s390_find_execute (pool, insn);
8273 else
8274 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8275 get_pool_mode (pool_ref));
8277 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8278 INSN_CODE (insn) = -1;
8284 /* Free the pool. */
8285 s390_free_pool (pool);
8288 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8289 We have decided we cannot use this pool, so revert all changes
8290 to the current function that were done by s390_mainpool_start. */
8291 static void
8292 s390_mainpool_cancel (struct constant_pool *pool)
8294 /* We didn't actually change the instruction stream, so simply
8295 free the pool memory. */
8296 s390_free_pool (pool);
8300 /* Chunkify the literal pool. */
8302 #define S390_POOL_CHUNK_MIN 0xc00
8303 #define S390_POOL_CHUNK_MAX 0xe00
8305 static struct constant_pool *
8306 s390_chunkify_start (void)
8308 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8309 int extra_size = 0;
8310 bitmap far_labels;
8311 rtx pending_ltrel = NULL_RTX;
8312 rtx_insn *insn;
8314 rtx (*gen_reload_base) (rtx, rtx) =
8315 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8318 /* We need correct insn addresses. */
8320 shorten_branches (get_insns ());
8322 /* Scan all insns and move literals to pool chunks. */
8324 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8326 bool section_switch_p = false;
8328 /* Check for pending LTREL_BASE. */
8329 if (INSN_P (insn))
8331 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8332 if (ltrel_base)
8334 gcc_assert (ltrel_base == pending_ltrel);
8335 pending_ltrel = NULL_RTX;
8339 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8341 if (!curr_pool)
8342 curr_pool = s390_start_pool (&pool_list, insn);
8344 s390_add_execute (curr_pool, insn);
8345 s390_add_pool_insn (curr_pool, insn);
8347 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8349 rtx pool_ref = NULL_RTX;
8350 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8351 if (pool_ref)
8353 rtx constant = get_pool_constant (pool_ref);
8354 machine_mode mode = get_pool_mode (pool_ref);
8356 if (!curr_pool)
8357 curr_pool = s390_start_pool (&pool_list, insn);
8359 s390_add_constant (curr_pool, constant, mode);
8360 s390_add_pool_insn (curr_pool, insn);
8362 /* Don't split the pool chunk between a LTREL_OFFSET load
8363 and the corresponding LTREL_BASE. */
8364 if (GET_CODE (constant) == CONST
8365 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8366 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8368 gcc_assert (!pending_ltrel);
8369 pending_ltrel = pool_ref;
8374 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8376 if (curr_pool)
8377 s390_add_pool_insn (curr_pool, insn);
8378 /* An LTREL_BASE must follow within the same basic block. */
8379 gcc_assert (!pending_ltrel);
8382 if (NOTE_P (insn))
8383 switch (NOTE_KIND (insn))
8385 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8386 section_switch_p = true;
8387 break;
8388 case NOTE_INSN_VAR_LOCATION:
8389 case NOTE_INSN_CALL_ARG_LOCATION:
8390 continue;
8391 default:
8392 break;
8395 if (!curr_pool
8396 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8397 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8398 continue;
8400 if (TARGET_CPU_ZARCH)
8402 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8403 continue;
8405 s390_end_pool (curr_pool, NULL);
8406 curr_pool = NULL;
8408 else
8410 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8411 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8412 + extra_size;
8414 /* We will later have to insert base register reload insns.
8415 Those will have an effect on code size, which we need to
8416 consider here. This calculation makes rather pessimistic
8417 worst-case assumptions. */
8418 if (LABEL_P (insn))
8419 extra_size += 6;
8421 if (chunk_size < S390_POOL_CHUNK_MIN
8422 && curr_pool->size < S390_POOL_CHUNK_MIN
8423 && !section_switch_p)
8424 continue;
8426 /* Pool chunks can only be inserted after BARRIERs ... */
8427 if (BARRIER_P (insn))
8429 s390_end_pool (curr_pool, insn);
8430 curr_pool = NULL;
8431 extra_size = 0;
8434 /* ... so if we don't find one in time, create one. */
8435 else if (chunk_size > S390_POOL_CHUNK_MAX
8436 || curr_pool->size > S390_POOL_CHUNK_MAX
8437 || section_switch_p)
8439 rtx_insn *label, *jump, *barrier, *next, *prev;
8441 if (!section_switch_p)
8443 /* We can insert the barrier only after a 'real' insn. */
8444 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8445 continue;
8446 if (get_attr_length (insn) == 0)
8447 continue;
8448 /* Don't separate LTREL_BASE from the corresponding
8449 LTREL_OFFSET load. */
8450 if (pending_ltrel)
8451 continue;
8452 next = insn;
8455 insn = next;
8456 next = NEXT_INSN (insn);
8458 while (next
8459 && NOTE_P (next)
8460 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8461 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8463 else
8465 gcc_assert (!pending_ltrel);
8467 /* The old pool has to end before the section switch
8468 note in order to make it part of the current
8469 section. */
8470 insn = PREV_INSN (insn);
8473 label = gen_label_rtx ();
8474 prev = insn;
8475 if (prev && NOTE_P (prev))
8476 prev = prev_nonnote_insn (prev);
8477 if (prev)
8478 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8479 INSN_LOCATION (prev));
8480 else
8481 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8482 barrier = emit_barrier_after (jump);
8483 insn = emit_label_after (label, barrier);
8484 JUMP_LABEL (jump) = label;
8485 LABEL_NUSES (label) = 1;
8487 INSN_ADDRESSES_NEW (jump, -1);
8488 INSN_ADDRESSES_NEW (barrier, -1);
8489 INSN_ADDRESSES_NEW (insn, -1);
8491 s390_end_pool (curr_pool, barrier);
8492 curr_pool = NULL;
8493 extra_size = 0;
8498 if (curr_pool)
8499 s390_end_pool (curr_pool, NULL);
8500 gcc_assert (!pending_ltrel);
8502 /* Find all labels that are branched into
8503 from an insn belonging to a different chunk. */
8505 far_labels = BITMAP_ALLOC (NULL);
8507 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8509 rtx_jump_table_data *table;
8511 /* Labels marked with LABEL_PRESERVE_P can be target
8512 of non-local jumps, so we have to mark them.
8513 The same holds for named labels.
8515 Don't do that, however, if it is the label before
8516 a jump table. */
8518 if (LABEL_P (insn)
8519 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8521 rtx_insn *vec_insn = NEXT_INSN (insn);
8522 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8523 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8525 /* Check potential targets in a table jump (casesi_jump). */
8526 else if (tablejump_p (insn, NULL, &table))
8528 rtx vec_pat = PATTERN (table);
8529 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8531 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8533 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8535 if (s390_find_pool (pool_list, label)
8536 != s390_find_pool (pool_list, insn))
8537 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8540 /* If we have a direct jump (conditional or unconditional),
8541 check all potential targets. */
8542 else if (JUMP_P (insn))
8544 rtx pat = PATTERN (insn);
8546 if (GET_CODE (pat) == PARALLEL)
8547 pat = XVECEXP (pat, 0, 0);
8549 if (GET_CODE (pat) == SET)
8551 rtx label = JUMP_LABEL (insn);
8552 if (label && !ANY_RETURN_P (label))
8554 if (s390_find_pool (pool_list, label)
8555 != s390_find_pool (pool_list, insn))
8556 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8562 /* Insert base register reload insns before every pool. */
8564 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8566 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8567 curr_pool->label);
8568 rtx_insn *insn = curr_pool->first_insn;
8569 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8572 /* Insert base register reload insns at every far label. */
8574 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8575 if (LABEL_P (insn)
8576 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8578 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8579 if (pool)
8581 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8582 pool->label);
8583 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8588 BITMAP_FREE (far_labels);
8591 /* Recompute insn addresses. */
8593 init_insn_lengths ();
8594 shorten_branches (get_insns ());
8596 return pool_list;
8599 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8600 After we have decided to use this list, finish implementing
8601 all changes to the current function as required. */
8603 static void
8604 s390_chunkify_finish (struct constant_pool *pool_list)
8606 struct constant_pool *curr_pool = NULL;
8607 rtx_insn *insn;
8610 /* Replace all literal pool references. */
8612 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8614 if (INSN_P (insn))
8615 replace_ltrel_base (&PATTERN (insn));
8617 curr_pool = s390_find_pool (pool_list, insn);
8618 if (!curr_pool)
8619 continue;
8621 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8623 rtx addr, pool_ref = NULL_RTX;
8624 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8625 if (pool_ref)
8627 if (s390_execute_label (insn))
8628 addr = s390_find_execute (curr_pool, insn);
8629 else
8630 addr = s390_find_constant (curr_pool,
8631 get_pool_constant (pool_ref),
8632 get_pool_mode (pool_ref));
8634 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8635 INSN_CODE (insn) = -1;
8640 /* Dump out all literal pools. */
8642 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8643 s390_dump_pool (curr_pool, 0);
8645 /* Free pool list. */
8647 while (pool_list)
8649 struct constant_pool *next = pool_list->next;
8650 s390_free_pool (pool_list);
8651 pool_list = next;
8655 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8656 We have decided we cannot use this list, so revert all changes
8657 to the current function that were done by s390_chunkify_start. */
8659 static void
8660 s390_chunkify_cancel (struct constant_pool *pool_list)
8662 struct constant_pool *curr_pool = NULL;
8663 rtx_insn *insn;
8665 /* Remove all pool placeholder insns. */
8667 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8669 /* Did we insert an extra barrier? Remove it. */
8670 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
8671 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
8672 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
8674 if (jump && JUMP_P (jump)
8675 && barrier && BARRIER_P (barrier)
8676 && label && LABEL_P (label)
8677 && GET_CODE (PATTERN (jump)) == SET
8678 && SET_DEST (PATTERN (jump)) == pc_rtx
8679 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
8680 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
8682 remove_insn (jump);
8683 remove_insn (barrier);
8684 remove_insn (label);
8687 remove_insn (curr_pool->pool_insn);
8690 /* Remove all base register reload insns. */
8692 for (insn = get_insns (); insn; )
8694 rtx_insn *next_insn = NEXT_INSN (insn);
8696 if (NONJUMP_INSN_P (insn)
8697 && GET_CODE (PATTERN (insn)) == SET
8698 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
8699 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
8700 remove_insn (insn);
8702 insn = next_insn;
8705 /* Free pool list. */
8707 while (pool_list)
8709 struct constant_pool *next = pool_list->next;
8710 s390_free_pool (pool_list);
8711 pool_list = next;
8715 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
8717 void
8718 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
8720 switch (GET_MODE_CLASS (mode))
8722 case MODE_FLOAT:
8723 case MODE_DECIMAL_FLOAT:
8724 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
8726 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
8727 break;
8729 case MODE_INT:
8730 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
8731 mark_symbol_refs_as_used (exp);
8732 break;
8734 case MODE_VECTOR_INT:
8735 case MODE_VECTOR_FLOAT:
8737 int i;
8738 machine_mode inner_mode;
8739 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
8741 inner_mode = GET_MODE_INNER (GET_MODE (exp));
8742 for (i = 0; i < XVECLEN (exp, 0); i++)
8743 s390_output_pool_entry (XVECEXP (exp, 0, i),
8744 inner_mode,
8745 i == 0
8746 ? align
8747 : GET_MODE_BITSIZE (inner_mode));
8749 break;
8751 default:
8752 gcc_unreachable ();
8757 /* Return an RTL expression representing the value of the return address
8758 for the frame COUNT steps up from the current frame. FRAME is the
8759 frame pointer of that frame. */
8762 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
8764 int offset;
8765 rtx addr;
8767 /* Without backchain, we fail for all but the current frame. */
8769 if (!TARGET_BACKCHAIN && count > 0)
8770 return NULL_RTX;
8772 /* For the current frame, we need to make sure the initial
8773 value of RETURN_REGNUM is actually saved. */
8775 if (count == 0)
8777 /* On non-z architectures branch splitting could overwrite r14. */
8778 if (TARGET_CPU_ZARCH)
8779 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
8780 else
8782 cfun_frame_layout.save_return_addr_p = true;
8783 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8787 if (TARGET_PACKED_STACK)
8788 offset = -2 * UNITS_PER_LONG;
8789 else
8790 offset = RETURN_REGNUM * UNITS_PER_LONG;
8792 addr = plus_constant (Pmode, frame, offset);
8793 addr = memory_address (Pmode, addr);
8794 return gen_rtx_MEM (Pmode, addr);
8797 /* Return an RTL expression representing the back chain stored in
8798 the current stack frame. */
8801 s390_back_chain_rtx (void)
8803 rtx chain;
8805 gcc_assert (TARGET_BACKCHAIN);
8807 if (TARGET_PACKED_STACK)
8808 chain = plus_constant (Pmode, stack_pointer_rtx,
8809 STACK_POINTER_OFFSET - UNITS_PER_LONG);
8810 else
8811 chain = stack_pointer_rtx;
8813 chain = gen_rtx_MEM (Pmode, chain);
8814 return chain;
8817 /* Find first call clobbered register unused in a function.
8818 This could be used as base register in a leaf function
8819 or for holding the return address before epilogue. */
8821 static int
8822 find_unused_clobbered_reg (void)
8824 int i;
8825 for (i = 0; i < 6; i++)
8826 if (!df_regs_ever_live_p (i))
8827 return i;
8828 return 0;
8832 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
8833 clobbered hard regs in SETREG. */
8835 static void
8836 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
8838 char *regs_ever_clobbered = (char *)data;
8839 unsigned int i, regno;
8840 machine_mode mode = GET_MODE (setreg);
8842 if (GET_CODE (setreg) == SUBREG)
8844 rtx inner = SUBREG_REG (setreg);
8845 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
8846 return;
8847 regno = subreg_regno (setreg);
8849 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
8850 regno = REGNO (setreg);
8851 else
8852 return;
8854 for (i = regno;
8855 i < regno + HARD_REGNO_NREGS (regno, mode);
8856 i++)
8857 regs_ever_clobbered[i] = 1;
8860 /* Walks through all basic blocks of the current function looking
8861 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
8862 of the passed integer array REGS_EVER_CLOBBERED are set to one for
8863 each of those regs. */
8865 static void
8866 s390_regs_ever_clobbered (char regs_ever_clobbered[])
8868 basic_block cur_bb;
8869 rtx_insn *cur_insn;
8870 unsigned int i;
8872 memset (regs_ever_clobbered, 0, 32);
8874 /* For non-leaf functions we have to consider all call clobbered regs to be
8875 clobbered. */
8876 if (!crtl->is_leaf)
8878 for (i = 0; i < 32; i++)
8879 regs_ever_clobbered[i] = call_really_used_regs[i];
8882 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
8883 this work is done by liveness analysis (mark_regs_live_at_end).
8884 Special care is needed for functions containing landing pads. Landing pads
8885 may use the eh registers, but the code which sets these registers is not
8886 contained in that function. Hence s390_regs_ever_clobbered is not able to
8887 deal with this automatically. */
8888 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
8889 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
8890 if (crtl->calls_eh_return
8891 || (cfun->machine->has_landing_pad_p
8892 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
8893 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
8895 /* For nonlocal gotos all call-saved registers have to be saved.
8896 This flag is also set for the unwinding code in libgcc.
8897 See expand_builtin_unwind_init. For regs_ever_live this is done by
8898 reload. */
8899 if (crtl->saves_all_registers)
8900 for (i = 0; i < 32; i++)
8901 if (!call_really_used_regs[i])
8902 regs_ever_clobbered[i] = 1;
8904 FOR_EACH_BB_FN (cur_bb, cfun)
8906 FOR_BB_INSNS (cur_bb, cur_insn)
8908 rtx pat;
8910 if (!INSN_P (cur_insn))
8911 continue;
8913 pat = PATTERN (cur_insn);
8915 /* Ignore GPR restore insns. */
8916 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
8918 if (GET_CODE (pat) == SET
8919 && GENERAL_REG_P (SET_DEST (pat)))
8921 /* lgdr */
8922 if (GET_MODE (SET_SRC (pat)) == DImode
8923 && FP_REG_P (SET_SRC (pat)))
8924 continue;
8926 /* l / lg */
8927 if (GET_CODE (SET_SRC (pat)) == MEM)
8928 continue;
8931 /* lm / lmg */
8932 if (GET_CODE (pat) == PARALLEL
8933 && load_multiple_operation (pat, VOIDmode))
8934 continue;
8937 note_stores (pat,
8938 s390_reg_clobbered_rtx,
8939 regs_ever_clobbered);
8944 /* Determine the frame area which actually has to be accessed
8945 in the function epilogue. The values are stored at the
8946 given pointers AREA_BOTTOM (address of the lowest used stack
8947 address) and AREA_TOP (address of the first item which does
8948 not belong to the stack frame). */
8950 static void
8951 s390_frame_area (int *area_bottom, int *area_top)
8953 int b, t;
8955 b = INT_MAX;
8956 t = INT_MIN;
8958 if (cfun_frame_layout.first_restore_gpr != -1)
8960 b = (cfun_frame_layout.gprs_offset
8961 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
8962 t = b + (cfun_frame_layout.last_restore_gpr
8963 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
8966 if (TARGET_64BIT && cfun_save_high_fprs_p)
8968 b = MIN (b, cfun_frame_layout.f8_offset);
8969 t = MAX (t, (cfun_frame_layout.f8_offset
8970 + cfun_frame_layout.high_fprs * 8));
8973 if (!TARGET_64BIT)
8975 if (cfun_fpr_save_p (FPR4_REGNUM))
8977 b = MIN (b, cfun_frame_layout.f4_offset);
8978 t = MAX (t, cfun_frame_layout.f4_offset + 8);
8980 if (cfun_fpr_save_p (FPR6_REGNUM))
8982 b = MIN (b, cfun_frame_layout.f4_offset + 8);
8983 t = MAX (t, cfun_frame_layout.f4_offset + 16);
8986 *area_bottom = b;
8987 *area_top = t;
8989 /* Update gpr_save_slots in the frame layout trying to make use of
8990 FPRs as GPR save slots.
8991 This is a helper routine of s390_register_info. */
8993 static void
8994 s390_register_info_gprtofpr ()
8996 int save_reg_slot = FPR0_REGNUM;
8997 int i, j;
8999 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9000 return;
9002 for (i = 15; i >= 6; i--)
9004 if (cfun_gpr_save_slot (i) == 0)
9005 continue;
9007 /* Advance to the next FP register which can be used as a
9008 GPR save slot. */
9009 while ((!call_really_used_regs[save_reg_slot]
9010 || df_regs_ever_live_p (save_reg_slot)
9011 || cfun_fpr_save_p (save_reg_slot))
9012 && FP_REGNO_P (save_reg_slot))
9013 save_reg_slot++;
9014 if (!FP_REGNO_P (save_reg_slot))
9016 /* We only want to use ldgr/lgdr if we can get rid of
9017 stm/lm entirely. So undo the gpr slot allocation in
9018 case we ran out of FPR save slots. */
9019 for (j = 6; j <= 15; j++)
9020 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9021 cfun_gpr_save_slot (j) = -1;
9022 break;
9024 cfun_gpr_save_slot (i) = save_reg_slot++;
9028 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9029 stdarg.
9030 This is a helper routine for s390_register_info. */
9032 static void
9033 s390_register_info_stdarg_fpr ()
9035 int i;
9036 int min_fpr;
9037 int max_fpr;
9039 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9040 f0-f4 for 64 bit. */
9041 if (!cfun->stdarg
9042 || !TARGET_HARD_FLOAT
9043 || !cfun->va_list_fpr_size
9044 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9045 return;
9047 min_fpr = crtl->args.info.fprs;
9048 max_fpr = min_fpr + cfun->va_list_fpr_size;
9049 if (max_fpr > FP_ARG_NUM_REG)
9050 max_fpr = FP_ARG_NUM_REG;
9052 for (i = min_fpr; i < max_fpr; i++)
9053 cfun_set_fpr_save (i + FPR0_REGNUM);
9056 /* Reserve the GPR save slots for GPRs which need to be saved due to
9057 stdarg.
9058 This is a helper routine for s390_register_info. */
9060 static void
9061 s390_register_info_stdarg_gpr ()
9063 int i;
9064 int min_gpr;
9065 int max_gpr;
9067 if (!cfun->stdarg
9068 || !cfun->va_list_gpr_size
9069 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9070 return;
9072 min_gpr = crtl->args.info.gprs;
9073 max_gpr = min_gpr + cfun->va_list_gpr_size;
9074 if (max_gpr > GP_ARG_NUM_REG)
9075 max_gpr = GP_ARG_NUM_REG;
9077 for (i = min_gpr; i < max_gpr; i++)
9078 cfun_gpr_save_slot (2 + i) = -1;
9081 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9082 for registers which need to be saved in function prologue.
9083 This function can be used until the insns emitted for save/restore
9084 of the regs are visible in the RTL stream. */
9086 static void
9087 s390_register_info ()
9089 int i, j;
9090 char clobbered_regs[32];
9092 gcc_assert (!epilogue_completed);
9094 if (reload_completed)
9095 /* After reload we rely on our own routine to determine which
9096 registers need saving. */
9097 s390_regs_ever_clobbered (clobbered_regs);
9098 else
9099 /* During reload we use regs_ever_live as a base since reload
9100 does changes in there which we otherwise would not be aware
9101 of. */
9102 for (i = 0; i < 32; i++)
9103 clobbered_regs[i] = df_regs_ever_live_p (i);
9105 for (i = 0; i < 32; i++)
9106 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9108 /* Mark the call-saved FPRs which need to be saved.
9109 This needs to be done before checking the special GPRs since the
9110 stack pointer usage depends on whether high FPRs have to be saved
9111 or not. */
9112 cfun_frame_layout.fpr_bitmap = 0;
9113 cfun_frame_layout.high_fprs = 0;
9114 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9115 if (clobbered_regs[i] && !call_really_used_regs[i])
9117 cfun_set_fpr_save (i);
9118 if (i >= FPR8_REGNUM)
9119 cfun_frame_layout.high_fprs++;
9122 if (flag_pic)
9123 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
9124 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
9126 clobbered_regs[BASE_REGNUM]
9127 |= (cfun->machine->base_reg
9128 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9130 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9131 |= !!frame_pointer_needed;
9133 /* On pre z900 machines this might take until machine dependent
9134 reorg to decide.
9135 save_return_addr_p will only be set on non-zarch machines so
9136 there is no risk that r14 goes into an FPR instead of a stack
9137 slot. */
9138 clobbered_regs[RETURN_REGNUM]
9139 |= (!crtl->is_leaf
9140 || TARGET_TPF_PROFILING
9141 || cfun->machine->split_branches_pending_p
9142 || cfun_frame_layout.save_return_addr_p
9143 || crtl->calls_eh_return);
9145 clobbered_regs[STACK_POINTER_REGNUM]
9146 |= (!crtl->is_leaf
9147 || TARGET_TPF_PROFILING
9148 || cfun_save_high_fprs_p
9149 || get_frame_size () > 0
9150 || (reload_completed && cfun_frame_layout.frame_size > 0)
9151 || cfun->calls_alloca);
9153 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
9155 for (i = 6; i < 16; i++)
9156 if (clobbered_regs[i])
9157 cfun_gpr_save_slot (i) = -1;
9159 s390_register_info_stdarg_fpr ();
9160 s390_register_info_gprtofpr ();
9162 /* First find the range of GPRs to be restored. Vararg regs don't
9163 need to be restored so we do it before assigning slots to the
9164 vararg GPRs. */
9165 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9166 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9167 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9168 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9170 /* stdarg functions might need to save GPRs 2 to 6. This might
9171 override the GPR->FPR save decision made above for r6 since
9172 vararg regs must go to the stack. */
9173 s390_register_info_stdarg_gpr ();
9175 /* Now the range of GPRs which need saving. */
9176 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9177 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9178 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9179 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9182 /* This function is called by s390_optimize_prologue in order to get
9183 rid of unnecessary GPR save/restore instructions. The register info
9184 for the GPRs is re-computed and the ranges are re-calculated. */
9186 static void
9187 s390_optimize_register_info ()
9189 char clobbered_regs[32];
9190 int i, j;
9192 gcc_assert (epilogue_completed);
9193 gcc_assert (!cfun->machine->split_branches_pending_p);
9195 s390_regs_ever_clobbered (clobbered_regs);
9197 for (i = 0; i < 32; i++)
9198 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9200 /* There is still special treatment needed for cases invisible to
9201 s390_regs_ever_clobbered. */
9202 clobbered_regs[RETURN_REGNUM]
9203 |= (TARGET_TPF_PROFILING
9204 /* When expanding builtin_return_addr in ESA mode we do not
9205 know whether r14 will later be needed as scratch reg when
9206 doing branch splitting. So the builtin always accesses the
9207 r14 save slot and we need to stick to the save/restore
9208 decision for r14 even if it turns out that it didn't get
9209 clobbered. */
9210 || cfun_frame_layout.save_return_addr_p
9211 || crtl->calls_eh_return);
9213 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
9215 for (i = 6; i < 16; i++)
9216 if (!clobbered_regs[i])
9217 cfun_gpr_save_slot (i) = 0;
9219 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9220 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9221 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9222 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9224 s390_register_info_stdarg_gpr ();
9226 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9227 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9228 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9229 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9232 /* Fill cfun->machine with info about frame of current function. */
9234 static void
9235 s390_frame_info (void)
9237 HOST_WIDE_INT lowest_offset;
9239 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9240 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9242 /* The va_arg builtin uses a constant distance of 16 *
9243 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9244 pointer. So even if we are going to save the stack pointer in an
9245 FPR we need the stack space in order to keep the offsets
9246 correct. */
9247 if (cfun->stdarg && cfun_save_arg_fprs_p)
9249 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9251 if (cfun_frame_layout.first_save_gpr_slot == -1)
9252 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9255 cfun_frame_layout.frame_size = get_frame_size ();
9256 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9257 fatal_error (input_location,
9258 "total size of local variables exceeds architecture limit");
9260 if (!TARGET_PACKED_STACK)
9262 /* Fixed stack layout. */
9263 cfun_frame_layout.backchain_offset = 0;
9264 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9265 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9266 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9267 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9268 * UNITS_PER_LONG);
9270 else if (TARGET_BACKCHAIN)
9272 /* Kernel stack layout - packed stack, backchain, no float */
9273 gcc_assert (TARGET_SOFT_FLOAT);
9274 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9275 - UNITS_PER_LONG);
9277 /* The distance between the backchain and the return address
9278 save slot must not change. So we always need a slot for the
9279 stack pointer which resides in between. */
9280 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9282 cfun_frame_layout.gprs_offset
9283 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9285 /* FPRs will not be saved. Nevertheless pick sane values to
9286 keep area calculations valid. */
9287 cfun_frame_layout.f0_offset =
9288 cfun_frame_layout.f4_offset =
9289 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9291 else
9293 int num_fprs;
9295 /* Packed stack layout without backchain. */
9297 /* With stdarg FPRs need their dedicated slots. */
9298 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9299 : (cfun_fpr_save_p (FPR4_REGNUM) +
9300 cfun_fpr_save_p (FPR6_REGNUM)));
9301 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9303 num_fprs = (cfun->stdarg ? 2
9304 : (cfun_fpr_save_p (FPR0_REGNUM)
9305 + cfun_fpr_save_p (FPR2_REGNUM)));
9306 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9308 cfun_frame_layout.gprs_offset
9309 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9311 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9312 - cfun_frame_layout.high_fprs * 8);
9315 if (cfun_save_high_fprs_p)
9316 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9318 if (!crtl->is_leaf)
9319 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9321 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9322 sized area at the bottom of the stack. This is required also for
9323 leaf functions. When GCC generates a local stack reference it
9324 will always add STACK_POINTER_OFFSET to all these references. */
9325 if (crtl->is_leaf
9326 && !TARGET_TPF_PROFILING
9327 && cfun_frame_layout.frame_size == 0
9328 && !cfun->calls_alloca)
9329 return;
9331 /* Calculate the number of bytes we have used in our own register
9332 save area. With the packed stack layout we can re-use the
9333 remaining bytes for normal stack elements. */
9335 if (TARGET_PACKED_STACK)
9336 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9337 cfun_frame_layout.f4_offset),
9338 cfun_frame_layout.gprs_offset);
9339 else
9340 lowest_offset = 0;
9342 if (TARGET_BACKCHAIN)
9343 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9345 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9347 /* If under 31 bit an odd number of gprs has to be saved we have to
9348 adjust the frame size to sustain 8 byte alignment of stack
9349 frames. */
9350 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9351 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9352 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9355 /* Generate frame layout. Fills in register and frame data for the current
9356 function in cfun->machine. This routine can be called multiple times;
9357 it will re-do the complete frame layout every time. */
9359 static void
9360 s390_init_frame_layout (void)
9362 HOST_WIDE_INT frame_size;
9363 int base_used;
9365 /* After LRA the frame layout is supposed to be read-only and should
9366 not be re-computed. */
9367 if (reload_completed)
9368 return;
9370 /* On S/390 machines, we may need to perform branch splitting, which
9371 will require both base and return address register. We have no
9372 choice but to assume we're going to need them until right at the
9373 end of the machine dependent reorg phase. */
9374 if (!TARGET_CPU_ZARCH)
9375 cfun->machine->split_branches_pending_p = true;
9379 frame_size = cfun_frame_layout.frame_size;
9381 /* Try to predict whether we'll need the base register. */
9382 base_used = cfun->machine->split_branches_pending_p
9383 || crtl->uses_const_pool
9384 || (!DISP_IN_RANGE (frame_size)
9385 && !CONST_OK_FOR_K (frame_size));
9387 /* Decide which register to use as literal pool base. In small
9388 leaf functions, try to use an unused call-clobbered register
9389 as base register to avoid save/restore overhead. */
9390 if (!base_used)
9391 cfun->machine->base_reg = NULL_RTX;
9392 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
9393 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
9394 else
9395 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
9397 s390_register_info ();
9398 s390_frame_info ();
9400 while (frame_size != cfun_frame_layout.frame_size);
9403 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9404 the TX is nonescaping. A transaction is considered escaping if
9405 there is at least one path from tbegin returning CC0 to the
9406 function exit block without an tend.
9408 The check so far has some limitations:
9409 - only single tbegin/tend BBs are supported
9410 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9411 - when CC is copied to a GPR and the CC0 check is done with the GPR
9412 this is not supported
9415 static void
9416 s390_optimize_nonescaping_tx (void)
9418 const unsigned int CC0 = 1 << 3;
9419 basic_block tbegin_bb = NULL;
9420 basic_block tend_bb = NULL;
9421 basic_block bb;
9422 rtx_insn *insn;
9423 bool result = true;
9424 int bb_index;
9425 rtx_insn *tbegin_insn = NULL;
9427 if (!cfun->machine->tbegin_p)
9428 return;
9430 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9432 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9434 if (!bb)
9435 continue;
9437 FOR_BB_INSNS (bb, insn)
9439 rtx ite, cc, pat, target;
9440 unsigned HOST_WIDE_INT mask;
9442 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9443 continue;
9445 pat = PATTERN (insn);
9447 if (GET_CODE (pat) == PARALLEL)
9448 pat = XVECEXP (pat, 0, 0);
9450 if (GET_CODE (pat) != SET
9451 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9452 continue;
9454 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9456 rtx_insn *tmp;
9458 tbegin_insn = insn;
9460 /* Just return if the tbegin doesn't have clobbers. */
9461 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9462 return;
9464 if (tbegin_bb != NULL)
9465 return;
9467 /* Find the next conditional jump. */
9468 for (tmp = NEXT_INSN (insn);
9469 tmp != NULL_RTX;
9470 tmp = NEXT_INSN (tmp))
9472 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9473 return;
9474 if (!JUMP_P (tmp))
9475 continue;
9477 ite = SET_SRC (PATTERN (tmp));
9478 if (GET_CODE (ite) != IF_THEN_ELSE)
9479 continue;
9481 cc = XEXP (XEXP (ite, 0), 0);
9482 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9483 || GET_MODE (cc) != CCRAWmode
9484 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9485 return;
9487 if (bb->succs->length () != 2)
9488 return;
9490 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9491 if (GET_CODE (XEXP (ite, 0)) == NE)
9492 mask ^= 0xf;
9494 if (mask == CC0)
9495 target = XEXP (ite, 1);
9496 else if (mask == (CC0 ^ 0xf))
9497 target = XEXP (ite, 2);
9498 else
9499 return;
9502 edge_iterator ei;
9503 edge e1, e2;
9505 ei = ei_start (bb->succs);
9506 e1 = ei_safe_edge (ei);
9507 ei_next (&ei);
9508 e2 = ei_safe_edge (ei);
9510 if (e2->flags & EDGE_FALLTHRU)
9512 e2 = e1;
9513 e1 = ei_safe_edge (ei);
9516 if (!(e1->flags & EDGE_FALLTHRU))
9517 return;
9519 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9521 if (tmp == BB_END (bb))
9522 break;
9526 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9528 if (tend_bb != NULL)
9529 return;
9530 tend_bb = bb;
9535 /* Either we successfully remove the FPR clobbers here or we are not
9536 able to do anything for this TX. Both cases don't qualify for
9537 another look. */
9538 cfun->machine->tbegin_p = false;
9540 if (tbegin_bb == NULL || tend_bb == NULL)
9541 return;
9543 calculate_dominance_info (CDI_POST_DOMINATORS);
9544 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9545 free_dominance_info (CDI_POST_DOMINATORS);
9547 if (!result)
9548 return;
9550 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9551 gen_rtvec (2,
9552 XVECEXP (PATTERN (tbegin_insn), 0, 0),
9553 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9554 INSN_CODE (tbegin_insn) = -1;
9555 df_insn_rescan (tbegin_insn);
9557 return;
9560 /* Return true if it is legal to put a value with MODE into REGNO. */
9562 bool
9563 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9565 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
9566 return false;
9568 switch (REGNO_REG_CLASS (regno))
9570 case VEC_REGS:
9571 return ((GET_MODE_CLASS (mode) == MODE_INT
9572 && s390_class_max_nregs (VEC_REGS, mode) == 1)
9573 || mode == DFmode
9574 || s390_vector_mode_supported_p (mode));
9575 break;
9576 case FP_REGS:
9577 if (TARGET_VX
9578 && ((GET_MODE_CLASS (mode) == MODE_INT
9579 && s390_class_max_nregs (FP_REGS, mode) == 1)
9580 || mode == DFmode
9581 || s390_vector_mode_supported_p (mode)))
9582 return true;
9584 if (REGNO_PAIR_OK (regno, mode))
9586 if (mode == SImode || mode == DImode)
9587 return true;
9589 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
9590 return true;
9592 break;
9593 case ADDR_REGS:
9594 if (FRAME_REGNO_P (regno) && mode == Pmode)
9595 return true;
9597 /* fallthrough */
9598 case GENERAL_REGS:
9599 if (REGNO_PAIR_OK (regno, mode))
9601 if (TARGET_ZARCH
9602 || (mode != TFmode && mode != TCmode && mode != TDmode))
9603 return true;
9605 break;
9606 case CC_REGS:
9607 if (GET_MODE_CLASS (mode) == MODE_CC)
9608 return true;
9609 break;
9610 case ACCESS_REGS:
9611 if (REGNO_PAIR_OK (regno, mode))
9613 if (mode == SImode || mode == Pmode)
9614 return true;
9616 break;
9617 default:
9618 return false;
9621 return false;
9624 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
9626 bool
9627 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
9629 /* Once we've decided upon a register to use as base register, it must
9630 no longer be used for any other purpose. */
9631 if (cfun->machine->base_reg)
9632 if (REGNO (cfun->machine->base_reg) == old_reg
9633 || REGNO (cfun->machine->base_reg) == new_reg)
9634 return false;
9636 /* Prevent regrename from using call-saved regs which haven't
9637 actually been saved. This is necessary since regrename assumes
9638 the backend save/restore decisions are based on
9639 df_regs_ever_live. Since we have our own routine we have to tell
9640 regrename manually about it. */
9641 if (GENERAL_REGNO_P (new_reg)
9642 && !call_really_used_regs[new_reg]
9643 && cfun_gpr_save_slot (new_reg) == 0)
9644 return false;
9646 return true;
9649 /* Return nonzero if register REGNO can be used as a scratch register
9650 in peephole2. */
9652 static bool
9653 s390_hard_regno_scratch_ok (unsigned int regno)
9655 /* See s390_hard_regno_rename_ok. */
9656 if (GENERAL_REGNO_P (regno)
9657 && !call_really_used_regs[regno]
9658 && cfun_gpr_save_slot (regno) == 0)
9659 return false;
9661 return true;
9664 /* Maximum number of registers to represent a value of mode MODE
9665 in a register of class RCLASS. */
9668 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
9670 int reg_size;
9671 bool reg_pair_required_p = false;
9673 switch (rclass)
9675 case FP_REGS:
9676 case VEC_REGS:
9677 reg_size = TARGET_VX ? 16 : 8;
9679 /* TF and TD modes would fit into a VR but we put them into a
9680 register pair since we do not have 128bit FP instructions on
9681 full VRs. */
9682 if (TARGET_VX
9683 && SCALAR_FLOAT_MODE_P (mode)
9684 && GET_MODE_SIZE (mode) >= 16)
9685 reg_pair_required_p = true;
9687 /* Even if complex types would fit into a single FPR/VR we force
9688 them into a register pair to deal with the parts more easily.
9689 (FIXME: What about complex ints?) */
9690 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9691 reg_pair_required_p = true;
9692 break;
9693 case ACCESS_REGS:
9694 reg_size = 4;
9695 break;
9696 default:
9697 reg_size = UNITS_PER_WORD;
9698 break;
9701 if (reg_pair_required_p)
9702 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
9704 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
9707 /* Return TRUE if changing mode from FROM to TO should not be allowed
9708 for register class CLASS. */
9711 s390_cannot_change_mode_class (machine_mode from_mode,
9712 machine_mode to_mode,
9713 enum reg_class rclass)
9715 machine_mode small_mode;
9716 machine_mode big_mode;
9718 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
9719 return 0;
9721 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
9723 small_mode = from_mode;
9724 big_mode = to_mode;
9726 else
9728 small_mode = to_mode;
9729 big_mode = from_mode;
9732 /* Values residing in VRs are little-endian style. All modes are
9733 placed left-aligned in an VR. This means that we cannot allow
9734 switching between modes with differing sizes. Also if the vector
9735 facility is available we still place TFmode values in VR register
9736 pairs, since the only instructions we have operating on TFmodes
9737 only deal with register pairs. Therefore we have to allow DFmode
9738 subregs of TFmodes to enable the TFmode splitters. */
9739 if (reg_classes_intersect_p (VEC_REGS, rclass)
9740 && (GET_MODE_SIZE (small_mode) < 8
9741 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
9742 return 1;
9744 /* Likewise for access registers, since they have only half the
9745 word size on 64-bit. */
9746 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
9747 return 1;
9749 return 0;
9752 /* Return true if we use LRA instead of reload pass. */
9753 static bool
9754 s390_lra_p (void)
9756 return s390_lra_flag;
9759 /* Return true if register FROM can be eliminated via register TO. */
9761 static bool
9762 s390_can_eliminate (const int from, const int to)
9764 /* On zSeries machines, we have not marked the base register as fixed.
9765 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
9766 If a function requires the base register, we say here that this
9767 elimination cannot be performed. This will cause reload to free
9768 up the base register (as if it were fixed). On the other hand,
9769 if the current function does *not* require the base register, we
9770 say here the elimination succeeds, which in turn allows reload
9771 to allocate the base register for any other purpose. */
9772 if (from == BASE_REGNUM && to == BASE_REGNUM)
9774 if (TARGET_CPU_ZARCH)
9776 s390_init_frame_layout ();
9777 return cfun->machine->base_reg == NULL_RTX;
9780 return false;
9783 /* Everything else must point into the stack frame. */
9784 gcc_assert (to == STACK_POINTER_REGNUM
9785 || to == HARD_FRAME_POINTER_REGNUM);
9787 gcc_assert (from == FRAME_POINTER_REGNUM
9788 || from == ARG_POINTER_REGNUM
9789 || from == RETURN_ADDRESS_POINTER_REGNUM);
9791 /* Make sure we actually saved the return address. */
9792 if (from == RETURN_ADDRESS_POINTER_REGNUM)
9793 if (!crtl->calls_eh_return
9794 && !cfun->stdarg
9795 && !cfun_frame_layout.save_return_addr_p)
9796 return false;
9798 return true;
9801 /* Return offset between register FROM and TO initially after prolog. */
9803 HOST_WIDE_INT
9804 s390_initial_elimination_offset (int from, int to)
9806 HOST_WIDE_INT offset;
9808 /* ??? Why are we called for non-eliminable pairs? */
9809 if (!s390_can_eliminate (from, to))
9810 return 0;
9812 switch (from)
9814 case FRAME_POINTER_REGNUM:
9815 offset = (get_frame_size()
9816 + STACK_POINTER_OFFSET
9817 + crtl->outgoing_args_size);
9818 break;
9820 case ARG_POINTER_REGNUM:
9821 s390_init_frame_layout ();
9822 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
9823 break;
9825 case RETURN_ADDRESS_POINTER_REGNUM:
9826 s390_init_frame_layout ();
9828 if (cfun_frame_layout.first_save_gpr_slot == -1)
9830 /* If it turns out that for stdarg nothing went into the reg
9831 save area we also do not need the return address
9832 pointer. */
9833 if (cfun->stdarg && !cfun_save_arg_fprs_p)
9834 return 0;
9836 gcc_unreachable ();
9839 /* In order to make the following work it is not necessary for
9840 r14 to have a save slot. It is sufficient if one other GPR
9841 got one. Since the GPRs are always stored without gaps we
9842 are able to calculate where the r14 save slot would
9843 reside. */
9844 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
9845 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
9846 UNITS_PER_LONG);
9847 break;
9849 case BASE_REGNUM:
9850 offset = 0;
9851 break;
9853 default:
9854 gcc_unreachable ();
9857 return offset;
9860 /* Emit insn to save fpr REGNUM at offset OFFSET relative
9861 to register BASE. Return generated insn. */
9863 static rtx
9864 save_fpr (rtx base, int offset, int regnum)
9866 rtx addr;
9867 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9869 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
9870 set_mem_alias_set (addr, get_varargs_alias_set ());
9871 else
9872 set_mem_alias_set (addr, get_frame_alias_set ());
9874 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
9877 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
9878 to register BASE. Return generated insn. */
9880 static rtx
9881 restore_fpr (rtx base, int offset, int regnum)
9883 rtx addr;
9884 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9885 set_mem_alias_set (addr, get_frame_alias_set ());
9887 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
9890 /* Return true if REGNO is a global register, but not one
9891 of the special ones that need to be saved/restored in anyway. */
9893 static inline bool
9894 global_not_special_regno_p (int regno)
9896 return (global_regs[regno]
9897 /* These registers are special and need to be
9898 restored in any case. */
9899 && !(regno == STACK_POINTER_REGNUM
9900 || regno == RETURN_REGNUM
9901 || regno == BASE_REGNUM
9902 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9905 /* Generate insn to save registers FIRST to LAST into
9906 the register save area located at offset OFFSET
9907 relative to register BASE. */
9909 static rtx
9910 save_gprs (rtx base, int offset, int first, int last)
9912 rtx addr, insn, note;
9913 int i;
9915 addr = plus_constant (Pmode, base, offset);
9916 addr = gen_rtx_MEM (Pmode, addr);
9918 set_mem_alias_set (addr, get_frame_alias_set ());
9920 /* Special-case single register. */
9921 if (first == last)
9923 if (TARGET_64BIT)
9924 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
9925 else
9926 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
9928 if (!global_not_special_regno_p (first))
9929 RTX_FRAME_RELATED_P (insn) = 1;
9930 return insn;
9934 insn = gen_store_multiple (addr,
9935 gen_rtx_REG (Pmode, first),
9936 GEN_INT (last - first + 1));
9938 if (first <= 6 && cfun->stdarg)
9939 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9941 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
9943 if (first + i <= 6)
9944 set_mem_alias_set (mem, get_varargs_alias_set ());
9947 /* We need to set the FRAME_RELATED flag on all SETs
9948 inside the store-multiple pattern.
9950 However, we must not emit DWARF records for registers 2..5
9951 if they are stored for use by variable arguments ...
9953 ??? Unfortunately, it is not enough to simply not the
9954 FRAME_RELATED flags for those SETs, because the first SET
9955 of the PARALLEL is always treated as if it had the flag
9956 set, even if it does not. Therefore we emit a new pattern
9957 without those registers as REG_FRAME_RELATED_EXPR note. */
9959 if (first >= 6 && !global_not_special_regno_p (first))
9961 rtx pat = PATTERN (insn);
9963 for (i = 0; i < XVECLEN (pat, 0); i++)
9964 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
9965 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
9966 0, i)))))
9967 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
9969 RTX_FRAME_RELATED_P (insn) = 1;
9971 else if (last >= 6)
9973 int start;
9975 for (start = first >= 6 ? first : 6; start <= last; start++)
9976 if (!global_not_special_regno_p (start))
9977 break;
9979 if (start > last)
9980 return insn;
9982 addr = plus_constant (Pmode, base,
9983 offset + (start - first) * UNITS_PER_LONG);
9985 if (start == last)
9987 if (TARGET_64BIT)
9988 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
9989 gen_rtx_REG (Pmode, start));
9990 else
9991 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
9992 gen_rtx_REG (Pmode, start));
9993 note = PATTERN (note);
9995 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9996 RTX_FRAME_RELATED_P (insn) = 1;
9998 return insn;
10001 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10002 gen_rtx_REG (Pmode, start),
10003 GEN_INT (last - start + 1));
10004 note = PATTERN (note);
10006 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10008 for (i = 0; i < XVECLEN (note, 0); i++)
10009 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10010 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10011 0, i)))))
10012 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10014 RTX_FRAME_RELATED_P (insn) = 1;
10017 return insn;
10020 /* Generate insn to restore registers FIRST to LAST from
10021 the register save area located at offset OFFSET
10022 relative to register BASE. */
10024 static rtx
10025 restore_gprs (rtx base, int offset, int first, int last)
10027 rtx addr, insn;
10029 addr = plus_constant (Pmode, base, offset);
10030 addr = gen_rtx_MEM (Pmode, addr);
10031 set_mem_alias_set (addr, get_frame_alias_set ());
10033 /* Special-case single register. */
10034 if (first == last)
10036 if (TARGET_64BIT)
10037 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10038 else
10039 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10041 RTX_FRAME_RELATED_P (insn) = 1;
10042 return insn;
10045 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10046 addr,
10047 GEN_INT (last - first + 1));
10048 RTX_FRAME_RELATED_P (insn) = 1;
10049 return insn;
10052 /* Return insn sequence to load the GOT register. */
10054 static GTY(()) rtx got_symbol;
10055 rtx_insn *
10056 s390_load_got (void)
10058 rtx_insn *insns;
10060 /* We cannot use pic_offset_table_rtx here since we use this
10061 function also for non-pic if __tls_get_offset is called and in
10062 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10063 aren't usable. */
10064 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10066 if (!got_symbol)
10068 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10069 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10072 start_sequence ();
10074 if (TARGET_CPU_ZARCH)
10076 emit_move_insn (got_rtx, got_symbol);
10078 else
10080 rtx offset;
10082 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10083 UNSPEC_LTREL_OFFSET);
10084 offset = gen_rtx_CONST (Pmode, offset);
10085 offset = force_const_mem (Pmode, offset);
10087 emit_move_insn (got_rtx, offset);
10089 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10090 UNSPEC_LTREL_BASE);
10091 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10093 emit_move_insn (got_rtx, offset);
10096 insns = get_insns ();
10097 end_sequence ();
10098 return insns;
10101 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10102 and the change to the stack pointer. */
10104 static void
10105 s390_emit_stack_tie (void)
10107 rtx mem = gen_frame_mem (BLKmode,
10108 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10110 emit_insn (gen_stack_tie (mem));
10113 /* Copy GPRS into FPR save slots. */
10115 static void
10116 s390_save_gprs_to_fprs (void)
10118 int i;
10120 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10121 return;
10123 for (i = 6; i < 16; i++)
10125 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10127 rtx_insn *insn =
10128 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10129 gen_rtx_REG (DImode, i));
10130 RTX_FRAME_RELATED_P (insn) = 1;
10131 /* This prevents dwarf2cfi from interpreting the set. Doing
10132 so it might emit def_cfa_register infos setting an FPR as
10133 new CFA. */
10134 add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn));
10139 /* Restore GPRs from FPR save slots. */
10141 static void
10142 s390_restore_gprs_from_fprs (void)
10144 int i;
10146 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10147 return;
10149 for (i = 6; i < 16; i++)
10151 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10153 rtx_insn *insn =
10154 emit_move_insn (gen_rtx_REG (DImode, i),
10155 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
10156 df_set_regs_ever_live (i, true);
10157 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10158 if (i == STACK_POINTER_REGNUM)
10159 add_reg_note (insn, REG_CFA_DEF_CFA,
10160 plus_constant (Pmode, stack_pointer_rtx,
10161 STACK_POINTER_OFFSET));
10162 RTX_FRAME_RELATED_P (insn) = 1;
10168 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10169 generation. */
10171 namespace {
10173 const pass_data pass_data_s390_early_mach =
10175 RTL_PASS, /* type */
10176 "early_mach", /* name */
10177 OPTGROUP_NONE, /* optinfo_flags */
10178 TV_MACH_DEP, /* tv_id */
10179 0, /* properties_required */
10180 0, /* properties_provided */
10181 0, /* properties_destroyed */
10182 0, /* todo_flags_start */
10183 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10186 class pass_s390_early_mach : public rtl_opt_pass
10188 public:
10189 pass_s390_early_mach (gcc::context *ctxt)
10190 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10193 /* opt_pass methods: */
10194 virtual unsigned int execute (function *);
10196 }; // class pass_s390_early_mach
10198 unsigned int
10199 pass_s390_early_mach::execute (function *fun)
10201 rtx_insn *insn;
10203 /* Try to get rid of the FPR clobbers. */
10204 s390_optimize_nonescaping_tx ();
10206 /* Re-compute register info. */
10207 s390_register_info ();
10209 /* If we're using a base register, ensure that it is always valid for
10210 the first non-prologue instruction. */
10211 if (fun->machine->base_reg)
10212 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10214 /* Annotate all constant pool references to let the scheduler know
10215 they implicitly use the base register. */
10216 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10217 if (INSN_P (insn))
10219 annotate_constant_pool_refs (&PATTERN (insn));
10220 df_insn_rescan (insn);
10222 return 0;
10225 } // anon namespace
10227 /* Expand the prologue into a bunch of separate insns. */
10229 void
10230 s390_emit_prologue (void)
10232 rtx insn, addr;
10233 rtx temp_reg;
10234 int i;
10235 int offset;
10236 int next_fpr = 0;
10238 /* Choose best register to use for temp use within prologue.
10239 See below for why TPF must use the register 1. */
10241 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10242 && !crtl->is_leaf
10243 && !TARGET_TPF_PROFILING)
10244 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10245 else
10246 temp_reg = gen_rtx_REG (Pmode, 1);
10248 s390_save_gprs_to_fprs ();
10250 /* Save call saved gprs. */
10251 if (cfun_frame_layout.first_save_gpr != -1)
10253 insn = save_gprs (stack_pointer_rtx,
10254 cfun_frame_layout.gprs_offset +
10255 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10256 - cfun_frame_layout.first_save_gpr_slot),
10257 cfun_frame_layout.first_save_gpr,
10258 cfun_frame_layout.last_save_gpr);
10259 emit_insn (insn);
10262 /* Dummy insn to mark literal pool slot. */
10264 if (cfun->machine->base_reg)
10265 emit_insn (gen_main_pool (cfun->machine->base_reg));
10267 offset = cfun_frame_layout.f0_offset;
10269 /* Save f0 and f2. */
10270 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10272 if (cfun_fpr_save_p (i))
10274 save_fpr (stack_pointer_rtx, offset, i);
10275 offset += 8;
10277 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10278 offset += 8;
10281 /* Save f4 and f6. */
10282 offset = cfun_frame_layout.f4_offset;
10283 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10285 if (cfun_fpr_save_p (i))
10287 insn = save_fpr (stack_pointer_rtx, offset, i);
10288 offset += 8;
10290 /* If f4 and f6 are call clobbered they are saved due to
10291 stdargs and therefore are not frame related. */
10292 if (!call_really_used_regs[i])
10293 RTX_FRAME_RELATED_P (insn) = 1;
10295 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10296 offset += 8;
10299 if (TARGET_PACKED_STACK
10300 && cfun_save_high_fprs_p
10301 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10303 offset = (cfun_frame_layout.f8_offset
10304 + (cfun_frame_layout.high_fprs - 1) * 8);
10306 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10307 if (cfun_fpr_save_p (i))
10309 insn = save_fpr (stack_pointer_rtx, offset, i);
10311 RTX_FRAME_RELATED_P (insn) = 1;
10312 offset -= 8;
10314 if (offset >= cfun_frame_layout.f8_offset)
10315 next_fpr = i;
10318 if (!TARGET_PACKED_STACK)
10319 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10321 if (flag_stack_usage_info)
10322 current_function_static_stack_size = cfun_frame_layout.frame_size;
10324 /* Decrement stack pointer. */
10326 if (cfun_frame_layout.frame_size > 0)
10328 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10329 rtx real_frame_off;
10331 if (s390_stack_size)
10333 HOST_WIDE_INT stack_guard;
10335 if (s390_stack_guard)
10336 stack_guard = s390_stack_guard;
10337 else
10339 /* If no value for stack guard is provided the smallest power of 2
10340 larger than the current frame size is chosen. */
10341 stack_guard = 1;
10342 while (stack_guard < cfun_frame_layout.frame_size)
10343 stack_guard <<= 1;
10346 if (cfun_frame_layout.frame_size >= s390_stack_size)
10348 warning (0, "frame size of function %qs is %wd"
10349 " bytes exceeding user provided stack limit of "
10350 "%d bytes. "
10351 "An unconditional trap is added.",
10352 current_function_name(), cfun_frame_layout.frame_size,
10353 s390_stack_size);
10354 emit_insn (gen_trap ());
10355 emit_barrier ();
10357 else
10359 /* stack_guard has to be smaller than s390_stack_size.
10360 Otherwise we would emit an AND with zero which would
10361 not match the test under mask pattern. */
10362 if (stack_guard >= s390_stack_size)
10364 warning (0, "frame size of function %qs is %wd"
10365 " bytes which is more than half the stack size. "
10366 "The dynamic check would not be reliable. "
10367 "No check emitted for this function.",
10368 current_function_name(),
10369 cfun_frame_layout.frame_size);
10371 else
10373 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10374 & ~(stack_guard - 1));
10376 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10377 GEN_INT (stack_check_mask));
10378 if (TARGET_64BIT)
10379 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10380 t, const0_rtx),
10381 t, const0_rtx, const0_rtx));
10382 else
10383 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10384 t, const0_rtx),
10385 t, const0_rtx, const0_rtx));
10390 if (s390_warn_framesize > 0
10391 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10392 warning (0, "frame size of %qs is %wd bytes",
10393 current_function_name (), cfun_frame_layout.frame_size);
10395 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10396 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10398 /* Save incoming stack pointer into temp reg. */
10399 if (TARGET_BACKCHAIN || next_fpr)
10400 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10402 /* Subtract frame size from stack pointer. */
10404 if (DISP_IN_RANGE (INTVAL (frame_off)))
10406 insn = gen_rtx_SET (stack_pointer_rtx,
10407 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10408 frame_off));
10409 insn = emit_insn (insn);
10411 else
10413 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10414 frame_off = force_const_mem (Pmode, frame_off);
10416 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10417 annotate_constant_pool_refs (&PATTERN (insn));
10420 RTX_FRAME_RELATED_P (insn) = 1;
10421 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10422 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10423 gen_rtx_SET (stack_pointer_rtx,
10424 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10425 real_frame_off)));
10427 /* Set backchain. */
10429 if (TARGET_BACKCHAIN)
10431 if (cfun_frame_layout.backchain_offset)
10432 addr = gen_rtx_MEM (Pmode,
10433 plus_constant (Pmode, stack_pointer_rtx,
10434 cfun_frame_layout.backchain_offset));
10435 else
10436 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10437 set_mem_alias_set (addr, get_frame_alias_set ());
10438 insn = emit_insn (gen_move_insn (addr, temp_reg));
10441 /* If we support non-call exceptions (e.g. for Java),
10442 we need to make sure the backchain pointer is set up
10443 before any possibly trapping memory access. */
10444 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10446 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10447 emit_clobber (addr);
10451 /* Save fprs 8 - 15 (64 bit ABI). */
10453 if (cfun_save_high_fprs_p && next_fpr)
10455 /* If the stack might be accessed through a different register
10456 we have to make sure that the stack pointer decrement is not
10457 moved below the use of the stack slots. */
10458 s390_emit_stack_tie ();
10460 insn = emit_insn (gen_add2_insn (temp_reg,
10461 GEN_INT (cfun_frame_layout.f8_offset)));
10463 offset = 0;
10465 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10466 if (cfun_fpr_save_p (i))
10468 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10469 cfun_frame_layout.frame_size
10470 + cfun_frame_layout.f8_offset
10471 + offset);
10473 insn = save_fpr (temp_reg, offset, i);
10474 offset += 8;
10475 RTX_FRAME_RELATED_P (insn) = 1;
10476 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10477 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10478 gen_rtx_REG (DFmode, i)));
10482 /* Set frame pointer, if needed. */
10484 if (frame_pointer_needed)
10486 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10487 RTX_FRAME_RELATED_P (insn) = 1;
10490 /* Set up got pointer, if needed. */
10492 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10494 rtx_insn *insns = s390_load_got ();
10496 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10497 annotate_constant_pool_refs (&PATTERN (insn));
10499 emit_insn (insns);
10502 if (TARGET_TPF_PROFILING)
10504 /* Generate a BAS instruction to serve as a function
10505 entry intercept to facilitate the use of tracing
10506 algorithms located at the branch target. */
10507 emit_insn (gen_prologue_tpf ());
10509 /* Emit a blockage here so that all code
10510 lies between the profiling mechanisms. */
10511 emit_insn (gen_blockage ());
10515 /* Expand the epilogue into a bunch of separate insns. */
10517 void
10518 s390_emit_epilogue (bool sibcall)
10520 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10521 int area_bottom, area_top, offset = 0;
10522 int next_offset;
10523 rtvec p;
10524 int i;
10526 if (TARGET_TPF_PROFILING)
10529 /* Generate a BAS instruction to serve as a function
10530 entry intercept to facilitate the use of tracing
10531 algorithms located at the branch target. */
10533 /* Emit a blockage here so that all code
10534 lies between the profiling mechanisms. */
10535 emit_insn (gen_blockage ());
10537 emit_insn (gen_epilogue_tpf ());
10540 /* Check whether to use frame or stack pointer for restore. */
10542 frame_pointer = (frame_pointer_needed
10543 ? hard_frame_pointer_rtx : stack_pointer_rtx);
10545 s390_frame_area (&area_bottom, &area_top);
10547 /* Check whether we can access the register save area.
10548 If not, increment the frame pointer as required. */
10550 if (area_top <= area_bottom)
10552 /* Nothing to restore. */
10554 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
10555 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
10557 /* Area is in range. */
10558 offset = cfun_frame_layout.frame_size;
10560 else
10562 rtx insn, frame_off, cfa;
10564 offset = area_bottom < 0 ? -area_bottom : 0;
10565 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
10567 cfa = gen_rtx_SET (frame_pointer,
10568 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10569 if (DISP_IN_RANGE (INTVAL (frame_off)))
10571 insn = gen_rtx_SET (frame_pointer,
10572 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10573 insn = emit_insn (insn);
10575 else
10577 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10578 frame_off = force_const_mem (Pmode, frame_off);
10580 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
10581 annotate_constant_pool_refs (&PATTERN (insn));
10583 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
10584 RTX_FRAME_RELATED_P (insn) = 1;
10587 /* Restore call saved fprs. */
10589 if (TARGET_64BIT)
10591 if (cfun_save_high_fprs_p)
10593 next_offset = cfun_frame_layout.f8_offset;
10594 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10596 if (cfun_fpr_save_p (i))
10598 restore_fpr (frame_pointer,
10599 offset + next_offset, i);
10600 cfa_restores
10601 = alloc_reg_note (REG_CFA_RESTORE,
10602 gen_rtx_REG (DFmode, i), cfa_restores);
10603 next_offset += 8;
10609 else
10611 next_offset = cfun_frame_layout.f4_offset;
10612 /* f4, f6 */
10613 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10615 if (cfun_fpr_save_p (i))
10617 restore_fpr (frame_pointer,
10618 offset + next_offset, i);
10619 cfa_restores
10620 = alloc_reg_note (REG_CFA_RESTORE,
10621 gen_rtx_REG (DFmode, i), cfa_restores);
10622 next_offset += 8;
10624 else if (!TARGET_PACKED_STACK)
10625 next_offset += 8;
10630 /* Return register. */
10632 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10634 /* Restore call saved gprs. */
10636 if (cfun_frame_layout.first_restore_gpr != -1)
10638 rtx insn, addr;
10639 int i;
10641 /* Check for global register and save them
10642 to stack location from where they get restored. */
10644 for (i = cfun_frame_layout.first_restore_gpr;
10645 i <= cfun_frame_layout.last_restore_gpr;
10646 i++)
10648 if (global_not_special_regno_p (i))
10650 addr = plus_constant (Pmode, frame_pointer,
10651 offset + cfun_frame_layout.gprs_offset
10652 + (i - cfun_frame_layout.first_save_gpr_slot)
10653 * UNITS_PER_LONG);
10654 addr = gen_rtx_MEM (Pmode, addr);
10655 set_mem_alias_set (addr, get_frame_alias_set ());
10656 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
10658 else
10659 cfa_restores
10660 = alloc_reg_note (REG_CFA_RESTORE,
10661 gen_rtx_REG (Pmode, i), cfa_restores);
10664 if (! sibcall)
10666 /* Fetch return address from stack before load multiple,
10667 this will do good for scheduling.
10669 Only do this if we already decided that r14 needs to be
10670 saved to a stack slot. (And not just because r14 happens to
10671 be in between two GPRs which need saving.) Otherwise it
10672 would be difficult to take that decision back in
10673 s390_optimize_prologue. */
10674 if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
10676 int return_regnum = find_unused_clobbered_reg();
10677 if (!return_regnum)
10678 return_regnum = 4;
10679 return_reg = gen_rtx_REG (Pmode, return_regnum);
10681 addr = plus_constant (Pmode, frame_pointer,
10682 offset + cfun_frame_layout.gprs_offset
10683 + (RETURN_REGNUM
10684 - cfun_frame_layout.first_save_gpr_slot)
10685 * UNITS_PER_LONG);
10686 addr = gen_rtx_MEM (Pmode, addr);
10687 set_mem_alias_set (addr, get_frame_alias_set ());
10688 emit_move_insn (return_reg, addr);
10690 /* Once we did that optimization we have to make sure
10691 s390_optimize_prologue does not try to remove the
10692 store of r14 since we will not be able to find the
10693 load issued here. */
10694 cfun_frame_layout.save_return_addr_p = true;
10698 insn = restore_gprs (frame_pointer,
10699 offset + cfun_frame_layout.gprs_offset
10700 + (cfun_frame_layout.first_restore_gpr
10701 - cfun_frame_layout.first_save_gpr_slot)
10702 * UNITS_PER_LONG,
10703 cfun_frame_layout.first_restore_gpr,
10704 cfun_frame_layout.last_restore_gpr);
10705 insn = emit_insn (insn);
10706 REG_NOTES (insn) = cfa_restores;
10707 add_reg_note (insn, REG_CFA_DEF_CFA,
10708 plus_constant (Pmode, stack_pointer_rtx,
10709 STACK_POINTER_OFFSET));
10710 RTX_FRAME_RELATED_P (insn) = 1;
10713 s390_restore_gprs_from_fprs ();
10715 if (! sibcall)
10718 /* Return to caller. */
10720 p = rtvec_alloc (2);
10722 RTVEC_ELT (p, 0) = ret_rtx;
10723 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
10724 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
10728 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
10730 static void
10731 s300_set_up_by_prologue (hard_reg_set_container *regs)
10733 if (cfun->machine->base_reg
10734 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10735 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
10738 /* Return true if the function can use simple_return to return outside
10739 of a shrink-wrapped region. At present shrink-wrapping is supported
10740 in all cases. */
10742 bool
10743 s390_can_use_simple_return_insn (void)
10745 return true;
10748 /* Return true if the epilogue is guaranteed to contain only a return
10749 instruction and if a direct return can therefore be used instead.
10750 One of the main advantages of using direct return instructions
10751 is that we can then use conditional returns. */
10753 bool
10754 s390_can_use_return_insn (void)
10756 int i;
10758 if (!reload_completed)
10759 return false;
10761 if (crtl->profile)
10762 return false;
10764 if (TARGET_TPF_PROFILING)
10765 return false;
10767 for (i = 0; i < 16; i++)
10768 if (cfun_gpr_save_slot (i))
10769 return false;
10771 /* For 31 bit this is not covered by the frame_size check below
10772 since f4, f6 are saved in the register save area without needing
10773 additional stack space. */
10774 if (!TARGET_64BIT
10775 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
10776 return false;
10778 if (cfun->machine->base_reg
10779 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10780 return false;
10782 return cfun_frame_layout.frame_size == 0;
10785 /* The VX ABI differs for vararg functions. Therefore we need the
10786 prototype of the callee to be available when passing vector type
10787 values. */
10788 static const char *
10789 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
10791 return ((TARGET_VX_ABI
10792 && typelist == 0
10793 && VECTOR_TYPE_P (TREE_TYPE (val))
10794 && (funcdecl == NULL_TREE
10795 || (TREE_CODE (funcdecl) == FUNCTION_DECL
10796 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
10797 ? N_("Vector argument passed to unprototyped function")
10798 : NULL);
10802 /* Return the size in bytes of a function argument of
10803 type TYPE and/or mode MODE. At least one of TYPE or
10804 MODE must be specified. */
10806 static int
10807 s390_function_arg_size (machine_mode mode, const_tree type)
10809 if (type)
10810 return int_size_in_bytes (type);
10812 /* No type info available for some library calls ... */
10813 if (mode != BLKmode)
10814 return GET_MODE_SIZE (mode);
10816 /* If we have neither type nor mode, abort */
10817 gcc_unreachable ();
10820 /* Return true if a function argument of type TYPE and mode MODE
10821 is to be passed in a vector register, if available. */
10823 bool
10824 s390_function_arg_vector (machine_mode mode, const_tree type)
10826 if (!TARGET_VX_ABI)
10827 return false;
10829 if (s390_function_arg_size (mode, type) > 16)
10830 return false;
10832 /* No type info available for some library calls ... */
10833 if (!type)
10834 return VECTOR_MODE_P (mode);
10836 /* The ABI says that record types with a single member are treated
10837 just like that member would be. */
10838 while (TREE_CODE (type) == RECORD_TYPE)
10840 tree field, single = NULL_TREE;
10842 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10844 if (TREE_CODE (field) != FIELD_DECL)
10845 continue;
10847 if (single == NULL_TREE)
10848 single = TREE_TYPE (field);
10849 else
10850 return false;
10853 if (single == NULL_TREE)
10854 return false;
10855 else
10857 /* If the field declaration adds extra byte due to
10858 e.g. padding this is not accepted as vector type. */
10859 if (int_size_in_bytes (single) <= 0
10860 || int_size_in_bytes (single) != int_size_in_bytes (type))
10861 return false;
10862 type = single;
10866 return VECTOR_TYPE_P (type);
10869 /* Return true if a function argument of type TYPE and mode MODE
10870 is to be passed in a floating-point register, if available. */
10872 static bool
10873 s390_function_arg_float (machine_mode mode, const_tree type)
10875 if (s390_function_arg_size (mode, type) > 8)
10876 return false;
10878 /* Soft-float changes the ABI: no floating-point registers are used. */
10879 if (TARGET_SOFT_FLOAT)
10880 return false;
10882 /* No type info available for some library calls ... */
10883 if (!type)
10884 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
10886 /* The ABI says that record types with a single member are treated
10887 just like that member would be. */
10888 while (TREE_CODE (type) == RECORD_TYPE)
10890 tree field, single = NULL_TREE;
10892 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10894 if (TREE_CODE (field) != FIELD_DECL)
10895 continue;
10897 if (single == NULL_TREE)
10898 single = TREE_TYPE (field);
10899 else
10900 return false;
10903 if (single == NULL_TREE)
10904 return false;
10905 else
10906 type = single;
10909 return TREE_CODE (type) == REAL_TYPE;
10912 /* Return true if a function argument of type TYPE and mode MODE
10913 is to be passed in an integer register, or a pair of integer
10914 registers, if available. */
10916 static bool
10917 s390_function_arg_integer (machine_mode mode, const_tree type)
10919 int size = s390_function_arg_size (mode, type);
10920 if (size > 8)
10921 return false;
10923 /* No type info available for some library calls ... */
10924 if (!type)
10925 return GET_MODE_CLASS (mode) == MODE_INT
10926 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
10928 /* We accept small integral (and similar) types. */
10929 if (INTEGRAL_TYPE_P (type)
10930 || POINTER_TYPE_P (type)
10931 || TREE_CODE (type) == NULLPTR_TYPE
10932 || TREE_CODE (type) == OFFSET_TYPE
10933 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
10934 return true;
10936 /* We also accept structs of size 1, 2, 4, 8 that are not
10937 passed in floating-point registers. */
10938 if (AGGREGATE_TYPE_P (type)
10939 && exact_log2 (size) >= 0
10940 && !s390_function_arg_float (mode, type))
10941 return true;
10943 return false;
10946 /* Return 1 if a function argument of type TYPE and mode MODE
10947 is to be passed by reference. The ABI specifies that only
10948 structures of size 1, 2, 4, or 8 bytes are passed by value,
10949 all other structures (and complex numbers) are passed by
10950 reference. */
10952 static bool
10953 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
10954 machine_mode mode, const_tree type,
10955 bool named ATTRIBUTE_UNUSED)
10957 int size = s390_function_arg_size (mode, type);
10959 if (s390_function_arg_vector (mode, type))
10960 return false;
10962 if (size > 8)
10963 return true;
10965 if (type)
10967 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
10968 return true;
10970 if (TREE_CODE (type) == COMPLEX_TYPE
10971 || TREE_CODE (type) == VECTOR_TYPE)
10972 return true;
10975 return false;
10978 /* Update the data in CUM to advance over an argument of mode MODE and
10979 data type TYPE. (TYPE is null for libcalls where that information
10980 may not be available.). The boolean NAMED specifies whether the
10981 argument is a named argument (as opposed to an unnamed argument
10982 matching an ellipsis). */
10984 static void
10985 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
10986 const_tree type, bool named)
10988 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10990 if (s390_function_arg_vector (mode, type))
10992 /* We are called for unnamed vector stdarg arguments which are
10993 passed on the stack. In this case this hook does not have to
10994 do anything since stack arguments are tracked by common
10995 code. */
10996 if (!named)
10997 return;
10998 cum->vrs += 1;
11000 else if (s390_function_arg_float (mode, type))
11002 cum->fprs += 1;
11004 else if (s390_function_arg_integer (mode, type))
11006 int size = s390_function_arg_size (mode, type);
11007 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11009 else
11010 gcc_unreachable ();
11013 /* Define where to put the arguments to a function.
11014 Value is zero to push the argument on the stack,
11015 or a hard register in which to store the argument.
11017 MODE is the argument's machine mode.
11018 TYPE is the data type of the argument (as a tree).
11019 This is null for libcalls where that information may
11020 not be available.
11021 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11022 the preceding args and about the function being called.
11023 NAMED is nonzero if this argument is a named parameter
11024 (otherwise it is an extra parameter matching an ellipsis).
11026 On S/390, we use general purpose registers 2 through 6 to
11027 pass integer, pointer, and certain structure arguments, and
11028 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11029 to pass floating point arguments. All remaining arguments
11030 are pushed to the stack. */
11032 static rtx
11033 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11034 const_tree type, bool named)
11036 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11038 if (!named)
11039 s390_check_type_for_vector_abi (type, true, false);
11041 if (s390_function_arg_vector (mode, type))
11043 /* Vector arguments being part of the ellipsis are passed on the
11044 stack. */
11045 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11046 return NULL_RTX;
11048 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11050 else if (s390_function_arg_float (mode, type))
11052 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11053 return NULL_RTX;
11054 else
11055 return gen_rtx_REG (mode, cum->fprs + 16);
11057 else if (s390_function_arg_integer (mode, type))
11059 int size = s390_function_arg_size (mode, type);
11060 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11062 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11063 return NULL_RTX;
11064 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11065 return gen_rtx_REG (mode, cum->gprs + 2);
11066 else if (n_gprs == 2)
11068 rtvec p = rtvec_alloc (2);
11070 RTVEC_ELT (p, 0)
11071 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11072 const0_rtx);
11073 RTVEC_ELT (p, 1)
11074 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11075 GEN_INT (4));
11077 return gen_rtx_PARALLEL (mode, p);
11081 /* After the real arguments, expand_call calls us once again
11082 with a void_type_node type. Whatever we return here is
11083 passed as operand 2 to the call expanders.
11085 We don't need this feature ... */
11086 else if (type == void_type_node)
11087 return const0_rtx;
11089 gcc_unreachable ();
11092 /* Return true if return values of type TYPE should be returned
11093 in a memory buffer whose address is passed by the caller as
11094 hidden first argument. */
11096 static bool
11097 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11099 /* We accept small integral (and similar) types. */
11100 if (INTEGRAL_TYPE_P (type)
11101 || POINTER_TYPE_P (type)
11102 || TREE_CODE (type) == OFFSET_TYPE
11103 || TREE_CODE (type) == REAL_TYPE)
11104 return int_size_in_bytes (type) > 8;
11106 /* vector types which fit into a VR. */
11107 if (TARGET_VX_ABI
11108 && VECTOR_TYPE_P (type)
11109 && int_size_in_bytes (type) <= 16)
11110 return false;
11112 /* Aggregates and similar constructs are always returned
11113 in memory. */
11114 if (AGGREGATE_TYPE_P (type)
11115 || TREE_CODE (type) == COMPLEX_TYPE
11116 || VECTOR_TYPE_P (type))
11117 return true;
11119 /* ??? We get called on all sorts of random stuff from
11120 aggregate_value_p. We can't abort, but it's not clear
11121 what's safe to return. Pretend it's a struct I guess. */
11122 return true;
11125 /* Function arguments and return values are promoted to word size. */
11127 static machine_mode
11128 s390_promote_function_mode (const_tree type, machine_mode mode,
11129 int *punsignedp,
11130 const_tree fntype ATTRIBUTE_UNUSED,
11131 int for_return ATTRIBUTE_UNUSED)
11133 if (INTEGRAL_MODE_P (mode)
11134 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11136 if (type != NULL_TREE && POINTER_TYPE_P (type))
11137 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11138 return Pmode;
11141 return mode;
11144 /* Define where to return a (scalar) value of type RET_TYPE.
11145 If RET_TYPE is null, define where to return a (scalar)
11146 value of mode MODE from a libcall. */
11148 static rtx
11149 s390_function_and_libcall_value (machine_mode mode,
11150 const_tree ret_type,
11151 const_tree fntype_or_decl,
11152 bool outgoing ATTRIBUTE_UNUSED)
11154 /* For vector return types it is important to use the RET_TYPE
11155 argument whenever available since the middle-end might have
11156 changed the mode to a scalar mode. */
11157 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11158 || (!ret_type && VECTOR_MODE_P (mode)));
11160 /* For normal functions perform the promotion as
11161 promote_function_mode would do. */
11162 if (ret_type)
11164 int unsignedp = TYPE_UNSIGNED (ret_type);
11165 mode = promote_function_mode (ret_type, mode, &unsignedp,
11166 fntype_or_decl, 1);
11169 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11170 || SCALAR_FLOAT_MODE_P (mode)
11171 || (TARGET_VX_ABI && vector_ret_type_p));
11172 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11174 if (TARGET_VX_ABI && vector_ret_type_p)
11175 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11176 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11177 return gen_rtx_REG (mode, 16);
11178 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11179 || UNITS_PER_LONG == UNITS_PER_WORD)
11180 return gen_rtx_REG (mode, 2);
11181 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11183 /* This case is triggered when returning a 64 bit value with
11184 -m31 -mzarch. Although the value would fit into a single
11185 register it has to be forced into a 32 bit register pair in
11186 order to match the ABI. */
11187 rtvec p = rtvec_alloc (2);
11189 RTVEC_ELT (p, 0)
11190 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11191 RTVEC_ELT (p, 1)
11192 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11194 return gen_rtx_PARALLEL (mode, p);
11197 gcc_unreachable ();
11200 /* Define where to return a scalar return value of type RET_TYPE. */
11202 static rtx
11203 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11204 bool outgoing)
11206 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11207 fn_decl_or_type, outgoing);
11210 /* Define where to return a scalar libcall return value of mode
11211 MODE. */
11213 static rtx
11214 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11216 return s390_function_and_libcall_value (mode, NULL_TREE,
11217 NULL_TREE, true);
11221 /* Create and return the va_list datatype.
11223 On S/390, va_list is an array type equivalent to
11225 typedef struct __va_list_tag
11227 long __gpr;
11228 long __fpr;
11229 void *__overflow_arg_area;
11230 void *__reg_save_area;
11231 } va_list[1];
11233 where __gpr and __fpr hold the number of general purpose
11234 or floating point arguments used up to now, respectively,
11235 __overflow_arg_area points to the stack location of the
11236 next argument passed on the stack, and __reg_save_area
11237 always points to the start of the register area in the
11238 call frame of the current function. The function prologue
11239 saves all registers used for argument passing into this
11240 area if the function uses variable arguments. */
11242 static tree
11243 s390_build_builtin_va_list (void)
11245 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11247 record = lang_hooks.types.make_type (RECORD_TYPE);
11249 type_decl =
11250 build_decl (BUILTINS_LOCATION,
11251 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11253 f_gpr = build_decl (BUILTINS_LOCATION,
11254 FIELD_DECL, get_identifier ("__gpr"),
11255 long_integer_type_node);
11256 f_fpr = build_decl (BUILTINS_LOCATION,
11257 FIELD_DECL, get_identifier ("__fpr"),
11258 long_integer_type_node);
11259 f_ovf = build_decl (BUILTINS_LOCATION,
11260 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11261 ptr_type_node);
11262 f_sav = build_decl (BUILTINS_LOCATION,
11263 FIELD_DECL, get_identifier ("__reg_save_area"),
11264 ptr_type_node);
11266 va_list_gpr_counter_field = f_gpr;
11267 va_list_fpr_counter_field = f_fpr;
11269 DECL_FIELD_CONTEXT (f_gpr) = record;
11270 DECL_FIELD_CONTEXT (f_fpr) = record;
11271 DECL_FIELD_CONTEXT (f_ovf) = record;
11272 DECL_FIELD_CONTEXT (f_sav) = record;
11274 TYPE_STUB_DECL (record) = type_decl;
11275 TYPE_NAME (record) = type_decl;
11276 TYPE_FIELDS (record) = f_gpr;
11277 DECL_CHAIN (f_gpr) = f_fpr;
11278 DECL_CHAIN (f_fpr) = f_ovf;
11279 DECL_CHAIN (f_ovf) = f_sav;
11281 layout_type (record);
11283 /* The correct type is an array type of one element. */
11284 return build_array_type (record, build_index_type (size_zero_node));
11287 /* Implement va_start by filling the va_list structure VALIST.
11288 STDARG_P is always true, and ignored.
11289 NEXTARG points to the first anonymous stack argument.
11291 The following global variables are used to initialize
11292 the va_list structure:
11294 crtl->args.info:
11295 holds number of gprs and fprs used for named arguments.
11296 crtl->args.arg_offset_rtx:
11297 holds the offset of the first anonymous stack argument
11298 (relative to the virtual arg pointer). */
11300 static void
11301 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11303 HOST_WIDE_INT n_gpr, n_fpr;
11304 int off;
11305 tree f_gpr, f_fpr, f_ovf, f_sav;
11306 tree gpr, fpr, ovf, sav, t;
11308 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11309 f_fpr = DECL_CHAIN (f_gpr);
11310 f_ovf = DECL_CHAIN (f_fpr);
11311 f_sav = DECL_CHAIN (f_ovf);
11313 valist = build_simple_mem_ref (valist);
11314 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11315 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11316 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11317 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11319 /* Count number of gp and fp argument registers used. */
11321 n_gpr = crtl->args.info.gprs;
11322 n_fpr = crtl->args.info.fprs;
11324 if (cfun->va_list_gpr_size)
11326 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11327 build_int_cst (NULL_TREE, n_gpr));
11328 TREE_SIDE_EFFECTS (t) = 1;
11329 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11332 if (cfun->va_list_fpr_size)
11334 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11335 build_int_cst (NULL_TREE, n_fpr));
11336 TREE_SIDE_EFFECTS (t) = 1;
11337 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11340 /* Find the overflow area.
11341 FIXME: This currently is too pessimistic when the vector ABI is
11342 enabled. In that case we *always* set up the overflow area
11343 pointer. */
11344 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11345 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11346 || TARGET_VX_ABI)
11348 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11350 off = INTVAL (crtl->args.arg_offset_rtx);
11351 off = off < 0 ? 0 : off;
11352 if (TARGET_DEBUG_ARG)
11353 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
11354 (int)n_gpr, (int)n_fpr, off);
11356 t = fold_build_pointer_plus_hwi (t, off);
11358 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11359 TREE_SIDE_EFFECTS (t) = 1;
11360 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11363 /* Find the register save area. */
11364 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
11365 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
11367 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
11368 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
11370 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11371 TREE_SIDE_EFFECTS (t) = 1;
11372 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11376 /* Implement va_arg by updating the va_list structure
11377 VALIST as required to retrieve an argument of type
11378 TYPE, and returning that argument.
11380 Generates code equivalent to:
11382 if (integral value) {
11383 if (size <= 4 && args.gpr < 5 ||
11384 size > 4 && args.gpr < 4 )
11385 ret = args.reg_save_area[args.gpr+8]
11386 else
11387 ret = *args.overflow_arg_area++;
11388 } else if (vector value) {
11389 ret = *args.overflow_arg_area;
11390 args.overflow_arg_area += size / 8;
11391 } else if (float value) {
11392 if (args.fgpr < 2)
11393 ret = args.reg_save_area[args.fpr+64]
11394 else
11395 ret = *args.overflow_arg_area++;
11396 } else if (aggregate value) {
11397 if (args.gpr < 5)
11398 ret = *args.reg_save_area[args.gpr]
11399 else
11400 ret = **args.overflow_arg_area++;
11401 } */
11403 static tree
11404 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11405 gimple_seq *post_p ATTRIBUTE_UNUSED)
11407 tree f_gpr, f_fpr, f_ovf, f_sav;
11408 tree gpr, fpr, ovf, sav, reg, t, u;
11409 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
11410 tree lab_false, lab_over;
11411 tree addr = create_tmp_var (ptr_type_node, "addr");
11412 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
11413 a stack slot. */
11415 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11416 f_fpr = DECL_CHAIN (f_gpr);
11417 f_ovf = DECL_CHAIN (f_fpr);
11418 f_sav = DECL_CHAIN (f_ovf);
11420 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11421 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11422 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11424 /* The tree for args* cannot be shared between gpr/fpr and ovf since
11425 both appear on a lhs. */
11426 valist = unshare_expr (valist);
11427 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11429 size = int_size_in_bytes (type);
11431 s390_check_type_for_vector_abi (type, true, false);
11433 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11435 if (TARGET_DEBUG_ARG)
11437 fprintf (stderr, "va_arg: aggregate type");
11438 debug_tree (type);
11441 /* Aggregates are passed by reference. */
11442 indirect_p = 1;
11443 reg = gpr;
11444 n_reg = 1;
11446 /* kernel stack layout on 31 bit: It is assumed here that no padding
11447 will be added by s390_frame_info because for va_args always an even
11448 number of gprs has to be saved r15-r2 = 14 regs. */
11449 sav_ofs = 2 * UNITS_PER_LONG;
11450 sav_scale = UNITS_PER_LONG;
11451 size = UNITS_PER_LONG;
11452 max_reg = GP_ARG_NUM_REG - n_reg;
11453 left_align_p = false;
11455 else if (s390_function_arg_vector (TYPE_MODE (type), type))
11457 if (TARGET_DEBUG_ARG)
11459 fprintf (stderr, "va_arg: vector type");
11460 debug_tree (type);
11463 indirect_p = 0;
11464 reg = NULL_TREE;
11465 n_reg = 0;
11466 sav_ofs = 0;
11467 sav_scale = 8;
11468 max_reg = 0;
11469 left_align_p = true;
11471 else if (s390_function_arg_float (TYPE_MODE (type), type))
11473 if (TARGET_DEBUG_ARG)
11475 fprintf (stderr, "va_arg: float type");
11476 debug_tree (type);
11479 /* FP args go in FP registers, if present. */
11480 indirect_p = 0;
11481 reg = fpr;
11482 n_reg = 1;
11483 sav_ofs = 16 * UNITS_PER_LONG;
11484 sav_scale = 8;
11485 max_reg = FP_ARG_NUM_REG - n_reg;
11486 left_align_p = false;
11488 else
11490 if (TARGET_DEBUG_ARG)
11492 fprintf (stderr, "va_arg: other type");
11493 debug_tree (type);
11496 /* Otherwise into GP registers. */
11497 indirect_p = 0;
11498 reg = gpr;
11499 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11501 /* kernel stack layout on 31 bit: It is assumed here that no padding
11502 will be added by s390_frame_info because for va_args always an even
11503 number of gprs has to be saved r15-r2 = 14 regs. */
11504 sav_ofs = 2 * UNITS_PER_LONG;
11506 if (size < UNITS_PER_LONG)
11507 sav_ofs += UNITS_PER_LONG - size;
11509 sav_scale = UNITS_PER_LONG;
11510 max_reg = GP_ARG_NUM_REG - n_reg;
11511 left_align_p = false;
11514 /* Pull the value out of the saved registers ... */
11516 if (reg != NULL_TREE)
11519 if (reg > ((typeof (reg))max_reg))
11520 goto lab_false;
11522 addr = sav + sav_ofs + reg * save_scale;
11524 goto lab_over;
11526 lab_false:
11529 lab_false = create_artificial_label (UNKNOWN_LOCATION);
11530 lab_over = create_artificial_label (UNKNOWN_LOCATION);
11532 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
11533 t = build2 (GT_EXPR, boolean_type_node, reg, t);
11534 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11535 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11536 gimplify_and_add (t, pre_p);
11538 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11539 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
11540 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
11541 t = fold_build_pointer_plus (t, u);
11543 gimplify_assign (addr, t, pre_p);
11545 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11547 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
11550 /* ... Otherwise out of the overflow area. */
11552 t = ovf;
11553 if (size < UNITS_PER_LONG && !left_align_p)
11554 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
11556 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11558 gimplify_assign (addr, t, pre_p);
11560 if (size < UNITS_PER_LONG && left_align_p)
11561 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
11562 else
11563 t = fold_build_pointer_plus_hwi (t, size);
11565 gimplify_assign (ovf, t, pre_p);
11567 if (reg != NULL_TREE)
11568 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
11571 /* Increment register save count. */
11573 if (n_reg > 0)
11575 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
11576 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
11577 gimplify_and_add (u, pre_p);
11580 if (indirect_p)
11582 t = build_pointer_type_for_mode (build_pointer_type (type),
11583 ptr_mode, true);
11584 addr = fold_convert (t, addr);
11585 addr = build_va_arg_indirect_ref (addr);
11587 else
11589 t = build_pointer_type_for_mode (type, ptr_mode, true);
11590 addr = fold_convert (t, addr);
11593 return build_va_arg_indirect_ref (addr);
11596 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
11597 expanders.
11598 DEST - Register location where CC will be stored.
11599 TDB - Pointer to a 256 byte area where to store the transaction.
11600 diagnostic block. NULL if TDB is not needed.
11601 RETRY - Retry count value. If non-NULL a retry loop for CC2
11602 is emitted
11603 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
11604 of the tbegin instruction pattern. */
11606 void
11607 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
11609 rtx retry_plus_two = gen_reg_rtx (SImode);
11610 rtx retry_reg = gen_reg_rtx (SImode);
11611 rtx_code_label *retry_label = NULL;
11613 if (retry != NULL_RTX)
11615 emit_move_insn (retry_reg, retry);
11616 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
11617 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
11618 retry_label = gen_label_rtx ();
11619 emit_label (retry_label);
11622 if (clobber_fprs_p)
11624 if (TARGET_VX)
11625 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
11626 tdb));
11627 else
11628 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
11629 tdb));
11631 else
11632 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
11633 tdb));
11635 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
11636 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
11637 CC_REGNUM)),
11638 UNSPEC_CC_TO_INT));
11639 if (retry != NULL_RTX)
11641 const int CC0 = 1 << 3;
11642 const int CC1 = 1 << 2;
11643 const int CC3 = 1 << 0;
11644 rtx jump;
11645 rtx count = gen_reg_rtx (SImode);
11646 rtx_code_label *leave_label = gen_label_rtx ();
11648 /* Exit for success and permanent failures. */
11649 jump = s390_emit_jump (leave_label,
11650 gen_rtx_EQ (VOIDmode,
11651 gen_rtx_REG (CCRAWmode, CC_REGNUM),
11652 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
11653 LABEL_NUSES (leave_label) = 1;
11655 /* CC2 - transient failure. Perform retry with ppa. */
11656 emit_move_insn (count, retry_plus_two);
11657 emit_insn (gen_subsi3 (count, count, retry_reg));
11658 emit_insn (gen_tx_assist (count));
11659 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
11660 retry_reg,
11661 retry_reg));
11662 JUMP_LABEL (jump) = retry_label;
11663 LABEL_NUSES (retry_label) = 1;
11664 emit_label (leave_label);
11669 /* Return the decl for the target specific builtin with the function
11670 code FCODE. */
11672 static tree
11673 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
11675 if (fcode >= S390_BUILTIN_MAX)
11676 return error_mark_node;
11678 return s390_builtin_decls[fcode];
11681 /* We call mcount before the function prologue. So a profiled leaf
11682 function should stay a leaf function. */
11684 static bool
11685 s390_keep_leaf_when_profiled ()
11687 return true;
11690 /* Output assembly code for the trampoline template to
11691 stdio stream FILE.
11693 On S/390, we use gpr 1 internally in the trampoline code;
11694 gpr 0 is used to hold the static chain. */
11696 static void
11697 s390_asm_trampoline_template (FILE *file)
11699 rtx op[2];
11700 op[0] = gen_rtx_REG (Pmode, 0);
11701 op[1] = gen_rtx_REG (Pmode, 1);
11703 if (TARGET_64BIT)
11705 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11706 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
11707 output_asm_insn ("br\t%1", op); /* 2 byte */
11708 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
11710 else
11712 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11713 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
11714 output_asm_insn ("br\t%1", op); /* 2 byte */
11715 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
11719 /* Emit RTL insns to initialize the variable parts of a trampoline.
11720 FNADDR is an RTX for the address of the function's pure code.
11721 CXT is an RTX for the static chain value for the function. */
11723 static void
11724 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
11726 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11727 rtx mem;
11729 emit_block_move (m_tramp, assemble_trampoline_template (),
11730 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
11732 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
11733 emit_move_insn (mem, cxt);
11734 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
11735 emit_move_insn (mem, fnaddr);
11738 /* Output assembler code to FILE to increment profiler label # LABELNO
11739 for profiling a function entry. */
11741 void
11742 s390_function_profiler (FILE *file, int labelno)
11744 rtx op[7];
11746 char label[128];
11747 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
11749 fprintf (file, "# function profiler \n");
11751 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
11752 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
11753 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
11755 op[2] = gen_rtx_REG (Pmode, 1);
11756 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
11757 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
11759 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
11760 if (flag_pic)
11762 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
11763 op[4] = gen_rtx_CONST (Pmode, op[4]);
11766 if (TARGET_64BIT)
11768 output_asm_insn ("stg\t%0,%1", op);
11769 output_asm_insn ("larl\t%2,%3", op);
11770 output_asm_insn ("brasl\t%0,%4", op);
11771 output_asm_insn ("lg\t%0,%1", op);
11773 else if (!flag_pic)
11775 op[6] = gen_label_rtx ();
11777 output_asm_insn ("st\t%0,%1", op);
11778 output_asm_insn ("bras\t%2,%l6", op);
11779 output_asm_insn (".long\t%4", op);
11780 output_asm_insn (".long\t%3", op);
11781 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11782 output_asm_insn ("l\t%0,0(%2)", op);
11783 output_asm_insn ("l\t%2,4(%2)", op);
11784 output_asm_insn ("basr\t%0,%0", op);
11785 output_asm_insn ("l\t%0,%1", op);
11787 else
11789 op[5] = gen_label_rtx ();
11790 op[6] = gen_label_rtx ();
11792 output_asm_insn ("st\t%0,%1", op);
11793 output_asm_insn ("bras\t%2,%l6", op);
11794 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
11795 output_asm_insn (".long\t%4-%l5", op);
11796 output_asm_insn (".long\t%3-%l5", op);
11797 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11798 output_asm_insn ("lr\t%0,%2", op);
11799 output_asm_insn ("a\t%0,0(%2)", op);
11800 output_asm_insn ("a\t%2,4(%2)", op);
11801 output_asm_insn ("basr\t%0,%0", op);
11802 output_asm_insn ("l\t%0,%1", op);
11806 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
11807 into its SYMBOL_REF_FLAGS. */
11809 static void
11810 s390_encode_section_info (tree decl, rtx rtl, int first)
11812 default_encode_section_info (decl, rtl, first);
11814 if (TREE_CODE (decl) == VAR_DECL)
11816 /* If a variable has a forced alignment to < 2 bytes, mark it
11817 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
11818 operand. */
11819 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
11820 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
11821 if (!DECL_SIZE (decl)
11822 || !DECL_ALIGN (decl)
11823 || !tree_fits_shwi_p (DECL_SIZE (decl))
11824 || (DECL_ALIGN (decl) <= 64
11825 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
11826 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11829 /* Literal pool references don't have a decl so they are handled
11830 differently here. We rely on the information in the MEM_ALIGN
11831 entry to decide upon natural alignment. */
11832 if (MEM_P (rtl)
11833 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
11834 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
11835 && (MEM_ALIGN (rtl) == 0
11836 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
11837 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
11838 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11841 /* Output thunk to FILE that implements a C++ virtual function call (with
11842 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
11843 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
11844 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
11845 relative to the resulting this pointer. */
11847 static void
11848 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
11849 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11850 tree function)
11852 rtx op[10];
11853 int nonlocal = 0;
11855 /* Make sure unwind info is emitted for the thunk if needed. */
11856 final_start_function (emit_barrier (), file, 1);
11858 /* Operand 0 is the target function. */
11859 op[0] = XEXP (DECL_RTL (function), 0);
11860 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
11862 nonlocal = 1;
11863 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
11864 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
11865 op[0] = gen_rtx_CONST (Pmode, op[0]);
11868 /* Operand 1 is the 'this' pointer. */
11869 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11870 op[1] = gen_rtx_REG (Pmode, 3);
11871 else
11872 op[1] = gen_rtx_REG (Pmode, 2);
11874 /* Operand 2 is the delta. */
11875 op[2] = GEN_INT (delta);
11877 /* Operand 3 is the vcall_offset. */
11878 op[3] = GEN_INT (vcall_offset);
11880 /* Operand 4 is the temporary register. */
11881 op[4] = gen_rtx_REG (Pmode, 1);
11883 /* Operands 5 to 8 can be used as labels. */
11884 op[5] = NULL_RTX;
11885 op[6] = NULL_RTX;
11886 op[7] = NULL_RTX;
11887 op[8] = NULL_RTX;
11889 /* Operand 9 can be used for temporary register. */
11890 op[9] = NULL_RTX;
11892 /* Generate code. */
11893 if (TARGET_64BIT)
11895 /* Setup literal pool pointer if required. */
11896 if ((!DISP_IN_RANGE (delta)
11897 && !CONST_OK_FOR_K (delta)
11898 && !CONST_OK_FOR_Os (delta))
11899 || (!DISP_IN_RANGE (vcall_offset)
11900 && !CONST_OK_FOR_K (vcall_offset)
11901 && !CONST_OK_FOR_Os (vcall_offset)))
11903 op[5] = gen_label_rtx ();
11904 output_asm_insn ("larl\t%4,%5", op);
11907 /* Add DELTA to this pointer. */
11908 if (delta)
11910 if (CONST_OK_FOR_J (delta))
11911 output_asm_insn ("la\t%1,%2(%1)", op);
11912 else if (DISP_IN_RANGE (delta))
11913 output_asm_insn ("lay\t%1,%2(%1)", op);
11914 else if (CONST_OK_FOR_K (delta))
11915 output_asm_insn ("aghi\t%1,%2", op);
11916 else if (CONST_OK_FOR_Os (delta))
11917 output_asm_insn ("agfi\t%1,%2", op);
11918 else
11920 op[6] = gen_label_rtx ();
11921 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
11925 /* Perform vcall adjustment. */
11926 if (vcall_offset)
11928 if (DISP_IN_RANGE (vcall_offset))
11930 output_asm_insn ("lg\t%4,0(%1)", op);
11931 output_asm_insn ("ag\t%1,%3(%4)", op);
11933 else if (CONST_OK_FOR_K (vcall_offset))
11935 output_asm_insn ("lghi\t%4,%3", op);
11936 output_asm_insn ("ag\t%4,0(%1)", op);
11937 output_asm_insn ("ag\t%1,0(%4)", op);
11939 else if (CONST_OK_FOR_Os (vcall_offset))
11941 output_asm_insn ("lgfi\t%4,%3", op);
11942 output_asm_insn ("ag\t%4,0(%1)", op);
11943 output_asm_insn ("ag\t%1,0(%4)", op);
11945 else
11947 op[7] = gen_label_rtx ();
11948 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
11949 output_asm_insn ("ag\t%4,0(%1)", op);
11950 output_asm_insn ("ag\t%1,0(%4)", op);
11954 /* Jump to target. */
11955 output_asm_insn ("jg\t%0", op);
11957 /* Output literal pool if required. */
11958 if (op[5])
11960 output_asm_insn (".align\t4", op);
11961 targetm.asm_out.internal_label (file, "L",
11962 CODE_LABEL_NUMBER (op[5]));
11964 if (op[6])
11966 targetm.asm_out.internal_label (file, "L",
11967 CODE_LABEL_NUMBER (op[6]));
11968 output_asm_insn (".long\t%2", op);
11970 if (op[7])
11972 targetm.asm_out.internal_label (file, "L",
11973 CODE_LABEL_NUMBER (op[7]));
11974 output_asm_insn (".long\t%3", op);
11977 else
11979 /* Setup base pointer if required. */
11980 if (!vcall_offset
11981 || (!DISP_IN_RANGE (delta)
11982 && !CONST_OK_FOR_K (delta)
11983 && !CONST_OK_FOR_Os (delta))
11984 || (!DISP_IN_RANGE (delta)
11985 && !CONST_OK_FOR_K (vcall_offset)
11986 && !CONST_OK_FOR_Os (vcall_offset)))
11988 op[5] = gen_label_rtx ();
11989 output_asm_insn ("basr\t%4,0", op);
11990 targetm.asm_out.internal_label (file, "L",
11991 CODE_LABEL_NUMBER (op[5]));
11994 /* Add DELTA to this pointer. */
11995 if (delta)
11997 if (CONST_OK_FOR_J (delta))
11998 output_asm_insn ("la\t%1,%2(%1)", op);
11999 else if (DISP_IN_RANGE (delta))
12000 output_asm_insn ("lay\t%1,%2(%1)", op);
12001 else if (CONST_OK_FOR_K (delta))
12002 output_asm_insn ("ahi\t%1,%2", op);
12003 else if (CONST_OK_FOR_Os (delta))
12004 output_asm_insn ("afi\t%1,%2", op);
12005 else
12007 op[6] = gen_label_rtx ();
12008 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12012 /* Perform vcall adjustment. */
12013 if (vcall_offset)
12015 if (CONST_OK_FOR_J (vcall_offset))
12017 output_asm_insn ("l\t%4,0(%1)", op);
12018 output_asm_insn ("a\t%1,%3(%4)", op);
12020 else if (DISP_IN_RANGE (vcall_offset))
12022 output_asm_insn ("l\t%4,0(%1)", op);
12023 output_asm_insn ("ay\t%1,%3(%4)", op);
12025 else if (CONST_OK_FOR_K (vcall_offset))
12027 output_asm_insn ("lhi\t%4,%3", op);
12028 output_asm_insn ("a\t%4,0(%1)", op);
12029 output_asm_insn ("a\t%1,0(%4)", op);
12031 else if (CONST_OK_FOR_Os (vcall_offset))
12033 output_asm_insn ("iilf\t%4,%3", op);
12034 output_asm_insn ("a\t%4,0(%1)", op);
12035 output_asm_insn ("a\t%1,0(%4)", op);
12037 else
12039 op[7] = gen_label_rtx ();
12040 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12041 output_asm_insn ("a\t%4,0(%1)", op);
12042 output_asm_insn ("a\t%1,0(%4)", op);
12045 /* We had to clobber the base pointer register.
12046 Re-setup the base pointer (with a different base). */
12047 op[5] = gen_label_rtx ();
12048 output_asm_insn ("basr\t%4,0", op);
12049 targetm.asm_out.internal_label (file, "L",
12050 CODE_LABEL_NUMBER (op[5]));
12053 /* Jump to target. */
12054 op[8] = gen_label_rtx ();
12056 if (!flag_pic)
12057 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12058 else if (!nonlocal)
12059 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12060 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12061 else if (flag_pic == 1)
12063 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12064 output_asm_insn ("l\t%4,%0(%4)", op);
12066 else if (flag_pic == 2)
12068 op[9] = gen_rtx_REG (Pmode, 0);
12069 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12070 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12071 output_asm_insn ("ar\t%4,%9", op);
12072 output_asm_insn ("l\t%4,0(%4)", op);
12075 output_asm_insn ("br\t%4", op);
12077 /* Output literal pool. */
12078 output_asm_insn (".align\t4", op);
12080 if (nonlocal && flag_pic == 2)
12081 output_asm_insn (".long\t%0", op);
12082 if (nonlocal)
12084 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12085 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12088 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12089 if (!flag_pic)
12090 output_asm_insn (".long\t%0", op);
12091 else
12092 output_asm_insn (".long\t%0-%5", op);
12094 if (op[6])
12096 targetm.asm_out.internal_label (file, "L",
12097 CODE_LABEL_NUMBER (op[6]));
12098 output_asm_insn (".long\t%2", op);
12100 if (op[7])
12102 targetm.asm_out.internal_label (file, "L",
12103 CODE_LABEL_NUMBER (op[7]));
12104 output_asm_insn (".long\t%3", op);
12107 final_end_function ();
12110 static bool
12111 s390_valid_pointer_mode (machine_mode mode)
12113 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12116 /* Checks whether the given CALL_EXPR would use a caller
12117 saved register. This is used to decide whether sibling call
12118 optimization could be performed on the respective function
12119 call. */
12121 static bool
12122 s390_call_saved_register_used (tree call_expr)
12124 CUMULATIVE_ARGS cum_v;
12125 cumulative_args_t cum;
12126 tree parameter;
12127 machine_mode mode;
12128 tree type;
12129 rtx parm_rtx;
12130 int reg, i;
12132 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12133 cum = pack_cumulative_args (&cum_v);
12135 for (i = 0; i < call_expr_nargs (call_expr); i++)
12137 parameter = CALL_EXPR_ARG (call_expr, i);
12138 gcc_assert (parameter);
12140 /* For an undeclared variable passed as parameter we will get
12141 an ERROR_MARK node here. */
12142 if (TREE_CODE (parameter) == ERROR_MARK)
12143 return true;
12145 type = TREE_TYPE (parameter);
12146 gcc_assert (type);
12148 mode = TYPE_MODE (type);
12149 gcc_assert (mode);
12151 /* We assume that in the target function all parameters are
12152 named. This only has an impact on vector argument register
12153 usage none of which is call-saved. */
12154 if (pass_by_reference (&cum_v, mode, type, true))
12156 mode = Pmode;
12157 type = build_pointer_type (type);
12160 parm_rtx = s390_function_arg (cum, mode, type, true);
12162 s390_function_arg_advance (cum, mode, type, true);
12164 if (!parm_rtx)
12165 continue;
12167 if (REG_P (parm_rtx))
12169 for (reg = 0;
12170 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12171 reg++)
12172 if (!call_used_regs[reg + REGNO (parm_rtx)])
12173 return true;
12176 if (GET_CODE (parm_rtx) == PARALLEL)
12178 int i;
12180 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12182 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12184 gcc_assert (REG_P (r));
12186 for (reg = 0;
12187 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12188 reg++)
12189 if (!call_used_regs[reg + REGNO (r)])
12190 return true;
12195 return false;
12198 /* Return true if the given call expression can be
12199 turned into a sibling call.
12200 DECL holds the declaration of the function to be called whereas
12201 EXP is the call expression itself. */
12203 static bool
12204 s390_function_ok_for_sibcall (tree decl, tree exp)
12206 /* The TPF epilogue uses register 1. */
12207 if (TARGET_TPF_PROFILING)
12208 return false;
12210 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12211 which would have to be restored before the sibcall. */
12212 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12213 return false;
12215 /* Register 6 on s390 is available as an argument register but unfortunately
12216 "caller saved". This makes functions needing this register for arguments
12217 not suitable for sibcalls. */
12218 return !s390_call_saved_register_used (exp);
12221 /* Return the fixed registers used for condition codes. */
12223 static bool
12224 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12226 *p1 = CC_REGNUM;
12227 *p2 = INVALID_REGNUM;
12229 return true;
12232 /* This function is used by the call expanders of the machine description.
12233 It emits the call insn itself together with the necessary operations
12234 to adjust the target address and returns the emitted insn.
12235 ADDR_LOCATION is the target address rtx
12236 TLS_CALL the location of the thread-local symbol
12237 RESULT_REG the register where the result of the call should be stored
12238 RETADDR_REG the register where the return address should be stored
12239 If this parameter is NULL_RTX the call is considered
12240 to be a sibling call. */
12242 rtx_insn *
12243 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12244 rtx retaddr_reg)
12246 bool plt_call = false;
12247 rtx_insn *insn;
12248 rtx call;
12249 rtx clobber;
12250 rtvec vec;
12252 /* Direct function calls need special treatment. */
12253 if (GET_CODE (addr_location) == SYMBOL_REF)
12255 /* When calling a global routine in PIC mode, we must
12256 replace the symbol itself with the PLT stub. */
12257 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12259 if (retaddr_reg != NULL_RTX)
12261 addr_location = gen_rtx_UNSPEC (Pmode,
12262 gen_rtvec (1, addr_location),
12263 UNSPEC_PLT);
12264 addr_location = gen_rtx_CONST (Pmode, addr_location);
12265 plt_call = true;
12267 else
12268 /* For -fpic code the PLT entries might use r12 which is
12269 call-saved. Therefore we cannot do a sibcall when
12270 calling directly using a symbol ref. When reaching
12271 this point we decided (in s390_function_ok_for_sibcall)
12272 to do a sibcall for a function pointer but one of the
12273 optimizers was able to get rid of the function pointer
12274 by propagating the symbol ref into the call. This
12275 optimization is illegal for S/390 so we turn the direct
12276 call into a indirect call again. */
12277 addr_location = force_reg (Pmode, addr_location);
12280 /* Unless we can use the bras(l) insn, force the
12281 routine address into a register. */
12282 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12284 if (flag_pic)
12285 addr_location = legitimize_pic_address (addr_location, 0);
12286 else
12287 addr_location = force_reg (Pmode, addr_location);
12291 /* If it is already an indirect call or the code above moved the
12292 SYMBOL_REF to somewhere else make sure the address can be found in
12293 register 1. */
12294 if (retaddr_reg == NULL_RTX
12295 && GET_CODE (addr_location) != SYMBOL_REF
12296 && !plt_call)
12298 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12299 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12302 addr_location = gen_rtx_MEM (QImode, addr_location);
12303 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12305 if (result_reg != NULL_RTX)
12306 call = gen_rtx_SET (result_reg, call);
12308 if (retaddr_reg != NULL_RTX)
12310 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12312 if (tls_call != NULL_RTX)
12313 vec = gen_rtvec (3, call, clobber,
12314 gen_rtx_USE (VOIDmode, tls_call));
12315 else
12316 vec = gen_rtvec (2, call, clobber);
12318 call = gen_rtx_PARALLEL (VOIDmode, vec);
12321 insn = emit_call_insn (call);
12323 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12324 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12326 /* s390_function_ok_for_sibcall should
12327 have denied sibcalls in this case. */
12328 gcc_assert (retaddr_reg != NULL_RTX);
12329 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12331 return insn;
12334 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12336 static void
12337 s390_conditional_register_usage (void)
12339 int i;
12341 if (flag_pic)
12343 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12344 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12346 if (TARGET_CPU_ZARCH)
12348 fixed_regs[BASE_REGNUM] = 0;
12349 call_used_regs[BASE_REGNUM] = 0;
12350 fixed_regs[RETURN_REGNUM] = 0;
12351 call_used_regs[RETURN_REGNUM] = 0;
12353 if (TARGET_64BIT)
12355 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12356 call_used_regs[i] = call_really_used_regs[i] = 0;
12358 else
12360 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
12361 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
12364 if (TARGET_SOFT_FLOAT)
12366 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
12367 call_used_regs[i] = fixed_regs[i] = 1;
12370 /* Disable v16 - v31 for non-vector target. */
12371 if (!TARGET_VX)
12373 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
12374 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
12378 /* Corresponding function to eh_return expander. */
12380 static GTY(()) rtx s390_tpf_eh_return_symbol;
12381 void
12382 s390_emit_tpf_eh_return (rtx target)
12384 rtx_insn *insn;
12385 rtx reg, orig_ra;
12387 if (!s390_tpf_eh_return_symbol)
12388 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
12390 reg = gen_rtx_REG (Pmode, 2);
12391 orig_ra = gen_rtx_REG (Pmode, 3);
12393 emit_move_insn (reg, target);
12394 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
12395 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
12396 gen_rtx_REG (Pmode, RETURN_REGNUM));
12397 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
12398 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
12400 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
12403 /* Rework the prologue/epilogue to avoid saving/restoring
12404 registers unnecessarily. */
12406 static void
12407 s390_optimize_prologue (void)
12409 rtx_insn *insn, *new_insn, *next_insn;
12411 /* Do a final recompute of the frame-related data. */
12412 s390_optimize_register_info ();
12414 /* If all special registers are in fact used, there's nothing we
12415 can do, so no point in walking the insn list. */
12417 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
12418 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
12419 && (TARGET_CPU_ZARCH
12420 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
12421 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
12422 return;
12424 /* Search for prologue/epilogue insns and replace them. */
12426 for (insn = get_insns (); insn; insn = next_insn)
12428 int first, last, off;
12429 rtx set, base, offset;
12430 rtx pat;
12432 next_insn = NEXT_INSN (insn);
12434 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
12435 continue;
12437 pat = PATTERN (insn);
12439 /* Remove ldgr/lgdr instructions used for saving and restore
12440 GPRs if possible. */
12441 if (TARGET_Z10
12442 && GET_CODE (pat) == SET
12443 && GET_MODE (SET_SRC (pat)) == DImode
12444 && REG_P (SET_SRC (pat))
12445 && REG_P (SET_DEST (pat)))
12447 int src_regno = REGNO (SET_SRC (pat));
12448 int dest_regno = REGNO (SET_DEST (pat));
12449 int gpr_regno;
12450 int fpr_regno;
12452 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
12453 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
12454 continue;
12456 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
12457 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
12459 /* GPR must be call-saved, FPR must be call-clobbered. */
12460 if (!call_really_used_regs[fpr_regno]
12461 || call_really_used_regs[gpr_regno])
12462 continue;
12464 /* It must not happen that what we once saved in an FPR now
12465 needs a stack slot. */
12466 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
12468 if (cfun_gpr_save_slot (gpr_regno) == 0)
12470 remove_insn (insn);
12471 continue;
12475 if (GET_CODE (pat) == PARALLEL
12476 && store_multiple_operation (pat, VOIDmode))
12478 set = XVECEXP (pat, 0, 0);
12479 first = REGNO (SET_SRC (set));
12480 last = first + XVECLEN (pat, 0) - 1;
12481 offset = const0_rtx;
12482 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12483 off = INTVAL (offset);
12485 if (GET_CODE (base) != REG || off < 0)
12486 continue;
12487 if (cfun_frame_layout.first_save_gpr != -1
12488 && (cfun_frame_layout.first_save_gpr < first
12489 || cfun_frame_layout.last_save_gpr > last))
12490 continue;
12491 if (REGNO (base) != STACK_POINTER_REGNUM
12492 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12493 continue;
12494 if (first > BASE_REGNUM || last < BASE_REGNUM)
12495 continue;
12497 if (cfun_frame_layout.first_save_gpr != -1)
12499 rtx s_pat = save_gprs (base,
12500 off + (cfun_frame_layout.first_save_gpr
12501 - first) * UNITS_PER_LONG,
12502 cfun_frame_layout.first_save_gpr,
12503 cfun_frame_layout.last_save_gpr);
12504 new_insn = emit_insn_before (s_pat, insn);
12505 INSN_ADDRESSES_NEW (new_insn, -1);
12508 remove_insn (insn);
12509 continue;
12512 if (cfun_frame_layout.first_save_gpr == -1
12513 && GET_CODE (pat) == SET
12514 && GENERAL_REG_P (SET_SRC (pat))
12515 && GET_CODE (SET_DEST (pat)) == MEM)
12517 set = pat;
12518 first = REGNO (SET_SRC (set));
12519 offset = const0_rtx;
12520 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12521 off = INTVAL (offset);
12523 if (GET_CODE (base) != REG || off < 0)
12524 continue;
12525 if (REGNO (base) != STACK_POINTER_REGNUM
12526 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12527 continue;
12529 remove_insn (insn);
12530 continue;
12533 if (GET_CODE (pat) == PARALLEL
12534 && load_multiple_operation (pat, VOIDmode))
12536 set = XVECEXP (pat, 0, 0);
12537 first = REGNO (SET_DEST (set));
12538 last = first + XVECLEN (pat, 0) - 1;
12539 offset = const0_rtx;
12540 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12541 off = INTVAL (offset);
12543 if (GET_CODE (base) != REG || off < 0)
12544 continue;
12546 if (cfun_frame_layout.first_restore_gpr != -1
12547 && (cfun_frame_layout.first_restore_gpr < first
12548 || cfun_frame_layout.last_restore_gpr > last))
12549 continue;
12550 if (REGNO (base) != STACK_POINTER_REGNUM
12551 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12552 continue;
12553 if (first > BASE_REGNUM || last < BASE_REGNUM)
12554 continue;
12556 if (cfun_frame_layout.first_restore_gpr != -1)
12558 rtx rpat = restore_gprs (base,
12559 off + (cfun_frame_layout.first_restore_gpr
12560 - first) * UNITS_PER_LONG,
12561 cfun_frame_layout.first_restore_gpr,
12562 cfun_frame_layout.last_restore_gpr);
12564 /* Remove REG_CFA_RESTOREs for registers that we no
12565 longer need to save. */
12566 REG_NOTES (rpat) = REG_NOTES (insn);
12567 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
12568 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
12569 && ((int) REGNO (XEXP (*ptr, 0))
12570 < cfun_frame_layout.first_restore_gpr))
12571 *ptr = XEXP (*ptr, 1);
12572 else
12573 ptr = &XEXP (*ptr, 1);
12574 new_insn = emit_insn_before (rpat, insn);
12575 RTX_FRAME_RELATED_P (new_insn) = 1;
12576 INSN_ADDRESSES_NEW (new_insn, -1);
12579 remove_insn (insn);
12580 continue;
12583 if (cfun_frame_layout.first_restore_gpr == -1
12584 && GET_CODE (pat) == SET
12585 && GENERAL_REG_P (SET_DEST (pat))
12586 && GET_CODE (SET_SRC (pat)) == MEM)
12588 set = pat;
12589 first = REGNO (SET_DEST (set));
12590 offset = const0_rtx;
12591 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12592 off = INTVAL (offset);
12594 if (GET_CODE (base) != REG || off < 0)
12595 continue;
12597 if (REGNO (base) != STACK_POINTER_REGNUM
12598 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12599 continue;
12601 remove_insn (insn);
12602 continue;
12607 /* On z10 and later the dynamic branch prediction must see the
12608 backward jump within a certain windows. If not it falls back to
12609 the static prediction. This function rearranges the loop backward
12610 branch in a way which makes the static prediction always correct.
12611 The function returns true if it added an instruction. */
12612 static bool
12613 s390_fix_long_loop_prediction (rtx_insn *insn)
12615 rtx set = single_set (insn);
12616 rtx code_label, label_ref, new_label;
12617 rtx_insn *uncond_jump;
12618 rtx_insn *cur_insn;
12619 rtx tmp;
12620 int distance;
12622 /* This will exclude branch on count and branch on index patterns
12623 since these are correctly statically predicted. */
12624 if (!set
12625 || SET_DEST (set) != pc_rtx
12626 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
12627 return false;
12629 /* Skip conditional returns. */
12630 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
12631 && XEXP (SET_SRC (set), 2) == pc_rtx)
12632 return false;
12634 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
12635 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
12637 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
12639 code_label = XEXP (label_ref, 0);
12641 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
12642 || INSN_ADDRESSES (INSN_UID (insn)) == -1
12643 || (INSN_ADDRESSES (INSN_UID (insn))
12644 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
12645 return false;
12647 for (distance = 0, cur_insn = PREV_INSN (insn);
12648 distance < PREDICT_DISTANCE - 6;
12649 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
12650 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
12651 return false;
12653 new_label = gen_label_rtx ();
12654 uncond_jump = emit_jump_insn_after (
12655 gen_rtx_SET (pc_rtx,
12656 gen_rtx_LABEL_REF (VOIDmode, code_label)),
12657 insn);
12658 emit_label_after (new_label, uncond_jump);
12660 tmp = XEXP (SET_SRC (set), 1);
12661 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
12662 XEXP (SET_SRC (set), 2) = tmp;
12663 INSN_CODE (insn) = -1;
12665 XEXP (label_ref, 0) = new_label;
12666 JUMP_LABEL (insn) = new_label;
12667 JUMP_LABEL (uncond_jump) = code_label;
12669 return true;
12672 /* Returns 1 if INSN reads the value of REG for purposes not related
12673 to addressing of memory, and 0 otherwise. */
12674 static int
12675 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
12677 return reg_referenced_p (reg, PATTERN (insn))
12678 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
12681 /* Starting from INSN find_cond_jump looks downwards in the insn
12682 stream for a single jump insn which is the last user of the
12683 condition code set in INSN. */
12684 static rtx_insn *
12685 find_cond_jump (rtx_insn *insn)
12687 for (; insn; insn = NEXT_INSN (insn))
12689 rtx ite, cc;
12691 if (LABEL_P (insn))
12692 break;
12694 if (!JUMP_P (insn))
12696 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
12697 break;
12698 continue;
12701 /* This will be triggered by a return. */
12702 if (GET_CODE (PATTERN (insn)) != SET)
12703 break;
12705 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
12706 ite = SET_SRC (PATTERN (insn));
12708 if (GET_CODE (ite) != IF_THEN_ELSE)
12709 break;
12711 cc = XEXP (XEXP (ite, 0), 0);
12712 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
12713 break;
12715 if (find_reg_note (insn, REG_DEAD, cc))
12716 return insn;
12717 break;
12720 return NULL;
12723 /* Swap the condition in COND and the operands in OP0 and OP1 so that
12724 the semantics does not change. If NULL_RTX is passed as COND the
12725 function tries to find the conditional jump starting with INSN. */
12726 static void
12727 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
12729 rtx tmp = *op0;
12731 if (cond == NULL_RTX)
12733 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
12734 rtx set = jump ? single_set (jump) : NULL_RTX;
12736 if (set == NULL_RTX)
12737 return;
12739 cond = XEXP (SET_SRC (set), 0);
12742 *op0 = *op1;
12743 *op1 = tmp;
12744 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
12747 /* On z10, instructions of the compare-and-branch family have the
12748 property to access the register occurring as second operand with
12749 its bits complemented. If such a compare is grouped with a second
12750 instruction that accesses the same register non-complemented, and
12751 if that register's value is delivered via a bypass, then the
12752 pipeline recycles, thereby causing significant performance decline.
12753 This function locates such situations and exchanges the two
12754 operands of the compare. The function return true whenever it
12755 added an insn. */
12756 static bool
12757 s390_z10_optimize_cmp (rtx_insn *insn)
12759 rtx_insn *prev_insn, *next_insn;
12760 bool insn_added_p = false;
12761 rtx cond, *op0, *op1;
12763 if (GET_CODE (PATTERN (insn)) == PARALLEL)
12765 /* Handle compare and branch and branch on count
12766 instructions. */
12767 rtx pattern = single_set (insn);
12769 if (!pattern
12770 || SET_DEST (pattern) != pc_rtx
12771 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
12772 return false;
12774 cond = XEXP (SET_SRC (pattern), 0);
12775 op0 = &XEXP (cond, 0);
12776 op1 = &XEXP (cond, 1);
12778 else if (GET_CODE (PATTERN (insn)) == SET)
12780 rtx src, dest;
12782 /* Handle normal compare instructions. */
12783 src = SET_SRC (PATTERN (insn));
12784 dest = SET_DEST (PATTERN (insn));
12786 if (!REG_P (dest)
12787 || !CC_REGNO_P (REGNO (dest))
12788 || GET_CODE (src) != COMPARE)
12789 return false;
12791 /* s390_swap_cmp will try to find the conditional
12792 jump when passing NULL_RTX as condition. */
12793 cond = NULL_RTX;
12794 op0 = &XEXP (src, 0);
12795 op1 = &XEXP (src, 1);
12797 else
12798 return false;
12800 if (!REG_P (*op0) || !REG_P (*op1))
12801 return false;
12803 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
12804 return false;
12806 /* Swap the COMPARE arguments and its mask if there is a
12807 conflicting access in the previous insn. */
12808 prev_insn = prev_active_insn (insn);
12809 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12810 && reg_referenced_p (*op1, PATTERN (prev_insn)))
12811 s390_swap_cmp (cond, op0, op1, insn);
12813 /* Check if there is a conflict with the next insn. If there
12814 was no conflict with the previous insn, then swap the
12815 COMPARE arguments and its mask. If we already swapped
12816 the operands, or if swapping them would cause a conflict
12817 with the previous insn, issue a NOP after the COMPARE in
12818 order to separate the two instuctions. */
12819 next_insn = next_active_insn (insn);
12820 if (next_insn != NULL_RTX && INSN_P (next_insn)
12821 && s390_non_addr_reg_read_p (*op1, next_insn))
12823 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12824 && s390_non_addr_reg_read_p (*op0, prev_insn))
12826 if (REGNO (*op1) == 0)
12827 emit_insn_after (gen_nop1 (), insn);
12828 else
12829 emit_insn_after (gen_nop (), insn);
12830 insn_added_p = true;
12832 else
12833 s390_swap_cmp (cond, op0, op1, insn);
12835 return insn_added_p;
12838 /* Perform machine-dependent processing. */
12840 static void
12841 s390_reorg (void)
12843 bool pool_overflow = false;
12844 int hw_before, hw_after;
12846 /* Make sure all splits have been performed; splits after
12847 machine_dependent_reorg might confuse insn length counts. */
12848 split_all_insns_noflow ();
12850 /* Install the main literal pool and the associated base
12851 register load insns.
12853 In addition, there are two problematic situations we need
12854 to correct:
12856 - the literal pool might be > 4096 bytes in size, so that
12857 some of its elements cannot be directly accessed
12859 - a branch target might be > 64K away from the branch, so that
12860 it is not possible to use a PC-relative instruction.
12862 To fix those, we split the single literal pool into multiple
12863 pool chunks, reloading the pool base register at various
12864 points throughout the function to ensure it always points to
12865 the pool chunk the following code expects, and / or replace
12866 PC-relative branches by absolute branches.
12868 However, the two problems are interdependent: splitting the
12869 literal pool can move a branch further away from its target,
12870 causing the 64K limit to overflow, and on the other hand,
12871 replacing a PC-relative branch by an absolute branch means
12872 we need to put the branch target address into the literal
12873 pool, possibly causing it to overflow.
12875 So, we loop trying to fix up both problems until we manage
12876 to satisfy both conditions at the same time. Note that the
12877 loop is guaranteed to terminate as every pass of the loop
12878 strictly decreases the total number of PC-relative branches
12879 in the function. (This is not completely true as there
12880 might be branch-over-pool insns introduced by chunkify_start.
12881 Those never need to be split however.) */
12883 for (;;)
12885 struct constant_pool *pool = NULL;
12887 /* Collect the literal pool. */
12888 if (!pool_overflow)
12890 pool = s390_mainpool_start ();
12891 if (!pool)
12892 pool_overflow = true;
12895 /* If literal pool overflowed, start to chunkify it. */
12896 if (pool_overflow)
12897 pool = s390_chunkify_start ();
12899 /* Split out-of-range branches. If this has created new
12900 literal pool entries, cancel current chunk list and
12901 recompute it. zSeries machines have large branch
12902 instructions, so we never need to split a branch. */
12903 if (!TARGET_CPU_ZARCH && s390_split_branches ())
12905 if (pool_overflow)
12906 s390_chunkify_cancel (pool);
12907 else
12908 s390_mainpool_cancel (pool);
12910 continue;
12913 /* If we made it up to here, both conditions are satisfied.
12914 Finish up literal pool related changes. */
12915 if (pool_overflow)
12916 s390_chunkify_finish (pool);
12917 else
12918 s390_mainpool_finish (pool);
12920 /* We're done splitting branches. */
12921 cfun->machine->split_branches_pending_p = false;
12922 break;
12925 /* Generate out-of-pool execute target insns. */
12926 if (TARGET_CPU_ZARCH)
12928 rtx_insn *insn, *target;
12929 rtx label;
12931 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12933 label = s390_execute_label (insn);
12934 if (!label)
12935 continue;
12937 gcc_assert (label != const0_rtx);
12939 target = emit_label (XEXP (label, 0));
12940 INSN_ADDRESSES_NEW (target, -1);
12942 target = emit_insn (s390_execute_target (insn));
12943 INSN_ADDRESSES_NEW (target, -1);
12947 /* Try to optimize prologue and epilogue further. */
12948 s390_optimize_prologue ();
12950 /* Walk over the insns and do some >=z10 specific changes. */
12951 if (s390_tune >= PROCESSOR_2097_Z10)
12953 rtx_insn *insn;
12954 bool insn_added_p = false;
12956 /* The insn lengths and addresses have to be up to date for the
12957 following manipulations. */
12958 shorten_branches (get_insns ());
12960 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12962 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
12963 continue;
12965 if (JUMP_P (insn))
12966 insn_added_p |= s390_fix_long_loop_prediction (insn);
12968 if ((GET_CODE (PATTERN (insn)) == PARALLEL
12969 || GET_CODE (PATTERN (insn)) == SET)
12970 && s390_tune == PROCESSOR_2097_Z10)
12971 insn_added_p |= s390_z10_optimize_cmp (insn);
12974 /* Adjust branches if we added new instructions. */
12975 if (insn_added_p)
12976 shorten_branches (get_insns ());
12979 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
12980 if (hw_after > 0)
12982 rtx_insn *insn;
12984 /* Insert NOPs for hotpatching. */
12985 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12986 /* Emit NOPs
12987 1. inside the area covered by debug information to allow setting
12988 breakpoints at the NOPs,
12989 2. before any insn which results in an asm instruction,
12990 3. before in-function labels to avoid jumping to the NOPs, for
12991 example as part of a loop,
12992 4. before any barrier in case the function is completely empty
12993 (__builtin_unreachable ()) and has neither internal labels nor
12994 active insns.
12996 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
12997 break;
12998 /* Output a series of NOPs before the first active insn. */
12999 while (insn && hw_after > 0)
13001 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13003 emit_insn_before (gen_nop_6_byte (), insn);
13004 hw_after -= 3;
13006 else if (hw_after >= 2)
13008 emit_insn_before (gen_nop_4_byte (), insn);
13009 hw_after -= 2;
13011 else
13013 emit_insn_before (gen_nop_2_byte (), insn);
13014 hw_after -= 1;
13020 /* Return true if INSN is a fp load insn writing register REGNO. */
13021 static inline bool
13022 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13024 rtx set;
13025 enum attr_type flag = s390_safe_attr_type (insn);
13027 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13028 return false;
13030 set = single_set (insn);
13032 if (set == NULL_RTX)
13033 return false;
13035 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13036 return false;
13038 if (REGNO (SET_DEST (set)) != regno)
13039 return false;
13041 return true;
13044 /* This value describes the distance to be avoided between an
13045 aritmetic fp instruction and an fp load writing the same register.
13046 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13047 fine but the exact value has to be avoided. Otherwise the FP
13048 pipeline will throw an exception causing a major penalty. */
13049 #define Z10_EARLYLOAD_DISTANCE 7
13051 /* Rearrange the ready list in order to avoid the situation described
13052 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13053 moved to the very end of the ready list. */
13054 static void
13055 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13057 unsigned int regno;
13058 int nready = *nready_p;
13059 rtx_insn *tmp;
13060 int i;
13061 rtx_insn *insn;
13062 rtx set;
13063 enum attr_type flag;
13064 int distance;
13066 /* Skip DISTANCE - 1 active insns. */
13067 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13068 distance > 0 && insn != NULL_RTX;
13069 distance--, insn = prev_active_insn (insn))
13070 if (CALL_P (insn) || JUMP_P (insn))
13071 return;
13073 if (insn == NULL_RTX)
13074 return;
13076 set = single_set (insn);
13078 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13079 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13080 return;
13082 flag = s390_safe_attr_type (insn);
13084 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13085 return;
13087 regno = REGNO (SET_DEST (set));
13088 i = nready - 1;
13090 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13091 i--;
13093 if (!i)
13094 return;
13096 tmp = ready[i];
13097 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13098 ready[0] = tmp;
13102 /* The s390_sched_state variable tracks the state of the current or
13103 the last instruction group.
13105 0,1,2 number of instructions scheduled in the current group
13106 3 the last group is complete - normal insns
13107 4 the last group was a cracked/expanded insn */
13109 static int s390_sched_state;
13111 #define S390_OOO_SCHED_STATE_NORMAL 3
13112 #define S390_OOO_SCHED_STATE_CRACKED 4
13114 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
13115 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
13116 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
13117 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
13119 static unsigned int
13120 s390_get_sched_attrmask (rtx_insn *insn)
13122 unsigned int mask = 0;
13124 if (get_attr_ooo_cracked (insn))
13125 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
13126 if (get_attr_ooo_expanded (insn))
13127 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
13128 if (get_attr_ooo_endgroup (insn))
13129 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
13130 if (get_attr_ooo_groupalone (insn))
13131 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
13132 return mask;
13135 /* Return the scheduling score for INSN. The higher the score the
13136 better. The score is calculated from the OOO scheduling attributes
13137 of INSN and the scheduling state s390_sched_state. */
13138 static int
13139 s390_sched_score (rtx_insn *insn)
13141 unsigned int mask = s390_get_sched_attrmask (insn);
13142 int score = 0;
13144 switch (s390_sched_state)
13146 case 0:
13147 /* Try to put insns into the first slot which would otherwise
13148 break a group. */
13149 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13150 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13151 score += 5;
13152 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13153 score += 10;
13154 case 1:
13155 /* Prefer not cracked insns while trying to put together a
13156 group. */
13157 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13158 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13159 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13160 score += 10;
13161 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
13162 score += 5;
13163 break;
13164 case 2:
13165 /* Prefer not cracked insns while trying to put together a
13166 group. */
13167 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13168 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13169 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13170 score += 10;
13171 /* Prefer endgroup insns in the last slot. */
13172 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
13173 score += 10;
13174 break;
13175 case S390_OOO_SCHED_STATE_NORMAL:
13176 /* Prefer not cracked insns if the last was not cracked. */
13177 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13178 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
13179 score += 5;
13180 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13181 score += 10;
13182 break;
13183 case S390_OOO_SCHED_STATE_CRACKED:
13184 /* Try to keep cracked insns together to prevent them from
13185 interrupting groups. */
13186 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13187 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13188 score += 5;
13189 break;
13191 return score;
13194 /* This function is called via hook TARGET_SCHED_REORDER before
13195 issuing one insn from list READY which contains *NREADYP entries.
13196 For target z10 it reorders load instructions to avoid early load
13197 conflicts in the floating point pipeline */
13198 static int
13199 s390_sched_reorder (FILE *file, int verbose,
13200 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13202 if (s390_tune == PROCESSOR_2097_Z10
13203 && reload_completed
13204 && *nreadyp > 1)
13205 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13207 if (s390_tune >= PROCESSOR_2827_ZEC12
13208 && reload_completed
13209 && *nreadyp > 1)
13211 int i;
13212 int last_index = *nreadyp - 1;
13213 int max_index = -1;
13214 int max_score = -1;
13215 rtx_insn *tmp;
13217 /* Just move the insn with the highest score to the top (the
13218 end) of the list. A full sort is not needed since a conflict
13219 in the hazard recognition cannot happen. So the top insn in
13220 the ready list will always be taken. */
13221 for (i = last_index; i >= 0; i--)
13223 int score;
13225 if (recog_memoized (ready[i]) < 0)
13226 continue;
13228 score = s390_sched_score (ready[i]);
13229 if (score > max_score)
13231 max_score = score;
13232 max_index = i;
13236 if (max_index != -1)
13238 if (max_index != last_index)
13240 tmp = ready[max_index];
13241 ready[max_index] = ready[last_index];
13242 ready[last_index] = tmp;
13244 if (verbose > 5)
13245 fprintf (file,
13246 "move insn %d to the top of list\n",
13247 INSN_UID (ready[last_index]));
13249 else if (verbose > 5)
13250 fprintf (file,
13251 "best insn %d already on top\n",
13252 INSN_UID (ready[last_index]));
13255 if (verbose > 5)
13257 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13258 s390_sched_state);
13260 for (i = last_index; i >= 0; i--)
13262 if (recog_memoized (ready[i]) < 0)
13263 continue;
13264 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
13265 s390_sched_score (ready[i]));
13266 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
13267 PRINT_OOO_ATTR (ooo_cracked);
13268 PRINT_OOO_ATTR (ooo_expanded);
13269 PRINT_OOO_ATTR (ooo_endgroup);
13270 PRINT_OOO_ATTR (ooo_groupalone);
13271 #undef PRINT_OOO_ATTR
13272 fprintf (file, "\n");
13277 return s390_issue_rate ();
13281 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
13282 the scheduler has issued INSN. It stores the last issued insn into
13283 last_scheduled_insn in order to make it available for
13284 s390_sched_reorder. */
13285 static int
13286 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
13288 last_scheduled_insn = insn;
13290 if (s390_tune >= PROCESSOR_2827_ZEC12
13291 && reload_completed
13292 && recog_memoized (insn) >= 0)
13294 unsigned int mask = s390_get_sched_attrmask (insn);
13296 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13297 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13298 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
13299 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
13300 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13301 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13302 else
13304 /* Only normal insns are left (mask == 0). */
13305 switch (s390_sched_state)
13307 case 0:
13308 case 1:
13309 case 2:
13310 case S390_OOO_SCHED_STATE_NORMAL:
13311 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
13312 s390_sched_state = 1;
13313 else
13314 s390_sched_state++;
13316 break;
13317 case S390_OOO_SCHED_STATE_CRACKED:
13318 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13319 break;
13322 if (verbose > 5)
13324 fprintf (file, "insn %d: ", INSN_UID (insn));
13325 #define PRINT_OOO_ATTR(ATTR) \
13326 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
13327 PRINT_OOO_ATTR (ooo_cracked);
13328 PRINT_OOO_ATTR (ooo_expanded);
13329 PRINT_OOO_ATTR (ooo_endgroup);
13330 PRINT_OOO_ATTR (ooo_groupalone);
13331 #undef PRINT_OOO_ATTR
13332 fprintf (file, "\n");
13333 fprintf (file, "sched state: %d\n", s390_sched_state);
13337 if (GET_CODE (PATTERN (insn)) != USE
13338 && GET_CODE (PATTERN (insn)) != CLOBBER)
13339 return more - 1;
13340 else
13341 return more;
13344 static void
13345 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
13346 int verbose ATTRIBUTE_UNUSED,
13347 int max_ready ATTRIBUTE_UNUSED)
13349 last_scheduled_insn = NULL;
13350 s390_sched_state = 0;
13353 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
13354 a new number struct loop *loop should be unrolled if tuned for cpus with
13355 a built-in stride prefetcher.
13356 The loop is analyzed for memory accesses by calling check_dpu for
13357 each rtx of the loop. Depending on the loop_depth and the amount of
13358 memory accesses a new number <=nunroll is returned to improve the
13359 behaviour of the hardware prefetch unit. */
13360 static unsigned
13361 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
13363 basic_block *bbs;
13364 rtx_insn *insn;
13365 unsigned i;
13366 unsigned mem_count = 0;
13368 if (s390_tune < PROCESSOR_2097_Z10)
13369 return nunroll;
13371 /* Count the number of memory references within the loop body. */
13372 bbs = get_loop_body (loop);
13373 subrtx_iterator::array_type array;
13374 for (i = 0; i < loop->num_nodes; i++)
13375 FOR_BB_INSNS (bbs[i], insn)
13376 if (INSN_P (insn) && INSN_CODE (insn) != -1)
13377 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13378 if (MEM_P (*iter))
13379 mem_count += 1;
13380 free (bbs);
13382 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
13383 if (mem_count == 0)
13384 return nunroll;
13386 switch (loop_depth(loop))
13388 case 1:
13389 return MIN (nunroll, 28 / mem_count);
13390 case 2:
13391 return MIN (nunroll, 22 / mem_count);
13392 default:
13393 return MIN (nunroll, 16 / mem_count);
13397 static void
13398 s390_option_override (void)
13400 unsigned int i;
13401 cl_deferred_option *opt;
13402 vec<cl_deferred_option> *v =
13403 (vec<cl_deferred_option> *) s390_deferred_options;
13405 if (v)
13406 FOR_EACH_VEC_ELT (*v, i, opt)
13408 switch (opt->opt_index)
13410 case OPT_mhotpatch_:
13412 int val1;
13413 int val2;
13414 char s[256];
13415 char *t;
13417 strncpy (s, opt->arg, 256);
13418 s[255] = 0;
13419 t = strchr (s, ',');
13420 if (t != NULL)
13422 *t = 0;
13423 t++;
13424 val1 = integral_argument (s);
13425 val2 = integral_argument (t);
13427 else
13429 val1 = -1;
13430 val2 = -1;
13432 if (val1 == -1 || val2 == -1)
13434 /* argument is not a plain number */
13435 error ("arguments to %qs should be non-negative integers",
13436 "-mhotpatch=n,m");
13437 break;
13439 else if (val1 > s390_hotpatch_hw_max
13440 || val2 > s390_hotpatch_hw_max)
13442 error ("argument to %qs is too large (max. %d)",
13443 "-mhotpatch=n,m", s390_hotpatch_hw_max);
13444 break;
13446 s390_hotpatch_hw_before_label = val1;
13447 s390_hotpatch_hw_after_label = val2;
13448 break;
13450 default:
13451 gcc_unreachable ();
13455 /* Set up function hooks. */
13456 init_machine_status = s390_init_machine_status;
13458 /* Architecture mode defaults according to ABI. */
13459 if (!(target_flags_explicit & MASK_ZARCH))
13461 if (TARGET_64BIT)
13462 target_flags |= MASK_ZARCH;
13463 else
13464 target_flags &= ~MASK_ZARCH;
13467 /* Set the march default in case it hasn't been specified on
13468 cmdline. */
13469 if (s390_arch == PROCESSOR_max)
13471 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
13472 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
13473 s390_arch_flags = processor_flags_table[(int)s390_arch];
13476 /* Determine processor to tune for. */
13477 if (s390_tune == PROCESSOR_max)
13479 s390_tune = s390_arch;
13480 s390_tune_flags = s390_arch_flags;
13483 /* Sanity checks. */
13484 if (s390_arch == PROCESSOR_NATIVE || s390_tune == PROCESSOR_NATIVE)
13485 gcc_unreachable ();
13486 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
13487 error ("z/Architecture mode not supported on %s", s390_arch_string);
13488 if (TARGET_64BIT && !TARGET_ZARCH)
13489 error ("64-bit ABI not supported in ESA/390 mode");
13491 /* Use hardware DFP if available and not explicitly disabled by
13492 user. E.g. with -m31 -march=z10 -mzarch */
13493 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
13494 target_flags |= MASK_HARD_DFP;
13496 /* Enable hardware transactions if available and not explicitly
13497 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
13498 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
13499 target_flags |= MASK_OPT_HTM;
13501 if (target_flags_explicit & MASK_OPT_VX)
13503 if (TARGET_OPT_VX)
13505 if (!TARGET_CPU_VX)
13506 error ("hardware vector support not available on %s",
13507 s390_arch_string);
13508 if (TARGET_SOFT_FLOAT)
13509 error ("hardware vector support not available with -msoft-float");
13512 else if (TARGET_CPU_VX)
13513 /* Enable vector support if available and not explicitly disabled
13514 by user. E.g. with -m31 -march=z13 -mzarch */
13515 target_flags |= MASK_OPT_VX;
13517 if (TARGET_HARD_DFP && !TARGET_DFP)
13519 if (target_flags_explicit & MASK_HARD_DFP)
13521 if (!TARGET_CPU_DFP)
13522 error ("hardware decimal floating point instructions"
13523 " not available on %s", s390_arch_string);
13524 if (!TARGET_ZARCH)
13525 error ("hardware decimal floating point instructions"
13526 " not available in ESA/390 mode");
13528 else
13529 target_flags &= ~MASK_HARD_DFP;
13532 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
13534 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
13535 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
13537 target_flags &= ~MASK_HARD_DFP;
13540 /* Set processor cost function. */
13541 switch (s390_tune)
13543 case PROCESSOR_2084_Z990:
13544 s390_cost = &z990_cost;
13545 break;
13546 case PROCESSOR_2094_Z9_109:
13547 case PROCESSOR_2094_Z9_EC:
13548 s390_cost = &z9_109_cost;
13549 break;
13550 case PROCESSOR_2097_Z10:
13551 s390_cost = &z10_cost;
13552 break;
13553 case PROCESSOR_2817_Z196:
13554 s390_cost = &z196_cost;
13555 break;
13556 case PROCESSOR_2827_ZEC12:
13557 case PROCESSOR_2964_Z13:
13558 s390_cost = &zEC12_cost;
13559 break;
13560 default:
13561 s390_cost = &z900_cost;
13564 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
13565 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
13566 "in combination");
13568 if (s390_stack_size)
13570 if (s390_stack_guard >= s390_stack_size)
13571 error ("stack size must be greater than the stack guard value");
13572 else if (s390_stack_size > 1 << 16)
13573 error ("stack size must not be greater than 64k");
13575 else if (s390_stack_guard)
13576 error ("-mstack-guard implies use of -mstack-size");
13578 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
13579 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
13580 target_flags |= MASK_LONG_DOUBLE_128;
13581 #endif
13583 if (s390_tune >= PROCESSOR_2097_Z10)
13585 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
13586 global_options.x_param_values,
13587 global_options_set.x_param_values);
13588 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
13589 global_options.x_param_values,
13590 global_options_set.x_param_values);
13591 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
13592 global_options.x_param_values,
13593 global_options_set.x_param_values);
13594 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
13595 global_options.x_param_values,
13596 global_options_set.x_param_values);
13599 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
13600 global_options.x_param_values,
13601 global_options_set.x_param_values);
13602 /* values for loop prefetching */
13603 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
13604 global_options.x_param_values,
13605 global_options_set.x_param_values);
13606 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
13607 global_options.x_param_values,
13608 global_options_set.x_param_values);
13609 /* s390 has more than 2 levels and the size is much larger. Since
13610 we are always running virtualized assume that we only get a small
13611 part of the caches above l1. */
13612 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
13613 global_options.x_param_values,
13614 global_options_set.x_param_values);
13615 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
13616 global_options.x_param_values,
13617 global_options_set.x_param_values);
13618 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
13619 global_options.x_param_values,
13620 global_options_set.x_param_values);
13622 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
13623 requires the arch flags to be evaluated already. Since prefetching
13624 is beneficial on s390, we enable it if available. */
13625 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
13626 flag_prefetch_loop_arrays = 1;
13628 /* Use the alternative scheduling-pressure algorithm by default. */
13629 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
13630 global_options.x_param_values,
13631 global_options_set.x_param_values);
13633 if (TARGET_TPF)
13635 /* Don't emit DWARF3/4 unless specifically selected. The TPF
13636 debuggers do not yet support DWARF 3/4. */
13637 if (!global_options_set.x_dwarf_strict)
13638 dwarf_strict = 1;
13639 if (!global_options_set.x_dwarf_version)
13640 dwarf_version = 2;
13643 /* Register a target-specific optimization-and-lowering pass
13644 to run immediately before prologue and epilogue generation.
13646 Registering the pass must be done at start up. It's
13647 convenient to do it here. */
13648 opt_pass *new_pass = new pass_s390_early_mach (g);
13649 struct register_pass_info insert_pass_s390_early_mach =
13651 new_pass, /* pass */
13652 "pro_and_epilogue", /* reference_pass_name */
13653 1, /* ref_pass_instance_number */
13654 PASS_POS_INSERT_BEFORE /* po_op */
13656 register_pass (&insert_pass_s390_early_mach);
13659 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
13661 static bool
13662 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13663 unsigned int align ATTRIBUTE_UNUSED,
13664 enum by_pieces_operation op ATTRIBUTE_UNUSED,
13665 bool speed_p ATTRIBUTE_UNUSED)
13667 return (size == 1 || size == 2
13668 || size == 4 || (TARGET_ZARCH && size == 8));
13671 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13673 static void
13674 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13676 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
13677 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
13678 tree call_efpc = build_call_expr (efpc, 0);
13679 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13681 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
13682 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
13683 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
13684 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
13685 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
13686 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
13688 /* Generates the equivalent of feholdexcept (&fenv_var)
13690 fenv_var = __builtin_s390_efpc ();
13691 __builtin_s390_sfpc (fenv_var & mask) */
13692 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
13693 tree new_fpc =
13694 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13695 build_int_cst (unsigned_type_node,
13696 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
13697 FPC_EXCEPTION_MASK)));
13698 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
13699 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
13701 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
13703 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
13704 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
13705 build_int_cst (unsigned_type_node,
13706 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
13707 *clear = build_call_expr (sfpc, 1, new_fpc);
13709 /* Generates the equivalent of feupdateenv (fenv_var)
13711 old_fpc = __builtin_s390_efpc ();
13712 __builtin_s390_sfpc (fenv_var);
13713 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
13715 old_fpc = create_tmp_var_raw (unsigned_type_node);
13716 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
13717 old_fpc, call_efpc);
13719 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
13721 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
13722 build_int_cst (unsigned_type_node,
13723 FPC_FLAGS_MASK));
13724 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
13725 build_int_cst (unsigned_type_node,
13726 FPC_FLAGS_SHIFT));
13727 tree atomic_feraiseexcept
13728 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13729 raise_old_except = build_call_expr (atomic_feraiseexcept,
13730 1, raise_old_except);
13732 *update = build2 (COMPOUND_EXPR, void_type_node,
13733 build2 (COMPOUND_EXPR, void_type_node,
13734 store_old_fpc, set_new_fpc),
13735 raise_old_except);
13737 #undef FPC_EXCEPTION_MASK
13738 #undef FPC_FLAGS_MASK
13739 #undef FPC_DXC_MASK
13740 #undef FPC_EXCEPTION_MASK_SHIFT
13741 #undef FPC_FLAGS_SHIFT
13742 #undef FPC_DXC_SHIFT
13745 /* Return the vector mode to be used for inner mode MODE when doing
13746 vectorization. */
13747 static machine_mode
13748 s390_preferred_simd_mode (machine_mode mode)
13750 if (TARGET_VX)
13751 switch (mode)
13753 case DFmode:
13754 return V2DFmode;
13755 case DImode:
13756 return V2DImode;
13757 case SImode:
13758 return V4SImode;
13759 case HImode:
13760 return V8HImode;
13761 case QImode:
13762 return V16QImode;
13763 default:;
13765 return word_mode;
13768 /* Our hardware does not require vectors to be strictly aligned. */
13769 static bool
13770 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
13771 const_tree type ATTRIBUTE_UNUSED,
13772 int misalignment ATTRIBUTE_UNUSED,
13773 bool is_packed ATTRIBUTE_UNUSED)
13775 if (TARGET_VX)
13776 return true;
13778 return default_builtin_support_vector_misalignment (mode, type, misalignment,
13779 is_packed);
13782 /* The vector ABI requires vector types to be aligned on an 8 byte
13783 boundary (our stack alignment). However, we allow this to be
13784 overriden by the user, while this definitely breaks the ABI. */
13785 static HOST_WIDE_INT
13786 s390_vector_alignment (const_tree type)
13788 if (!TARGET_VX_ABI)
13789 return default_vector_alignment (type);
13791 if (TYPE_USER_ALIGN (type))
13792 return TYPE_ALIGN (type);
13794 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
13797 /* Implement TARGET_ASM_FILE_END. */
13798 static void
13799 s390_asm_file_end (void)
13801 #ifdef HAVE_AS_GNU_ATTRIBUTE
13802 varpool_node *vnode;
13803 cgraph_node *cnode;
13805 FOR_EACH_VARIABLE (vnode)
13806 if (TREE_PUBLIC (vnode->decl))
13807 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
13809 FOR_EACH_FUNCTION (cnode)
13810 if (TREE_PUBLIC (cnode->decl))
13811 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
13814 if (s390_vector_abi != 0)
13815 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
13816 s390_vector_abi);
13817 #endif
13818 file_end_indicate_exec_stack ();
13821 /* Return true if TYPE is a vector bool type. */
13822 static inline bool
13823 s390_vector_bool_type_p (const_tree type)
13825 return TYPE_VECTOR_OPAQUE (type);
13828 /* Return the diagnostic message string if the binary operation OP is
13829 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13830 static const char*
13831 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
13833 bool bool1_p, bool2_p;
13834 bool plusminus_p;
13835 bool muldiv_p;
13836 bool compare_p;
13837 machine_mode mode1, mode2;
13839 if (!TARGET_ZVECTOR)
13840 return NULL;
13842 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
13843 return NULL;
13845 bool1_p = s390_vector_bool_type_p (type1);
13846 bool2_p = s390_vector_bool_type_p (type2);
13848 /* Mixing signed and unsigned types is forbidden for all
13849 operators. */
13850 if (!bool1_p && !bool2_p
13851 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
13852 return N_("types differ in signess");
13854 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
13855 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
13856 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
13857 || op == ROUND_DIV_EXPR);
13858 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
13859 || op == EQ_EXPR || op == NE_EXPR);
13861 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
13862 return N_("binary operator does not support two vector bool operands");
13864 if (bool1_p != bool2_p && (muldiv_p || compare_p))
13865 return N_("binary operator does not support vector bool operand");
13867 mode1 = TYPE_MODE (type1);
13868 mode2 = TYPE_MODE (type2);
13870 if (bool1_p != bool2_p && plusminus_p
13871 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
13872 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
13873 return N_("binary operator does not support mixing vector "
13874 "bool with floating point vector operands");
13876 return NULL;
13879 /* Initialize GCC target structure. */
13881 #undef TARGET_ASM_ALIGNED_HI_OP
13882 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
13883 #undef TARGET_ASM_ALIGNED_DI_OP
13884 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
13885 #undef TARGET_ASM_INTEGER
13886 #define TARGET_ASM_INTEGER s390_assemble_integer
13888 #undef TARGET_ASM_OPEN_PAREN
13889 #define TARGET_ASM_OPEN_PAREN ""
13891 #undef TARGET_ASM_CLOSE_PAREN
13892 #define TARGET_ASM_CLOSE_PAREN ""
13894 #undef TARGET_OPTION_OVERRIDE
13895 #define TARGET_OPTION_OVERRIDE s390_option_override
13897 #undef TARGET_ENCODE_SECTION_INFO
13898 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
13900 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13901 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13903 #ifdef HAVE_AS_TLS
13904 #undef TARGET_HAVE_TLS
13905 #define TARGET_HAVE_TLS true
13906 #endif
13907 #undef TARGET_CANNOT_FORCE_CONST_MEM
13908 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
13910 #undef TARGET_DELEGITIMIZE_ADDRESS
13911 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
13913 #undef TARGET_LEGITIMIZE_ADDRESS
13914 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
13916 #undef TARGET_RETURN_IN_MEMORY
13917 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
13919 #undef TARGET_INIT_BUILTINS
13920 #define TARGET_INIT_BUILTINS s390_init_builtins
13921 #undef TARGET_EXPAND_BUILTIN
13922 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
13923 #undef TARGET_BUILTIN_DECL
13924 #define TARGET_BUILTIN_DECL s390_builtin_decl
13926 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
13927 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
13929 #undef TARGET_ASM_OUTPUT_MI_THUNK
13930 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
13931 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13932 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
13934 #undef TARGET_SCHED_ADJUST_PRIORITY
13935 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
13936 #undef TARGET_SCHED_ISSUE_RATE
13937 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
13938 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13939 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
13941 #undef TARGET_SCHED_VARIABLE_ISSUE
13942 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
13943 #undef TARGET_SCHED_REORDER
13944 #define TARGET_SCHED_REORDER s390_sched_reorder
13945 #undef TARGET_SCHED_INIT
13946 #define TARGET_SCHED_INIT s390_sched_init
13948 #undef TARGET_CANNOT_COPY_INSN_P
13949 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
13950 #undef TARGET_RTX_COSTS
13951 #define TARGET_RTX_COSTS s390_rtx_costs
13952 #undef TARGET_ADDRESS_COST
13953 #define TARGET_ADDRESS_COST s390_address_cost
13954 #undef TARGET_REGISTER_MOVE_COST
13955 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
13956 #undef TARGET_MEMORY_MOVE_COST
13957 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
13959 #undef TARGET_MACHINE_DEPENDENT_REORG
13960 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
13962 #undef TARGET_VALID_POINTER_MODE
13963 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
13965 #undef TARGET_BUILD_BUILTIN_VA_LIST
13966 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
13967 #undef TARGET_EXPAND_BUILTIN_VA_START
13968 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
13969 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
13970 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
13972 #undef TARGET_PROMOTE_FUNCTION_MODE
13973 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
13974 #undef TARGET_PASS_BY_REFERENCE
13975 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
13977 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
13978 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
13979 #undef TARGET_FUNCTION_ARG
13980 #define TARGET_FUNCTION_ARG s390_function_arg
13981 #undef TARGET_FUNCTION_ARG_ADVANCE
13982 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
13983 #undef TARGET_FUNCTION_VALUE
13984 #define TARGET_FUNCTION_VALUE s390_function_value
13985 #undef TARGET_LIBCALL_VALUE
13986 #define TARGET_LIBCALL_VALUE s390_libcall_value
13987 #undef TARGET_STRICT_ARGUMENT_NAMING
13988 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
13990 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
13991 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
13993 #undef TARGET_FIXED_CONDITION_CODE_REGS
13994 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
13996 #undef TARGET_CC_MODES_COMPATIBLE
13997 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
13999 #undef TARGET_INVALID_WITHIN_DOLOOP
14000 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
14002 #ifdef HAVE_AS_TLS
14003 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
14004 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
14005 #endif
14007 #undef TARGET_DWARF_FRAME_REG_MODE
14008 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
14010 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
14011 #undef TARGET_MANGLE_TYPE
14012 #define TARGET_MANGLE_TYPE s390_mangle_type
14013 #endif
14015 #undef TARGET_SCALAR_MODE_SUPPORTED_P
14016 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
14018 #undef TARGET_VECTOR_MODE_SUPPORTED_P
14019 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
14021 #undef TARGET_PREFERRED_RELOAD_CLASS
14022 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
14024 #undef TARGET_SECONDARY_RELOAD
14025 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
14027 #undef TARGET_LIBGCC_CMP_RETURN_MODE
14028 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
14030 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
14031 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
14033 #undef TARGET_LEGITIMATE_ADDRESS_P
14034 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
14036 #undef TARGET_LEGITIMATE_CONSTANT_P
14037 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
14039 #undef TARGET_LRA_P
14040 #define TARGET_LRA_P s390_lra_p
14042 #undef TARGET_CAN_ELIMINATE
14043 #define TARGET_CAN_ELIMINATE s390_can_eliminate
14045 #undef TARGET_CONDITIONAL_REGISTER_USAGE
14046 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
14048 #undef TARGET_LOOP_UNROLL_ADJUST
14049 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
14051 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14052 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
14053 #undef TARGET_TRAMPOLINE_INIT
14054 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
14056 #undef TARGET_UNWIND_WORD_MODE
14057 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
14059 #undef TARGET_CANONICALIZE_COMPARISON
14060 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
14062 #undef TARGET_HARD_REGNO_SCRATCH_OK
14063 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
14065 #undef TARGET_ATTRIBUTE_TABLE
14066 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
14068 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
14069 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
14071 #undef TARGET_SET_UP_BY_PROLOGUE
14072 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
14074 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14075 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14076 s390_use_by_pieces_infrastructure_p
14078 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14079 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
14081 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
14082 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
14084 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14085 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
14087 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
14088 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
14090 #undef TARGET_VECTOR_ALIGNMENT
14091 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
14093 #undef TARGET_INVALID_BINARY_OP
14094 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
14096 #undef TARGET_ASM_FILE_END
14097 #define TARGET_ASM_FILE_END s390_asm_file_end
14099 struct gcc_target targetm = TARGET_INITIALIZER;
14101 #include "gt-s390.h"