S/390: Use macros from hwint.h where possible.
[official-gcc.git] / gcc / config / s390 / s390.c
blob9c4e641e5ad809cf921491ad176a3afe6c95fc4d
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "target-globals.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "diagnostic.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "print-tree.h"
49 #include "stor-layout.h"
50 #include "varasm.h"
51 #include "calls.h"
52 #include "conditions.h"
53 #include "output.h"
54 #include "insn-attr.h"
55 #include "flags.h"
56 #include "except.h"
57 #include "dojump.h"
58 #include "explow.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "reload.h"
62 #include "cfgrtl.h"
63 #include "cfganal.h"
64 #include "lcm.h"
65 #include "cfgbuild.h"
66 #include "cfgcleanup.h"
67 #include "debug.h"
68 #include "langhooks.h"
69 #include "internal-fn.h"
70 #include "gimple-fold.h"
71 #include "tree-eh.h"
72 #include "gimplify.h"
73 #include "params.h"
74 #include "opts.h"
75 #include "tree-pass.h"
76 #include "context.h"
77 #include "builtins.h"
78 #include "rtl-iter.h"
79 #include "intl.h"
80 #include "tm-constrs.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 /* Remember the last target of s390_set_current_function. */
86 static GTY(()) tree s390_previous_fndecl;
88 /* Define the specific costs for a given cpu. */
90 struct processor_costs
92 /* multiplication */
93 const int m; /* cost of an M instruction. */
94 const int mghi; /* cost of an MGHI instruction. */
95 const int mh; /* cost of an MH instruction. */
96 const int mhi; /* cost of an MHI instruction. */
97 const int ml; /* cost of an ML instruction. */
98 const int mr; /* cost of an MR instruction. */
99 const int ms; /* cost of an MS instruction. */
100 const int msg; /* cost of an MSG instruction. */
101 const int msgf; /* cost of an MSGF instruction. */
102 const int msgfr; /* cost of an MSGFR instruction. */
103 const int msgr; /* cost of an MSGR instruction. */
104 const int msr; /* cost of an MSR instruction. */
105 const int mult_df; /* cost of multiplication in DFmode. */
106 const int mxbr;
107 /* square root */
108 const int sqxbr; /* cost of square root in TFmode. */
109 const int sqdbr; /* cost of square root in DFmode. */
110 const int sqebr; /* cost of square root in SFmode. */
111 /* multiply and add */
112 const int madbr; /* cost of multiply and add in DFmode. */
113 const int maebr; /* cost of multiply and add in SFmode. */
114 /* division */
115 const int dxbr;
116 const int ddbr;
117 const int debr;
118 const int dlgr;
119 const int dlr;
120 const int dr;
121 const int dsgfr;
122 const int dsgr;
125 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
127 static const
128 struct processor_costs z900_cost =
130 COSTS_N_INSNS (5), /* M */
131 COSTS_N_INSNS (10), /* MGHI */
132 COSTS_N_INSNS (5), /* MH */
133 COSTS_N_INSNS (4), /* MHI */
134 COSTS_N_INSNS (5), /* ML */
135 COSTS_N_INSNS (5), /* MR */
136 COSTS_N_INSNS (4), /* MS */
137 COSTS_N_INSNS (15), /* MSG */
138 COSTS_N_INSNS (7), /* MSGF */
139 COSTS_N_INSNS (7), /* MSGFR */
140 COSTS_N_INSNS (10), /* MSGR */
141 COSTS_N_INSNS (4), /* MSR */
142 COSTS_N_INSNS (7), /* multiplication in DFmode */
143 COSTS_N_INSNS (13), /* MXBR */
144 COSTS_N_INSNS (136), /* SQXBR */
145 COSTS_N_INSNS (44), /* SQDBR */
146 COSTS_N_INSNS (35), /* SQEBR */
147 COSTS_N_INSNS (18), /* MADBR */
148 COSTS_N_INSNS (13), /* MAEBR */
149 COSTS_N_INSNS (134), /* DXBR */
150 COSTS_N_INSNS (30), /* DDBR */
151 COSTS_N_INSNS (27), /* DEBR */
152 COSTS_N_INSNS (220), /* DLGR */
153 COSTS_N_INSNS (34), /* DLR */
154 COSTS_N_INSNS (34), /* DR */
155 COSTS_N_INSNS (32), /* DSGFR */
156 COSTS_N_INSNS (32), /* DSGR */
159 static const
160 struct processor_costs z990_cost =
162 COSTS_N_INSNS (4), /* M */
163 COSTS_N_INSNS (2), /* MGHI */
164 COSTS_N_INSNS (2), /* MH */
165 COSTS_N_INSNS (2), /* MHI */
166 COSTS_N_INSNS (4), /* ML */
167 COSTS_N_INSNS (4), /* MR */
168 COSTS_N_INSNS (5), /* MS */
169 COSTS_N_INSNS (6), /* MSG */
170 COSTS_N_INSNS (4), /* MSGF */
171 COSTS_N_INSNS (4), /* MSGFR */
172 COSTS_N_INSNS (4), /* MSGR */
173 COSTS_N_INSNS (4), /* MSR */
174 COSTS_N_INSNS (1), /* multiplication in DFmode */
175 COSTS_N_INSNS (28), /* MXBR */
176 COSTS_N_INSNS (130), /* SQXBR */
177 COSTS_N_INSNS (66), /* SQDBR */
178 COSTS_N_INSNS (38), /* SQEBR */
179 COSTS_N_INSNS (1), /* MADBR */
180 COSTS_N_INSNS (1), /* MAEBR */
181 COSTS_N_INSNS (60), /* DXBR */
182 COSTS_N_INSNS (40), /* DDBR */
183 COSTS_N_INSNS (26), /* DEBR */
184 COSTS_N_INSNS (176), /* DLGR */
185 COSTS_N_INSNS (31), /* DLR */
186 COSTS_N_INSNS (31), /* DR */
187 COSTS_N_INSNS (31), /* DSGFR */
188 COSTS_N_INSNS (31), /* DSGR */
191 static const
192 struct processor_costs z9_109_cost =
194 COSTS_N_INSNS (4), /* M */
195 COSTS_N_INSNS (2), /* MGHI */
196 COSTS_N_INSNS (2), /* MH */
197 COSTS_N_INSNS (2), /* MHI */
198 COSTS_N_INSNS (4), /* ML */
199 COSTS_N_INSNS (4), /* MR */
200 COSTS_N_INSNS (5), /* MS */
201 COSTS_N_INSNS (6), /* MSG */
202 COSTS_N_INSNS (4), /* MSGF */
203 COSTS_N_INSNS (4), /* MSGFR */
204 COSTS_N_INSNS (4), /* MSGR */
205 COSTS_N_INSNS (4), /* MSR */
206 COSTS_N_INSNS (1), /* multiplication in DFmode */
207 COSTS_N_INSNS (28), /* MXBR */
208 COSTS_N_INSNS (130), /* SQXBR */
209 COSTS_N_INSNS (66), /* SQDBR */
210 COSTS_N_INSNS (38), /* SQEBR */
211 COSTS_N_INSNS (1), /* MADBR */
212 COSTS_N_INSNS (1), /* MAEBR */
213 COSTS_N_INSNS (60), /* DXBR */
214 COSTS_N_INSNS (40), /* DDBR */
215 COSTS_N_INSNS (26), /* DEBR */
216 COSTS_N_INSNS (30), /* DLGR */
217 COSTS_N_INSNS (23), /* DLR */
218 COSTS_N_INSNS (23), /* DR */
219 COSTS_N_INSNS (24), /* DSGFR */
220 COSTS_N_INSNS (24), /* DSGR */
223 static const
224 struct processor_costs z10_cost =
226 COSTS_N_INSNS (10), /* M */
227 COSTS_N_INSNS (10), /* MGHI */
228 COSTS_N_INSNS (10), /* MH */
229 COSTS_N_INSNS (10), /* MHI */
230 COSTS_N_INSNS (10), /* ML */
231 COSTS_N_INSNS (10), /* MR */
232 COSTS_N_INSNS (10), /* MS */
233 COSTS_N_INSNS (10), /* MSG */
234 COSTS_N_INSNS (10), /* MSGF */
235 COSTS_N_INSNS (10), /* MSGFR */
236 COSTS_N_INSNS (10), /* MSGR */
237 COSTS_N_INSNS (10), /* MSR */
238 COSTS_N_INSNS (1) , /* multiplication in DFmode */
239 COSTS_N_INSNS (50), /* MXBR */
240 COSTS_N_INSNS (120), /* SQXBR */
241 COSTS_N_INSNS (52), /* SQDBR */
242 COSTS_N_INSNS (38), /* SQEBR */
243 COSTS_N_INSNS (1), /* MADBR */
244 COSTS_N_INSNS (1), /* MAEBR */
245 COSTS_N_INSNS (111), /* DXBR */
246 COSTS_N_INSNS (39), /* DDBR */
247 COSTS_N_INSNS (32), /* DEBR */
248 COSTS_N_INSNS (160), /* DLGR */
249 COSTS_N_INSNS (71), /* DLR */
250 COSTS_N_INSNS (71), /* DR */
251 COSTS_N_INSNS (71), /* DSGFR */
252 COSTS_N_INSNS (71), /* DSGR */
255 static const
256 struct processor_costs z196_cost =
258 COSTS_N_INSNS (7), /* M */
259 COSTS_N_INSNS (5), /* MGHI */
260 COSTS_N_INSNS (5), /* MH */
261 COSTS_N_INSNS (5), /* MHI */
262 COSTS_N_INSNS (7), /* ML */
263 COSTS_N_INSNS (7), /* MR */
264 COSTS_N_INSNS (6), /* MS */
265 COSTS_N_INSNS (8), /* MSG */
266 COSTS_N_INSNS (6), /* MSGF */
267 COSTS_N_INSNS (6), /* MSGFR */
268 COSTS_N_INSNS (8), /* MSGR */
269 COSTS_N_INSNS (6), /* MSR */
270 COSTS_N_INSNS (1) , /* multiplication in DFmode */
271 COSTS_N_INSNS (40), /* MXBR B+40 */
272 COSTS_N_INSNS (100), /* SQXBR B+100 */
273 COSTS_N_INSNS (42), /* SQDBR B+42 */
274 COSTS_N_INSNS (28), /* SQEBR B+28 */
275 COSTS_N_INSNS (1), /* MADBR B */
276 COSTS_N_INSNS (1), /* MAEBR B */
277 COSTS_N_INSNS (101), /* DXBR B+101 */
278 COSTS_N_INSNS (29), /* DDBR */
279 COSTS_N_INSNS (22), /* DEBR */
280 COSTS_N_INSNS (160), /* DLGR cracked */
281 COSTS_N_INSNS (160), /* DLR cracked */
282 COSTS_N_INSNS (160), /* DR expanded */
283 COSTS_N_INSNS (160), /* DSGFR cracked */
284 COSTS_N_INSNS (160), /* DSGR cracked */
287 static const
288 struct processor_costs zEC12_cost =
290 COSTS_N_INSNS (7), /* M */
291 COSTS_N_INSNS (5), /* MGHI */
292 COSTS_N_INSNS (5), /* MH */
293 COSTS_N_INSNS (5), /* MHI */
294 COSTS_N_INSNS (7), /* ML */
295 COSTS_N_INSNS (7), /* MR */
296 COSTS_N_INSNS (6), /* MS */
297 COSTS_N_INSNS (8), /* MSG */
298 COSTS_N_INSNS (6), /* MSGF */
299 COSTS_N_INSNS (6), /* MSGFR */
300 COSTS_N_INSNS (8), /* MSGR */
301 COSTS_N_INSNS (6), /* MSR */
302 COSTS_N_INSNS (1) , /* multiplication in DFmode */
303 COSTS_N_INSNS (40), /* MXBR B+40 */
304 COSTS_N_INSNS (100), /* SQXBR B+100 */
305 COSTS_N_INSNS (42), /* SQDBR B+42 */
306 COSTS_N_INSNS (28), /* SQEBR B+28 */
307 COSTS_N_INSNS (1), /* MADBR B */
308 COSTS_N_INSNS (1), /* MAEBR B */
309 COSTS_N_INSNS (131), /* DXBR B+131 */
310 COSTS_N_INSNS (29), /* DDBR */
311 COSTS_N_INSNS (22), /* DEBR */
312 COSTS_N_INSNS (160), /* DLGR cracked */
313 COSTS_N_INSNS (160), /* DLR cracked */
314 COSTS_N_INSNS (160), /* DR expanded */
315 COSTS_N_INSNS (160), /* DSGFR cracked */
316 COSTS_N_INSNS (160), /* DSGR cracked */
319 static struct
321 const char *const name;
322 const enum processor_type processor;
323 const struct processor_costs *cost;
325 const processor_table[] =
327 { "g5", PROCESSOR_9672_G5, &z900_cost },
328 { "g6", PROCESSOR_9672_G6, &z900_cost },
329 { "z900", PROCESSOR_2064_Z900, &z900_cost },
330 { "z990", PROCESSOR_2084_Z990, &z990_cost },
331 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
332 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
333 { "z10", PROCESSOR_2097_Z10, &z10_cost },
334 { "z196", PROCESSOR_2817_Z196, &z196_cost },
335 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
336 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
337 { "native", PROCESSOR_NATIVE, NULL }
340 extern int reload_completed;
342 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
343 static rtx_insn *last_scheduled_insn;
344 #define MAX_SCHED_UNITS 3
345 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
347 /* The maximum score added for an instruction whose unit hasn't been
348 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
349 give instruction mix scheduling more priority over instruction
350 grouping. */
351 #define MAX_SCHED_MIX_SCORE 8
353 /* The maximum distance up to which individual scores will be
354 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
355 Increase this with the OOO windows size of the machine. */
356 #define MAX_SCHED_MIX_DISTANCE 100
358 /* Structure used to hold the components of a S/390 memory
359 address. A legitimate address on S/390 is of the general
360 form
361 base + index + displacement
362 where any of the components is optional.
364 base and index are registers of the class ADDR_REGS,
365 displacement is an unsigned 12-bit immediate constant. */
367 struct s390_address
369 rtx base;
370 rtx indx;
371 rtx disp;
372 bool pointer;
373 bool literal_pool;
376 /* The following structure is embedded in the machine
377 specific part of struct function. */
379 struct GTY (()) s390_frame_layout
381 /* Offset within stack frame. */
382 HOST_WIDE_INT gprs_offset;
383 HOST_WIDE_INT f0_offset;
384 HOST_WIDE_INT f4_offset;
385 HOST_WIDE_INT f8_offset;
386 HOST_WIDE_INT backchain_offset;
388 /* Number of first and last gpr where slots in the register
389 save area are reserved for. */
390 int first_save_gpr_slot;
391 int last_save_gpr_slot;
393 /* Location (FP register number) where GPRs (r0-r15) should
394 be saved to.
395 0 - does not need to be saved at all
396 -1 - stack slot */
397 #define SAVE_SLOT_NONE 0
398 #define SAVE_SLOT_STACK -1
399 signed char gpr_save_slots[16];
401 /* Number of first and last gpr to be saved, restored. */
402 int first_save_gpr;
403 int first_restore_gpr;
404 int last_save_gpr;
405 int last_restore_gpr;
407 /* Bits standing for floating point registers. Set, if the
408 respective register has to be saved. Starting with reg 16 (f0)
409 at the rightmost bit.
410 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
411 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
412 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
413 unsigned int fpr_bitmap;
415 /* Number of floating point registers f8-f15 which must be saved. */
416 int high_fprs;
418 /* Set if return address needs to be saved.
419 This flag is set by s390_return_addr_rtx if it could not use
420 the initial value of r14 and therefore depends on r14 saved
421 to the stack. */
422 bool save_return_addr_p;
424 /* Size of stack frame. */
425 HOST_WIDE_INT frame_size;
428 /* Define the structure for the machine field in struct function. */
430 struct GTY(()) machine_function
432 struct s390_frame_layout frame_layout;
434 /* Literal pool base register. */
435 rtx base_reg;
437 /* True if we may need to perform branch splitting. */
438 bool split_branches_pending_p;
440 bool has_landing_pad_p;
442 /* True if the current function may contain a tbegin clobbering
443 FPRs. */
444 bool tbegin_p;
446 /* For -fsplit-stack support: A stack local which holds a pointer to
447 the stack arguments for a function with a variable number of
448 arguments. This is set at the start of the function and is used
449 to initialize the overflow_arg_area field of the va_list
450 structure. */
451 rtx split_stack_varargs_pointer;
454 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
456 #define cfun_frame_layout (cfun->machine->frame_layout)
457 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
458 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
459 ? cfun_frame_layout.fpr_bitmap & 0x0f \
460 : cfun_frame_layout.fpr_bitmap & 0x03))
461 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
462 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
463 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
464 (1 << (REGNO - FPR0_REGNUM)))
465 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
466 (1 << (REGNO - FPR0_REGNUM))))
467 #define cfun_gpr_save_slot(REGNO) \
468 cfun->machine->frame_layout.gpr_save_slots[REGNO]
470 /* Number of GPRs and FPRs used for argument passing. */
471 #define GP_ARG_NUM_REG 5
472 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
473 #define VEC_ARG_NUM_REG 8
475 /* A couple of shortcuts. */
476 #define CONST_OK_FOR_J(x) \
477 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
478 #define CONST_OK_FOR_K(x) \
479 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
480 #define CONST_OK_FOR_Os(x) \
481 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
482 #define CONST_OK_FOR_Op(x) \
483 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
484 #define CONST_OK_FOR_On(x) \
485 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
487 #define REGNO_PAIR_OK(REGNO, MODE) \
488 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
490 /* That's the read ahead of the dynamic branch prediction unit in
491 bytes on a z10 (or higher) CPU. */
492 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
495 /* Indicate which ABI has been used for passing vector args.
496 0 - no vector type arguments have been passed where the ABI is relevant
497 1 - the old ABI has been used
498 2 - a vector type argument has been passed either in a vector register
499 or on the stack by value */
500 static int s390_vector_abi = 0;
502 /* Set the vector ABI marker if TYPE is subject to the vector ABI
503 switch. The vector ABI affects only vector data types. There are
504 two aspects of the vector ABI relevant here:
506 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
507 ABI and natural alignment with the old.
509 2. vector <= 16 bytes are passed in VRs or by value on the stack
510 with the new ABI but by reference on the stack with the old.
512 If ARG_P is true TYPE is used for a function argument or return
513 value. The ABI marker then is set for all vector data types. If
514 ARG_P is false only type 1 vectors are being checked. */
516 static void
517 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
519 static hash_set<const_tree> visited_types_hash;
521 if (s390_vector_abi)
522 return;
524 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
525 return;
527 if (visited_types_hash.contains (type))
528 return;
530 visited_types_hash.add (type);
532 if (VECTOR_TYPE_P (type))
534 int type_size = int_size_in_bytes (type);
536 /* Outside arguments only the alignment is changing and this
537 only happens for vector types >= 16 bytes. */
538 if (!arg_p && type_size < 16)
539 return;
541 /* In arguments vector types > 16 are passed as before (GCC
542 never enforced the bigger alignment for arguments which was
543 required by the old vector ABI). However, it might still be
544 ABI relevant due to the changed alignment if it is a struct
545 member. */
546 if (arg_p && type_size > 16 && !in_struct_p)
547 return;
549 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
551 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
553 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
554 natural alignment there will never be ABI dependent padding
555 in an array type. That's why we do not set in_struct_p to
556 true here. */
557 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
559 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
561 tree arg_chain;
563 /* Check the return type. */
564 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
566 for (arg_chain = TYPE_ARG_TYPES (type);
567 arg_chain;
568 arg_chain = TREE_CHAIN (arg_chain))
569 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
571 else if (RECORD_OR_UNION_TYPE_P (type))
573 tree field;
575 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
577 if (TREE_CODE (field) != FIELD_DECL)
578 continue;
580 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
586 /* System z builtins. */
588 #include "s390-builtins.h"
590 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
592 #undef B_DEF
593 #undef OB_DEF
594 #undef OB_DEF_VAR
595 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
596 #define OB_DEF(...)
597 #define OB_DEF_VAR(...)
598 #include "s390-builtins.def"
602 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(...)
610 #include "s390-builtins.def"
614 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
616 #undef B_DEF
617 #undef OB_DEF
618 #undef OB_DEF_VAR
619 #define B_DEF(...)
620 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
621 #define OB_DEF_VAR(...)
622 #include "s390-builtins.def"
626 const unsigned int
627 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
629 #undef B_DEF
630 #undef OB_DEF
631 #undef OB_DEF_VAR
632 #define B_DEF(...)
633 #define OB_DEF(...)
634 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
635 #include "s390-builtins.def"
639 tree s390_builtin_types[BT_MAX];
640 tree s390_builtin_fn_types[BT_FN_MAX];
641 tree s390_builtin_decls[S390_BUILTIN_MAX +
642 S390_OVERLOADED_BUILTIN_MAX +
643 S390_OVERLOADED_BUILTIN_VAR_MAX];
645 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
646 #undef B_DEF
647 #undef OB_DEF
648 #undef OB_DEF_VAR
649 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
650 #define OB_DEF(...)
651 #define OB_DEF_VAR(...)
653 #include "s390-builtins.def"
654 CODE_FOR_nothing
657 static void
658 s390_init_builtins (void)
660 /* These definitions are being used in s390-builtins.def. */
661 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
662 NULL, NULL);
663 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
664 tree c_uint64_type_node;
666 /* The uint64_type_node from tree.c is not compatible to the C99
667 uint64_t data type. What we want is c_uint64_type_node from
668 c-common.c. But since backend code is not supposed to interface
669 with the frontend we recreate it here. */
670 if (TARGET_64BIT)
671 c_uint64_type_node = long_unsigned_type_node;
672 else
673 c_uint64_type_node = long_long_unsigned_type_node;
675 #undef DEF_TYPE
676 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = (!CONST_P) ? \
679 (NODE) : build_type_variant ((NODE), 1, 0);
681 #undef DEF_POINTER_TYPE
682 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_pointer_type (s390_builtin_types[INDEX_BASE]);
687 #undef DEF_DISTINCT_TYPE
688 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
693 #undef DEF_VECTOR_TYPE
694 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
695 if (s390_builtin_types[INDEX] == NULL) \
696 s390_builtin_types[INDEX] = \
697 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
699 #undef DEF_OPAQUE_VECTOR_TYPE
700 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = \
703 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
705 #undef DEF_FN_TYPE
706 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
707 if (s390_builtin_fn_types[INDEX] == NULL) \
708 s390_builtin_fn_types[INDEX] = \
709 build_function_type_list (args, NULL_TREE);
710 #undef DEF_OV_TYPE
711 #define DEF_OV_TYPE(...)
712 #include "s390-builtin-types.def"
714 #undef B_DEF
715 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
716 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
717 s390_builtin_decls[S390_BUILTIN_##NAME] = \
718 add_builtin_function ("__builtin_" #NAME, \
719 s390_builtin_fn_types[FNTYPE], \
720 S390_BUILTIN_##NAME, \
721 BUILT_IN_MD, \
722 NULL, \
723 ATTRS);
724 #undef OB_DEF
725 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
726 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
727 == NULL) \
728 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
729 add_builtin_function ("__builtin_" #NAME, \
730 s390_builtin_fn_types[FNTYPE], \
731 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
732 BUILT_IN_MD, \
733 NULL, \
735 #undef OB_DEF_VAR
736 #define OB_DEF_VAR(...)
737 #include "s390-builtins.def"
741 /* Return true if ARG is appropriate as argument number ARGNUM of
742 builtin DECL. The operand flags from s390-builtins.def have to
743 passed as OP_FLAGS. */
744 bool
745 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
747 if (O_UIMM_P (op_flags))
749 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
750 int bitwidth = bitwidths[op_flags - O_U1];
752 if (!tree_fits_uhwi_p (arg)
753 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
755 error("constant argument %d for builtin %qF is out of range (0.."
756 HOST_WIDE_INT_PRINT_UNSIGNED ")",
757 argnum, decl,
758 (HOST_WIDE_INT_1U << bitwidth) - 1);
759 return false;
763 if (O_SIMM_P (op_flags))
765 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
766 int bitwidth = bitwidths[op_flags - O_S2];
768 if (!tree_fits_shwi_p (arg)
769 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
770 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
772 error("constant argument %d for builtin %qF is out of range ("
773 HOST_WIDE_INT_PRINT_DEC ".."
774 HOST_WIDE_INT_PRINT_DEC ")",
775 argnum, decl,
776 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
777 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
778 return false;
781 return true;
784 /* Expand an expression EXP that calls a built-in function,
785 with result going to TARGET if that's convenient
786 (and in mode MODE if that's convenient).
787 SUBTARGET may be used as the target for computing one of EXP's operands.
788 IGNORE is nonzero if the value is to be ignored. */
790 static rtx
791 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
792 machine_mode mode ATTRIBUTE_UNUSED,
793 int ignore ATTRIBUTE_UNUSED)
795 #define MAX_ARGS 6
797 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
799 enum insn_code icode;
800 rtx op[MAX_ARGS], pat;
801 int arity;
802 bool nonvoid;
803 tree arg;
804 call_expr_arg_iterator iter;
805 unsigned int all_op_flags = opflags_for_builtin (fcode);
806 machine_mode last_vec_mode = VOIDmode;
808 if (TARGET_DEBUG_ARG)
810 fprintf (stderr,
811 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
812 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
813 bflags_for_builtin (fcode));
816 if (S390_USE_TARGET_ATTRIBUTE)
818 unsigned int bflags;
820 bflags = bflags_for_builtin (fcode);
821 if ((bflags & B_HTM) && !TARGET_HTM)
823 error ("builtin %qF is not supported without -mhtm "
824 "(default with -march=zEC12 and higher).", fndecl);
825 return const0_rtx;
827 if ((bflags & B_VX) && !TARGET_VX)
829 error ("builtin %qF is not supported without -mvx "
830 "(default with -march=z13 and higher).", fndecl);
831 return const0_rtx;
834 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
835 && fcode < S390_ALL_BUILTIN_MAX)
837 gcc_unreachable ();
839 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
841 icode = code_for_builtin[fcode];
842 /* Set a flag in the machine specific cfun part in order to support
843 saving/restoring of FPRs. */
844 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
845 cfun->machine->tbegin_p = true;
847 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
849 error ("unresolved overloaded builtin");
850 return const0_rtx;
852 else
853 internal_error ("bad builtin fcode");
855 if (icode == 0)
856 internal_error ("bad builtin icode");
858 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
860 if (nonvoid)
862 machine_mode tmode = insn_data[icode].operand[0].mode;
863 if (!target
864 || GET_MODE (target) != tmode
865 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
866 target = gen_reg_rtx (tmode);
868 /* There are builtins (e.g. vec_promote) with no vector
869 arguments but an element selector. So we have to also look
870 at the vector return type when emitting the modulo
871 operation. */
872 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
873 last_vec_mode = insn_data[icode].operand[0].mode;
876 arity = 0;
877 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
879 rtx tmp_rtx;
880 const struct insn_operand_data *insn_op;
881 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
883 all_op_flags = all_op_flags >> O_SHIFT;
885 if (arg == error_mark_node)
886 return NULL_RTX;
887 if (arity >= MAX_ARGS)
888 return NULL_RTX;
890 if (O_IMM_P (op_flags)
891 && TREE_CODE (arg) != INTEGER_CST)
893 error ("constant value required for builtin %qF argument %d",
894 fndecl, arity + 1);
895 return const0_rtx;
898 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
899 return const0_rtx;
901 insn_op = &insn_data[icode].operand[arity + nonvoid];
902 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
904 /* expand_expr truncates constants to the target mode only if it
905 is "convenient". However, our checks below rely on this
906 being done. */
907 if (CONST_INT_P (op[arity])
908 && SCALAR_INT_MODE_P (insn_op->mode)
909 && GET_MODE (op[arity]) != insn_op->mode)
910 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
911 insn_op->mode));
913 /* Wrap the expanded RTX for pointer types into a MEM expr with
914 the proper mode. This allows us to use e.g. (match_operand
915 "memory_operand"..) in the insn patterns instead of (mem
916 (match_operand "address_operand)). This is helpful for
917 patterns not just accepting MEMs. */
918 if (POINTER_TYPE_P (TREE_TYPE (arg))
919 && insn_op->predicate != address_operand)
920 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
922 /* Expand the module operation required on element selectors. */
923 if (op_flags == O_ELEM)
925 gcc_assert (last_vec_mode != VOIDmode);
926 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
927 op[arity],
928 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
929 NULL_RTX, 1, OPTAB_DIRECT);
932 /* Record the vector mode used for an element selector. This assumes:
933 1. There is no builtin with two different vector modes and an element selector
934 2. The element selector comes after the vector type it is referring to.
935 This currently the true for all the builtins but FIXME we
936 should better check for that. */
937 if (VECTOR_MODE_P (insn_op->mode))
938 last_vec_mode = insn_op->mode;
940 if (insn_op->predicate (op[arity], insn_op->mode))
942 arity++;
943 continue;
946 if (MEM_P (op[arity])
947 && insn_op->predicate == memory_operand
948 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
949 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
951 op[arity] = replace_equiv_address (op[arity],
952 copy_to_mode_reg (Pmode,
953 XEXP (op[arity], 0)));
955 /* Some of the builtins require different modes/types than the
956 pattern in order to implement a specific API. Instead of
957 adding many expanders which do the mode change we do it here.
958 E.g. s390_vec_add_u128 required to have vector unsigned char
959 arguments is mapped to addti3. */
960 else if (insn_op->mode != VOIDmode
961 && GET_MODE (op[arity]) != VOIDmode
962 && GET_MODE (op[arity]) != insn_op->mode
963 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
964 GET_MODE (op[arity]), 0))
965 != NULL_RTX))
967 op[arity] = tmp_rtx;
969 else if (GET_MODE (op[arity]) == insn_op->mode
970 || GET_MODE (op[arity]) == VOIDmode
971 || (insn_op->predicate == address_operand
972 && GET_MODE (op[arity]) == Pmode))
974 /* An address_operand usually has VOIDmode in the expander
975 so we cannot use this. */
976 machine_mode target_mode =
977 (insn_op->predicate == address_operand
978 ? Pmode : insn_op->mode);
979 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
982 if (!insn_op->predicate (op[arity], insn_op->mode))
984 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
985 return const0_rtx;
987 arity++;
990 switch (arity)
992 case 0:
993 pat = GEN_FCN (icode) (target);
994 break;
995 case 1:
996 if (nonvoid)
997 pat = GEN_FCN (icode) (target, op[0]);
998 else
999 pat = GEN_FCN (icode) (op[0]);
1000 break;
1001 case 2:
1002 if (nonvoid)
1003 pat = GEN_FCN (icode) (target, op[0], op[1]);
1004 else
1005 pat = GEN_FCN (icode) (op[0], op[1]);
1006 break;
1007 case 3:
1008 if (nonvoid)
1009 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1010 else
1011 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1012 break;
1013 case 4:
1014 if (nonvoid)
1015 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1016 else
1017 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1018 break;
1019 case 5:
1020 if (nonvoid)
1021 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1022 else
1023 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1024 break;
1025 case 6:
1026 if (nonvoid)
1027 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1028 else
1029 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1030 break;
1031 default:
1032 gcc_unreachable ();
1034 if (!pat)
1035 return NULL_RTX;
1036 emit_insn (pat);
1038 if (nonvoid)
1039 return target;
1040 else
1041 return const0_rtx;
1045 static const int s390_hotpatch_hw_max = 1000000;
1046 static int s390_hotpatch_hw_before_label = 0;
1047 static int s390_hotpatch_hw_after_label = 0;
1049 /* Check whether the hotpatch attribute is applied to a function and, if it has
1050 an argument, the argument is valid. */
1052 static tree
1053 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1054 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1056 tree expr;
1057 tree expr2;
1058 int err;
1060 if (TREE_CODE (*node) != FUNCTION_DECL)
1062 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1063 name);
1064 *no_add_attrs = true;
1066 if (args != NULL && TREE_CHAIN (args) != NULL)
1068 expr = TREE_VALUE (args);
1069 expr2 = TREE_VALUE (TREE_CHAIN (args));
1071 if (args == NULL || TREE_CHAIN (args) == NULL)
1072 err = 1;
1073 else if (TREE_CODE (expr) != INTEGER_CST
1074 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1075 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1076 err = 1;
1077 else if (TREE_CODE (expr2) != INTEGER_CST
1078 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1079 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1080 err = 1;
1081 else
1082 err = 0;
1083 if (err)
1085 error ("requested %qE attribute is not a comma separated pair of"
1086 " non-negative integer constants or too large (max. %d)", name,
1087 s390_hotpatch_hw_max);
1088 *no_add_attrs = true;
1091 return NULL_TREE;
1094 /* Expand the s390_vector_bool type attribute. */
1096 static tree
1097 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1098 tree args ATTRIBUTE_UNUSED,
1099 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1101 tree type = *node, result = NULL_TREE;
1102 machine_mode mode;
1104 while (POINTER_TYPE_P (type)
1105 || TREE_CODE (type) == FUNCTION_TYPE
1106 || TREE_CODE (type) == METHOD_TYPE
1107 || TREE_CODE (type) == ARRAY_TYPE)
1108 type = TREE_TYPE (type);
1110 mode = TYPE_MODE (type);
1111 switch (mode)
1113 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1114 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1115 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1116 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1117 default: break;
1120 *no_add_attrs = true; /* No need to hang on to the attribute. */
1122 if (result)
1123 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1125 return NULL_TREE;
1128 static const struct attribute_spec s390_attribute_table[] = {
1129 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1130 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1131 /* End element. */
1132 { NULL, 0, 0, false, false, false, NULL, false }
1135 /* Return the alignment for LABEL. We default to the -falign-labels
1136 value except for the literal pool base label. */
1138 s390_label_align (rtx_insn *label)
1140 rtx_insn *prev_insn = prev_active_insn (label);
1141 rtx set, src;
1143 if (prev_insn == NULL_RTX)
1144 goto old;
1146 set = single_set (prev_insn);
1148 if (set == NULL_RTX)
1149 goto old;
1151 src = SET_SRC (set);
1153 /* Don't align literal pool base labels. */
1154 if (GET_CODE (src) == UNSPEC
1155 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1156 return 0;
1158 old:
1159 return align_labels_log;
1162 static machine_mode
1163 s390_libgcc_cmp_return_mode (void)
1165 return TARGET_64BIT ? DImode : SImode;
1168 static machine_mode
1169 s390_libgcc_shift_count_mode (void)
1171 return TARGET_64BIT ? DImode : SImode;
1174 static machine_mode
1175 s390_unwind_word_mode (void)
1177 return TARGET_64BIT ? DImode : SImode;
1180 /* Return true if the back end supports mode MODE. */
1181 static bool
1182 s390_scalar_mode_supported_p (machine_mode mode)
1184 /* In contrast to the default implementation reject TImode constants on 31bit
1185 TARGET_ZARCH for ABI compliance. */
1186 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1187 return false;
1189 if (DECIMAL_FLOAT_MODE_P (mode))
1190 return default_decimal_float_supported_p ();
1192 return default_scalar_mode_supported_p (mode);
1195 /* Return true if the back end supports vector mode MODE. */
1196 static bool
1197 s390_vector_mode_supported_p (machine_mode mode)
1199 machine_mode inner;
1201 if (!VECTOR_MODE_P (mode)
1202 || !TARGET_VX
1203 || GET_MODE_SIZE (mode) > 16)
1204 return false;
1206 inner = GET_MODE_INNER (mode);
1208 switch (inner)
1210 case QImode:
1211 case HImode:
1212 case SImode:
1213 case DImode:
1214 case TImode:
1215 case SFmode:
1216 case DFmode:
1217 case TFmode:
1218 return true;
1219 default:
1220 return false;
1224 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1226 void
1227 s390_set_has_landing_pad_p (bool value)
1229 cfun->machine->has_landing_pad_p = value;
1232 /* If two condition code modes are compatible, return a condition code
1233 mode which is compatible with both. Otherwise, return
1234 VOIDmode. */
1236 static machine_mode
1237 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1239 if (m1 == m2)
1240 return m1;
1242 switch (m1)
1244 case CCZmode:
1245 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1246 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1247 return m2;
1248 return VOIDmode;
1250 case CCSmode:
1251 case CCUmode:
1252 case CCTmode:
1253 case CCSRmode:
1254 case CCURmode:
1255 case CCZ1mode:
1256 if (m2 == CCZmode)
1257 return m1;
1259 return VOIDmode;
1261 default:
1262 return VOIDmode;
1264 return VOIDmode;
1267 /* Return true if SET either doesn't set the CC register, or else
1268 the source and destination have matching CC modes and that
1269 CC mode is at least as constrained as REQ_MODE. */
1271 static bool
1272 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1274 machine_mode set_mode;
1276 gcc_assert (GET_CODE (set) == SET);
1278 /* These modes are supposed to be used only in CC consumer
1279 patterns. */
1280 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1281 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1283 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1284 return 1;
1286 set_mode = GET_MODE (SET_DEST (set));
1287 switch (set_mode)
1289 case CCSmode:
1290 case CCSRmode:
1291 case CCUmode:
1292 case CCURmode:
1293 case CCLmode:
1294 case CCL1mode:
1295 case CCL2mode:
1296 case CCL3mode:
1297 case CCT1mode:
1298 case CCT2mode:
1299 case CCT3mode:
1300 case CCVEQmode:
1301 case CCVIHmode:
1302 case CCVIHUmode:
1303 case CCVFHmode:
1304 case CCVFHEmode:
1305 if (req_mode != set_mode)
1306 return 0;
1307 break;
1309 case CCZmode:
1310 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1311 && req_mode != CCSRmode && req_mode != CCURmode)
1312 return 0;
1313 break;
1315 case CCAPmode:
1316 case CCANmode:
1317 if (req_mode != CCAmode)
1318 return 0;
1319 break;
1321 default:
1322 gcc_unreachable ();
1325 return (GET_MODE (SET_SRC (set)) == set_mode);
1328 /* Return true if every SET in INSN that sets the CC register
1329 has source and destination with matching CC modes and that
1330 CC mode is at least as constrained as REQ_MODE.
1331 If REQ_MODE is VOIDmode, always return false. */
1333 bool
1334 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1336 int i;
1338 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1339 if (req_mode == VOIDmode)
1340 return false;
1342 if (GET_CODE (PATTERN (insn)) == SET)
1343 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1345 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1346 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1348 rtx set = XVECEXP (PATTERN (insn), 0, i);
1349 if (GET_CODE (set) == SET)
1350 if (!s390_match_ccmode_set (set, req_mode))
1351 return false;
1354 return true;
1357 /* If a test-under-mask instruction can be used to implement
1358 (compare (and ... OP1) OP2), return the CC mode required
1359 to do that. Otherwise, return VOIDmode.
1360 MIXED is true if the instruction can distinguish between
1361 CC1 and CC2 for mixed selected bits (TMxx), it is false
1362 if the instruction cannot (TM). */
1364 machine_mode
1365 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1367 int bit0, bit1;
1369 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1370 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1371 return VOIDmode;
1373 /* Selected bits all zero: CC0.
1374 e.g.: int a; if ((a & (16 + 128)) == 0) */
1375 if (INTVAL (op2) == 0)
1376 return CCTmode;
1378 /* Selected bits all one: CC3.
1379 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1380 if (INTVAL (op2) == INTVAL (op1))
1381 return CCT3mode;
1383 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1384 int a;
1385 if ((a & (16 + 128)) == 16) -> CCT1
1386 if ((a & (16 + 128)) == 128) -> CCT2 */
1387 if (mixed)
1389 bit1 = exact_log2 (INTVAL (op2));
1390 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1391 if (bit0 != -1 && bit1 != -1)
1392 return bit0 > bit1 ? CCT1mode : CCT2mode;
1395 return VOIDmode;
1398 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1399 OP0 and OP1 of a COMPARE, return the mode to be used for the
1400 comparison. */
1402 machine_mode
1403 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1405 if (TARGET_VX
1406 && register_operand (op0, DFmode)
1407 && register_operand (op1, DFmode))
1409 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1410 s390_emit_compare or s390_canonicalize_comparison will take
1411 care of it. */
1412 switch (code)
1414 case EQ:
1415 case NE:
1416 return CCVEQmode;
1417 case GT:
1418 case UNLE:
1419 return CCVFHmode;
1420 case GE:
1421 case UNLT:
1422 return CCVFHEmode;
1423 default:
1428 switch (code)
1430 case EQ:
1431 case NE:
1432 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1433 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1434 return CCAPmode;
1435 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1436 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1437 return CCAPmode;
1438 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1439 || GET_CODE (op1) == NEG)
1440 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1441 return CCLmode;
1443 if (GET_CODE (op0) == AND)
1445 /* Check whether we can potentially do it via TM. */
1446 machine_mode ccmode;
1447 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1448 if (ccmode != VOIDmode)
1450 /* Relax CCTmode to CCZmode to allow fall-back to AND
1451 if that turns out to be beneficial. */
1452 return ccmode == CCTmode ? CCZmode : ccmode;
1456 if (register_operand (op0, HImode)
1457 && GET_CODE (op1) == CONST_INT
1458 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1459 return CCT3mode;
1460 if (register_operand (op0, QImode)
1461 && GET_CODE (op1) == CONST_INT
1462 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1463 return CCT3mode;
1465 return CCZmode;
1467 case LE:
1468 case LT:
1469 case GE:
1470 case GT:
1471 /* The only overflow condition of NEG and ABS happens when
1472 -INT_MAX is used as parameter, which stays negative. So
1473 we have an overflow from a positive value to a negative.
1474 Using CCAP mode the resulting cc can be used for comparisons. */
1475 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1476 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1477 return CCAPmode;
1479 /* If constants are involved in an add instruction it is possible to use
1480 the resulting cc for comparisons with zero. Knowing the sign of the
1481 constant the overflow behavior gets predictable. e.g.:
1482 int a, b; if ((b = a + c) > 0)
1483 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1484 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1485 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1486 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1487 /* Avoid INT32_MIN on 32 bit. */
1488 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1490 if (INTVAL (XEXP((op0), 1)) < 0)
1491 return CCANmode;
1492 else
1493 return CCAPmode;
1495 /* Fall through. */
1496 case UNORDERED:
1497 case ORDERED:
1498 case UNEQ:
1499 case UNLE:
1500 case UNLT:
1501 case UNGE:
1502 case UNGT:
1503 case LTGT:
1504 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1505 && GET_CODE (op1) != CONST_INT)
1506 return CCSRmode;
1507 return CCSmode;
1509 case LTU:
1510 case GEU:
1511 if (GET_CODE (op0) == PLUS
1512 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1513 return CCL1mode;
1515 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1516 && GET_CODE (op1) != CONST_INT)
1517 return CCURmode;
1518 return CCUmode;
1520 case LEU:
1521 case GTU:
1522 if (GET_CODE (op0) == MINUS
1523 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1524 return CCL2mode;
1526 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1527 && GET_CODE (op1) != CONST_INT)
1528 return CCURmode;
1529 return CCUmode;
1531 default:
1532 gcc_unreachable ();
1536 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1537 that we can implement more efficiently. */
1539 static void
1540 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1541 bool op0_preserve_value)
1543 if (op0_preserve_value)
1544 return;
1546 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1547 if ((*code == EQ || *code == NE)
1548 && *op1 == const0_rtx
1549 && GET_CODE (*op0) == ZERO_EXTRACT
1550 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1551 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1552 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1554 rtx inner = XEXP (*op0, 0);
1555 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1556 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1557 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1559 if (len > 0 && len < modesize
1560 && pos >= 0 && pos + len <= modesize
1561 && modesize <= HOST_BITS_PER_WIDE_INT)
1563 unsigned HOST_WIDE_INT block;
1564 block = (HOST_WIDE_INT_1U << len) - 1;
1565 block <<= modesize - pos - len;
1567 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1568 gen_int_mode (block, GET_MODE (inner)));
1572 /* Narrow AND of memory against immediate to enable TM. */
1573 if ((*code == EQ || *code == NE)
1574 && *op1 == const0_rtx
1575 && GET_CODE (*op0) == AND
1576 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1577 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1579 rtx inner = XEXP (*op0, 0);
1580 rtx mask = XEXP (*op0, 1);
1582 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1583 if (GET_CODE (inner) == SUBREG
1584 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1585 && (GET_MODE_SIZE (GET_MODE (inner))
1586 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1587 && ((INTVAL (mask)
1588 & GET_MODE_MASK (GET_MODE (inner))
1589 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1590 == 0))
1591 inner = SUBREG_REG (inner);
1593 /* Do not change volatile MEMs. */
1594 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1596 int part = s390_single_part (XEXP (*op0, 1),
1597 GET_MODE (inner), QImode, 0);
1598 if (part >= 0)
1600 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1601 inner = adjust_address_nv (inner, QImode, part);
1602 *op0 = gen_rtx_AND (QImode, inner, mask);
1607 /* Narrow comparisons against 0xffff to HImode if possible. */
1608 if ((*code == EQ || *code == NE)
1609 && GET_CODE (*op1) == CONST_INT
1610 && INTVAL (*op1) == 0xffff
1611 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1612 && (nonzero_bits (*op0, GET_MODE (*op0))
1613 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1615 *op0 = gen_lowpart (HImode, *op0);
1616 *op1 = constm1_rtx;
1619 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1620 if (GET_CODE (*op0) == UNSPEC
1621 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1622 && XVECLEN (*op0, 0) == 1
1623 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1624 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1625 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1626 && *op1 == const0_rtx)
1628 enum rtx_code new_code = UNKNOWN;
1629 switch (*code)
1631 case EQ: new_code = EQ; break;
1632 case NE: new_code = NE; break;
1633 case LT: new_code = GTU; break;
1634 case GT: new_code = LTU; break;
1635 case LE: new_code = GEU; break;
1636 case GE: new_code = LEU; break;
1637 default: break;
1640 if (new_code != UNKNOWN)
1642 *op0 = XVECEXP (*op0, 0, 0);
1643 *code = new_code;
1647 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1648 if (GET_CODE (*op0) == UNSPEC
1649 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1650 && XVECLEN (*op0, 0) == 1
1651 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1652 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1653 && CONST_INT_P (*op1))
1655 enum rtx_code new_code = UNKNOWN;
1656 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1658 case CCZmode:
1659 case CCRAWmode:
1660 switch (*code)
1662 case EQ: new_code = EQ; break;
1663 case NE: new_code = NE; break;
1664 default: break;
1666 break;
1667 default: break;
1670 if (new_code != UNKNOWN)
1672 /* For CCRAWmode put the required cc mask into the second
1673 operand. */
1674 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1675 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1676 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1677 *op0 = XVECEXP (*op0, 0, 0);
1678 *code = new_code;
1682 /* Simplify cascaded EQ, NE with const0_rtx. */
1683 if ((*code == NE || *code == EQ)
1684 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1685 && GET_MODE (*op0) == SImode
1686 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1687 && REG_P (XEXP (*op0, 0))
1688 && XEXP (*op0, 1) == const0_rtx
1689 && *op1 == const0_rtx)
1691 if ((*code == EQ && GET_CODE (*op0) == NE)
1692 || (*code == NE && GET_CODE (*op0) == EQ))
1693 *code = EQ;
1694 else
1695 *code = NE;
1696 *op0 = XEXP (*op0, 0);
1699 /* Prefer register over memory as first operand. */
1700 if (MEM_P (*op0) && REG_P (*op1))
1702 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1703 *code = (int)swap_condition ((enum rtx_code)*code);
1706 /* Using the scalar variants of vector instructions for 64 bit FP
1707 comparisons might require swapping the operands. */
1708 if (TARGET_VX
1709 && register_operand (*op0, DFmode)
1710 && register_operand (*op1, DFmode)
1711 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1713 rtx tmp;
1715 switch (*code)
1717 case LT: *code = GT; break;
1718 case LE: *code = GE; break;
1719 case UNGT: *code = UNLE; break;
1720 case UNGE: *code = UNLT; break;
1721 default: ;
1723 tmp = *op0; *op0 = *op1; *op1 = tmp;
1726 /* A comparison result is compared against zero. Replace it with
1727 the (perhaps inverted) original comparison.
1728 This probably should be done by simplify_relational_operation. */
1729 if ((*code == EQ || *code == NE)
1730 && *op1 == const0_rtx
1731 && COMPARISON_P (*op0)
1732 && CC_REG_P (XEXP (*op0, 0)))
1734 enum rtx_code new_code;
1736 if (*code == EQ)
1737 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1738 XEXP (*op0, 0),
1739 XEXP (*op1, 0), NULL);
1740 else
1741 new_code = GET_CODE (*op0);
1743 if (new_code != UNKNOWN)
1745 *code = new_code;
1746 *op1 = XEXP (*op0, 1);
1747 *op0 = XEXP (*op0, 0);
1752 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1753 FP compare using the single element variant of vector instructions.
1754 Replace CODE with the comparison code to be used in the CC reg
1755 compare and return the condition code register RTX in CC. */
1757 static bool
1758 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1759 rtx *cc)
1761 machine_mode cmp_mode;
1762 bool swap_p = false;
1764 switch (*code)
1766 case EQ: cmp_mode = CCVEQmode; break;
1767 case NE: cmp_mode = CCVEQmode; break;
1768 case GT: cmp_mode = CCVFHmode; break;
1769 case GE: cmp_mode = CCVFHEmode; break;
1770 case UNLE: cmp_mode = CCVFHmode; break;
1771 case UNLT: cmp_mode = CCVFHEmode; break;
1772 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1773 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1774 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1775 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1776 default: return false;
1779 if (swap_p)
1781 rtx tmp = cmp2;
1782 cmp2 = cmp1;
1783 cmp1 = tmp;
1786 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1787 gen_rtvec (2,
1788 gen_rtx_SET (gen_rtx_REG (cmp_mode, CC_REGNUM),
1789 gen_rtx_COMPARE (cmp_mode, cmp1,
1790 cmp2)),
1791 gen_rtx_CLOBBER (VOIDmode,
1792 gen_rtx_SCRATCH (V2DImode)))));
1794 /* This is the cc reg how it will be used in the cc mode consumer.
1795 It either needs to be CCVFALL or CCVFANY. However, CC1 will
1796 never be set by the scalar variants. So it actually doesn't
1797 matter which one we choose here. */
1798 *cc = gen_rtx_REG (CCVFALLmode, CC_REGNUM);
1799 return true;
1803 /* Emit a compare instruction suitable to implement the comparison
1804 OP0 CODE OP1. Return the correct condition RTL to be placed in
1805 the IF_THEN_ELSE of the conditional branch testing the result. */
1808 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1810 machine_mode mode = s390_select_ccmode (code, op0, op1);
1811 rtx cc;
1813 if (TARGET_VX
1814 && register_operand (op0, DFmode)
1815 && register_operand (op1, DFmode)
1816 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1818 /* Work has been done by s390_expand_vec_compare_scalar already. */
1820 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1822 /* Do not output a redundant compare instruction if a
1823 compare_and_swap pattern already computed the result and the
1824 machine modes are compatible. */
1825 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1826 == GET_MODE (op0));
1827 cc = op0;
1829 else
1831 cc = gen_rtx_REG (mode, CC_REGNUM);
1832 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1835 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1838 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1839 matches CMP.
1840 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1841 conditional branch testing the result. */
1843 static rtx
1844 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1845 rtx cmp, rtx new_rtx)
1847 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1848 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1849 const0_rtx);
1852 /* Emit a jump instruction to TARGET and return it. If COND is
1853 NULL_RTX, emit an unconditional jump, else a conditional jump under
1854 condition COND. */
1856 rtx_insn *
1857 s390_emit_jump (rtx target, rtx cond)
1859 rtx insn;
1861 target = gen_rtx_LABEL_REF (VOIDmode, target);
1862 if (cond)
1863 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1865 insn = gen_rtx_SET (pc_rtx, target);
1866 return emit_jump_insn (insn);
1869 /* Return branch condition mask to implement a branch
1870 specified by CODE. Return -1 for invalid comparisons. */
1873 s390_branch_condition_mask (rtx code)
1875 const int CC0 = 1 << 3;
1876 const int CC1 = 1 << 2;
1877 const int CC2 = 1 << 1;
1878 const int CC3 = 1 << 0;
1880 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1881 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1882 gcc_assert (XEXP (code, 1) == const0_rtx
1883 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1884 && CONST_INT_P (XEXP (code, 1))));
1887 switch (GET_MODE (XEXP (code, 0)))
1889 case CCZmode:
1890 case CCZ1mode:
1891 switch (GET_CODE (code))
1893 case EQ: return CC0;
1894 case NE: return CC1 | CC2 | CC3;
1895 default: return -1;
1897 break;
1899 case CCT1mode:
1900 switch (GET_CODE (code))
1902 case EQ: return CC1;
1903 case NE: return CC0 | CC2 | CC3;
1904 default: return -1;
1906 break;
1908 case CCT2mode:
1909 switch (GET_CODE (code))
1911 case EQ: return CC2;
1912 case NE: return CC0 | CC1 | CC3;
1913 default: return -1;
1915 break;
1917 case CCT3mode:
1918 switch (GET_CODE (code))
1920 case EQ: return CC3;
1921 case NE: return CC0 | CC1 | CC2;
1922 default: return -1;
1924 break;
1926 case CCLmode:
1927 switch (GET_CODE (code))
1929 case EQ: return CC0 | CC2;
1930 case NE: return CC1 | CC3;
1931 default: return -1;
1933 break;
1935 case CCL1mode:
1936 switch (GET_CODE (code))
1938 case LTU: return CC2 | CC3; /* carry */
1939 case GEU: return CC0 | CC1; /* no carry */
1940 default: return -1;
1942 break;
1944 case CCL2mode:
1945 switch (GET_CODE (code))
1947 case GTU: return CC0 | CC1; /* borrow */
1948 case LEU: return CC2 | CC3; /* no borrow */
1949 default: return -1;
1951 break;
1953 case CCL3mode:
1954 switch (GET_CODE (code))
1956 case EQ: return CC0 | CC2;
1957 case NE: return CC1 | CC3;
1958 case LTU: return CC1;
1959 case GTU: return CC3;
1960 case LEU: return CC1 | CC2;
1961 case GEU: return CC2 | CC3;
1962 default: return -1;
1965 case CCUmode:
1966 switch (GET_CODE (code))
1968 case EQ: return CC0;
1969 case NE: return CC1 | CC2 | CC3;
1970 case LTU: return CC1;
1971 case GTU: return CC2;
1972 case LEU: return CC0 | CC1;
1973 case GEU: return CC0 | CC2;
1974 default: return -1;
1976 break;
1978 case CCURmode:
1979 switch (GET_CODE (code))
1981 case EQ: return CC0;
1982 case NE: return CC2 | CC1 | CC3;
1983 case LTU: return CC2;
1984 case GTU: return CC1;
1985 case LEU: return CC0 | CC2;
1986 case GEU: return CC0 | CC1;
1987 default: return -1;
1989 break;
1991 case CCAPmode:
1992 switch (GET_CODE (code))
1994 case EQ: return CC0;
1995 case NE: return CC1 | CC2 | CC3;
1996 case LT: return CC1 | CC3;
1997 case GT: return CC2;
1998 case LE: return CC0 | CC1 | CC3;
1999 case GE: return CC0 | CC2;
2000 default: return -1;
2002 break;
2004 case CCANmode:
2005 switch (GET_CODE (code))
2007 case EQ: return CC0;
2008 case NE: return CC1 | CC2 | CC3;
2009 case LT: return CC1;
2010 case GT: return CC2 | CC3;
2011 case LE: return CC0 | CC1;
2012 case GE: return CC0 | CC2 | CC3;
2013 default: return -1;
2015 break;
2017 case CCSmode:
2018 switch (GET_CODE (code))
2020 case EQ: return CC0;
2021 case NE: return CC1 | CC2 | CC3;
2022 case LT: return CC1;
2023 case GT: return CC2;
2024 case LE: return CC0 | CC1;
2025 case GE: return CC0 | CC2;
2026 case UNORDERED: return CC3;
2027 case ORDERED: return CC0 | CC1 | CC2;
2028 case UNEQ: return CC0 | CC3;
2029 case UNLT: return CC1 | CC3;
2030 case UNGT: return CC2 | CC3;
2031 case UNLE: return CC0 | CC1 | CC3;
2032 case UNGE: return CC0 | CC2 | CC3;
2033 case LTGT: return CC1 | CC2;
2034 default: return -1;
2036 break;
2038 case CCSRmode:
2039 switch (GET_CODE (code))
2041 case EQ: return CC0;
2042 case NE: return CC2 | CC1 | CC3;
2043 case LT: return CC2;
2044 case GT: return CC1;
2045 case LE: return CC0 | CC2;
2046 case GE: return CC0 | CC1;
2047 case UNORDERED: return CC3;
2048 case ORDERED: return CC0 | CC2 | CC1;
2049 case UNEQ: return CC0 | CC3;
2050 case UNLT: return CC2 | CC3;
2051 case UNGT: return CC1 | CC3;
2052 case UNLE: return CC0 | CC2 | CC3;
2053 case UNGE: return CC0 | CC1 | CC3;
2054 case LTGT: return CC2 | CC1;
2055 default: return -1;
2057 break;
2059 /* Vector comparison modes. */
2060 /* CC2 will never be set. It however is part of the negated
2061 masks. */
2062 case CCVIALLmode:
2063 switch (GET_CODE (code))
2065 case EQ:
2066 case GTU:
2067 case GT:
2068 case GE: return CC0;
2069 /* The inverted modes are in fact *any* modes. */
2070 case NE:
2071 case LEU:
2072 case LE:
2073 case LT: return CC3 | CC1 | CC2;
2074 default: return -1;
2077 case CCVIANYmode:
2078 switch (GET_CODE (code))
2080 case EQ:
2081 case GTU:
2082 case GT:
2083 case GE: return CC0 | CC1;
2084 /* The inverted modes are in fact *all* modes. */
2085 case NE:
2086 case LEU:
2087 case LE:
2088 case LT: return CC3 | CC2;
2089 default: return -1;
2091 case CCVFALLmode:
2092 switch (GET_CODE (code))
2094 case EQ:
2095 case GT:
2096 case GE: return CC0;
2097 /* The inverted modes are in fact *any* modes. */
2098 case NE:
2099 case UNLE:
2100 case UNLT: return CC3 | CC1 | CC2;
2101 default: return -1;
2104 case CCVFANYmode:
2105 switch (GET_CODE (code))
2107 case EQ:
2108 case GT:
2109 case GE: return CC0 | CC1;
2110 /* The inverted modes are in fact *all* modes. */
2111 case NE:
2112 case UNLE:
2113 case UNLT: return CC3 | CC2;
2114 default: return -1;
2117 case CCRAWmode:
2118 switch (GET_CODE (code))
2120 case EQ:
2121 return INTVAL (XEXP (code, 1));
2122 case NE:
2123 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2124 default:
2125 gcc_unreachable ();
2128 default:
2129 return -1;
2134 /* Return branch condition mask to implement a compare and branch
2135 specified by CODE. Return -1 for invalid comparisons. */
2138 s390_compare_and_branch_condition_mask (rtx code)
2140 const int CC0 = 1 << 3;
2141 const int CC1 = 1 << 2;
2142 const int CC2 = 1 << 1;
2144 switch (GET_CODE (code))
2146 case EQ:
2147 return CC0;
2148 case NE:
2149 return CC1 | CC2;
2150 case LT:
2151 case LTU:
2152 return CC1;
2153 case GT:
2154 case GTU:
2155 return CC2;
2156 case LE:
2157 case LEU:
2158 return CC0 | CC1;
2159 case GE:
2160 case GEU:
2161 return CC0 | CC2;
2162 default:
2163 gcc_unreachable ();
2165 return -1;
2168 /* If INV is false, return assembler mnemonic string to implement
2169 a branch specified by CODE. If INV is true, return mnemonic
2170 for the corresponding inverted branch. */
2172 static const char *
2173 s390_branch_condition_mnemonic (rtx code, int inv)
2175 int mask;
2177 static const char *const mnemonic[16] =
2179 NULL, "o", "h", "nle",
2180 "l", "nhe", "lh", "ne",
2181 "e", "nlh", "he", "nl",
2182 "le", "nh", "no", NULL
2185 if (GET_CODE (XEXP (code, 0)) == REG
2186 && REGNO (XEXP (code, 0)) == CC_REGNUM
2187 && (XEXP (code, 1) == const0_rtx
2188 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2189 && CONST_INT_P (XEXP (code, 1)))))
2190 mask = s390_branch_condition_mask (code);
2191 else
2192 mask = s390_compare_and_branch_condition_mask (code);
2194 gcc_assert (mask >= 0);
2196 if (inv)
2197 mask ^= 15;
2199 gcc_assert (mask >= 1 && mask <= 14);
2201 return mnemonic[mask];
2204 /* Return the part of op which has a value different from def.
2205 The size of the part is determined by mode.
2206 Use this function only if you already know that op really
2207 contains such a part. */
2209 unsigned HOST_WIDE_INT
2210 s390_extract_part (rtx op, machine_mode mode, int def)
2212 unsigned HOST_WIDE_INT value = 0;
2213 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2214 int part_bits = GET_MODE_BITSIZE (mode);
2215 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2216 int i;
2218 for (i = 0; i < max_parts; i++)
2220 if (i == 0)
2221 value = UINTVAL (op);
2222 else
2223 value >>= part_bits;
2225 if ((value & part_mask) != (def & part_mask))
2226 return value & part_mask;
2229 gcc_unreachable ();
2232 /* If OP is an integer constant of mode MODE with exactly one
2233 part of mode PART_MODE unequal to DEF, return the number of that
2234 part. Otherwise, return -1. */
2237 s390_single_part (rtx op,
2238 machine_mode mode,
2239 machine_mode part_mode,
2240 int def)
2242 unsigned HOST_WIDE_INT value = 0;
2243 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2244 unsigned HOST_WIDE_INT part_mask
2245 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2246 int i, part = -1;
2248 if (GET_CODE (op) != CONST_INT)
2249 return -1;
2251 for (i = 0; i < n_parts; i++)
2253 if (i == 0)
2254 value = UINTVAL (op);
2255 else
2256 value >>= GET_MODE_BITSIZE (part_mode);
2258 if ((value & part_mask) != (def & part_mask))
2260 if (part != -1)
2261 return -1;
2262 else
2263 part = i;
2266 return part == -1 ? -1 : n_parts - 1 - part;
2269 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2270 bits and no other bits are set in (the lower SIZE bits of) IN.
2272 PSTART and PEND can be used to obtain the start and end
2273 position (inclusive) of the bitfield relative to 64
2274 bits. *PSTART / *PEND gives the position of the first/last bit
2275 of the bitfield counting from the highest order bit starting
2276 with zero. */
2278 bool
2279 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2280 int *pstart, int *pend)
2282 int start;
2283 int end = -1;
2284 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2285 int highbit = HOST_BITS_PER_WIDE_INT - size;
2286 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2288 gcc_assert (!!pstart == !!pend);
2289 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2290 if (end == -1)
2292 /* Look for the rightmost bit of a contiguous range of ones. */
2293 if (bitmask & in)
2294 /* Found it. */
2295 end = start;
2297 else
2299 /* Look for the firt zero bit after the range of ones. */
2300 if (! (bitmask & in))
2301 /* Found it. */
2302 break;
2304 /* We're one past the last one-bit. */
2305 start++;
2307 if (end == -1)
2308 /* No one bits found. */
2309 return false;
2311 if (start > highbit)
2313 unsigned HOST_WIDE_INT mask;
2315 /* Calculate a mask for all bits beyond the contiguous bits. */
2316 mask = ((~HOST_WIDE_INT_0U >> highbit)
2317 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2318 if (mask & in)
2319 /* There are more bits set beyond the first range of one bits. */
2320 return false;
2323 if (pstart)
2325 *pstart = start;
2326 *pend = end;
2329 return true;
2332 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2333 if ~IN contains a contiguous bitfield. In that case, *END is <
2334 *START.
2336 If WRAP_P is true, a bitmask that wraps around is also tested.
2337 When a wraparoud occurs *START is greater than *END (in
2338 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2339 part of the range. If WRAP_P is false, no wraparound is
2340 tested. */
2342 bool
2343 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2344 int size, int *start, int *end)
2346 int bs = HOST_BITS_PER_WIDE_INT;
2347 bool b;
2349 gcc_assert (!!start == !!end);
2350 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2351 /* This cannot be expressed as a contiguous bitmask. Exit early because
2352 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2353 a valid bitmask. */
2354 return false;
2355 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2356 if (b)
2357 return true;
2358 if (! wrap_p)
2359 return false;
2360 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2361 if (b && start)
2363 int s = *start;
2364 int e = *end;
2366 gcc_assert (s >= 1);
2367 *start = ((e + 1) & (bs - 1));
2368 *end = ((s - 1 + bs) & (bs - 1));
2371 return b;
2374 /* Return true if OP contains the same contiguous bitfield in *all*
2375 its elements. START and END can be used to obtain the start and
2376 end position of the bitfield.
2378 START/STOP give the position of the first/last bit of the bitfield
2379 counting from the lowest order bit starting with zero. In order to
2380 use these values for S/390 instructions this has to be converted to
2381 "bits big endian" style. */
2383 bool
2384 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2386 unsigned HOST_WIDE_INT mask;
2387 int size;
2388 rtx elt;
2389 bool b;
2391 gcc_assert (!!start == !!end);
2392 if (!const_vec_duplicate_p (op, &elt)
2393 || !CONST_INT_P (elt))
2394 return false;
2396 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2398 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2399 if (size > 64)
2400 return false;
2402 mask = UINTVAL (elt);
2404 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2405 if (b)
2407 if (start)
2409 *start -= (HOST_BITS_PER_WIDE_INT - size);
2410 *end -= (HOST_BITS_PER_WIDE_INT - size);
2412 return true;
2414 else
2415 return false;
2418 /* Return true if C consists only of byte chunks being either 0 or
2419 0xff. If MASK is !=NULL a byte mask is generated which is
2420 appropriate for the vector generate byte mask instruction. */
2422 bool
2423 s390_bytemask_vector_p (rtx op, unsigned *mask)
2425 int i;
2426 unsigned tmp_mask = 0;
2427 int nunit, unit_size;
2429 if (!VECTOR_MODE_P (GET_MODE (op))
2430 || GET_CODE (op) != CONST_VECTOR
2431 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2432 return false;
2434 nunit = GET_MODE_NUNITS (GET_MODE (op));
2435 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2437 for (i = 0; i < nunit; i++)
2439 unsigned HOST_WIDE_INT c;
2440 int j;
2442 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2443 return false;
2445 c = UINTVAL (XVECEXP (op, 0, i));
2446 for (j = 0; j < unit_size; j++)
2448 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2449 return false;
2450 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2451 c = c >> BITS_PER_UNIT;
2455 if (mask != NULL)
2456 *mask = tmp_mask;
2458 return true;
2461 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2462 equivalent to a shift followed by the AND. In particular, CONTIG
2463 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2464 for ROTL indicate a rotate to the right. */
2466 bool
2467 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2469 int start, end;
2470 bool ok;
2472 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2473 gcc_assert (ok);
2475 if (rotl >= 0)
2476 return (64 - end >= rotl);
2477 else
2479 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2480 DIMode. */
2481 rotl = -rotl + (64 - bitsize);
2482 return (start >= rotl);
2486 /* Check whether we can (and want to) split a double-word
2487 move in mode MODE from SRC to DST into two single-word
2488 moves, moving the subword FIRST_SUBWORD first. */
2490 bool
2491 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2493 /* Floating point and vector registers cannot be split. */
2494 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2495 return false;
2497 /* We don't need to split if operands are directly accessible. */
2498 if (s_operand (src, mode) || s_operand (dst, mode))
2499 return false;
2501 /* Non-offsettable memory references cannot be split. */
2502 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2503 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2504 return false;
2506 /* Moving the first subword must not clobber a register
2507 needed to move the second subword. */
2508 if (register_operand (dst, mode))
2510 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2511 if (reg_overlap_mentioned_p (subreg, src))
2512 return false;
2515 return true;
2518 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2519 and [MEM2, MEM2 + SIZE] do overlap and false
2520 otherwise. */
2522 bool
2523 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2525 rtx addr1, addr2, addr_delta;
2526 HOST_WIDE_INT delta;
2528 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2529 return true;
2531 if (size == 0)
2532 return false;
2534 addr1 = XEXP (mem1, 0);
2535 addr2 = XEXP (mem2, 0);
2537 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2539 /* This overlapping check is used by peepholes merging memory block operations.
2540 Overlapping operations would otherwise be recognized by the S/390 hardware
2541 and would fall back to a slower implementation. Allowing overlapping
2542 operations would lead to slow code but not to wrong code. Therefore we are
2543 somewhat optimistic if we cannot prove that the memory blocks are
2544 overlapping.
2545 That's why we return false here although this may accept operations on
2546 overlapping memory areas. */
2547 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2548 return false;
2550 delta = INTVAL (addr_delta);
2552 if (delta == 0
2553 || (delta > 0 && delta < size)
2554 || (delta < 0 && -delta < size))
2555 return true;
2557 return false;
2560 /* Check whether the address of memory reference MEM2 equals exactly
2561 the address of memory reference MEM1 plus DELTA. Return true if
2562 we can prove this to be the case, false otherwise. */
2564 bool
2565 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2567 rtx addr1, addr2, addr_delta;
2569 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2570 return false;
2572 addr1 = XEXP (mem1, 0);
2573 addr2 = XEXP (mem2, 0);
2575 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2576 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2577 return false;
2579 return true;
2582 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2584 void
2585 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2586 rtx *operands)
2588 machine_mode wmode = mode;
2589 rtx dst = operands[0];
2590 rtx src1 = operands[1];
2591 rtx src2 = operands[2];
2592 rtx op, clob, tem;
2594 /* If we cannot handle the operation directly, use a temp register. */
2595 if (!s390_logical_operator_ok_p (operands))
2596 dst = gen_reg_rtx (mode);
2598 /* QImode and HImode patterns make sense only if we have a destination
2599 in memory. Otherwise perform the operation in SImode. */
2600 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2601 wmode = SImode;
2603 /* Widen operands if required. */
2604 if (mode != wmode)
2606 if (GET_CODE (dst) == SUBREG
2607 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2608 dst = tem;
2609 else if (REG_P (dst))
2610 dst = gen_rtx_SUBREG (wmode, dst, 0);
2611 else
2612 dst = gen_reg_rtx (wmode);
2614 if (GET_CODE (src1) == SUBREG
2615 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2616 src1 = tem;
2617 else if (GET_MODE (src1) != VOIDmode)
2618 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2620 if (GET_CODE (src2) == SUBREG
2621 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2622 src2 = tem;
2623 else if (GET_MODE (src2) != VOIDmode)
2624 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2627 /* Emit the instruction. */
2628 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2629 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2630 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2632 /* Fix up the destination if needed. */
2633 if (dst != operands[0])
2634 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2637 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2639 bool
2640 s390_logical_operator_ok_p (rtx *operands)
2642 /* If the destination operand is in memory, it needs to coincide
2643 with one of the source operands. After reload, it has to be
2644 the first source operand. */
2645 if (GET_CODE (operands[0]) == MEM)
2646 return rtx_equal_p (operands[0], operands[1])
2647 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2649 return true;
2652 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2653 operand IMMOP to switch from SS to SI type instructions. */
2655 void
2656 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2658 int def = code == AND ? -1 : 0;
2659 HOST_WIDE_INT mask;
2660 int part;
2662 gcc_assert (GET_CODE (*memop) == MEM);
2663 gcc_assert (!MEM_VOLATILE_P (*memop));
2665 mask = s390_extract_part (*immop, QImode, def);
2666 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2667 gcc_assert (part >= 0);
2669 *memop = adjust_address (*memop, QImode, part);
2670 *immop = gen_int_mode (mask, QImode);
2674 /* How to allocate a 'struct machine_function'. */
2676 static struct machine_function *
2677 s390_init_machine_status (void)
2679 return ggc_cleared_alloc<machine_function> ();
2682 /* Map for smallest class containing reg regno. */
2684 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2685 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2686 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2687 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2688 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2689 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2690 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2691 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2692 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2693 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2694 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2695 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2696 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2697 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2698 VEC_REGS, VEC_REGS /* 52 */
2701 /* Return attribute type of insn. */
2703 static enum attr_type
2704 s390_safe_attr_type (rtx_insn *insn)
2706 if (recog_memoized (insn) >= 0)
2707 return get_attr_type (insn);
2708 else
2709 return TYPE_NONE;
2712 /* Return true if DISP is a valid short displacement. */
2714 static bool
2715 s390_short_displacement (rtx disp)
2717 /* No displacement is OK. */
2718 if (!disp)
2719 return true;
2721 /* Without the long displacement facility we don't need to
2722 distingiush between long and short displacement. */
2723 if (!TARGET_LONG_DISPLACEMENT)
2724 return true;
2726 /* Integer displacement in range. */
2727 if (GET_CODE (disp) == CONST_INT)
2728 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2730 /* GOT offset is not OK, the GOT can be large. */
2731 if (GET_CODE (disp) == CONST
2732 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2733 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2734 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2735 return false;
2737 /* All other symbolic constants are literal pool references,
2738 which are OK as the literal pool must be small. */
2739 if (GET_CODE (disp) == CONST)
2740 return true;
2742 return false;
2745 /* Decompose a RTL expression ADDR for a memory address into
2746 its components, returned in OUT.
2748 Returns false if ADDR is not a valid memory address, true
2749 otherwise. If OUT is NULL, don't return the components,
2750 but check for validity only.
2752 Note: Only addresses in canonical form are recognized.
2753 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2754 canonical form so that they will be recognized. */
2756 static int
2757 s390_decompose_address (rtx addr, struct s390_address *out)
2759 HOST_WIDE_INT offset = 0;
2760 rtx base = NULL_RTX;
2761 rtx indx = NULL_RTX;
2762 rtx disp = NULL_RTX;
2763 rtx orig_disp;
2764 bool pointer = false;
2765 bool base_ptr = false;
2766 bool indx_ptr = false;
2767 bool literal_pool = false;
2769 /* We may need to substitute the literal pool base register into the address
2770 below. However, at this point we do not know which register is going to
2771 be used as base, so we substitute the arg pointer register. This is going
2772 to be treated as holding a pointer below -- it shouldn't be used for any
2773 other purpose. */
2774 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2776 /* Decompose address into base + index + displacement. */
2778 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2779 base = addr;
2781 else if (GET_CODE (addr) == PLUS)
2783 rtx op0 = XEXP (addr, 0);
2784 rtx op1 = XEXP (addr, 1);
2785 enum rtx_code code0 = GET_CODE (op0);
2786 enum rtx_code code1 = GET_CODE (op1);
2788 if (code0 == REG || code0 == UNSPEC)
2790 if (code1 == REG || code1 == UNSPEC)
2792 indx = op0; /* index + base */
2793 base = op1;
2796 else
2798 base = op0; /* base + displacement */
2799 disp = op1;
2803 else if (code0 == PLUS)
2805 indx = XEXP (op0, 0); /* index + base + disp */
2806 base = XEXP (op0, 1);
2807 disp = op1;
2810 else
2812 return false;
2816 else
2817 disp = addr; /* displacement */
2819 /* Extract integer part of displacement. */
2820 orig_disp = disp;
2821 if (disp)
2823 if (GET_CODE (disp) == CONST_INT)
2825 offset = INTVAL (disp);
2826 disp = NULL_RTX;
2828 else if (GET_CODE (disp) == CONST
2829 && GET_CODE (XEXP (disp, 0)) == PLUS
2830 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2832 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2833 disp = XEXP (XEXP (disp, 0), 0);
2837 /* Strip off CONST here to avoid special case tests later. */
2838 if (disp && GET_CODE (disp) == CONST)
2839 disp = XEXP (disp, 0);
2841 /* We can convert literal pool addresses to
2842 displacements by basing them off the base register. */
2843 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2845 /* Either base or index must be free to hold the base register. */
2846 if (!base)
2847 base = fake_pool_base, literal_pool = true;
2848 else if (!indx)
2849 indx = fake_pool_base, literal_pool = true;
2850 else
2851 return false;
2853 /* Mark up the displacement. */
2854 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2855 UNSPEC_LTREL_OFFSET);
2858 /* Validate base register. */
2859 if (base)
2861 if (GET_CODE (base) == UNSPEC)
2862 switch (XINT (base, 1))
2864 case UNSPEC_LTREF:
2865 if (!disp)
2866 disp = gen_rtx_UNSPEC (Pmode,
2867 gen_rtvec (1, XVECEXP (base, 0, 0)),
2868 UNSPEC_LTREL_OFFSET);
2869 else
2870 return false;
2872 base = XVECEXP (base, 0, 1);
2873 break;
2875 case UNSPEC_LTREL_BASE:
2876 if (XVECLEN (base, 0) == 1)
2877 base = fake_pool_base, literal_pool = true;
2878 else
2879 base = XVECEXP (base, 0, 1);
2880 break;
2882 default:
2883 return false;
2886 if (!REG_P (base) || GET_MODE (base) != Pmode)
2887 return false;
2889 if (REGNO (base) == STACK_POINTER_REGNUM
2890 || REGNO (base) == FRAME_POINTER_REGNUM
2891 || ((reload_completed || reload_in_progress)
2892 && frame_pointer_needed
2893 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2894 || REGNO (base) == ARG_POINTER_REGNUM
2895 || (flag_pic
2896 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2897 pointer = base_ptr = true;
2899 if ((reload_completed || reload_in_progress)
2900 && base == cfun->machine->base_reg)
2901 pointer = base_ptr = literal_pool = true;
2904 /* Validate index register. */
2905 if (indx)
2907 if (GET_CODE (indx) == UNSPEC)
2908 switch (XINT (indx, 1))
2910 case UNSPEC_LTREF:
2911 if (!disp)
2912 disp = gen_rtx_UNSPEC (Pmode,
2913 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2914 UNSPEC_LTREL_OFFSET);
2915 else
2916 return false;
2918 indx = XVECEXP (indx, 0, 1);
2919 break;
2921 case UNSPEC_LTREL_BASE:
2922 if (XVECLEN (indx, 0) == 1)
2923 indx = fake_pool_base, literal_pool = true;
2924 else
2925 indx = XVECEXP (indx, 0, 1);
2926 break;
2928 default:
2929 return false;
2932 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2933 return false;
2935 if (REGNO (indx) == STACK_POINTER_REGNUM
2936 || REGNO (indx) == FRAME_POINTER_REGNUM
2937 || ((reload_completed || reload_in_progress)
2938 && frame_pointer_needed
2939 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2940 || REGNO (indx) == ARG_POINTER_REGNUM
2941 || (flag_pic
2942 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2943 pointer = indx_ptr = true;
2945 if ((reload_completed || reload_in_progress)
2946 && indx == cfun->machine->base_reg)
2947 pointer = indx_ptr = literal_pool = true;
2950 /* Prefer to use pointer as base, not index. */
2951 if (base && indx && !base_ptr
2952 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2954 rtx tmp = base;
2955 base = indx;
2956 indx = tmp;
2959 /* Validate displacement. */
2960 if (!disp)
2962 /* If virtual registers are involved, the displacement will change later
2963 anyway as the virtual registers get eliminated. This could make a
2964 valid displacement invalid, but it is more likely to make an invalid
2965 displacement valid, because we sometimes access the register save area
2966 via negative offsets to one of those registers.
2967 Thus we don't check the displacement for validity here. If after
2968 elimination the displacement turns out to be invalid after all,
2969 this is fixed up by reload in any case. */
2970 /* LRA maintains always displacements up to date and we need to
2971 know the displacement is right during all LRA not only at the
2972 final elimination. */
2973 if (lra_in_progress
2974 || (base != arg_pointer_rtx
2975 && indx != arg_pointer_rtx
2976 && base != return_address_pointer_rtx
2977 && indx != return_address_pointer_rtx
2978 && base != frame_pointer_rtx
2979 && indx != frame_pointer_rtx
2980 && base != virtual_stack_vars_rtx
2981 && indx != virtual_stack_vars_rtx))
2982 if (!DISP_IN_RANGE (offset))
2983 return false;
2985 else
2987 /* All the special cases are pointers. */
2988 pointer = true;
2990 /* In the small-PIC case, the linker converts @GOT
2991 and @GOTNTPOFF offsets to possible displacements. */
2992 if (GET_CODE (disp) == UNSPEC
2993 && (XINT (disp, 1) == UNSPEC_GOT
2994 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2995 && flag_pic == 1)
3000 /* Accept pool label offsets. */
3001 else if (GET_CODE (disp) == UNSPEC
3002 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3005 /* Accept literal pool references. */
3006 else if (GET_CODE (disp) == UNSPEC
3007 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3009 /* In case CSE pulled a non literal pool reference out of
3010 the pool we have to reject the address. This is
3011 especially important when loading the GOT pointer on non
3012 zarch CPUs. In this case the literal pool contains an lt
3013 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3014 will most likely exceed the displacement. */
3015 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3016 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3017 return false;
3019 orig_disp = gen_rtx_CONST (Pmode, disp);
3020 if (offset)
3022 /* If we have an offset, make sure it does not
3023 exceed the size of the constant pool entry. */
3024 rtx sym = XVECEXP (disp, 0, 0);
3025 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3026 return false;
3028 orig_disp = plus_constant (Pmode, orig_disp, offset);
3032 else
3033 return false;
3036 if (!base && !indx)
3037 pointer = true;
3039 if (out)
3041 out->base = base;
3042 out->indx = indx;
3043 out->disp = orig_disp;
3044 out->pointer = pointer;
3045 out->literal_pool = literal_pool;
3048 return true;
3051 /* Decompose a RTL expression OP for an address style operand into its
3052 components, and return the base register in BASE and the offset in
3053 OFFSET. While OP looks like an address it is never supposed to be
3054 used as such.
3056 Return true if OP is a valid address operand, false if not. */
3058 bool
3059 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3060 HOST_WIDE_INT *offset)
3062 rtx off = NULL_RTX;
3064 /* We can have an integer constant, an address register,
3065 or a sum of the two. */
3066 if (CONST_SCALAR_INT_P (op))
3068 off = op;
3069 op = NULL_RTX;
3071 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3073 off = XEXP (op, 1);
3074 op = XEXP (op, 0);
3076 while (op && GET_CODE (op) == SUBREG)
3077 op = SUBREG_REG (op);
3079 if (op && GET_CODE (op) != REG)
3080 return false;
3082 if (offset)
3084 if (off == NULL_RTX)
3085 *offset = 0;
3086 else if (CONST_INT_P (off))
3087 *offset = INTVAL (off);
3088 else if (CONST_WIDE_INT_P (off))
3089 /* The offset will anyway be cut down to 12 bits so take just
3090 the lowest order chunk of the wide int. */
3091 *offset = CONST_WIDE_INT_ELT (off, 0);
3092 else
3093 gcc_unreachable ();
3095 if (base)
3096 *base = op;
3098 return true;
3102 /* Return true if CODE is a valid address without index. */
3104 bool
3105 s390_legitimate_address_without_index_p (rtx op)
3107 struct s390_address addr;
3109 if (!s390_decompose_address (XEXP (op, 0), &addr))
3110 return false;
3111 if (addr.indx)
3112 return false;
3114 return true;
3118 /* Return TRUE if ADDR is an operand valid for a load/store relative
3119 instruction. Be aware that the alignment of the operand needs to
3120 be checked separately.
3121 Valid addresses are single references or a sum of a reference and a
3122 constant integer. Return these parts in SYMREF and ADDEND. You can
3123 pass NULL in REF and/or ADDEND if you are not interested in these
3124 values. Literal pool references are *not* considered symbol
3125 references. */
3127 static bool
3128 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3130 HOST_WIDE_INT tmpaddend = 0;
3132 if (GET_CODE (addr) == CONST)
3133 addr = XEXP (addr, 0);
3135 if (GET_CODE (addr) == PLUS)
3137 if (!CONST_INT_P (XEXP (addr, 1)))
3138 return false;
3140 tmpaddend = INTVAL (XEXP (addr, 1));
3141 addr = XEXP (addr, 0);
3144 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3145 || (GET_CODE (addr) == UNSPEC
3146 && (XINT (addr, 1) == UNSPEC_GOTENT
3147 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3149 if (symref)
3150 *symref = addr;
3151 if (addend)
3152 *addend = tmpaddend;
3154 return true;
3156 return false;
3159 /* Return true if the address in OP is valid for constraint letter C
3160 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3161 pool MEMs should be accepted. Only the Q, R, S, T constraint
3162 letters are allowed for C. */
3164 static int
3165 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3167 struct s390_address addr;
3168 bool decomposed = false;
3170 /* This check makes sure that no symbolic address (except literal
3171 pool references) are accepted by the R or T constraints. */
3172 if (s390_loadrelative_operand_p (op, NULL, NULL))
3173 return 0;
3175 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3176 if (!lit_pool_ok)
3178 if (!s390_decompose_address (op, &addr))
3179 return 0;
3180 if (addr.literal_pool)
3181 return 0;
3182 decomposed = true;
3185 /* With reload, we sometimes get intermediate address forms that are
3186 actually invalid as-is, but we need to accept them in the most
3187 generic cases below ('R' or 'T'), since reload will in fact fix
3188 them up. LRA behaves differently here; we never see such forms,
3189 but on the other hand, we need to strictly reject every invalid
3190 address form. Perform this check right up front. */
3191 if (lra_in_progress)
3193 if (!decomposed && !s390_decompose_address (op, &addr))
3194 return 0;
3195 decomposed = true;
3198 switch (c)
3200 case 'Q': /* no index short displacement */
3201 if (!decomposed && !s390_decompose_address (op, &addr))
3202 return 0;
3203 if (addr.indx)
3204 return 0;
3205 if (!s390_short_displacement (addr.disp))
3206 return 0;
3207 break;
3209 case 'R': /* with index short displacement */
3210 if (TARGET_LONG_DISPLACEMENT)
3212 if (!decomposed && !s390_decompose_address (op, &addr))
3213 return 0;
3214 if (!s390_short_displacement (addr.disp))
3215 return 0;
3217 /* Any invalid address here will be fixed up by reload,
3218 so accept it for the most generic constraint. */
3219 break;
3221 case 'S': /* no index long displacement */
3222 if (!decomposed && !s390_decompose_address (op, &addr))
3223 return 0;
3224 if (addr.indx)
3225 return 0;
3226 break;
3228 case 'T': /* with index long displacement */
3229 /* Any invalid address here will be fixed up by reload,
3230 so accept it for the most generic constraint. */
3231 break;
3233 default:
3234 return 0;
3236 return 1;
3240 /* Evaluates constraint strings described by the regular expression
3241 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3242 the constraint given in STR, or 0 else. */
3245 s390_mem_constraint (const char *str, rtx op)
3247 char c = str[0];
3249 switch (c)
3251 case 'A':
3252 /* Check for offsettable variants of memory constraints. */
3253 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3254 return 0;
3255 if ((reload_completed || reload_in_progress)
3256 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3257 return 0;
3258 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3259 case 'B':
3260 /* Check for non-literal-pool variants of memory constraints. */
3261 if (!MEM_P (op))
3262 return 0;
3263 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3264 case 'Q':
3265 case 'R':
3266 case 'S':
3267 case 'T':
3268 if (GET_CODE (op) != MEM)
3269 return 0;
3270 return s390_check_qrst_address (c, XEXP (op, 0), true);
3271 case 'Y':
3272 /* Simply check for the basic form of a shift count. Reload will
3273 take care of making sure we have a proper base register. */
3274 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3275 return 0;
3276 break;
3277 case 'Z':
3278 return s390_check_qrst_address (str[1], op, true);
3279 default:
3280 return 0;
3282 return 1;
3286 /* Evaluates constraint strings starting with letter O. Input
3287 parameter C is the second letter following the "O" in the constraint
3288 string. Returns 1 if VALUE meets the respective constraint and 0
3289 otherwise. */
3292 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3294 if (!TARGET_EXTIMM)
3295 return 0;
3297 switch (c)
3299 case 's':
3300 return trunc_int_for_mode (value, SImode) == value;
3302 case 'p':
3303 return value == 0
3304 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3306 case 'n':
3307 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3309 default:
3310 gcc_unreachable ();
3315 /* Evaluates constraint strings starting with letter N. Parameter STR
3316 contains the letters following letter "N" in the constraint string.
3317 Returns true if VALUE matches the constraint. */
3320 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3322 machine_mode mode, part_mode;
3323 int def;
3324 int part, part_goal;
3327 if (str[0] == 'x')
3328 part_goal = -1;
3329 else
3330 part_goal = str[0] - '0';
3332 switch (str[1])
3334 case 'Q':
3335 part_mode = QImode;
3336 break;
3337 case 'H':
3338 part_mode = HImode;
3339 break;
3340 case 'S':
3341 part_mode = SImode;
3342 break;
3343 default:
3344 return 0;
3347 switch (str[2])
3349 case 'H':
3350 mode = HImode;
3351 break;
3352 case 'S':
3353 mode = SImode;
3354 break;
3355 case 'D':
3356 mode = DImode;
3357 break;
3358 default:
3359 return 0;
3362 switch (str[3])
3364 case '0':
3365 def = 0;
3366 break;
3367 case 'F':
3368 def = -1;
3369 break;
3370 default:
3371 return 0;
3374 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3375 return 0;
3377 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3378 if (part < 0)
3379 return 0;
3380 if (part_goal != -1 && part_goal != part)
3381 return 0;
3383 return 1;
3387 /* Returns true if the input parameter VALUE is a float zero. */
3390 s390_float_const_zero_p (rtx value)
3392 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3393 && value == CONST0_RTX (GET_MODE (value)));
3396 /* Implement TARGET_REGISTER_MOVE_COST. */
3398 static int
3399 s390_register_move_cost (machine_mode mode,
3400 reg_class_t from, reg_class_t to)
3402 /* On s390, copy between fprs and gprs is expensive. */
3404 /* It becomes somewhat faster having ldgr/lgdr. */
3405 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3407 /* ldgr is single cycle. */
3408 if (reg_classes_intersect_p (from, GENERAL_REGS)
3409 && reg_classes_intersect_p (to, FP_REGS))
3410 return 1;
3411 /* lgdr needs 3 cycles. */
3412 if (reg_classes_intersect_p (to, GENERAL_REGS)
3413 && reg_classes_intersect_p (from, FP_REGS))
3414 return 3;
3417 /* Otherwise copying is done via memory. */
3418 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3419 && reg_classes_intersect_p (to, FP_REGS))
3420 || (reg_classes_intersect_p (from, FP_REGS)
3421 && reg_classes_intersect_p (to, GENERAL_REGS)))
3422 return 10;
3424 return 1;
3427 /* Implement TARGET_MEMORY_MOVE_COST. */
3429 static int
3430 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3431 reg_class_t rclass ATTRIBUTE_UNUSED,
3432 bool in ATTRIBUTE_UNUSED)
3434 return 2;
3437 /* Compute a (partial) cost for rtx X. Return true if the complete
3438 cost has been computed, and false if subexpressions should be
3439 scanned. In either case, *TOTAL contains the cost result. The
3440 initial value of *TOTAL is the default value computed by
3441 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3442 code of the superexpression of x. */
3444 static bool
3445 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3446 int opno ATTRIBUTE_UNUSED,
3447 int *total, bool speed ATTRIBUTE_UNUSED)
3449 int code = GET_CODE (x);
3450 switch (code)
3452 case CONST:
3453 case CONST_INT:
3454 case LABEL_REF:
3455 case SYMBOL_REF:
3456 case CONST_DOUBLE:
3457 case CONST_WIDE_INT:
3458 case MEM:
3459 *total = 0;
3460 return true;
3462 case IOR:
3463 /* risbg */
3464 if (GET_CODE (XEXP (x, 0)) == AND
3465 && GET_CODE (XEXP (x, 1)) == ASHIFT
3466 && REG_P (XEXP (XEXP (x, 0), 0))
3467 && REG_P (XEXP (XEXP (x, 1), 0))
3468 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3469 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3470 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3471 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3473 *total = COSTS_N_INSNS (2);
3474 return true;
3476 /* fallthrough */
3477 case ASHIFT:
3478 case ASHIFTRT:
3479 case LSHIFTRT:
3480 case ROTATE:
3481 case ROTATERT:
3482 case AND:
3483 case XOR:
3484 case NEG:
3485 case NOT:
3486 *total = COSTS_N_INSNS (1);
3487 return false;
3489 case PLUS:
3490 case MINUS:
3491 *total = COSTS_N_INSNS (1);
3492 return false;
3494 case MULT:
3495 switch (mode)
3497 case SImode:
3499 rtx left = XEXP (x, 0);
3500 rtx right = XEXP (x, 1);
3501 if (GET_CODE (right) == CONST_INT
3502 && CONST_OK_FOR_K (INTVAL (right)))
3503 *total = s390_cost->mhi;
3504 else if (GET_CODE (left) == SIGN_EXTEND)
3505 *total = s390_cost->mh;
3506 else
3507 *total = s390_cost->ms; /* msr, ms, msy */
3508 break;
3510 case DImode:
3512 rtx left = XEXP (x, 0);
3513 rtx right = XEXP (x, 1);
3514 if (TARGET_ZARCH)
3516 if (GET_CODE (right) == CONST_INT
3517 && CONST_OK_FOR_K (INTVAL (right)))
3518 *total = s390_cost->mghi;
3519 else if (GET_CODE (left) == SIGN_EXTEND)
3520 *total = s390_cost->msgf;
3521 else
3522 *total = s390_cost->msg; /* msgr, msg */
3524 else /* TARGET_31BIT */
3526 if (GET_CODE (left) == SIGN_EXTEND
3527 && GET_CODE (right) == SIGN_EXTEND)
3528 /* mulsidi case: mr, m */
3529 *total = s390_cost->m;
3530 else if (GET_CODE (left) == ZERO_EXTEND
3531 && GET_CODE (right) == ZERO_EXTEND
3532 && TARGET_CPU_ZARCH)
3533 /* umulsidi case: ml, mlr */
3534 *total = s390_cost->ml;
3535 else
3536 /* Complex calculation is required. */
3537 *total = COSTS_N_INSNS (40);
3539 break;
3541 case SFmode:
3542 case DFmode:
3543 *total = s390_cost->mult_df;
3544 break;
3545 case TFmode:
3546 *total = s390_cost->mxbr;
3547 break;
3548 default:
3549 return false;
3551 return false;
3553 case FMA:
3554 switch (mode)
3556 case DFmode:
3557 *total = s390_cost->madbr;
3558 break;
3559 case SFmode:
3560 *total = s390_cost->maebr;
3561 break;
3562 default:
3563 return false;
3565 /* Negate in the third argument is free: FMSUB. */
3566 if (GET_CODE (XEXP (x, 2)) == NEG)
3568 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3569 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3570 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3571 return true;
3573 return false;
3575 case UDIV:
3576 case UMOD:
3577 if (mode == TImode) /* 128 bit division */
3578 *total = s390_cost->dlgr;
3579 else if (mode == DImode)
3581 rtx right = XEXP (x, 1);
3582 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3583 *total = s390_cost->dlr;
3584 else /* 64 by 64 bit division */
3585 *total = s390_cost->dlgr;
3587 else if (mode == SImode) /* 32 bit division */
3588 *total = s390_cost->dlr;
3589 return false;
3591 case DIV:
3592 case MOD:
3593 if (mode == DImode)
3595 rtx right = XEXP (x, 1);
3596 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3597 if (TARGET_ZARCH)
3598 *total = s390_cost->dsgfr;
3599 else
3600 *total = s390_cost->dr;
3601 else /* 64 by 64 bit division */
3602 *total = s390_cost->dsgr;
3604 else if (mode == SImode) /* 32 bit division */
3605 *total = s390_cost->dlr;
3606 else if (mode == SFmode)
3608 *total = s390_cost->debr;
3610 else if (mode == DFmode)
3612 *total = s390_cost->ddbr;
3614 else if (mode == TFmode)
3616 *total = s390_cost->dxbr;
3618 return false;
3620 case SQRT:
3621 if (mode == SFmode)
3622 *total = s390_cost->sqebr;
3623 else if (mode == DFmode)
3624 *total = s390_cost->sqdbr;
3625 else /* TFmode */
3626 *total = s390_cost->sqxbr;
3627 return false;
3629 case SIGN_EXTEND:
3630 case ZERO_EXTEND:
3631 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3632 || outer_code == PLUS || outer_code == MINUS
3633 || outer_code == COMPARE)
3634 *total = 0;
3635 return false;
3637 case COMPARE:
3638 *total = COSTS_N_INSNS (1);
3639 if (GET_CODE (XEXP (x, 0)) == AND
3640 && GET_CODE (XEXP (x, 1)) == CONST_INT
3641 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3643 rtx op0 = XEXP (XEXP (x, 0), 0);
3644 rtx op1 = XEXP (XEXP (x, 0), 1);
3645 rtx op2 = XEXP (x, 1);
3647 if (memory_operand (op0, GET_MODE (op0))
3648 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3649 return true;
3650 if (register_operand (op0, GET_MODE (op0))
3651 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3652 return true;
3654 return false;
3656 default:
3657 return false;
3661 /* Return the cost of an address rtx ADDR. */
3663 static int
3664 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3665 addr_space_t as ATTRIBUTE_UNUSED,
3666 bool speed ATTRIBUTE_UNUSED)
3668 struct s390_address ad;
3669 if (!s390_decompose_address (addr, &ad))
3670 return 1000;
3672 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3675 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3676 static int
3677 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3678 tree vectype,
3679 int misalign ATTRIBUTE_UNUSED)
3681 switch (type_of_cost)
3683 case scalar_stmt:
3684 case scalar_load:
3685 case scalar_store:
3686 case vector_stmt:
3687 case vector_load:
3688 case vector_store:
3689 case vec_to_scalar:
3690 case scalar_to_vec:
3691 case cond_branch_not_taken:
3692 case vec_perm:
3693 case vec_promote_demote:
3694 case unaligned_load:
3695 case unaligned_store:
3696 return 1;
3698 case cond_branch_taken:
3699 return 3;
3701 case vec_construct:
3702 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3704 default:
3705 gcc_unreachable ();
3709 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3710 otherwise return 0. */
3713 tls_symbolic_operand (rtx op)
3715 if (GET_CODE (op) != SYMBOL_REF)
3716 return 0;
3717 return SYMBOL_REF_TLS_MODEL (op);
3720 /* Split DImode access register reference REG (on 64-bit) into its constituent
3721 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3722 gen_highpart cannot be used as they assume all registers are word-sized,
3723 while our access registers have only half that size. */
3725 void
3726 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3728 gcc_assert (TARGET_64BIT);
3729 gcc_assert (ACCESS_REG_P (reg));
3730 gcc_assert (GET_MODE (reg) == DImode);
3731 gcc_assert (!(REGNO (reg) & 1));
3733 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3734 *hi = gen_rtx_REG (SImode, REGNO (reg));
3737 /* Return true if OP contains a symbol reference */
3739 bool
3740 symbolic_reference_mentioned_p (rtx op)
3742 const char *fmt;
3743 int i;
3745 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3746 return 1;
3748 fmt = GET_RTX_FORMAT (GET_CODE (op));
3749 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3751 if (fmt[i] == 'E')
3753 int j;
3755 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3756 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3757 return 1;
3760 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3761 return 1;
3764 return 0;
3767 /* Return true if OP contains a reference to a thread-local symbol. */
3769 bool
3770 tls_symbolic_reference_mentioned_p (rtx op)
3772 const char *fmt;
3773 int i;
3775 if (GET_CODE (op) == SYMBOL_REF)
3776 return tls_symbolic_operand (op);
3778 fmt = GET_RTX_FORMAT (GET_CODE (op));
3779 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3781 if (fmt[i] == 'E')
3783 int j;
3785 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3786 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3787 return true;
3790 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3791 return true;
3794 return false;
3798 /* Return true if OP is a legitimate general operand when
3799 generating PIC code. It is given that flag_pic is on
3800 and that OP satisfies CONSTANT_P. */
3803 legitimate_pic_operand_p (rtx op)
3805 /* Accept all non-symbolic constants. */
3806 if (!SYMBOLIC_CONST (op))
3807 return 1;
3809 /* Reject everything else; must be handled
3810 via emit_symbolic_move. */
3811 return 0;
3814 /* Returns true if the constant value OP is a legitimate general operand.
3815 It is given that OP satisfies CONSTANT_P. */
3817 static bool
3818 s390_legitimate_constant_p (machine_mode mode, rtx op)
3820 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3822 if (GET_MODE_SIZE (mode) != 16)
3823 return 0;
3825 if (!satisfies_constraint_j00 (op)
3826 && !satisfies_constraint_jm1 (op)
3827 && !satisfies_constraint_jKK (op)
3828 && !satisfies_constraint_jxx (op)
3829 && !satisfies_constraint_jyy (op))
3830 return 0;
3833 /* Accept all non-symbolic constants. */
3834 if (!SYMBOLIC_CONST (op))
3835 return 1;
3837 /* Accept immediate LARL operands. */
3838 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3839 return 1;
3841 /* Thread-local symbols are never legal constants. This is
3842 so that emit_call knows that computing such addresses
3843 might require a function call. */
3844 if (TLS_SYMBOLIC_CONST (op))
3845 return 0;
3847 /* In the PIC case, symbolic constants must *not* be
3848 forced into the literal pool. We accept them here,
3849 so that they will be handled by emit_symbolic_move. */
3850 if (flag_pic)
3851 return 1;
3853 /* All remaining non-PIC symbolic constants are
3854 forced into the literal pool. */
3855 return 0;
3858 /* Determine if it's legal to put X into the constant pool. This
3859 is not possible if X contains the address of a symbol that is
3860 not constant (TLS) or not known at final link time (PIC). */
3862 static bool
3863 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3865 switch (GET_CODE (x))
3867 case CONST_INT:
3868 case CONST_DOUBLE:
3869 case CONST_WIDE_INT:
3870 case CONST_VECTOR:
3871 /* Accept all non-symbolic constants. */
3872 return false;
3874 case LABEL_REF:
3875 /* Labels are OK iff we are non-PIC. */
3876 return flag_pic != 0;
3878 case SYMBOL_REF:
3879 /* 'Naked' TLS symbol references are never OK,
3880 non-TLS symbols are OK iff we are non-PIC. */
3881 if (tls_symbolic_operand (x))
3882 return true;
3883 else
3884 return flag_pic != 0;
3886 case CONST:
3887 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3888 case PLUS:
3889 case MINUS:
3890 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3891 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3893 case UNSPEC:
3894 switch (XINT (x, 1))
3896 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3897 case UNSPEC_LTREL_OFFSET:
3898 case UNSPEC_GOT:
3899 case UNSPEC_GOTOFF:
3900 case UNSPEC_PLTOFF:
3901 case UNSPEC_TLSGD:
3902 case UNSPEC_TLSLDM:
3903 case UNSPEC_NTPOFF:
3904 case UNSPEC_DTPOFF:
3905 case UNSPEC_GOTNTPOFF:
3906 case UNSPEC_INDNTPOFF:
3907 return false;
3909 /* If the literal pool shares the code section, be put
3910 execute template placeholders into the pool as well. */
3911 case UNSPEC_INSN:
3912 return TARGET_CPU_ZARCH;
3914 default:
3915 return true;
3917 break;
3919 default:
3920 gcc_unreachable ();
3924 /* Returns true if the constant value OP is a legitimate general
3925 operand during and after reload. The difference to
3926 legitimate_constant_p is that this function will not accept
3927 a constant that would need to be forced to the literal pool
3928 before it can be used as operand.
3929 This function accepts all constants which can be loaded directly
3930 into a GPR. */
3932 bool
3933 legitimate_reload_constant_p (rtx op)
3935 /* Accept la(y) operands. */
3936 if (GET_CODE (op) == CONST_INT
3937 && DISP_IN_RANGE (INTVAL (op)))
3938 return true;
3940 /* Accept l(g)hi/l(g)fi operands. */
3941 if (GET_CODE (op) == CONST_INT
3942 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3943 return true;
3945 /* Accept lliXX operands. */
3946 if (TARGET_ZARCH
3947 && GET_CODE (op) == CONST_INT
3948 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3949 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3950 return true;
3952 if (TARGET_EXTIMM
3953 && GET_CODE (op) == CONST_INT
3954 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3955 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3956 return true;
3958 /* Accept larl operands. */
3959 if (TARGET_CPU_ZARCH
3960 && larl_operand (op, VOIDmode))
3961 return true;
3963 /* Accept floating-point zero operands that fit into a single GPR. */
3964 if (GET_CODE (op) == CONST_DOUBLE
3965 && s390_float_const_zero_p (op)
3966 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3967 return true;
3969 /* Accept double-word operands that can be split. */
3970 if (GET_CODE (op) == CONST_WIDE_INT
3971 || (GET_CODE (op) == CONST_INT
3972 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3974 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3975 rtx hi = operand_subword (op, 0, 0, dword_mode);
3976 rtx lo = operand_subword (op, 1, 0, dword_mode);
3977 return legitimate_reload_constant_p (hi)
3978 && legitimate_reload_constant_p (lo);
3981 /* Everything else cannot be handled without reload. */
3982 return false;
3985 /* Returns true if the constant value OP is a legitimate fp operand
3986 during and after reload.
3987 This function accepts all constants which can be loaded directly
3988 into an FPR. */
3990 static bool
3991 legitimate_reload_fp_constant_p (rtx op)
3993 /* Accept floating-point zero operands if the load zero instruction
3994 can be used. Prior to z196 the load fp zero instruction caused a
3995 performance penalty if the result is used as BFP number. */
3996 if (TARGET_Z196
3997 && GET_CODE (op) == CONST_DOUBLE
3998 && s390_float_const_zero_p (op))
3999 return true;
4001 return false;
4004 /* Returns true if the constant value OP is a legitimate vector operand
4005 during and after reload.
4006 This function accepts all constants which can be loaded directly
4007 into an VR. */
4009 static bool
4010 legitimate_reload_vector_constant_p (rtx op)
4012 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4013 && (satisfies_constraint_j00 (op)
4014 || satisfies_constraint_jm1 (op)
4015 || satisfies_constraint_jKK (op)
4016 || satisfies_constraint_jxx (op)
4017 || satisfies_constraint_jyy (op)))
4018 return true;
4020 return false;
4023 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4024 return the class of reg to actually use. */
4026 static reg_class_t
4027 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4029 switch (GET_CODE (op))
4031 /* Constants we cannot reload into general registers
4032 must be forced into the literal pool. */
4033 case CONST_VECTOR:
4034 case CONST_DOUBLE:
4035 case CONST_INT:
4036 case CONST_WIDE_INT:
4037 if (reg_class_subset_p (GENERAL_REGS, rclass)
4038 && legitimate_reload_constant_p (op))
4039 return GENERAL_REGS;
4040 else if (reg_class_subset_p (ADDR_REGS, rclass)
4041 && legitimate_reload_constant_p (op))
4042 return ADDR_REGS;
4043 else if (reg_class_subset_p (FP_REGS, rclass)
4044 && legitimate_reload_fp_constant_p (op))
4045 return FP_REGS;
4046 else if (reg_class_subset_p (VEC_REGS, rclass)
4047 && legitimate_reload_vector_constant_p (op))
4048 return VEC_REGS;
4050 return NO_REGS;
4052 /* If a symbolic constant or a PLUS is reloaded,
4053 it is most likely being used as an address, so
4054 prefer ADDR_REGS. If 'class' is not a superset
4055 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4056 case CONST:
4057 /* Symrefs cannot be pushed into the literal pool with -fPIC
4058 so we *MUST NOT* return NO_REGS for these cases
4059 (s390_cannot_force_const_mem will return true).
4061 On the other hand we MUST return NO_REGS for symrefs with
4062 invalid addend which might have been pushed to the literal
4063 pool (no -fPIC). Usually we would expect them to be
4064 handled via secondary reload but this does not happen if
4065 they are used as literal pool slot replacement in reload
4066 inheritance (see emit_input_reload_insns). */
4067 if (TARGET_CPU_ZARCH
4068 && GET_CODE (XEXP (op, 0)) == PLUS
4069 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4070 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4072 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4073 return ADDR_REGS;
4074 else
4075 return NO_REGS;
4077 /* fallthrough */
4078 case LABEL_REF:
4079 case SYMBOL_REF:
4080 if (!legitimate_reload_constant_p (op))
4081 return NO_REGS;
4082 /* fallthrough */
4083 case PLUS:
4084 /* load address will be used. */
4085 if (reg_class_subset_p (ADDR_REGS, rclass))
4086 return ADDR_REGS;
4087 else
4088 return NO_REGS;
4090 default:
4091 break;
4094 return rclass;
4097 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4098 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4099 aligned. */
4101 bool
4102 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4104 HOST_WIDE_INT addend;
4105 rtx symref;
4107 /* The "required alignment" might be 0 (e.g. for certain structs
4108 accessed via BLKmode). Early abort in this case, as well as when
4109 an alignment > 8 is required. */
4110 if (alignment < 2 || alignment > 8)
4111 return false;
4113 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4114 return false;
4116 if (addend & (alignment - 1))
4117 return false;
4119 if (GET_CODE (symref) == SYMBOL_REF)
4121 /* We have load-relative instructions for 2-byte, 4-byte, and
4122 8-byte alignment so allow only these. */
4123 switch (alignment)
4125 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4126 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4127 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4128 default: return false;
4132 if (GET_CODE (symref) == UNSPEC
4133 && alignment <= UNITS_PER_LONG)
4134 return true;
4136 return false;
4139 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4140 operand SCRATCH is used to reload the even part of the address and
4141 adding one. */
4143 void
4144 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4146 HOST_WIDE_INT addend;
4147 rtx symref;
4149 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4150 gcc_unreachable ();
4152 if (!(addend & 1))
4153 /* Easy case. The addend is even so larl will do fine. */
4154 emit_move_insn (reg, addr);
4155 else
4157 /* We can leave the scratch register untouched if the target
4158 register is a valid base register. */
4159 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4160 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4161 scratch = reg;
4163 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4164 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4166 if (addend != 1)
4167 emit_move_insn (scratch,
4168 gen_rtx_CONST (Pmode,
4169 gen_rtx_PLUS (Pmode, symref,
4170 GEN_INT (addend - 1))));
4171 else
4172 emit_move_insn (scratch, symref);
4174 /* Increment the address using la in order to avoid clobbering cc. */
4175 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4179 /* Generate what is necessary to move between REG and MEM using
4180 SCRATCH. The direction is given by TOMEM. */
4182 void
4183 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4185 /* Reload might have pulled a constant out of the literal pool.
4186 Force it back in. */
4187 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4188 || GET_CODE (mem) == CONST_WIDE_INT
4189 || GET_CODE (mem) == CONST_VECTOR
4190 || GET_CODE (mem) == CONST)
4191 mem = force_const_mem (GET_MODE (reg), mem);
4193 gcc_assert (MEM_P (mem));
4195 /* For a load from memory we can leave the scratch register
4196 untouched if the target register is a valid base register. */
4197 if (!tomem
4198 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4199 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4200 && GET_MODE (reg) == GET_MODE (scratch))
4201 scratch = reg;
4203 /* Load address into scratch register. Since we can't have a
4204 secondary reload for a secondary reload we have to cover the case
4205 where larl would need a secondary reload here as well. */
4206 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4208 /* Now we can use a standard load/store to do the move. */
4209 if (tomem)
4210 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4211 else
4212 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4215 /* Inform reload about cases where moving X with a mode MODE to a register in
4216 RCLASS requires an extra scratch or immediate register. Return the class
4217 needed for the immediate register. */
4219 static reg_class_t
4220 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4221 machine_mode mode, secondary_reload_info *sri)
4223 enum reg_class rclass = (enum reg_class) rclass_i;
4225 /* Intermediate register needed. */
4226 if (reg_classes_intersect_p (CC_REGS, rclass))
4227 return GENERAL_REGS;
4229 if (TARGET_VX)
4231 /* The vst/vl vector move instructions allow only for short
4232 displacements. */
4233 if (MEM_P (x)
4234 && GET_CODE (XEXP (x, 0)) == PLUS
4235 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4236 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4237 && reg_class_subset_p (rclass, VEC_REGS)
4238 && (!reg_class_subset_p (rclass, FP_REGS)
4239 || (GET_MODE_SIZE (mode) > 8
4240 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4242 if (in_p)
4243 sri->icode = (TARGET_64BIT ?
4244 CODE_FOR_reloaddi_la_in :
4245 CODE_FOR_reloadsi_la_in);
4246 else
4247 sri->icode = (TARGET_64BIT ?
4248 CODE_FOR_reloaddi_la_out :
4249 CODE_FOR_reloadsi_la_out);
4253 if (TARGET_Z10)
4255 HOST_WIDE_INT offset;
4256 rtx symref;
4258 /* On z10 several optimizer steps may generate larl operands with
4259 an odd addend. */
4260 if (in_p
4261 && s390_loadrelative_operand_p (x, &symref, &offset)
4262 && mode == Pmode
4263 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4264 && (offset & 1) == 1)
4265 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4266 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4268 /* Handle all the (mem (symref)) accesses we cannot use the z10
4269 instructions for. */
4270 if (MEM_P (x)
4271 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4272 && (mode == QImode
4273 || !reg_class_subset_p (rclass, GENERAL_REGS)
4274 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4275 || !s390_check_symref_alignment (XEXP (x, 0),
4276 GET_MODE_SIZE (mode))))
4278 #define __SECONDARY_RELOAD_CASE(M,m) \
4279 case M##mode: \
4280 if (TARGET_64BIT) \
4281 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4282 CODE_FOR_reload##m##di_tomem_z10; \
4283 else \
4284 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4285 CODE_FOR_reload##m##si_tomem_z10; \
4286 break;
4288 switch (GET_MODE (x))
4290 __SECONDARY_RELOAD_CASE (QI, qi);
4291 __SECONDARY_RELOAD_CASE (HI, hi);
4292 __SECONDARY_RELOAD_CASE (SI, si);
4293 __SECONDARY_RELOAD_CASE (DI, di);
4294 __SECONDARY_RELOAD_CASE (TI, ti);
4295 __SECONDARY_RELOAD_CASE (SF, sf);
4296 __SECONDARY_RELOAD_CASE (DF, df);
4297 __SECONDARY_RELOAD_CASE (TF, tf);
4298 __SECONDARY_RELOAD_CASE (SD, sd);
4299 __SECONDARY_RELOAD_CASE (DD, dd);
4300 __SECONDARY_RELOAD_CASE (TD, td);
4301 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4302 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4303 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4304 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4305 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4306 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4307 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4308 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4309 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4310 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4311 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4312 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4313 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4314 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4315 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4316 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4317 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4318 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4319 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4320 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4321 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4322 default:
4323 gcc_unreachable ();
4325 #undef __SECONDARY_RELOAD_CASE
4329 /* We need a scratch register when loading a PLUS expression which
4330 is not a legitimate operand of the LOAD ADDRESS instruction. */
4331 /* LRA can deal with transformation of plus op very well -- so we
4332 don't need to prompt LRA in this case. */
4333 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4334 sri->icode = (TARGET_64BIT ?
4335 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4337 /* Performing a multiword move from or to memory we have to make sure the
4338 second chunk in memory is addressable without causing a displacement
4339 overflow. If that would be the case we calculate the address in
4340 a scratch register. */
4341 if (MEM_P (x)
4342 && GET_CODE (XEXP (x, 0)) == PLUS
4343 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4344 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4345 + GET_MODE_SIZE (mode) - 1))
4347 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4348 in a s_operand address since we may fallback to lm/stm. So we only
4349 have to care about overflows in the b+i+d case. */
4350 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4351 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4352 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4353 /* For FP_REGS no lm/stm is available so this check is triggered
4354 for displacement overflows in b+i+d and b+d like addresses. */
4355 || (reg_classes_intersect_p (FP_REGS, rclass)
4356 && s390_class_max_nregs (FP_REGS, mode) > 1))
4358 if (in_p)
4359 sri->icode = (TARGET_64BIT ?
4360 CODE_FOR_reloaddi_la_in :
4361 CODE_FOR_reloadsi_la_in);
4362 else
4363 sri->icode = (TARGET_64BIT ?
4364 CODE_FOR_reloaddi_la_out :
4365 CODE_FOR_reloadsi_la_out);
4369 /* A scratch address register is needed when a symbolic constant is
4370 copied to r0 compiling with -fPIC. In other cases the target
4371 register might be used as temporary (see legitimize_pic_address). */
4372 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4373 sri->icode = (TARGET_64BIT ?
4374 CODE_FOR_reloaddi_PIC_addr :
4375 CODE_FOR_reloadsi_PIC_addr);
4377 /* Either scratch or no register needed. */
4378 return NO_REGS;
4381 /* Generate code to load SRC, which is PLUS that is not a
4382 legitimate operand for the LA instruction, into TARGET.
4383 SCRATCH may be used as scratch register. */
4385 void
4386 s390_expand_plus_operand (rtx target, rtx src,
4387 rtx scratch)
4389 rtx sum1, sum2;
4390 struct s390_address ad;
4392 /* src must be a PLUS; get its two operands. */
4393 gcc_assert (GET_CODE (src) == PLUS);
4394 gcc_assert (GET_MODE (src) == Pmode);
4396 /* Check if any of the two operands is already scheduled
4397 for replacement by reload. This can happen e.g. when
4398 float registers occur in an address. */
4399 sum1 = find_replacement (&XEXP (src, 0));
4400 sum2 = find_replacement (&XEXP (src, 1));
4401 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4403 /* If the address is already strictly valid, there's nothing to do. */
4404 if (!s390_decompose_address (src, &ad)
4405 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4406 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4408 /* Otherwise, one of the operands cannot be an address register;
4409 we reload its value into the scratch register. */
4410 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4412 emit_move_insn (scratch, sum1);
4413 sum1 = scratch;
4415 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4417 emit_move_insn (scratch, sum2);
4418 sum2 = scratch;
4421 /* According to the way these invalid addresses are generated
4422 in reload.c, it should never happen (at least on s390) that
4423 *neither* of the PLUS components, after find_replacements
4424 was applied, is an address register. */
4425 if (sum1 == scratch && sum2 == scratch)
4427 debug_rtx (src);
4428 gcc_unreachable ();
4431 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4434 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4435 is only ever performed on addresses, so we can mark the
4436 sum as legitimate for LA in any case. */
4437 s390_load_address (target, src);
4441 /* Return true if ADDR is a valid memory address.
4442 STRICT specifies whether strict register checking applies. */
4444 static bool
4445 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4447 struct s390_address ad;
4449 if (TARGET_Z10
4450 && larl_operand (addr, VOIDmode)
4451 && (mode == VOIDmode
4452 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4453 return true;
4455 if (!s390_decompose_address (addr, &ad))
4456 return false;
4458 if (strict)
4460 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4461 return false;
4463 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4464 return false;
4466 else
4468 if (ad.base
4469 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4470 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4471 return false;
4473 if (ad.indx
4474 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4475 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4476 return false;
4478 return true;
4481 /* Return true if OP is a valid operand for the LA instruction.
4482 In 31-bit, we need to prove that the result is used as an
4483 address, as LA performs only a 31-bit addition. */
4485 bool
4486 legitimate_la_operand_p (rtx op)
4488 struct s390_address addr;
4489 if (!s390_decompose_address (op, &addr))
4490 return false;
4492 return (TARGET_64BIT || addr.pointer);
4495 /* Return true if it is valid *and* preferable to use LA to
4496 compute the sum of OP1 and OP2. */
4498 bool
4499 preferred_la_operand_p (rtx op1, rtx op2)
4501 struct s390_address addr;
4503 if (op2 != const0_rtx)
4504 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4506 if (!s390_decompose_address (op1, &addr))
4507 return false;
4508 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4509 return false;
4510 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4511 return false;
4513 /* Avoid LA instructions with index register on z196; it is
4514 preferable to use regular add instructions when possible.
4515 Starting with zEC12 the la with index register is "uncracked"
4516 again. */
4517 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4518 return false;
4520 if (!TARGET_64BIT && !addr.pointer)
4521 return false;
4523 if (addr.pointer)
4524 return true;
4526 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4527 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4528 return true;
4530 return false;
4533 /* Emit a forced load-address operation to load SRC into DST.
4534 This will use the LOAD ADDRESS instruction even in situations
4535 where legitimate_la_operand_p (SRC) returns false. */
4537 void
4538 s390_load_address (rtx dst, rtx src)
4540 if (TARGET_64BIT)
4541 emit_move_insn (dst, src);
4542 else
4543 emit_insn (gen_force_la_31 (dst, src));
4546 /* Return a legitimate reference for ORIG (an address) using the
4547 register REG. If REG is 0, a new pseudo is generated.
4549 There are two types of references that must be handled:
4551 1. Global data references must load the address from the GOT, via
4552 the PIC reg. An insn is emitted to do this load, and the reg is
4553 returned.
4555 2. Static data references, constant pool addresses, and code labels
4556 compute the address as an offset from the GOT, whose base is in
4557 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4558 differentiate them from global data objects. The returned
4559 address is the PIC reg + an unspec constant.
4561 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4562 reg also appears in the address. */
4565 legitimize_pic_address (rtx orig, rtx reg)
4567 rtx addr = orig;
4568 rtx addend = const0_rtx;
4569 rtx new_rtx = orig;
4571 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4573 if (GET_CODE (addr) == CONST)
4574 addr = XEXP (addr, 0);
4576 if (GET_CODE (addr) == PLUS)
4578 addend = XEXP (addr, 1);
4579 addr = XEXP (addr, 0);
4582 if ((GET_CODE (addr) == LABEL_REF
4583 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4584 || (GET_CODE (addr) == UNSPEC &&
4585 (XINT (addr, 1) == UNSPEC_GOTENT
4586 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4587 && GET_CODE (addend) == CONST_INT)
4589 /* This can be locally addressed. */
4591 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4592 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4593 gen_rtx_CONST (Pmode, addr) : addr);
4595 if (TARGET_CPU_ZARCH
4596 && larl_operand (const_addr, VOIDmode)
4597 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4598 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4600 if (INTVAL (addend) & 1)
4602 /* LARL can't handle odd offsets, so emit a pair of LARL
4603 and LA. */
4604 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4606 if (!DISP_IN_RANGE (INTVAL (addend)))
4608 HOST_WIDE_INT even = INTVAL (addend) - 1;
4609 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4610 addr = gen_rtx_CONST (Pmode, addr);
4611 addend = const1_rtx;
4614 emit_move_insn (temp, addr);
4615 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4617 if (reg != 0)
4619 s390_load_address (reg, new_rtx);
4620 new_rtx = reg;
4623 else
4625 /* If the offset is even, we can just use LARL. This
4626 will happen automatically. */
4629 else
4631 /* No larl - Access local symbols relative to the GOT. */
4633 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4635 if (reload_in_progress || reload_completed)
4636 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4638 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4639 if (addend != const0_rtx)
4640 addr = gen_rtx_PLUS (Pmode, addr, addend);
4641 addr = gen_rtx_CONST (Pmode, addr);
4642 addr = force_const_mem (Pmode, addr);
4643 emit_move_insn (temp, addr);
4645 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4646 if (reg != 0)
4648 s390_load_address (reg, new_rtx);
4649 new_rtx = reg;
4653 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4655 /* A non-local symbol reference without addend.
4657 The symbol ref is wrapped into an UNSPEC to make sure the
4658 proper operand modifier (@GOT or @GOTENT) will be emitted.
4659 This will tell the linker to put the symbol into the GOT.
4661 Additionally the code dereferencing the GOT slot is emitted here.
4663 An addend to the symref needs to be added afterwards.
4664 legitimize_pic_address calls itself recursively to handle
4665 that case. So no need to do it here. */
4667 if (reg == 0)
4668 reg = gen_reg_rtx (Pmode);
4670 if (TARGET_Z10)
4672 /* Use load relative if possible.
4673 lgrl <target>, sym@GOTENT */
4674 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4675 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4676 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4678 emit_move_insn (reg, new_rtx);
4679 new_rtx = reg;
4681 else if (flag_pic == 1)
4683 /* Assume GOT offset is a valid displacement operand (< 4k
4684 or < 512k with z990). This is handled the same way in
4685 both 31- and 64-bit code (@GOT).
4686 lg <target>, sym@GOT(r12) */
4688 if (reload_in_progress || reload_completed)
4689 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4691 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4692 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4693 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4694 new_rtx = gen_const_mem (Pmode, new_rtx);
4695 emit_move_insn (reg, new_rtx);
4696 new_rtx = reg;
4698 else if (TARGET_CPU_ZARCH)
4700 /* If the GOT offset might be >= 4k, we determine the position
4701 of the GOT entry via a PC-relative LARL (@GOTENT).
4702 larl temp, sym@GOTENT
4703 lg <target>, 0(temp) */
4705 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4707 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4708 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4710 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4711 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4712 emit_move_insn (temp, new_rtx);
4714 new_rtx = gen_const_mem (Pmode, temp);
4715 emit_move_insn (reg, new_rtx);
4717 new_rtx = reg;
4719 else
4721 /* If the GOT offset might be >= 4k, we have to load it
4722 from the literal pool (@GOT).
4724 lg temp, lit-litbase(r13)
4725 lg <target>, 0(temp)
4726 lit: .long sym@GOT */
4728 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4730 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4731 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4733 if (reload_in_progress || reload_completed)
4734 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4736 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4737 addr = gen_rtx_CONST (Pmode, addr);
4738 addr = force_const_mem (Pmode, addr);
4739 emit_move_insn (temp, addr);
4741 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4742 new_rtx = gen_const_mem (Pmode, new_rtx);
4743 emit_move_insn (reg, new_rtx);
4744 new_rtx = reg;
4747 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4749 gcc_assert (XVECLEN (addr, 0) == 1);
4750 switch (XINT (addr, 1))
4752 /* These address symbols (or PLT slots) relative to the GOT
4753 (not GOT slots!). In general this will exceed the
4754 displacement range so these value belong into the literal
4755 pool. */
4756 case UNSPEC_GOTOFF:
4757 case UNSPEC_PLTOFF:
4758 new_rtx = force_const_mem (Pmode, orig);
4759 break;
4761 /* For -fPIC the GOT size might exceed the displacement
4762 range so make sure the value is in the literal pool. */
4763 case UNSPEC_GOT:
4764 if (flag_pic == 2)
4765 new_rtx = force_const_mem (Pmode, orig);
4766 break;
4768 /* For @GOTENT larl is used. This is handled like local
4769 symbol refs. */
4770 case UNSPEC_GOTENT:
4771 gcc_unreachable ();
4772 break;
4774 /* @PLT is OK as is on 64-bit, must be converted to
4775 GOT-relative @PLTOFF on 31-bit. */
4776 case UNSPEC_PLT:
4777 if (!TARGET_CPU_ZARCH)
4779 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4781 if (reload_in_progress || reload_completed)
4782 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4784 addr = XVECEXP (addr, 0, 0);
4785 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4786 UNSPEC_PLTOFF);
4787 if (addend != const0_rtx)
4788 addr = gen_rtx_PLUS (Pmode, addr, addend);
4789 addr = gen_rtx_CONST (Pmode, addr);
4790 addr = force_const_mem (Pmode, addr);
4791 emit_move_insn (temp, addr);
4793 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4794 if (reg != 0)
4796 s390_load_address (reg, new_rtx);
4797 new_rtx = reg;
4800 else
4801 /* On 64 bit larl can be used. This case is handled like
4802 local symbol refs. */
4803 gcc_unreachable ();
4804 break;
4806 /* Everything else cannot happen. */
4807 default:
4808 gcc_unreachable ();
4811 else if (addend != const0_rtx)
4813 /* Otherwise, compute the sum. */
4815 rtx base = legitimize_pic_address (addr, reg);
4816 new_rtx = legitimize_pic_address (addend,
4817 base == reg ? NULL_RTX : reg);
4818 if (GET_CODE (new_rtx) == CONST_INT)
4819 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4820 else
4822 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4824 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4825 new_rtx = XEXP (new_rtx, 1);
4827 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4830 if (GET_CODE (new_rtx) == CONST)
4831 new_rtx = XEXP (new_rtx, 0);
4832 new_rtx = force_operand (new_rtx, 0);
4835 return new_rtx;
4838 /* Load the thread pointer into a register. */
4841 s390_get_thread_pointer (void)
4843 rtx tp = gen_reg_rtx (Pmode);
4845 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4846 mark_reg_pointer (tp, BITS_PER_WORD);
4848 return tp;
4851 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4852 in s390_tls_symbol which always refers to __tls_get_offset.
4853 The returned offset is written to RESULT_REG and an USE rtx is
4854 generated for TLS_CALL. */
4856 static GTY(()) rtx s390_tls_symbol;
4858 static void
4859 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4861 rtx insn;
4863 if (!flag_pic)
4864 emit_insn (s390_load_got ());
4866 if (!s390_tls_symbol)
4867 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4869 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4870 gen_rtx_REG (Pmode, RETURN_REGNUM));
4872 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4873 RTL_CONST_CALL_P (insn) = 1;
4876 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4877 this (thread-local) address. REG may be used as temporary. */
4879 static rtx
4880 legitimize_tls_address (rtx addr, rtx reg)
4882 rtx new_rtx, tls_call, temp, base, r2;
4883 rtx_insn *insn;
4885 if (GET_CODE (addr) == SYMBOL_REF)
4886 switch (tls_symbolic_operand (addr))
4888 case TLS_MODEL_GLOBAL_DYNAMIC:
4889 start_sequence ();
4890 r2 = gen_rtx_REG (Pmode, 2);
4891 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4892 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4893 new_rtx = force_const_mem (Pmode, new_rtx);
4894 emit_move_insn (r2, new_rtx);
4895 s390_emit_tls_call_insn (r2, tls_call);
4896 insn = get_insns ();
4897 end_sequence ();
4899 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4900 temp = gen_reg_rtx (Pmode);
4901 emit_libcall_block (insn, temp, r2, new_rtx);
4903 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4904 if (reg != 0)
4906 s390_load_address (reg, new_rtx);
4907 new_rtx = reg;
4909 break;
4911 case TLS_MODEL_LOCAL_DYNAMIC:
4912 start_sequence ();
4913 r2 = gen_rtx_REG (Pmode, 2);
4914 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4915 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4916 new_rtx = force_const_mem (Pmode, new_rtx);
4917 emit_move_insn (r2, new_rtx);
4918 s390_emit_tls_call_insn (r2, tls_call);
4919 insn = get_insns ();
4920 end_sequence ();
4922 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4923 temp = gen_reg_rtx (Pmode);
4924 emit_libcall_block (insn, temp, r2, new_rtx);
4926 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4927 base = gen_reg_rtx (Pmode);
4928 s390_load_address (base, new_rtx);
4930 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4931 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4932 new_rtx = force_const_mem (Pmode, new_rtx);
4933 temp = gen_reg_rtx (Pmode);
4934 emit_move_insn (temp, new_rtx);
4936 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4937 if (reg != 0)
4939 s390_load_address (reg, new_rtx);
4940 new_rtx = reg;
4942 break;
4944 case TLS_MODEL_INITIAL_EXEC:
4945 if (flag_pic == 1)
4947 /* Assume GOT offset < 4k. This is handled the same way
4948 in both 31- and 64-bit code. */
4950 if (reload_in_progress || reload_completed)
4951 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4953 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4954 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4955 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4956 new_rtx = gen_const_mem (Pmode, new_rtx);
4957 temp = gen_reg_rtx (Pmode);
4958 emit_move_insn (temp, new_rtx);
4960 else if (TARGET_CPU_ZARCH)
4962 /* If the GOT offset might be >= 4k, we determine the position
4963 of the GOT entry via a PC-relative LARL. */
4965 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4966 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4967 temp = gen_reg_rtx (Pmode);
4968 emit_move_insn (temp, new_rtx);
4970 new_rtx = gen_const_mem (Pmode, temp);
4971 temp = gen_reg_rtx (Pmode);
4972 emit_move_insn (temp, new_rtx);
4974 else if (flag_pic)
4976 /* If the GOT offset might be >= 4k, we have to load it
4977 from the literal pool. */
4979 if (reload_in_progress || reload_completed)
4980 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4982 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4983 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4984 new_rtx = force_const_mem (Pmode, new_rtx);
4985 temp = gen_reg_rtx (Pmode);
4986 emit_move_insn (temp, new_rtx);
4988 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4989 new_rtx = gen_const_mem (Pmode, new_rtx);
4991 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4992 temp = gen_reg_rtx (Pmode);
4993 emit_insn (gen_rtx_SET (temp, new_rtx));
4995 else
4997 /* In position-dependent code, load the absolute address of
4998 the GOT entry from the literal pool. */
5000 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5001 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5002 new_rtx = force_const_mem (Pmode, new_rtx);
5003 temp = gen_reg_rtx (Pmode);
5004 emit_move_insn (temp, new_rtx);
5006 new_rtx = temp;
5007 new_rtx = gen_const_mem (Pmode, new_rtx);
5008 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5009 temp = gen_reg_rtx (Pmode);
5010 emit_insn (gen_rtx_SET (temp, new_rtx));
5013 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5014 if (reg != 0)
5016 s390_load_address (reg, new_rtx);
5017 new_rtx = reg;
5019 break;
5021 case TLS_MODEL_LOCAL_EXEC:
5022 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5023 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5024 new_rtx = force_const_mem (Pmode, new_rtx);
5025 temp = gen_reg_rtx (Pmode);
5026 emit_move_insn (temp, new_rtx);
5028 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5029 if (reg != 0)
5031 s390_load_address (reg, new_rtx);
5032 new_rtx = reg;
5034 break;
5036 default:
5037 gcc_unreachable ();
5040 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5042 switch (XINT (XEXP (addr, 0), 1))
5044 case UNSPEC_INDNTPOFF:
5045 gcc_assert (TARGET_CPU_ZARCH);
5046 new_rtx = addr;
5047 break;
5049 default:
5050 gcc_unreachable ();
5054 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5055 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5057 new_rtx = XEXP (XEXP (addr, 0), 0);
5058 if (GET_CODE (new_rtx) != SYMBOL_REF)
5059 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5061 new_rtx = legitimize_tls_address (new_rtx, reg);
5062 new_rtx = plus_constant (Pmode, new_rtx,
5063 INTVAL (XEXP (XEXP (addr, 0), 1)));
5064 new_rtx = force_operand (new_rtx, 0);
5067 else
5068 gcc_unreachable (); /* for now ... */
5070 return new_rtx;
5073 /* Emit insns making the address in operands[1] valid for a standard
5074 move to operands[0]. operands[1] is replaced by an address which
5075 should be used instead of the former RTX to emit the move
5076 pattern. */
5078 void
5079 emit_symbolic_move (rtx *operands)
5081 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5083 if (GET_CODE (operands[0]) == MEM)
5084 operands[1] = force_reg (Pmode, operands[1]);
5085 else if (TLS_SYMBOLIC_CONST (operands[1]))
5086 operands[1] = legitimize_tls_address (operands[1], temp);
5087 else if (flag_pic)
5088 operands[1] = legitimize_pic_address (operands[1], temp);
5091 /* Try machine-dependent ways of modifying an illegitimate address X
5092 to be legitimate. If we find one, return the new, valid address.
5094 OLDX is the address as it was before break_out_memory_refs was called.
5095 In some cases it is useful to look at this to decide what needs to be done.
5097 MODE is the mode of the operand pointed to by X.
5099 When -fpic is used, special handling is needed for symbolic references.
5100 See comments by legitimize_pic_address for details. */
5102 static rtx
5103 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5104 machine_mode mode ATTRIBUTE_UNUSED)
5106 rtx constant_term = const0_rtx;
5108 if (TLS_SYMBOLIC_CONST (x))
5110 x = legitimize_tls_address (x, 0);
5112 if (s390_legitimate_address_p (mode, x, FALSE))
5113 return x;
5115 else if (GET_CODE (x) == PLUS
5116 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5117 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5119 return x;
5121 else if (flag_pic)
5123 if (SYMBOLIC_CONST (x)
5124 || (GET_CODE (x) == PLUS
5125 && (SYMBOLIC_CONST (XEXP (x, 0))
5126 || SYMBOLIC_CONST (XEXP (x, 1)))))
5127 x = legitimize_pic_address (x, 0);
5129 if (s390_legitimate_address_p (mode, x, FALSE))
5130 return x;
5133 x = eliminate_constant_term (x, &constant_term);
5135 /* Optimize loading of large displacements by splitting them
5136 into the multiple of 4K and the rest; this allows the
5137 former to be CSE'd if possible.
5139 Don't do this if the displacement is added to a register
5140 pointing into the stack frame, as the offsets will
5141 change later anyway. */
5143 if (GET_CODE (constant_term) == CONST_INT
5144 && !TARGET_LONG_DISPLACEMENT
5145 && !DISP_IN_RANGE (INTVAL (constant_term))
5146 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5148 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5149 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5151 rtx temp = gen_reg_rtx (Pmode);
5152 rtx val = force_operand (GEN_INT (upper), temp);
5153 if (val != temp)
5154 emit_move_insn (temp, val);
5156 x = gen_rtx_PLUS (Pmode, x, temp);
5157 constant_term = GEN_INT (lower);
5160 if (GET_CODE (x) == PLUS)
5162 if (GET_CODE (XEXP (x, 0)) == REG)
5164 rtx temp = gen_reg_rtx (Pmode);
5165 rtx val = force_operand (XEXP (x, 1), temp);
5166 if (val != temp)
5167 emit_move_insn (temp, val);
5169 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5172 else if (GET_CODE (XEXP (x, 1)) == REG)
5174 rtx temp = gen_reg_rtx (Pmode);
5175 rtx val = force_operand (XEXP (x, 0), temp);
5176 if (val != temp)
5177 emit_move_insn (temp, val);
5179 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5183 if (constant_term != const0_rtx)
5184 x = gen_rtx_PLUS (Pmode, x, constant_term);
5186 return x;
5189 /* Try a machine-dependent way of reloading an illegitimate address AD
5190 operand. If we find one, push the reload and return the new address.
5192 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5193 and TYPE is the reload type of the current reload. */
5196 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5197 int opnum, int type)
5199 if (!optimize || TARGET_LONG_DISPLACEMENT)
5200 return NULL_RTX;
5202 if (GET_CODE (ad) == PLUS)
5204 rtx tem = simplify_binary_operation (PLUS, Pmode,
5205 XEXP (ad, 0), XEXP (ad, 1));
5206 if (tem)
5207 ad = tem;
5210 if (GET_CODE (ad) == PLUS
5211 && GET_CODE (XEXP (ad, 0)) == REG
5212 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5213 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5215 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5216 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5217 rtx cst, tem, new_rtx;
5219 cst = GEN_INT (upper);
5220 if (!legitimate_reload_constant_p (cst))
5221 cst = force_const_mem (Pmode, cst);
5223 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5224 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5226 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5227 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5228 opnum, (enum reload_type) type);
5229 return new_rtx;
5232 return NULL_RTX;
5235 /* Emit code to move LEN bytes from DST to SRC. */
5237 bool
5238 s390_expand_movmem (rtx dst, rtx src, rtx len)
5240 /* When tuning for z10 or higher we rely on the Glibc functions to
5241 do the right thing. Only for constant lengths below 64k we will
5242 generate inline code. */
5243 if (s390_tune >= PROCESSOR_2097_Z10
5244 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5245 return false;
5247 /* Expand memcpy for constant length operands without a loop if it
5248 is shorter that way.
5250 With a constant length argument a
5251 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5252 if (GET_CODE (len) == CONST_INT
5253 && INTVAL (len) >= 0
5254 && INTVAL (len) <= 256 * 6
5255 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5257 HOST_WIDE_INT o, l;
5259 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5261 rtx newdst = adjust_address (dst, BLKmode, o);
5262 rtx newsrc = adjust_address (src, BLKmode, o);
5263 emit_insn (gen_movmem_short (newdst, newsrc,
5264 GEN_INT (l > 256 ? 255 : l - 1)));
5268 else if (TARGET_MVCLE)
5270 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5273 else
5275 rtx dst_addr, src_addr, count, blocks, temp;
5276 rtx_code_label *loop_start_label = gen_label_rtx ();
5277 rtx_code_label *loop_end_label = gen_label_rtx ();
5278 rtx_code_label *end_label = gen_label_rtx ();
5279 machine_mode mode;
5281 mode = GET_MODE (len);
5282 if (mode == VOIDmode)
5283 mode = Pmode;
5285 dst_addr = gen_reg_rtx (Pmode);
5286 src_addr = gen_reg_rtx (Pmode);
5287 count = gen_reg_rtx (mode);
5288 blocks = gen_reg_rtx (mode);
5290 convert_move (count, len, 1);
5291 emit_cmp_and_jump_insns (count, const0_rtx,
5292 EQ, NULL_RTX, mode, 1, end_label);
5294 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5295 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5296 dst = change_address (dst, VOIDmode, dst_addr);
5297 src = change_address (src, VOIDmode, src_addr);
5299 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5300 OPTAB_DIRECT);
5301 if (temp != count)
5302 emit_move_insn (count, temp);
5304 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5305 OPTAB_DIRECT);
5306 if (temp != blocks)
5307 emit_move_insn (blocks, temp);
5309 emit_cmp_and_jump_insns (blocks, const0_rtx,
5310 EQ, NULL_RTX, mode, 1, loop_end_label);
5312 emit_label (loop_start_label);
5314 if (TARGET_Z10
5315 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5317 rtx prefetch;
5319 /* Issue a read prefetch for the +3 cache line. */
5320 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5321 const0_rtx, const0_rtx);
5322 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5323 emit_insn (prefetch);
5325 /* Issue a write prefetch for the +3 cache line. */
5326 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5327 const1_rtx, const0_rtx);
5328 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5329 emit_insn (prefetch);
5332 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5333 s390_load_address (dst_addr,
5334 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5335 s390_load_address (src_addr,
5336 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5338 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5339 OPTAB_DIRECT);
5340 if (temp != blocks)
5341 emit_move_insn (blocks, temp);
5343 emit_cmp_and_jump_insns (blocks, const0_rtx,
5344 EQ, NULL_RTX, mode, 1, loop_end_label);
5346 emit_jump (loop_start_label);
5347 emit_label (loop_end_label);
5349 emit_insn (gen_movmem_short (dst, src,
5350 convert_to_mode (Pmode, count, 1)));
5351 emit_label (end_label);
5353 return true;
5356 /* Emit code to set LEN bytes at DST to VAL.
5357 Make use of clrmem if VAL is zero. */
5359 void
5360 s390_expand_setmem (rtx dst, rtx len, rtx val)
5362 const int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5364 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5365 return;
5367 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5369 /* Expand setmem/clrmem for a constant length operand without a
5370 loop if it will be shorter that way.
5371 With a constant length and without pfd argument a
5372 clrmem loop is 32 bytes -> 5.3 * xc
5373 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5374 if (GET_CODE (len) == CONST_INT
5375 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5376 || INTVAL (len) <= 257 * 3)
5377 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5379 HOST_WIDE_INT o, l;
5381 if (val == const0_rtx)
5382 /* clrmem: emit 256 byte blockwise XCs. */
5383 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5385 rtx newdst = adjust_address (dst, BLKmode, o);
5386 emit_insn (gen_clrmem_short (newdst,
5387 GEN_INT (l > 256 ? 255 : l - 1)));
5389 else
5390 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5391 setting first byte to val and using a 256 byte mvc with one
5392 byte overlap to propagate the byte. */
5393 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5395 rtx newdst = adjust_address (dst, BLKmode, o);
5396 emit_move_insn (adjust_address (dst, QImode, o), val);
5397 if (l > 1)
5399 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5400 emit_insn (gen_movmem_short (newdstp1, newdst,
5401 GEN_INT (l > 257 ? 255 : l - 2)));
5406 else if (TARGET_MVCLE)
5408 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5409 if (TARGET_64BIT)
5410 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5411 val));
5412 else
5413 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5414 val));
5417 else
5419 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5420 rtx_code_label *loop_start_label = gen_label_rtx ();
5421 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5422 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5423 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5424 machine_mode mode;
5426 mode = GET_MODE (len);
5427 if (mode == VOIDmode)
5428 mode = Pmode;
5430 dst_addr = gen_reg_rtx (Pmode);
5431 count = gen_reg_rtx (mode);
5432 blocks = gen_reg_rtx (mode);
5434 convert_move (count, len, 1);
5435 emit_cmp_and_jump_insns (count, const0_rtx,
5436 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5437 very_unlikely);
5439 /* We need to make a copy of the target address since memset is
5440 supposed to return it unmodified. We have to make it here
5441 already since the new reg is used at onebyte_end_label. */
5442 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5443 dst = change_address (dst, VOIDmode, dst_addr);
5445 if (val != const0_rtx)
5447 /* When using the overlapping mvc the original target
5448 address is only accessed as single byte entity (even by
5449 the mvc reading this value). */
5450 set_mem_size (dst, 1);
5451 dstp1 = adjust_address (dst, VOIDmode, 1);
5452 emit_cmp_and_jump_insns (count,
5453 const1_rtx, EQ, NULL_RTX, mode, 1,
5454 onebyte_end_label, very_unlikely);
5457 /* There is one unconditional (mvi+mvc)/xc after the loop
5458 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5459 or one (xc) here leaves this number of bytes to be handled by
5460 it. */
5461 temp = expand_binop (mode, add_optab, count,
5462 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5463 count, 1, OPTAB_DIRECT);
5464 if (temp != count)
5465 emit_move_insn (count, temp);
5467 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5468 OPTAB_DIRECT);
5469 if (temp != blocks)
5470 emit_move_insn (blocks, temp);
5472 emit_cmp_and_jump_insns (blocks, const0_rtx,
5473 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5475 emit_jump (loop_start_label);
5477 if (val != const0_rtx)
5479 /* The 1 byte != 0 special case. Not handled efficiently
5480 since we require two jumps for that. However, this
5481 should be very rare. */
5482 emit_label (onebyte_end_label);
5483 emit_move_insn (adjust_address (dst, QImode, 0), val);
5484 emit_jump (zerobyte_end_label);
5487 emit_label (loop_start_label);
5489 if (TARGET_Z10
5490 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5492 /* Issue a write prefetch for the +4 cache line. */
5493 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5494 GEN_INT (1024)),
5495 const1_rtx, const0_rtx);
5496 emit_insn (prefetch);
5497 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5500 if (val == const0_rtx)
5501 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5502 else
5504 /* Set the first byte in the block to the value and use an
5505 overlapping mvc for the block. */
5506 emit_move_insn (adjust_address (dst, QImode, 0), val);
5507 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5509 s390_load_address (dst_addr,
5510 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5512 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5513 OPTAB_DIRECT);
5514 if (temp != blocks)
5515 emit_move_insn (blocks, temp);
5517 emit_cmp_and_jump_insns (blocks, const0_rtx,
5518 NE, NULL_RTX, mode, 1, loop_start_label);
5520 emit_label (restbyte_end_label);
5522 if (val == const0_rtx)
5523 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5524 else
5526 /* Set the first byte in the block to the value and use an
5527 overlapping mvc for the block. */
5528 emit_move_insn (adjust_address (dst, QImode, 0), val);
5529 /* execute only uses the lowest 8 bits of count that's
5530 exactly what we need here. */
5531 emit_insn (gen_movmem_short (dstp1, dst,
5532 convert_to_mode (Pmode, count, 1)));
5535 emit_label (zerobyte_end_label);
5539 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5540 and return the result in TARGET. */
5542 bool
5543 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5545 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5546 rtx tmp;
5548 /* When tuning for z10 or higher we rely on the Glibc functions to
5549 do the right thing. Only for constant lengths below 64k we will
5550 generate inline code. */
5551 if (s390_tune >= PROCESSOR_2097_Z10
5552 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5553 return false;
5555 /* As the result of CMPINT is inverted compared to what we need,
5556 we have to swap the operands. */
5557 tmp = op0; op0 = op1; op1 = tmp;
5559 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5561 if (INTVAL (len) > 0)
5563 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5564 emit_insn (gen_cmpint (target, ccreg));
5566 else
5567 emit_move_insn (target, const0_rtx);
5569 else if (TARGET_MVCLE)
5571 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5572 emit_insn (gen_cmpint (target, ccreg));
5574 else
5576 rtx addr0, addr1, count, blocks, temp;
5577 rtx_code_label *loop_start_label = gen_label_rtx ();
5578 rtx_code_label *loop_end_label = gen_label_rtx ();
5579 rtx_code_label *end_label = gen_label_rtx ();
5580 machine_mode mode;
5582 mode = GET_MODE (len);
5583 if (mode == VOIDmode)
5584 mode = Pmode;
5586 addr0 = gen_reg_rtx (Pmode);
5587 addr1 = gen_reg_rtx (Pmode);
5588 count = gen_reg_rtx (mode);
5589 blocks = gen_reg_rtx (mode);
5591 convert_move (count, len, 1);
5592 emit_cmp_and_jump_insns (count, const0_rtx,
5593 EQ, NULL_RTX, mode, 1, end_label);
5595 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5596 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5597 op0 = change_address (op0, VOIDmode, addr0);
5598 op1 = change_address (op1, VOIDmode, addr1);
5600 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5601 OPTAB_DIRECT);
5602 if (temp != count)
5603 emit_move_insn (count, temp);
5605 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5606 OPTAB_DIRECT);
5607 if (temp != blocks)
5608 emit_move_insn (blocks, temp);
5610 emit_cmp_and_jump_insns (blocks, const0_rtx,
5611 EQ, NULL_RTX, mode, 1, loop_end_label);
5613 emit_label (loop_start_label);
5615 if (TARGET_Z10
5616 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5618 rtx prefetch;
5620 /* Issue a read prefetch for the +2 cache line of operand 1. */
5621 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5622 const0_rtx, const0_rtx);
5623 emit_insn (prefetch);
5624 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5626 /* Issue a read prefetch for the +2 cache line of operand 2. */
5627 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5628 const0_rtx, const0_rtx);
5629 emit_insn (prefetch);
5630 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5633 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5634 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5635 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5636 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5637 temp = gen_rtx_SET (pc_rtx, temp);
5638 emit_jump_insn (temp);
5640 s390_load_address (addr0,
5641 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5642 s390_load_address (addr1,
5643 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5645 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5646 OPTAB_DIRECT);
5647 if (temp != blocks)
5648 emit_move_insn (blocks, temp);
5650 emit_cmp_and_jump_insns (blocks, const0_rtx,
5651 EQ, NULL_RTX, mode, 1, loop_end_label);
5653 emit_jump (loop_start_label);
5654 emit_label (loop_end_label);
5656 emit_insn (gen_cmpmem_short (op0, op1,
5657 convert_to_mode (Pmode, count, 1)));
5658 emit_label (end_label);
5660 emit_insn (gen_cmpint (target, ccreg));
5662 return true;
5665 /* Emit a conditional jump to LABEL for condition code mask MASK using
5666 comparsion operator COMPARISON. Return the emitted jump insn. */
5668 static rtx_insn *
5669 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5671 rtx temp;
5673 gcc_assert (comparison == EQ || comparison == NE);
5674 gcc_assert (mask > 0 && mask < 15);
5676 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5677 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5678 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5679 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5680 temp = gen_rtx_SET (pc_rtx, temp);
5681 return emit_jump_insn (temp);
5684 /* Emit the instructions to implement strlen of STRING and store the
5685 result in TARGET. The string has the known ALIGNMENT. This
5686 version uses vector instructions and is therefore not appropriate
5687 for targets prior to z13. */
5689 void
5690 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5692 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5693 int very_likely = REG_BR_PROB_BASE - 1;
5694 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5695 rtx str_reg = gen_reg_rtx (V16QImode);
5696 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5697 rtx str_idx_reg = gen_reg_rtx (Pmode);
5698 rtx result_reg = gen_reg_rtx (V16QImode);
5699 rtx is_aligned_label = gen_label_rtx ();
5700 rtx into_loop_label = NULL_RTX;
5701 rtx loop_start_label = gen_label_rtx ();
5702 rtx temp;
5703 rtx len = gen_reg_rtx (QImode);
5704 rtx cond;
5706 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5707 emit_move_insn (str_idx_reg, const0_rtx);
5709 if (INTVAL (alignment) < 16)
5711 /* Check whether the address happens to be aligned properly so
5712 jump directly to the aligned loop. */
5713 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5714 str_addr_base_reg, GEN_INT (15)),
5715 const0_rtx, EQ, NULL_RTX,
5716 Pmode, 1, is_aligned_label);
5718 temp = gen_reg_rtx (Pmode);
5719 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5720 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5721 gcc_assert (REG_P (temp));
5722 highest_index_to_load_reg =
5723 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5724 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5725 gcc_assert (REG_P (highest_index_to_load_reg));
5726 emit_insn (gen_vllv16qi (str_reg,
5727 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5728 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5730 into_loop_label = gen_label_rtx ();
5731 s390_emit_jump (into_loop_label, NULL_RTX);
5732 emit_barrier ();
5735 emit_label (is_aligned_label);
5736 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5738 /* Reaching this point we are only performing 16 bytes aligned
5739 loads. */
5740 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5742 emit_label (loop_start_label);
5743 LABEL_NUSES (loop_start_label) = 1;
5745 /* Load 16 bytes of the string into VR. */
5746 emit_move_insn (str_reg,
5747 gen_rtx_MEM (V16QImode,
5748 gen_rtx_PLUS (Pmode, str_idx_reg,
5749 str_addr_base_reg)));
5750 if (into_loop_label != NULL_RTX)
5752 emit_label (into_loop_label);
5753 LABEL_NUSES (into_loop_label) = 1;
5756 /* Increment string index by 16 bytes. */
5757 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5758 str_idx_reg, 1, OPTAB_DIRECT);
5760 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5761 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5763 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5764 REG_BR_PROB, very_likely);
5765 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5767 /* If the string pointer wasn't aligned we have loaded less then 16
5768 bytes and the remaining bytes got filled with zeros (by vll).
5769 Now we have to check whether the resulting index lies within the
5770 bytes actually part of the string. */
5772 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5773 highest_index_to_load_reg);
5774 s390_load_address (highest_index_to_load_reg,
5775 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5776 const1_rtx));
5777 if (TARGET_64BIT)
5778 emit_insn (gen_movdicc (str_idx_reg, cond,
5779 highest_index_to_load_reg, str_idx_reg));
5780 else
5781 emit_insn (gen_movsicc (str_idx_reg, cond,
5782 highest_index_to_load_reg, str_idx_reg));
5784 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5785 very_unlikely);
5787 expand_binop (Pmode, add_optab, str_idx_reg,
5788 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5789 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5790 here. */
5791 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5792 convert_to_mode (Pmode, len, 1),
5793 target, 1, OPTAB_DIRECT);
5794 if (temp != target)
5795 emit_move_insn (target, temp);
5798 void
5799 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5801 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5802 rtx temp = gen_reg_rtx (Pmode);
5803 rtx src_addr = XEXP (src, 0);
5804 rtx dst_addr = XEXP (dst, 0);
5805 rtx src_addr_reg = gen_reg_rtx (Pmode);
5806 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5807 rtx offset = gen_reg_rtx (Pmode);
5808 rtx vsrc = gen_reg_rtx (V16QImode);
5809 rtx vpos = gen_reg_rtx (V16QImode);
5810 rtx loadlen = gen_reg_rtx (SImode);
5811 rtx gpos_qi = gen_reg_rtx(QImode);
5812 rtx gpos = gen_reg_rtx (SImode);
5813 rtx done_label = gen_label_rtx ();
5814 rtx loop_label = gen_label_rtx ();
5815 rtx exit_label = gen_label_rtx ();
5816 rtx full_label = gen_label_rtx ();
5818 /* Perform a quick check for string ending on the first up to 16
5819 bytes and exit early if successful. */
5821 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5822 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5823 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5824 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5825 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5826 /* gpos is the byte index if a zero was found and 16 otherwise.
5827 So if it is lower than the loaded bytes we have a hit. */
5828 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5829 full_label);
5830 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5832 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5833 1, OPTAB_DIRECT);
5834 emit_jump (exit_label);
5835 emit_barrier ();
5837 emit_label (full_label);
5838 LABEL_NUSES (full_label) = 1;
5840 /* Calculate `offset' so that src + offset points to the last byte
5841 before 16 byte alignment. */
5843 /* temp = src_addr & 0xf */
5844 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5845 1, OPTAB_DIRECT);
5847 /* offset = 0xf - temp */
5848 emit_move_insn (offset, GEN_INT (15));
5849 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5850 1, OPTAB_DIRECT);
5852 /* Store `offset' bytes in the dstination string. The quick check
5853 has loaded at least `offset' bytes into vsrc. */
5855 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5857 /* Advance to the next byte to be loaded. */
5858 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5859 1, OPTAB_DIRECT);
5861 /* Make sure the addresses are single regs which can be used as a
5862 base. */
5863 emit_move_insn (src_addr_reg, src_addr);
5864 emit_move_insn (dst_addr_reg, dst_addr);
5866 /* MAIN LOOP */
5868 emit_label (loop_label);
5869 LABEL_NUSES (loop_label) = 1;
5871 emit_move_insn (vsrc,
5872 gen_rtx_MEM (V16QImode,
5873 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5875 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5876 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5877 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5878 REG_BR_PROB, very_unlikely);
5880 emit_move_insn (gen_rtx_MEM (V16QImode,
5881 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5882 vsrc);
5883 /* offset += 16 */
5884 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5885 offset, 1, OPTAB_DIRECT);
5887 emit_jump (loop_label);
5888 emit_barrier ();
5890 /* REGULAR EXIT */
5892 /* We are done. Add the offset of the zero character to the dst_addr
5893 pointer to get the result. */
5895 emit_label (done_label);
5896 LABEL_NUSES (done_label) = 1;
5898 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5899 1, OPTAB_DIRECT);
5901 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5902 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5904 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5906 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5907 1, OPTAB_DIRECT);
5909 /* EARLY EXIT */
5911 emit_label (exit_label);
5912 LABEL_NUSES (exit_label) = 1;
5916 /* Expand conditional increment or decrement using alc/slb instructions.
5917 Should generate code setting DST to either SRC or SRC + INCREMENT,
5918 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5919 Returns true if successful, false otherwise.
5921 That makes it possible to implement some if-constructs without jumps e.g.:
5922 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5923 unsigned int a, b, c;
5924 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5925 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5926 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5927 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5929 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5930 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5931 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5932 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5933 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5935 bool
5936 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5937 rtx dst, rtx src, rtx increment)
5939 machine_mode cmp_mode;
5940 machine_mode cc_mode;
5941 rtx op_res;
5942 rtx insn;
5943 rtvec p;
5944 int ret;
5946 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5947 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5948 cmp_mode = SImode;
5949 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5950 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5951 cmp_mode = DImode;
5952 else
5953 return false;
5955 /* Try ADD LOGICAL WITH CARRY. */
5956 if (increment == const1_rtx)
5958 /* Determine CC mode to use. */
5959 if (cmp_code == EQ || cmp_code == NE)
5961 if (cmp_op1 != const0_rtx)
5963 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5964 NULL_RTX, 0, OPTAB_WIDEN);
5965 cmp_op1 = const0_rtx;
5968 cmp_code = cmp_code == EQ ? LEU : GTU;
5971 if (cmp_code == LTU || cmp_code == LEU)
5973 rtx tem = cmp_op0;
5974 cmp_op0 = cmp_op1;
5975 cmp_op1 = tem;
5976 cmp_code = swap_condition (cmp_code);
5979 switch (cmp_code)
5981 case GTU:
5982 cc_mode = CCUmode;
5983 break;
5985 case GEU:
5986 cc_mode = CCL3mode;
5987 break;
5989 default:
5990 return false;
5993 /* Emit comparison instruction pattern. */
5994 if (!register_operand (cmp_op0, cmp_mode))
5995 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5997 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5998 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5999 /* We use insn_invalid_p here to add clobbers if required. */
6000 ret = insn_invalid_p (emit_insn (insn), false);
6001 gcc_assert (!ret);
6003 /* Emit ALC instruction pattern. */
6004 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6005 gen_rtx_REG (cc_mode, CC_REGNUM),
6006 const0_rtx);
6008 if (src != const0_rtx)
6010 if (!register_operand (src, GET_MODE (dst)))
6011 src = force_reg (GET_MODE (dst), src);
6013 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6014 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6017 p = rtvec_alloc (2);
6018 RTVEC_ELT (p, 0) =
6019 gen_rtx_SET (dst, op_res);
6020 RTVEC_ELT (p, 1) =
6021 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6022 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6024 return true;
6027 /* Try SUBTRACT LOGICAL WITH BORROW. */
6028 if (increment == constm1_rtx)
6030 /* Determine CC mode to use. */
6031 if (cmp_code == EQ || cmp_code == NE)
6033 if (cmp_op1 != const0_rtx)
6035 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6036 NULL_RTX, 0, OPTAB_WIDEN);
6037 cmp_op1 = const0_rtx;
6040 cmp_code = cmp_code == EQ ? LEU : GTU;
6043 if (cmp_code == GTU || cmp_code == GEU)
6045 rtx tem = cmp_op0;
6046 cmp_op0 = cmp_op1;
6047 cmp_op1 = tem;
6048 cmp_code = swap_condition (cmp_code);
6051 switch (cmp_code)
6053 case LEU:
6054 cc_mode = CCUmode;
6055 break;
6057 case LTU:
6058 cc_mode = CCL3mode;
6059 break;
6061 default:
6062 return false;
6065 /* Emit comparison instruction pattern. */
6066 if (!register_operand (cmp_op0, cmp_mode))
6067 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6069 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6070 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6071 /* We use insn_invalid_p here to add clobbers if required. */
6072 ret = insn_invalid_p (emit_insn (insn), false);
6073 gcc_assert (!ret);
6075 /* Emit SLB instruction pattern. */
6076 if (!register_operand (src, GET_MODE (dst)))
6077 src = force_reg (GET_MODE (dst), src);
6079 op_res = gen_rtx_MINUS (GET_MODE (dst),
6080 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6081 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6082 gen_rtx_REG (cc_mode, CC_REGNUM),
6083 const0_rtx));
6084 p = rtvec_alloc (2);
6085 RTVEC_ELT (p, 0) =
6086 gen_rtx_SET (dst, op_res);
6087 RTVEC_ELT (p, 1) =
6088 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6089 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6091 return true;
6094 return false;
6097 /* Expand code for the insv template. Return true if successful. */
6099 bool
6100 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6102 int bitsize = INTVAL (op1);
6103 int bitpos = INTVAL (op2);
6104 machine_mode mode = GET_MODE (dest);
6105 machine_mode smode;
6106 int smode_bsize, mode_bsize;
6107 rtx op, clobber;
6109 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6110 return false;
6112 /* Generate INSERT IMMEDIATE (IILL et al). */
6113 /* (set (ze (reg)) (const_int)). */
6114 if (TARGET_ZARCH
6115 && register_operand (dest, word_mode)
6116 && (bitpos % 16) == 0
6117 && (bitsize % 16) == 0
6118 && const_int_operand (src, VOIDmode))
6120 HOST_WIDE_INT val = INTVAL (src);
6121 int regpos = bitpos + bitsize;
6123 while (regpos > bitpos)
6125 machine_mode putmode;
6126 int putsize;
6128 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6129 putmode = SImode;
6130 else
6131 putmode = HImode;
6133 putsize = GET_MODE_BITSIZE (putmode);
6134 regpos -= putsize;
6135 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6136 GEN_INT (putsize),
6137 GEN_INT (regpos)),
6138 gen_int_mode (val, putmode));
6139 val >>= putsize;
6141 gcc_assert (regpos == bitpos);
6142 return true;
6145 smode = smallest_mode_for_size (bitsize, MODE_INT);
6146 smode_bsize = GET_MODE_BITSIZE (smode);
6147 mode_bsize = GET_MODE_BITSIZE (mode);
6149 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6150 if (bitpos == 0
6151 && (bitsize % BITS_PER_UNIT) == 0
6152 && MEM_P (dest)
6153 && (register_operand (src, word_mode)
6154 || const_int_operand (src, VOIDmode)))
6156 /* Emit standard pattern if possible. */
6157 if (smode_bsize == bitsize)
6159 emit_move_insn (adjust_address (dest, smode, 0),
6160 gen_lowpart (smode, src));
6161 return true;
6164 /* (set (ze (mem)) (const_int)). */
6165 else if (const_int_operand (src, VOIDmode))
6167 int size = bitsize / BITS_PER_UNIT;
6168 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6169 BLKmode,
6170 UNITS_PER_WORD - size);
6172 dest = adjust_address (dest, BLKmode, 0);
6173 set_mem_size (dest, size);
6174 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6175 return true;
6178 /* (set (ze (mem)) (reg)). */
6179 else if (register_operand (src, word_mode))
6181 if (bitsize <= 32)
6182 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6183 const0_rtx), src);
6184 else
6186 /* Emit st,stcmh sequence. */
6187 int stcmh_width = bitsize - 32;
6188 int size = stcmh_width / BITS_PER_UNIT;
6190 emit_move_insn (adjust_address (dest, SImode, size),
6191 gen_lowpart (SImode, src));
6192 set_mem_size (dest, size);
6193 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6194 GEN_INT (stcmh_width),
6195 const0_rtx),
6196 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6198 return true;
6202 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6203 if ((bitpos % BITS_PER_UNIT) == 0
6204 && (bitsize % BITS_PER_UNIT) == 0
6205 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6206 && MEM_P (src)
6207 && (mode == DImode || mode == SImode)
6208 && register_operand (dest, mode))
6210 /* Emit a strict_low_part pattern if possible. */
6211 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6213 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6214 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6215 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6216 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6217 return true;
6220 /* ??? There are more powerful versions of ICM that are not
6221 completely represented in the md file. */
6224 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6225 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6227 machine_mode mode_s = GET_MODE (src);
6229 if (CONSTANT_P (src))
6231 /* For constant zero values the representation with AND
6232 appears to be folded in more situations than the (set
6233 (zero_extract) ...).
6234 We only do this when the start and end of the bitfield
6235 remain in the same SImode chunk. That way nihf or nilf
6236 can be used.
6237 The AND patterns might still generate a risbg for this. */
6238 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6239 return false;
6240 else
6241 src = force_reg (mode, src);
6243 else if (mode_s != mode)
6245 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6246 src = force_reg (mode_s, src);
6247 src = gen_lowpart (mode, src);
6250 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6251 op = gen_rtx_SET (op, src);
6253 if (!TARGET_ZEC12)
6255 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6256 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6258 emit_insn (op);
6260 return true;
6263 return false;
6266 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6267 register that holds VAL of mode MODE shifted by COUNT bits. */
6269 static inline rtx
6270 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6272 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6273 NULL_RTX, 1, OPTAB_DIRECT);
6274 return expand_simple_binop (SImode, ASHIFT, val, count,
6275 NULL_RTX, 1, OPTAB_DIRECT);
6278 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6279 the result in TARGET. */
6281 void
6282 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6283 rtx cmp_op1, rtx cmp_op2)
6285 machine_mode mode = GET_MODE (target);
6286 bool neg_p = false, swap_p = false;
6287 rtx tmp;
6289 if (GET_MODE (cmp_op1) == V2DFmode)
6291 switch (cond)
6293 /* NE a != b -> !(a == b) */
6294 case NE: cond = EQ; neg_p = true; break;
6295 /* UNGT a u> b -> !(b >= a) */
6296 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6297 /* UNGE a u>= b -> !(b > a) */
6298 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6299 /* LE: a <= b -> b >= a */
6300 case LE: cond = GE; swap_p = true; break;
6301 /* UNLE: a u<= b -> !(a > b) */
6302 case UNLE: cond = GT; neg_p = true; break;
6303 /* LT: a < b -> b > a */
6304 case LT: cond = GT; swap_p = true; break;
6305 /* UNLT: a u< b -> !(a >= b) */
6306 case UNLT: cond = GE; neg_p = true; break;
6307 case UNEQ:
6308 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6309 return;
6310 case LTGT:
6311 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6312 return;
6313 case ORDERED:
6314 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6315 return;
6316 case UNORDERED:
6317 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6318 return;
6319 default: break;
6322 else
6324 switch (cond)
6326 /* NE: a != b -> !(a == b) */
6327 case NE: cond = EQ; neg_p = true; break;
6328 /* GE: a >= b -> !(b > a) */
6329 case GE: cond = GT; neg_p = true; swap_p = true; break;
6330 /* GEU: a >= b -> !(b > a) */
6331 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6332 /* LE: a <= b -> !(a > b) */
6333 case LE: cond = GT; neg_p = true; break;
6334 /* LEU: a <= b -> !(a > b) */
6335 case LEU: cond = GTU; neg_p = true; break;
6336 /* LT: a < b -> b > a */
6337 case LT: cond = GT; swap_p = true; break;
6338 /* LTU: a < b -> b > a */
6339 case LTU: cond = GTU; swap_p = true; break;
6340 default: break;
6344 if (swap_p)
6346 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6349 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6350 mode,
6351 cmp_op1, cmp_op2)));
6352 if (neg_p)
6353 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6356 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6357 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6358 elements in CMP1 and CMP2 fulfill the comparison.
6359 This function is only used to emit patterns for the vx builtins and
6360 therefore only handles comparison codes required by the
6361 builtins. */
6362 void
6363 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6364 rtx cmp1, rtx cmp2, bool all_p)
6366 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6367 rtx tmp_reg = gen_reg_rtx (SImode);
6368 bool swap_p = false;
6370 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6372 switch (code)
6374 case EQ:
6375 case NE:
6376 cc_producer_mode = CCVEQmode;
6377 break;
6378 case GE:
6379 case LT:
6380 code = swap_condition (code);
6381 swap_p = true;
6382 /* fallthrough */
6383 case GT:
6384 case LE:
6385 cc_producer_mode = CCVIHmode;
6386 break;
6387 case GEU:
6388 case LTU:
6389 code = swap_condition (code);
6390 swap_p = true;
6391 /* fallthrough */
6392 case GTU:
6393 case LEU:
6394 cc_producer_mode = CCVIHUmode;
6395 break;
6396 default:
6397 gcc_unreachable ();
6400 scratch_mode = GET_MODE (cmp1);
6401 /* These codes represent inverted CC interpretations. Inverting
6402 an ALL CC mode results in an ANY CC mode and the other way
6403 around. Invert the all_p flag here to compensate for
6404 that. */
6405 if (code == NE || code == LE || code == LEU)
6406 all_p = !all_p;
6408 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6410 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6412 bool inv_p = false;
6414 switch (code)
6416 case EQ: cc_producer_mode = CCVEQmode; break;
6417 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6418 case GT: cc_producer_mode = CCVFHmode; break;
6419 case GE: cc_producer_mode = CCVFHEmode; break;
6420 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6421 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6422 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6423 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6424 default: gcc_unreachable ();
6426 scratch_mode = mode_for_vector (
6427 int_mode_for_mode (GET_MODE_INNER (GET_MODE (cmp1))),
6428 GET_MODE_NUNITS (GET_MODE (cmp1)));
6429 gcc_assert (scratch_mode != BLKmode);
6431 if (inv_p)
6432 all_p = !all_p;
6434 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6436 else
6437 gcc_unreachable ();
6439 if (swap_p)
6441 rtx tmp = cmp2;
6442 cmp2 = cmp1;
6443 cmp1 = tmp;
6446 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6447 gen_rtvec (2, gen_rtx_SET (
6448 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6449 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6450 gen_rtx_CLOBBER (VOIDmode,
6451 gen_rtx_SCRATCH (scratch_mode)))));
6452 emit_move_insn (target, const0_rtx);
6453 emit_move_insn (tmp_reg, const1_rtx);
6455 emit_move_insn (target,
6456 gen_rtx_IF_THEN_ELSE (SImode,
6457 gen_rtx_fmt_ee (code, VOIDmode,
6458 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6459 const0_rtx),
6460 tmp_reg, target));
6463 /* Invert the comparison CODE applied to a CC mode. This is only safe
6464 if we know whether there result was created by a floating point
6465 compare or not. For the CCV modes this is encoded as part of the
6466 mode. */
6467 enum rtx_code
6468 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6470 /* Reversal of FP compares takes care -- an ordered compare
6471 becomes an unordered compare and vice versa. */
6472 if (mode == CCVFALLmode || mode == CCVFANYmode)
6473 return reverse_condition_maybe_unordered (code);
6474 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6475 return reverse_condition (code);
6476 else
6477 gcc_unreachable ();
6480 /* Generate a vector comparison expression loading either elements of
6481 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6482 and CMP_OP2. */
6484 void
6485 s390_expand_vcond (rtx target, rtx then, rtx els,
6486 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6488 rtx tmp;
6489 machine_mode result_mode;
6490 rtx result_target;
6492 machine_mode target_mode = GET_MODE (target);
6493 machine_mode cmp_mode = GET_MODE (cmp_op1);
6494 rtx op = (cond == LT) ? els : then;
6496 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6497 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6498 for short and byte (x >> 15 and x >> 7 respectively). */
6499 if ((cond == LT || cond == GE)
6500 && target_mode == cmp_mode
6501 && cmp_op2 == CONST0_RTX (cmp_mode)
6502 && op == CONST0_RTX (target_mode)
6503 && s390_vector_mode_supported_p (target_mode)
6504 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6506 rtx negop = (cond == LT) ? then : els;
6508 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6510 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6511 if (negop == CONST1_RTX (target_mode))
6513 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6514 GEN_INT (shift), target,
6515 1, OPTAB_DIRECT);
6516 if (res != target)
6517 emit_move_insn (target, res);
6518 return;
6521 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6522 else if (all_ones_operand (negop, target_mode))
6524 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6525 GEN_INT (shift), target,
6526 0, OPTAB_DIRECT);
6527 if (res != target)
6528 emit_move_insn (target, res);
6529 return;
6533 /* We always use an integral type vector to hold the comparison
6534 result. */
6535 result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
6536 result_target = gen_reg_rtx (result_mode);
6538 /* We allow vector immediates as comparison operands that
6539 can be handled by the optimization above but not by the
6540 following code. Hence, force them into registers here. */
6541 if (!REG_P (cmp_op1))
6542 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6544 if (!REG_P (cmp_op2))
6545 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6547 s390_expand_vec_compare (result_target, cond,
6548 cmp_op1, cmp_op2);
6550 /* If the results are supposed to be either -1 or 0 we are done
6551 since this is what our compare instructions generate anyway. */
6552 if (all_ones_operand (then, GET_MODE (then))
6553 && const0_operand (els, GET_MODE (els)))
6555 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6556 result_target, 0));
6557 return;
6560 /* Otherwise we will do a vsel afterwards. */
6561 /* This gets triggered e.g.
6562 with gcc.c-torture/compile/pr53410-1.c */
6563 if (!REG_P (then))
6564 then = force_reg (target_mode, then);
6566 if (!REG_P (els))
6567 els = force_reg (target_mode, els);
6569 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6570 result_target,
6571 CONST0_RTX (result_mode));
6573 /* We compared the result against zero above so we have to swap then
6574 and els here. */
6575 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6577 gcc_assert (target_mode == GET_MODE (then));
6578 emit_insn (gen_rtx_SET (target, tmp));
6581 /* Emit the RTX necessary to initialize the vector TARGET with values
6582 in VALS. */
6583 void
6584 s390_expand_vec_init (rtx target, rtx vals)
6586 machine_mode mode = GET_MODE (target);
6587 machine_mode inner_mode = GET_MODE_INNER (mode);
6588 int n_elts = GET_MODE_NUNITS (mode);
6589 bool all_same = true, all_regs = true, all_const_int = true;
6590 rtx x;
6591 int i;
6593 for (i = 0; i < n_elts; ++i)
6595 x = XVECEXP (vals, 0, i);
6597 if (!CONST_INT_P (x))
6598 all_const_int = false;
6600 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6601 all_same = false;
6603 if (!REG_P (x))
6604 all_regs = false;
6607 /* Use vector gen mask or vector gen byte mask if possible. */
6608 if (all_same && all_const_int
6609 && (XVECEXP (vals, 0, 0) == const0_rtx
6610 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6611 NULL, NULL)
6612 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6614 emit_insn (gen_rtx_SET (target,
6615 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6616 return;
6619 if (all_same)
6621 emit_insn (gen_rtx_SET (target,
6622 gen_rtx_VEC_DUPLICATE (mode,
6623 XVECEXP (vals, 0, 0))));
6624 return;
6627 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6629 /* Use vector load pair. */
6630 emit_insn (gen_rtx_SET (target,
6631 gen_rtx_VEC_CONCAT (mode,
6632 XVECEXP (vals, 0, 0),
6633 XVECEXP (vals, 0, 1))));
6634 return;
6637 /* We are about to set the vector elements one by one. Zero out the
6638 full register first in order to help the data flow framework to
6639 detect it as full VR set. */
6640 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6642 /* Unfortunately the vec_init expander is not allowed to fail. So
6643 we have to implement the fallback ourselves. */
6644 for (i = 0; i < n_elts; i++)
6646 rtx elem = XVECEXP (vals, 0, i);
6647 if (!general_operand (elem, GET_MODE (elem)))
6648 elem = force_reg (inner_mode, elem);
6650 emit_insn (gen_rtx_SET (target,
6651 gen_rtx_UNSPEC (mode,
6652 gen_rtvec (3, elem,
6653 GEN_INT (i), target),
6654 UNSPEC_VEC_SET)));
6658 /* Structure to hold the initial parameters for a compare_and_swap operation
6659 in HImode and QImode. */
6661 struct alignment_context
6663 rtx memsi; /* SI aligned memory location. */
6664 rtx shift; /* Bit offset with regard to lsb. */
6665 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6666 rtx modemaski; /* ~modemask */
6667 bool aligned; /* True if memory is aligned, false else. */
6670 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6671 structure AC for transparent simplifying, if the memory alignment is known
6672 to be at least 32bit. MEM is the memory location for the actual operation
6673 and MODE its mode. */
6675 static void
6676 init_alignment_context (struct alignment_context *ac, rtx mem,
6677 machine_mode mode)
6679 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6680 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6682 if (ac->aligned)
6683 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6684 else
6686 /* Alignment is unknown. */
6687 rtx byteoffset, addr, align;
6689 /* Force the address into a register. */
6690 addr = force_reg (Pmode, XEXP (mem, 0));
6692 /* Align it to SImode. */
6693 align = expand_simple_binop (Pmode, AND, addr,
6694 GEN_INT (-GET_MODE_SIZE (SImode)),
6695 NULL_RTX, 1, OPTAB_DIRECT);
6696 /* Generate MEM. */
6697 ac->memsi = gen_rtx_MEM (SImode, align);
6698 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6699 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6700 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6702 /* Calculate shiftcount. */
6703 byteoffset = expand_simple_binop (Pmode, AND, addr,
6704 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6705 NULL_RTX, 1, OPTAB_DIRECT);
6706 /* As we already have some offset, evaluate the remaining distance. */
6707 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6708 NULL_RTX, 1, OPTAB_DIRECT);
6711 /* Shift is the byte count, but we need the bitcount. */
6712 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6713 NULL_RTX, 1, OPTAB_DIRECT);
6715 /* Calculate masks. */
6716 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6717 GEN_INT (GET_MODE_MASK (mode)),
6718 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6719 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6720 NULL_RTX, 1);
6723 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6724 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6725 perform the merge in SEQ2. */
6727 static rtx
6728 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6729 machine_mode mode, rtx val, rtx ins)
6731 rtx tmp;
6733 if (ac->aligned)
6735 start_sequence ();
6736 tmp = copy_to_mode_reg (SImode, val);
6737 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6738 const0_rtx, ins))
6740 *seq1 = NULL;
6741 *seq2 = get_insns ();
6742 end_sequence ();
6743 return tmp;
6745 end_sequence ();
6748 /* Failed to use insv. Generate a two part shift and mask. */
6749 start_sequence ();
6750 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6751 *seq1 = get_insns ();
6752 end_sequence ();
6754 start_sequence ();
6755 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6756 *seq2 = get_insns ();
6757 end_sequence ();
6759 return tmp;
6762 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6763 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6764 value to set if CMP == MEM. */
6766 void
6767 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6768 rtx cmp, rtx new_rtx, bool is_weak)
6770 struct alignment_context ac;
6771 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6772 rtx res = gen_reg_rtx (SImode);
6773 rtx_code_label *csloop = NULL, *csend = NULL;
6775 gcc_assert (MEM_P (mem));
6777 init_alignment_context (&ac, mem, mode);
6779 /* Load full word. Subsequent loads are performed by CS. */
6780 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6781 NULL_RTX, 1, OPTAB_DIRECT);
6783 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6784 possible, we try to use insv to make this happen efficiently. If
6785 that fails we'll generate code both inside and outside the loop. */
6786 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6787 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6789 if (seq0)
6790 emit_insn (seq0);
6791 if (seq1)
6792 emit_insn (seq1);
6794 /* Start CS loop. */
6795 if (!is_weak)
6797 /* Begin assuming success. */
6798 emit_move_insn (btarget, const1_rtx);
6800 csloop = gen_label_rtx ();
6801 csend = gen_label_rtx ();
6802 emit_label (csloop);
6805 /* val = "<mem>00..0<mem>"
6806 * cmp = "00..0<cmp>00..0"
6807 * new = "00..0<new>00..0"
6810 emit_insn (seq2);
6811 emit_insn (seq3);
6813 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6814 if (is_weak)
6815 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6816 else
6818 rtx tmp;
6820 /* Jump to end if we're done (likely?). */
6821 s390_emit_jump (csend, cc);
6823 /* Check for changes outside mode, and loop internal if so.
6824 Arrange the moves so that the compare is adjacent to the
6825 branch so that we can generate CRJ. */
6826 tmp = copy_to_reg (val);
6827 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6828 1, OPTAB_DIRECT);
6829 cc = s390_emit_compare (NE, val, tmp);
6830 s390_emit_jump (csloop, cc);
6832 /* Failed. */
6833 emit_move_insn (btarget, const0_rtx);
6834 emit_label (csend);
6837 /* Return the correct part of the bitfield. */
6838 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6839 NULL_RTX, 1, OPTAB_DIRECT), 1);
6842 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6843 and VAL the value to play with. If AFTER is true then store the value
6844 MEM holds after the operation, if AFTER is false then store the value MEM
6845 holds before the operation. If TARGET is zero then discard that value, else
6846 store it to TARGET. */
6848 void
6849 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6850 rtx target, rtx mem, rtx val, bool after)
6852 struct alignment_context ac;
6853 rtx cmp;
6854 rtx new_rtx = gen_reg_rtx (SImode);
6855 rtx orig = gen_reg_rtx (SImode);
6856 rtx_code_label *csloop = gen_label_rtx ();
6858 gcc_assert (!target || register_operand (target, VOIDmode));
6859 gcc_assert (MEM_P (mem));
6861 init_alignment_context (&ac, mem, mode);
6863 /* Shift val to the correct bit positions.
6864 Preserve "icm", but prevent "ex icm". */
6865 if (!(ac.aligned && code == SET && MEM_P (val)))
6866 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6868 /* Further preparation insns. */
6869 if (code == PLUS || code == MINUS)
6870 emit_move_insn (orig, val);
6871 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6872 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6873 NULL_RTX, 1, OPTAB_DIRECT);
6875 /* Load full word. Subsequent loads are performed by CS. */
6876 cmp = force_reg (SImode, ac.memsi);
6878 /* Start CS loop. */
6879 emit_label (csloop);
6880 emit_move_insn (new_rtx, cmp);
6882 /* Patch new with val at correct position. */
6883 switch (code)
6885 case PLUS:
6886 case MINUS:
6887 val = expand_simple_binop (SImode, code, new_rtx, orig,
6888 NULL_RTX, 1, OPTAB_DIRECT);
6889 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6890 NULL_RTX, 1, OPTAB_DIRECT);
6891 /* FALLTHRU */
6892 case SET:
6893 if (ac.aligned && MEM_P (val))
6894 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6895 0, 0, SImode, val, false);
6896 else
6898 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6899 NULL_RTX, 1, OPTAB_DIRECT);
6900 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6901 NULL_RTX, 1, OPTAB_DIRECT);
6903 break;
6904 case AND:
6905 case IOR:
6906 case XOR:
6907 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6908 NULL_RTX, 1, OPTAB_DIRECT);
6909 break;
6910 case MULT: /* NAND */
6911 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6912 NULL_RTX, 1, OPTAB_DIRECT);
6913 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6914 NULL_RTX, 1, OPTAB_DIRECT);
6915 break;
6916 default:
6917 gcc_unreachable ();
6920 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6921 ac.memsi, cmp, new_rtx));
6923 /* Return the correct part of the bitfield. */
6924 if (target)
6925 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6926 after ? new_rtx : cmp, ac.shift,
6927 NULL_RTX, 1, OPTAB_DIRECT), 1);
6930 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6931 We need to emit DTP-relative relocations. */
6933 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6935 static void
6936 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6938 switch (size)
6940 case 4:
6941 fputs ("\t.long\t", file);
6942 break;
6943 case 8:
6944 fputs ("\t.quad\t", file);
6945 break;
6946 default:
6947 gcc_unreachable ();
6949 output_addr_const (file, x);
6950 fputs ("@DTPOFF", file);
6953 /* Return the proper mode for REGNO being represented in the dwarf
6954 unwind table. */
6955 machine_mode
6956 s390_dwarf_frame_reg_mode (int regno)
6958 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6960 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6961 if (GENERAL_REGNO_P (regno))
6962 save_mode = Pmode;
6964 /* The rightmost 64 bits of vector registers are call-clobbered. */
6965 if (GET_MODE_SIZE (save_mode) > 8)
6966 save_mode = DImode;
6968 return save_mode;
6971 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6972 /* Implement TARGET_MANGLE_TYPE. */
6974 static const char *
6975 s390_mangle_type (const_tree type)
6977 type = TYPE_MAIN_VARIANT (type);
6979 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6980 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6981 return NULL;
6983 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6984 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6985 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6986 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6988 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6989 && TARGET_LONG_DOUBLE_128)
6990 return "g";
6992 /* For all other types, use normal C++ mangling. */
6993 return NULL;
6995 #endif
6997 /* In the name of slightly smaller debug output, and to cater to
6998 general assembler lossage, recognize various UNSPEC sequences
6999 and turn them back into a direct symbol reference. */
7001 static rtx
7002 s390_delegitimize_address (rtx orig_x)
7004 rtx x, y;
7006 orig_x = delegitimize_mem_from_attrs (orig_x);
7007 x = orig_x;
7009 /* Extract the symbol ref from:
7010 (plus:SI (reg:SI 12 %r12)
7011 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7012 UNSPEC_GOTOFF/PLTOFF)))
7014 (plus:SI (reg:SI 12 %r12)
7015 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7016 UNSPEC_GOTOFF/PLTOFF)
7017 (const_int 4 [0x4])))) */
7018 if (GET_CODE (x) == PLUS
7019 && REG_P (XEXP (x, 0))
7020 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7021 && GET_CODE (XEXP (x, 1)) == CONST)
7023 HOST_WIDE_INT offset = 0;
7025 /* The const operand. */
7026 y = XEXP (XEXP (x, 1), 0);
7028 if (GET_CODE (y) == PLUS
7029 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7031 offset = INTVAL (XEXP (y, 1));
7032 y = XEXP (y, 0);
7035 if (GET_CODE (y) == UNSPEC
7036 && (XINT (y, 1) == UNSPEC_GOTOFF
7037 || XINT (y, 1) == UNSPEC_PLTOFF))
7038 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7041 if (GET_CODE (x) != MEM)
7042 return orig_x;
7044 x = XEXP (x, 0);
7045 if (GET_CODE (x) == PLUS
7046 && GET_CODE (XEXP (x, 1)) == CONST
7047 && GET_CODE (XEXP (x, 0)) == REG
7048 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7050 y = XEXP (XEXP (x, 1), 0);
7051 if (GET_CODE (y) == UNSPEC
7052 && XINT (y, 1) == UNSPEC_GOT)
7053 y = XVECEXP (y, 0, 0);
7054 else
7055 return orig_x;
7057 else if (GET_CODE (x) == CONST)
7059 /* Extract the symbol ref from:
7060 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7061 UNSPEC_PLT/GOTENT))) */
7063 y = XEXP (x, 0);
7064 if (GET_CODE (y) == UNSPEC
7065 && (XINT (y, 1) == UNSPEC_GOTENT
7066 || XINT (y, 1) == UNSPEC_PLT))
7067 y = XVECEXP (y, 0, 0);
7068 else
7069 return orig_x;
7071 else
7072 return orig_x;
7074 if (GET_MODE (orig_x) != Pmode)
7076 if (GET_MODE (orig_x) == BLKmode)
7077 return orig_x;
7078 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7079 if (y == NULL_RTX)
7080 return orig_x;
7082 return y;
7085 /* Output operand OP to stdio stream FILE.
7086 OP is an address (register + offset) which is not used to address data;
7087 instead the rightmost bits are interpreted as the value. */
7089 static void
7090 print_addrstyle_operand (FILE *file, rtx op)
7092 HOST_WIDE_INT offset;
7093 rtx base;
7095 /* Extract base register and offset. */
7096 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7097 gcc_unreachable ();
7099 /* Sanity check. */
7100 if (base)
7102 gcc_assert (GET_CODE (base) == REG);
7103 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7104 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7107 /* Offsets are constricted to twelve bits. */
7108 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7109 if (base)
7110 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7113 /* Assigns the number of NOP halfwords to be emitted before and after the
7114 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7115 If hotpatching is disabled for the function, the values are set to zero.
7118 static void
7119 s390_function_num_hotpatch_hw (tree decl,
7120 int *hw_before,
7121 int *hw_after)
7123 tree attr;
7125 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7127 /* Handle the arguments of the hotpatch attribute. The values
7128 specified via attribute might override the cmdline argument
7129 values. */
7130 if (attr)
7132 tree args = TREE_VALUE (attr);
7134 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7135 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7137 else
7139 /* Use the values specified by the cmdline arguments. */
7140 *hw_before = s390_hotpatch_hw_before_label;
7141 *hw_after = s390_hotpatch_hw_after_label;
7145 /* Write the current .machine and .machinemode specification to the assembler
7146 file. */
7148 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7149 static void
7150 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7152 fprintf (asm_out_file, "\t.machinemode %s\n",
7153 (TARGET_ZARCH) ? "zarch" : "esa");
7154 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
7155 if (S390_USE_ARCHITECTURE_MODIFIERS)
7157 int cpu_flags;
7159 cpu_flags = processor_flags_table[(int) s390_arch];
7160 if (TARGET_HTM && !(cpu_flags & PF_TX))
7161 fprintf (asm_out_file, "+htm");
7162 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7163 fprintf (asm_out_file, "+nohtm");
7164 if (TARGET_VX && !(cpu_flags & PF_VX))
7165 fprintf (asm_out_file, "+vx");
7166 else if (!TARGET_VX && (cpu_flags & PF_VX))
7167 fprintf (asm_out_file, "+novx");
7169 fprintf (asm_out_file, "\"\n");
7172 /* Write an extra function header before the very start of the function. */
7174 void
7175 s390_asm_output_function_prefix (FILE *asm_out_file,
7176 const char *fnname ATTRIBUTE_UNUSED)
7178 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7179 return;
7180 /* Since only the function specific options are saved but not the indications
7181 which options are set, it's too much work here to figure out which options
7182 have actually changed. Thus, generate .machine and .machinemode whenever a
7183 function has the target attribute or pragma. */
7184 fprintf (asm_out_file, "\t.machinemode push\n");
7185 fprintf (asm_out_file, "\t.machine push\n");
7186 s390_asm_output_machine_for_arch (asm_out_file);
7189 /* Write an extra function footer after the very end of the function. */
7191 void
7192 s390_asm_declare_function_size (FILE *asm_out_file,
7193 const char *fnname, tree decl)
7195 if (!flag_inhibit_size_directive)
7196 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7197 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7198 return;
7199 fprintf (asm_out_file, "\t.machine pop\n");
7200 fprintf (asm_out_file, "\t.machinemode pop\n");
7202 #endif
7204 /* Write the extra assembler code needed to declare a function properly. */
7206 void
7207 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7208 tree decl)
7210 int hw_before, hw_after;
7212 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7213 if (hw_before > 0)
7215 unsigned int function_alignment;
7216 int i;
7218 /* Add a trampoline code area before the function label and initialize it
7219 with two-byte nop instructions. This area can be overwritten with code
7220 that jumps to a patched version of the function. */
7221 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
7222 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7223 hw_before);
7224 for (i = 1; i < hw_before; i++)
7225 fputs ("\tnopr\t%r7\n", asm_out_file);
7227 /* Note: The function label must be aligned so that (a) the bytes of the
7228 following nop do not cross a cacheline boundary, and (b) a jump address
7229 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7230 stored directly before the label without crossing a cacheline
7231 boundary. All this is necessary to make sure the trampoline code can
7232 be changed atomically.
7233 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7234 if there are NOPs before the function label, the alignment is placed
7235 before them. So it is necessary to duplicate the alignment after the
7236 NOPs. */
7237 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7238 if (! DECL_USER_ALIGN (decl))
7239 function_alignment = MAX (function_alignment,
7240 (unsigned int) align_functions);
7241 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7242 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7245 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7247 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7248 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7249 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7250 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7251 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7252 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7253 s390_warn_framesize);
7254 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7255 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7256 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7257 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7258 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7259 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7260 TARGET_PACKED_STACK);
7261 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7262 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7263 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7264 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7265 s390_warn_dynamicstack_p);
7267 ASM_OUTPUT_LABEL (asm_out_file, fname);
7268 if (hw_after > 0)
7269 asm_fprintf (asm_out_file,
7270 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7271 hw_after);
7274 /* Output machine-dependent UNSPECs occurring in address constant X
7275 in assembler syntax to stdio stream FILE. Returns true if the
7276 constant X could be recognized, false otherwise. */
7278 static bool
7279 s390_output_addr_const_extra (FILE *file, rtx x)
7281 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7282 switch (XINT (x, 1))
7284 case UNSPEC_GOTENT:
7285 output_addr_const (file, XVECEXP (x, 0, 0));
7286 fprintf (file, "@GOTENT");
7287 return true;
7288 case UNSPEC_GOT:
7289 output_addr_const (file, XVECEXP (x, 0, 0));
7290 fprintf (file, "@GOT");
7291 return true;
7292 case UNSPEC_GOTOFF:
7293 output_addr_const (file, XVECEXP (x, 0, 0));
7294 fprintf (file, "@GOTOFF");
7295 return true;
7296 case UNSPEC_PLT:
7297 output_addr_const (file, XVECEXP (x, 0, 0));
7298 fprintf (file, "@PLT");
7299 return true;
7300 case UNSPEC_PLTOFF:
7301 output_addr_const (file, XVECEXP (x, 0, 0));
7302 fprintf (file, "@PLTOFF");
7303 return true;
7304 case UNSPEC_TLSGD:
7305 output_addr_const (file, XVECEXP (x, 0, 0));
7306 fprintf (file, "@TLSGD");
7307 return true;
7308 case UNSPEC_TLSLDM:
7309 assemble_name (file, get_some_local_dynamic_name ());
7310 fprintf (file, "@TLSLDM");
7311 return true;
7312 case UNSPEC_DTPOFF:
7313 output_addr_const (file, XVECEXP (x, 0, 0));
7314 fprintf (file, "@DTPOFF");
7315 return true;
7316 case UNSPEC_NTPOFF:
7317 output_addr_const (file, XVECEXP (x, 0, 0));
7318 fprintf (file, "@NTPOFF");
7319 return true;
7320 case UNSPEC_GOTNTPOFF:
7321 output_addr_const (file, XVECEXP (x, 0, 0));
7322 fprintf (file, "@GOTNTPOFF");
7323 return true;
7324 case UNSPEC_INDNTPOFF:
7325 output_addr_const (file, XVECEXP (x, 0, 0));
7326 fprintf (file, "@INDNTPOFF");
7327 return true;
7330 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7331 switch (XINT (x, 1))
7333 case UNSPEC_POOL_OFFSET:
7334 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7335 output_addr_const (file, x);
7336 return true;
7338 return false;
7341 /* Output address operand ADDR in assembler syntax to
7342 stdio stream FILE. */
7344 void
7345 print_operand_address (FILE *file, rtx addr)
7347 struct s390_address ad;
7348 memset (&ad, 0, sizeof (s390_address));
7350 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7352 if (!TARGET_Z10)
7354 output_operand_lossage ("symbolic memory references are "
7355 "only supported on z10 or later");
7356 return;
7358 output_addr_const (file, addr);
7359 return;
7362 if (!s390_decompose_address (addr, &ad)
7363 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7364 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7365 output_operand_lossage ("cannot decompose address");
7367 if (ad.disp)
7368 output_addr_const (file, ad.disp);
7369 else
7370 fprintf (file, "0");
7372 if (ad.base && ad.indx)
7373 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7374 reg_names[REGNO (ad.base)]);
7375 else if (ad.base)
7376 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7379 /* Output operand X in assembler syntax to stdio stream FILE.
7380 CODE specified the format flag. The following format flags
7381 are recognized:
7383 'C': print opcode suffix for branch condition.
7384 'D': print opcode suffix for inverse branch condition.
7385 'E': print opcode suffix for branch on index instruction.
7386 'G': print the size of the operand in bytes.
7387 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7388 'M': print the second word of a TImode operand.
7389 'N': print the second word of a DImode operand.
7390 'O': print only the displacement of a memory reference or address.
7391 'R': print only the base register of a memory reference or address.
7392 'S': print S-type memory reference (base+displacement).
7393 'Y': print address style operand without index (e.g. shift count or setmem
7394 operand).
7396 'b': print integer X as if it's an unsigned byte.
7397 'c': print integer X as if it's an signed byte.
7398 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7399 'f': "end" contiguous bitmask X in SImode.
7400 'h': print integer X as if it's a signed halfword.
7401 'i': print the first nonzero HImode part of X.
7402 'j': print the first HImode part unequal to -1 of X.
7403 'k': print the first nonzero SImode part of X.
7404 'm': print the first SImode part unequal to -1 of X.
7405 'o': print integer X as if it's an unsigned 32bit word.
7406 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7407 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7408 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7409 'x': print integer X as if it's an unsigned halfword.
7410 'v': print register number as vector register (v1 instead of f1).
7413 void
7414 print_operand (FILE *file, rtx x, int code)
7416 HOST_WIDE_INT ival;
7418 switch (code)
7420 case 'C':
7421 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7422 return;
7424 case 'D':
7425 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7426 return;
7428 case 'E':
7429 if (GET_CODE (x) == LE)
7430 fprintf (file, "l");
7431 else if (GET_CODE (x) == GT)
7432 fprintf (file, "h");
7433 else
7434 output_operand_lossage ("invalid comparison operator "
7435 "for 'E' output modifier");
7436 return;
7438 case 'J':
7439 if (GET_CODE (x) == SYMBOL_REF)
7441 fprintf (file, "%s", ":tls_load:");
7442 output_addr_const (file, x);
7444 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7446 fprintf (file, "%s", ":tls_gdcall:");
7447 output_addr_const (file, XVECEXP (x, 0, 0));
7449 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7451 fprintf (file, "%s", ":tls_ldcall:");
7452 const char *name = get_some_local_dynamic_name ();
7453 gcc_assert (name);
7454 assemble_name (file, name);
7456 else
7457 output_operand_lossage ("invalid reference for 'J' output modifier");
7458 return;
7460 case 'G':
7461 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7462 return;
7464 case 'O':
7466 struct s390_address ad;
7467 int ret;
7469 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7471 if (!ret
7472 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7473 || ad.indx)
7475 output_operand_lossage ("invalid address for 'O' output modifier");
7476 return;
7479 if (ad.disp)
7480 output_addr_const (file, ad.disp);
7481 else
7482 fprintf (file, "0");
7484 return;
7486 case 'R':
7488 struct s390_address ad;
7489 int ret;
7491 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7493 if (!ret
7494 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7495 || ad.indx)
7497 output_operand_lossage ("invalid address for 'R' output modifier");
7498 return;
7501 if (ad.base)
7502 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7503 else
7504 fprintf (file, "0");
7506 return;
7508 case 'S':
7510 struct s390_address ad;
7511 int ret;
7513 if (!MEM_P (x))
7515 output_operand_lossage ("memory reference expected for "
7516 "'S' output modifier");
7517 return;
7519 ret = s390_decompose_address (XEXP (x, 0), &ad);
7521 if (!ret
7522 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7523 || ad.indx)
7525 output_operand_lossage ("invalid address for 'S' output modifier");
7526 return;
7529 if (ad.disp)
7530 output_addr_const (file, ad.disp);
7531 else
7532 fprintf (file, "0");
7534 if (ad.base)
7535 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7537 return;
7539 case 'N':
7540 if (GET_CODE (x) == REG)
7541 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7542 else if (GET_CODE (x) == MEM)
7543 x = change_address (x, VOIDmode,
7544 plus_constant (Pmode, XEXP (x, 0), 4));
7545 else
7546 output_operand_lossage ("register or memory expression expected "
7547 "for 'N' output modifier");
7548 break;
7550 case 'M':
7551 if (GET_CODE (x) == REG)
7552 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7553 else if (GET_CODE (x) == MEM)
7554 x = change_address (x, VOIDmode,
7555 plus_constant (Pmode, XEXP (x, 0), 8));
7556 else
7557 output_operand_lossage ("register or memory expression expected "
7558 "for 'M' output modifier");
7559 break;
7561 case 'Y':
7562 print_addrstyle_operand (file, x);
7563 return;
7566 switch (GET_CODE (x))
7568 case REG:
7569 /* Print FP regs as fx instead of vx when they are accessed
7570 through non-vector mode. */
7571 if (code == 'v'
7572 || VECTOR_NOFP_REG_P (x)
7573 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7574 || (VECTOR_REG_P (x)
7575 && (GET_MODE_SIZE (GET_MODE (x)) /
7576 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7577 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7578 else
7579 fprintf (file, "%s", reg_names[REGNO (x)]);
7580 break;
7582 case MEM:
7583 output_address (GET_MODE (x), XEXP (x, 0));
7584 break;
7586 case CONST:
7587 case CODE_LABEL:
7588 case LABEL_REF:
7589 case SYMBOL_REF:
7590 output_addr_const (file, x);
7591 break;
7593 case CONST_INT:
7594 ival = INTVAL (x);
7595 switch (code)
7597 case 0:
7598 break;
7599 case 'b':
7600 ival &= 0xff;
7601 break;
7602 case 'c':
7603 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7604 break;
7605 case 'x':
7606 ival &= 0xffff;
7607 break;
7608 case 'h':
7609 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7610 break;
7611 case 'i':
7612 ival = s390_extract_part (x, HImode, 0);
7613 break;
7614 case 'j':
7615 ival = s390_extract_part (x, HImode, -1);
7616 break;
7617 case 'k':
7618 ival = s390_extract_part (x, SImode, 0);
7619 break;
7620 case 'm':
7621 ival = s390_extract_part (x, SImode, -1);
7622 break;
7623 case 'o':
7624 ival &= 0xffffffff;
7625 break;
7626 case 'e': case 'f':
7627 case 's': case 't':
7629 int start, end;
7630 int len;
7631 bool ok;
7633 len = (code == 's' || code == 'e' ? 64 : 32);
7634 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7635 gcc_assert (ok);
7636 if (code == 's' || code == 't')
7637 ival = start;
7638 else
7639 ival = end;
7641 break;
7642 default:
7643 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7645 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7646 break;
7648 case CONST_WIDE_INT:
7649 if (code == 'b')
7650 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7651 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7652 else if (code == 'x')
7653 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7654 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7655 else if (code == 'h')
7656 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7657 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7658 else
7660 if (code == 0)
7661 output_operand_lossage ("invalid constant - try using "
7662 "an output modifier");
7663 else
7664 output_operand_lossage ("invalid constant for output modifier '%c'",
7665 code);
7667 break;
7668 case CONST_VECTOR:
7669 switch (code)
7671 case 'h':
7672 gcc_assert (const_vec_duplicate_p (x));
7673 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7674 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7675 break;
7676 case 'e':
7677 case 's':
7679 int start, end;
7680 bool ok;
7682 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7683 gcc_assert (ok);
7684 ival = (code == 's') ? start : end;
7685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7687 break;
7688 case 't':
7690 unsigned mask;
7691 bool ok = s390_bytemask_vector_p (x, &mask);
7692 gcc_assert (ok);
7693 fprintf (file, "%u", mask);
7695 break;
7697 default:
7698 output_operand_lossage ("invalid constant vector for output "
7699 "modifier '%c'", code);
7701 break;
7703 default:
7704 if (code == 0)
7705 output_operand_lossage ("invalid expression - try using "
7706 "an output modifier");
7707 else
7708 output_operand_lossage ("invalid expression for output "
7709 "modifier '%c'", code);
7710 break;
7714 /* Target hook for assembling integer objects. We need to define it
7715 here to work a round a bug in some versions of GAS, which couldn't
7716 handle values smaller than INT_MIN when printed in decimal. */
7718 static bool
7719 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7721 if (size == 8 && aligned_p
7722 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7724 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7725 INTVAL (x));
7726 return true;
7728 return default_assemble_integer (x, size, aligned_p);
7731 /* Returns true if register REGNO is used for forming
7732 a memory address in expression X. */
7734 static bool
7735 reg_used_in_mem_p (int regno, rtx x)
7737 enum rtx_code code = GET_CODE (x);
7738 int i, j;
7739 const char *fmt;
7741 if (code == MEM)
7743 if (refers_to_regno_p (regno, XEXP (x, 0)))
7744 return true;
7746 else if (code == SET
7747 && GET_CODE (SET_DEST (x)) == PC)
7749 if (refers_to_regno_p (regno, SET_SRC (x)))
7750 return true;
7753 fmt = GET_RTX_FORMAT (code);
7754 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7756 if (fmt[i] == 'e'
7757 && reg_used_in_mem_p (regno, XEXP (x, i)))
7758 return true;
7760 else if (fmt[i] == 'E')
7761 for (j = 0; j < XVECLEN (x, i); j++)
7762 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7763 return true;
7765 return false;
7768 /* Returns true if expression DEP_RTX sets an address register
7769 used by instruction INSN to address memory. */
7771 static bool
7772 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7774 rtx target, pat;
7776 if (NONJUMP_INSN_P (dep_rtx))
7777 dep_rtx = PATTERN (dep_rtx);
7779 if (GET_CODE (dep_rtx) == SET)
7781 target = SET_DEST (dep_rtx);
7782 if (GET_CODE (target) == STRICT_LOW_PART)
7783 target = XEXP (target, 0);
7784 while (GET_CODE (target) == SUBREG)
7785 target = SUBREG_REG (target);
7787 if (GET_CODE (target) == REG)
7789 int regno = REGNO (target);
7791 if (s390_safe_attr_type (insn) == TYPE_LA)
7793 pat = PATTERN (insn);
7794 if (GET_CODE (pat) == PARALLEL)
7796 gcc_assert (XVECLEN (pat, 0) == 2);
7797 pat = XVECEXP (pat, 0, 0);
7799 gcc_assert (GET_CODE (pat) == SET);
7800 return refers_to_regno_p (regno, SET_SRC (pat));
7802 else if (get_attr_atype (insn) == ATYPE_AGEN)
7803 return reg_used_in_mem_p (regno, PATTERN (insn));
7806 return false;
7809 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7812 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7814 rtx dep_rtx = PATTERN (dep_insn);
7815 int i;
7817 if (GET_CODE (dep_rtx) == SET
7818 && addr_generation_dependency_p (dep_rtx, insn))
7819 return 1;
7820 else if (GET_CODE (dep_rtx) == PARALLEL)
7822 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7824 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7825 return 1;
7828 return 0;
7832 /* A C statement (sans semicolon) to update the integer scheduling priority
7833 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7834 reduce the priority to execute INSN later. Do not define this macro if
7835 you do not need to adjust the scheduling priorities of insns.
7837 A STD instruction should be scheduled earlier,
7838 in order to use the bypass. */
7839 static int
7840 s390_adjust_priority (rtx_insn *insn, int priority)
7842 if (! INSN_P (insn))
7843 return priority;
7845 if (s390_tune <= PROCESSOR_2064_Z900)
7846 return priority;
7848 switch (s390_safe_attr_type (insn))
7850 case TYPE_FSTOREDF:
7851 case TYPE_FSTORESF:
7852 priority = priority << 3;
7853 break;
7854 case TYPE_STORE:
7855 case TYPE_STM:
7856 priority = priority << 1;
7857 break;
7858 default:
7859 break;
7861 return priority;
7865 /* The number of instructions that can be issued per cycle. */
7867 static int
7868 s390_issue_rate (void)
7870 switch (s390_tune)
7872 case PROCESSOR_2084_Z990:
7873 case PROCESSOR_2094_Z9_109:
7874 case PROCESSOR_2094_Z9_EC:
7875 case PROCESSOR_2817_Z196:
7876 return 3;
7877 case PROCESSOR_2097_Z10:
7878 return 2;
7879 case PROCESSOR_9672_G5:
7880 case PROCESSOR_9672_G6:
7881 case PROCESSOR_2064_Z900:
7882 /* Starting with EC12 we use the sched_reorder hook to take care
7883 of instruction dispatch constraints. The algorithm only
7884 picks the best instruction and assumes only a single
7885 instruction gets issued per cycle. */
7886 case PROCESSOR_2827_ZEC12:
7887 case PROCESSOR_2964_Z13:
7888 default:
7889 return 1;
7893 static int
7894 s390_first_cycle_multipass_dfa_lookahead (void)
7896 return 4;
7899 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7900 Fix up MEMs as required. */
7902 static void
7903 annotate_constant_pool_refs (rtx *x)
7905 int i, j;
7906 const char *fmt;
7908 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7909 || !CONSTANT_POOL_ADDRESS_P (*x));
7911 /* Literal pool references can only occur inside a MEM ... */
7912 if (GET_CODE (*x) == MEM)
7914 rtx memref = XEXP (*x, 0);
7916 if (GET_CODE (memref) == SYMBOL_REF
7917 && CONSTANT_POOL_ADDRESS_P (memref))
7919 rtx base = cfun->machine->base_reg;
7920 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7921 UNSPEC_LTREF);
7923 *x = replace_equiv_address (*x, addr);
7924 return;
7927 if (GET_CODE (memref) == CONST
7928 && GET_CODE (XEXP (memref, 0)) == PLUS
7929 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7930 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7931 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7933 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7934 rtx sym = XEXP (XEXP (memref, 0), 0);
7935 rtx base = cfun->machine->base_reg;
7936 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7937 UNSPEC_LTREF);
7939 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7940 return;
7944 /* ... or a load-address type pattern. */
7945 if (GET_CODE (*x) == SET)
7947 rtx addrref = SET_SRC (*x);
7949 if (GET_CODE (addrref) == SYMBOL_REF
7950 && CONSTANT_POOL_ADDRESS_P (addrref))
7952 rtx base = cfun->machine->base_reg;
7953 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7954 UNSPEC_LTREF);
7956 SET_SRC (*x) = addr;
7957 return;
7960 if (GET_CODE (addrref) == CONST
7961 && GET_CODE (XEXP (addrref, 0)) == PLUS
7962 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7963 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7964 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7966 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7967 rtx sym = XEXP (XEXP (addrref, 0), 0);
7968 rtx base = cfun->machine->base_reg;
7969 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7970 UNSPEC_LTREF);
7972 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7973 return;
7977 /* Annotate LTREL_BASE as well. */
7978 if (GET_CODE (*x) == UNSPEC
7979 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7981 rtx base = cfun->machine->base_reg;
7982 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7983 UNSPEC_LTREL_BASE);
7984 return;
7987 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7988 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7990 if (fmt[i] == 'e')
7992 annotate_constant_pool_refs (&XEXP (*x, i));
7994 else if (fmt[i] == 'E')
7996 for (j = 0; j < XVECLEN (*x, i); j++)
7997 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8002 /* Split all branches that exceed the maximum distance.
8003 Returns true if this created a new literal pool entry. */
8005 static int
8006 s390_split_branches (void)
8008 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8009 int new_literal = 0, ret;
8010 rtx_insn *insn;
8011 rtx pat, target;
8012 rtx *label;
8014 /* We need correct insn addresses. */
8016 shorten_branches (get_insns ());
8018 /* Find all branches that exceed 64KB, and split them. */
8020 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8022 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8023 continue;
8025 pat = PATTERN (insn);
8026 if (GET_CODE (pat) == PARALLEL)
8027 pat = XVECEXP (pat, 0, 0);
8028 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8029 continue;
8031 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8033 label = &SET_SRC (pat);
8035 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8037 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8038 label = &XEXP (SET_SRC (pat), 1);
8039 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8040 label = &XEXP (SET_SRC (pat), 2);
8041 else
8042 continue;
8044 else
8045 continue;
8047 if (get_attr_length (insn) <= 4)
8048 continue;
8050 /* We are going to use the return register as scratch register,
8051 make sure it will be saved/restored by the prologue/epilogue. */
8052 cfun_frame_layout.save_return_addr_p = 1;
8054 if (!flag_pic)
8056 new_literal = 1;
8057 rtx mem = force_const_mem (Pmode, *label);
8058 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8059 insn);
8060 INSN_ADDRESSES_NEW (set_insn, -1);
8061 annotate_constant_pool_refs (&PATTERN (set_insn));
8063 target = temp_reg;
8065 else
8067 new_literal = 1;
8068 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8069 UNSPEC_LTREL_OFFSET);
8070 target = gen_rtx_CONST (Pmode, target);
8071 target = force_const_mem (Pmode, target);
8072 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8073 insn);
8074 INSN_ADDRESSES_NEW (set_insn, -1);
8075 annotate_constant_pool_refs (&PATTERN (set_insn));
8077 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8078 cfun->machine->base_reg),
8079 UNSPEC_LTREL_BASE);
8080 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8083 ret = validate_change (insn, label, target, 0);
8084 gcc_assert (ret);
8087 return new_literal;
8091 /* Find an annotated literal pool symbol referenced in RTX X,
8092 and store it at REF. Will abort if X contains references to
8093 more than one such pool symbol; multiple references to the same
8094 symbol are allowed, however.
8096 The rtx pointed to by REF must be initialized to NULL_RTX
8097 by the caller before calling this routine. */
8099 static void
8100 find_constant_pool_ref (rtx x, rtx *ref)
8102 int i, j;
8103 const char *fmt;
8105 /* Ignore LTREL_BASE references. */
8106 if (GET_CODE (x) == UNSPEC
8107 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8108 return;
8109 /* Likewise POOL_ENTRY insns. */
8110 if (GET_CODE (x) == UNSPEC_VOLATILE
8111 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8112 return;
8114 gcc_assert (GET_CODE (x) != SYMBOL_REF
8115 || !CONSTANT_POOL_ADDRESS_P (x));
8117 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8119 rtx sym = XVECEXP (x, 0, 0);
8120 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8121 && CONSTANT_POOL_ADDRESS_P (sym));
8123 if (*ref == NULL_RTX)
8124 *ref = sym;
8125 else
8126 gcc_assert (*ref == sym);
8128 return;
8131 fmt = GET_RTX_FORMAT (GET_CODE (x));
8132 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8134 if (fmt[i] == 'e')
8136 find_constant_pool_ref (XEXP (x, i), ref);
8138 else if (fmt[i] == 'E')
8140 for (j = 0; j < XVECLEN (x, i); j++)
8141 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8146 /* Replace every reference to the annotated literal pool
8147 symbol REF in X by its base plus OFFSET. */
8149 static void
8150 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8152 int i, j;
8153 const char *fmt;
8155 gcc_assert (*x != ref);
8157 if (GET_CODE (*x) == UNSPEC
8158 && XINT (*x, 1) == UNSPEC_LTREF
8159 && XVECEXP (*x, 0, 0) == ref)
8161 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8162 return;
8165 if (GET_CODE (*x) == PLUS
8166 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8167 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8168 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8169 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8171 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8172 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8173 return;
8176 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8177 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8179 if (fmt[i] == 'e')
8181 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8183 else if (fmt[i] == 'E')
8185 for (j = 0; j < XVECLEN (*x, i); j++)
8186 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8191 /* Check whether X contains an UNSPEC_LTREL_BASE.
8192 Return its constant pool symbol if found, NULL_RTX otherwise. */
8194 static rtx
8195 find_ltrel_base (rtx x)
8197 int i, j;
8198 const char *fmt;
8200 if (GET_CODE (x) == UNSPEC
8201 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8202 return XVECEXP (x, 0, 0);
8204 fmt = GET_RTX_FORMAT (GET_CODE (x));
8205 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8207 if (fmt[i] == 'e')
8209 rtx fnd = find_ltrel_base (XEXP (x, i));
8210 if (fnd)
8211 return fnd;
8213 else if (fmt[i] == 'E')
8215 for (j = 0; j < XVECLEN (x, i); j++)
8217 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8218 if (fnd)
8219 return fnd;
8224 return NULL_RTX;
8227 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8229 static void
8230 replace_ltrel_base (rtx *x)
8232 int i, j;
8233 const char *fmt;
8235 if (GET_CODE (*x) == UNSPEC
8236 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8238 *x = XVECEXP (*x, 0, 1);
8239 return;
8242 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8243 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8245 if (fmt[i] == 'e')
8247 replace_ltrel_base (&XEXP (*x, i));
8249 else if (fmt[i] == 'E')
8251 for (j = 0; j < XVECLEN (*x, i); j++)
8252 replace_ltrel_base (&XVECEXP (*x, i, j));
8258 /* We keep a list of constants which we have to add to internal
8259 constant tables in the middle of large functions. */
8261 #define NR_C_MODES 32
8262 machine_mode constant_modes[NR_C_MODES] =
8264 TFmode, TImode, TDmode,
8265 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8266 V4SFmode, V2DFmode, V1TFmode,
8267 DFmode, DImode, DDmode,
8268 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8269 SFmode, SImode, SDmode,
8270 V4QImode, V2HImode, V1SImode, V1SFmode,
8271 HImode,
8272 V2QImode, V1HImode,
8273 QImode,
8274 V1QImode
8277 struct constant
8279 struct constant *next;
8280 rtx value;
8281 rtx_code_label *label;
8284 struct constant_pool
8286 struct constant_pool *next;
8287 rtx_insn *first_insn;
8288 rtx_insn *pool_insn;
8289 bitmap insns;
8290 rtx_insn *emit_pool_after;
8292 struct constant *constants[NR_C_MODES];
8293 struct constant *execute;
8294 rtx_code_label *label;
8295 int size;
8298 /* Allocate new constant_pool structure. */
8300 static struct constant_pool *
8301 s390_alloc_pool (void)
8303 struct constant_pool *pool;
8304 int i;
8306 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8307 pool->next = NULL;
8308 for (i = 0; i < NR_C_MODES; i++)
8309 pool->constants[i] = NULL;
8311 pool->execute = NULL;
8312 pool->label = gen_label_rtx ();
8313 pool->first_insn = NULL;
8314 pool->pool_insn = NULL;
8315 pool->insns = BITMAP_ALLOC (NULL);
8316 pool->size = 0;
8317 pool->emit_pool_after = NULL;
8319 return pool;
8322 /* Create new constant pool covering instructions starting at INSN
8323 and chain it to the end of POOL_LIST. */
8325 static struct constant_pool *
8326 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8328 struct constant_pool *pool, **prev;
8330 pool = s390_alloc_pool ();
8331 pool->first_insn = insn;
8333 for (prev = pool_list; *prev; prev = &(*prev)->next)
8335 *prev = pool;
8337 return pool;
8340 /* End range of instructions covered by POOL at INSN and emit
8341 placeholder insn representing the pool. */
8343 static void
8344 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8346 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8348 if (!insn)
8349 insn = get_last_insn ();
8351 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8352 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8355 /* Add INSN to the list of insns covered by POOL. */
8357 static void
8358 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8360 bitmap_set_bit (pool->insns, INSN_UID (insn));
8363 /* Return pool out of POOL_LIST that covers INSN. */
8365 static struct constant_pool *
8366 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8368 struct constant_pool *pool;
8370 for (pool = pool_list; pool; pool = pool->next)
8371 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8372 break;
8374 return pool;
8377 /* Add constant VAL of mode MODE to the constant pool POOL. */
8379 static void
8380 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8382 struct constant *c;
8383 int i;
8385 for (i = 0; i < NR_C_MODES; i++)
8386 if (constant_modes[i] == mode)
8387 break;
8388 gcc_assert (i != NR_C_MODES);
8390 for (c = pool->constants[i]; c != NULL; c = c->next)
8391 if (rtx_equal_p (val, c->value))
8392 break;
8394 if (c == NULL)
8396 c = (struct constant *) xmalloc (sizeof *c);
8397 c->value = val;
8398 c->label = gen_label_rtx ();
8399 c->next = pool->constants[i];
8400 pool->constants[i] = c;
8401 pool->size += GET_MODE_SIZE (mode);
8405 /* Return an rtx that represents the offset of X from the start of
8406 pool POOL. */
8408 static rtx
8409 s390_pool_offset (struct constant_pool *pool, rtx x)
8411 rtx label;
8413 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8414 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8415 UNSPEC_POOL_OFFSET);
8416 return gen_rtx_CONST (GET_MODE (x), x);
8419 /* Find constant VAL of mode MODE in the constant pool POOL.
8420 Return an RTX describing the distance from the start of
8421 the pool to the location of the new constant. */
8423 static rtx
8424 s390_find_constant (struct constant_pool *pool, rtx val,
8425 machine_mode mode)
8427 struct constant *c;
8428 int i;
8430 for (i = 0; i < NR_C_MODES; i++)
8431 if (constant_modes[i] == mode)
8432 break;
8433 gcc_assert (i != NR_C_MODES);
8435 for (c = pool->constants[i]; c != NULL; c = c->next)
8436 if (rtx_equal_p (val, c->value))
8437 break;
8439 gcc_assert (c);
8441 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8444 /* Check whether INSN is an execute. Return the label_ref to its
8445 execute target template if so, NULL_RTX otherwise. */
8447 static rtx
8448 s390_execute_label (rtx insn)
8450 if (NONJUMP_INSN_P (insn)
8451 && GET_CODE (PATTERN (insn)) == PARALLEL
8452 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8453 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8454 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8456 return NULL_RTX;
8459 /* Add execute target for INSN to the constant pool POOL. */
8461 static void
8462 s390_add_execute (struct constant_pool *pool, rtx insn)
8464 struct constant *c;
8466 for (c = pool->execute; c != NULL; c = c->next)
8467 if (INSN_UID (insn) == INSN_UID (c->value))
8468 break;
8470 if (c == NULL)
8472 c = (struct constant *) xmalloc (sizeof *c);
8473 c->value = insn;
8474 c->label = gen_label_rtx ();
8475 c->next = pool->execute;
8476 pool->execute = c;
8477 pool->size += 6;
8481 /* Find execute target for INSN in the constant pool POOL.
8482 Return an RTX describing the distance from the start of
8483 the pool to the location of the execute target. */
8485 static rtx
8486 s390_find_execute (struct constant_pool *pool, rtx insn)
8488 struct constant *c;
8490 for (c = pool->execute; c != NULL; c = c->next)
8491 if (INSN_UID (insn) == INSN_UID (c->value))
8492 break;
8494 gcc_assert (c);
8496 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8499 /* For an execute INSN, extract the execute target template. */
8501 static rtx
8502 s390_execute_target (rtx insn)
8504 rtx pattern = PATTERN (insn);
8505 gcc_assert (s390_execute_label (insn));
8507 if (XVECLEN (pattern, 0) == 2)
8509 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8511 else
8513 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8514 int i;
8516 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8517 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8519 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8522 return pattern;
8525 /* Indicate that INSN cannot be duplicated. This is the case for
8526 execute insns that carry a unique label. */
8528 static bool
8529 s390_cannot_copy_insn_p (rtx_insn *insn)
8531 rtx label = s390_execute_label (insn);
8532 return label && label != const0_rtx;
8535 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8536 do not emit the pool base label. */
8538 static void
8539 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8541 struct constant *c;
8542 rtx_insn *insn = pool->pool_insn;
8543 int i;
8545 /* Switch to rodata section. */
8546 if (TARGET_CPU_ZARCH)
8548 insn = emit_insn_after (gen_pool_section_start (), insn);
8549 INSN_ADDRESSES_NEW (insn, -1);
8552 /* Ensure minimum pool alignment. */
8553 if (TARGET_CPU_ZARCH)
8554 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8555 else
8556 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8557 INSN_ADDRESSES_NEW (insn, -1);
8559 /* Emit pool base label. */
8560 if (!remote_label)
8562 insn = emit_label_after (pool->label, insn);
8563 INSN_ADDRESSES_NEW (insn, -1);
8566 /* Dump constants in descending alignment requirement order,
8567 ensuring proper alignment for every constant. */
8568 for (i = 0; i < NR_C_MODES; i++)
8569 for (c = pool->constants[i]; c; c = c->next)
8571 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8572 rtx value = copy_rtx (c->value);
8573 if (GET_CODE (value) == CONST
8574 && GET_CODE (XEXP (value, 0)) == UNSPEC
8575 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8576 && XVECLEN (XEXP (value, 0), 0) == 1)
8577 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8579 insn = emit_label_after (c->label, insn);
8580 INSN_ADDRESSES_NEW (insn, -1);
8582 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8583 gen_rtvec (1, value),
8584 UNSPECV_POOL_ENTRY);
8585 insn = emit_insn_after (value, insn);
8586 INSN_ADDRESSES_NEW (insn, -1);
8589 /* Ensure minimum alignment for instructions. */
8590 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8591 INSN_ADDRESSES_NEW (insn, -1);
8593 /* Output in-pool execute template insns. */
8594 for (c = pool->execute; c; c = c->next)
8596 insn = emit_label_after (c->label, insn);
8597 INSN_ADDRESSES_NEW (insn, -1);
8599 insn = emit_insn_after (s390_execute_target (c->value), insn);
8600 INSN_ADDRESSES_NEW (insn, -1);
8603 /* Switch back to previous section. */
8604 if (TARGET_CPU_ZARCH)
8606 insn = emit_insn_after (gen_pool_section_end (), insn);
8607 INSN_ADDRESSES_NEW (insn, -1);
8610 insn = emit_barrier_after (insn);
8611 INSN_ADDRESSES_NEW (insn, -1);
8613 /* Remove placeholder insn. */
8614 remove_insn (pool->pool_insn);
8617 /* Free all memory used by POOL. */
8619 static void
8620 s390_free_pool (struct constant_pool *pool)
8622 struct constant *c, *next;
8623 int i;
8625 for (i = 0; i < NR_C_MODES; i++)
8626 for (c = pool->constants[i]; c; c = next)
8628 next = c->next;
8629 free (c);
8632 for (c = pool->execute; c; c = next)
8634 next = c->next;
8635 free (c);
8638 BITMAP_FREE (pool->insns);
8639 free (pool);
8643 /* Collect main literal pool. Return NULL on overflow. */
8645 static struct constant_pool *
8646 s390_mainpool_start (void)
8648 struct constant_pool *pool;
8649 rtx_insn *insn;
8651 pool = s390_alloc_pool ();
8653 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8655 if (NONJUMP_INSN_P (insn)
8656 && GET_CODE (PATTERN (insn)) == SET
8657 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8658 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8660 /* There might be two main_pool instructions if base_reg
8661 is call-clobbered; one for shrink-wrapped code and one
8662 for the rest. We want to keep the first. */
8663 if (pool->pool_insn)
8665 insn = PREV_INSN (insn);
8666 delete_insn (NEXT_INSN (insn));
8667 continue;
8669 pool->pool_insn = insn;
8672 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8674 s390_add_execute (pool, insn);
8676 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8678 rtx pool_ref = NULL_RTX;
8679 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8680 if (pool_ref)
8682 rtx constant = get_pool_constant (pool_ref);
8683 machine_mode mode = get_pool_mode (pool_ref);
8684 s390_add_constant (pool, constant, mode);
8688 /* If hot/cold partitioning is enabled we have to make sure that
8689 the literal pool is emitted in the same section where the
8690 initialization of the literal pool base pointer takes place.
8691 emit_pool_after is only used in the non-overflow case on non
8692 Z cpus where we can emit the literal pool at the end of the
8693 function body within the text section. */
8694 if (NOTE_P (insn)
8695 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8696 && !pool->emit_pool_after)
8697 pool->emit_pool_after = PREV_INSN (insn);
8700 gcc_assert (pool->pool_insn || pool->size == 0);
8702 if (pool->size >= 4096)
8704 /* We're going to chunkify the pool, so remove the main
8705 pool placeholder insn. */
8706 remove_insn (pool->pool_insn);
8708 s390_free_pool (pool);
8709 pool = NULL;
8712 /* If the functions ends with the section where the literal pool
8713 should be emitted set the marker to its end. */
8714 if (pool && !pool->emit_pool_after)
8715 pool->emit_pool_after = get_last_insn ();
8717 return pool;
8720 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8721 Modify the current function to output the pool constants as well as
8722 the pool register setup instruction. */
8724 static void
8725 s390_mainpool_finish (struct constant_pool *pool)
8727 rtx base_reg = cfun->machine->base_reg;
8729 /* If the pool is empty, we're done. */
8730 if (pool->size == 0)
8732 /* We don't actually need a base register after all. */
8733 cfun->machine->base_reg = NULL_RTX;
8735 if (pool->pool_insn)
8736 remove_insn (pool->pool_insn);
8737 s390_free_pool (pool);
8738 return;
8741 /* We need correct insn addresses. */
8742 shorten_branches (get_insns ());
8744 /* On zSeries, we use a LARL to load the pool register. The pool is
8745 located in the .rodata section, so we emit it after the function. */
8746 if (TARGET_CPU_ZARCH)
8748 rtx set = gen_main_base_64 (base_reg, pool->label);
8749 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8750 INSN_ADDRESSES_NEW (insn, -1);
8751 remove_insn (pool->pool_insn);
8753 insn = get_last_insn ();
8754 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8755 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8757 s390_dump_pool (pool, 0);
8760 /* On S/390, if the total size of the function's code plus literal pool
8761 does not exceed 4096 bytes, we use BASR to set up a function base
8762 pointer, and emit the literal pool at the end of the function. */
8763 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8764 + pool->size + 8 /* alignment slop */ < 4096)
8766 rtx set = gen_main_base_31_small (base_reg, pool->label);
8767 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8768 INSN_ADDRESSES_NEW (insn, -1);
8769 remove_insn (pool->pool_insn);
8771 insn = emit_label_after (pool->label, insn);
8772 INSN_ADDRESSES_NEW (insn, -1);
8774 /* emit_pool_after will be set by s390_mainpool_start to the
8775 last insn of the section where the literal pool should be
8776 emitted. */
8777 insn = pool->emit_pool_after;
8779 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8780 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8782 s390_dump_pool (pool, 1);
8785 /* Otherwise, we emit an inline literal pool and use BASR to branch
8786 over it, setting up the pool register at the same time. */
8787 else
8789 rtx_code_label *pool_end = gen_label_rtx ();
8791 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8792 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8793 JUMP_LABEL (insn) = pool_end;
8794 INSN_ADDRESSES_NEW (insn, -1);
8795 remove_insn (pool->pool_insn);
8797 insn = emit_label_after (pool->label, insn);
8798 INSN_ADDRESSES_NEW (insn, -1);
8800 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8801 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8803 insn = emit_label_after (pool_end, pool->pool_insn);
8804 INSN_ADDRESSES_NEW (insn, -1);
8806 s390_dump_pool (pool, 1);
8810 /* Replace all literal pool references. */
8812 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8814 if (INSN_P (insn))
8815 replace_ltrel_base (&PATTERN (insn));
8817 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8819 rtx addr, pool_ref = NULL_RTX;
8820 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8821 if (pool_ref)
8823 if (s390_execute_label (insn))
8824 addr = s390_find_execute (pool, insn);
8825 else
8826 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8827 get_pool_mode (pool_ref));
8829 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8830 INSN_CODE (insn) = -1;
8836 /* Free the pool. */
8837 s390_free_pool (pool);
8840 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8841 We have decided we cannot use this pool, so revert all changes
8842 to the current function that were done by s390_mainpool_start. */
8843 static void
8844 s390_mainpool_cancel (struct constant_pool *pool)
8846 /* We didn't actually change the instruction stream, so simply
8847 free the pool memory. */
8848 s390_free_pool (pool);
8852 /* Chunkify the literal pool. */
8854 #define S390_POOL_CHUNK_MIN 0xc00
8855 #define S390_POOL_CHUNK_MAX 0xe00
8857 static struct constant_pool *
8858 s390_chunkify_start (void)
8860 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8861 int extra_size = 0;
8862 bitmap far_labels;
8863 rtx pending_ltrel = NULL_RTX;
8864 rtx_insn *insn;
8866 rtx (*gen_reload_base) (rtx, rtx) =
8867 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8870 /* We need correct insn addresses. */
8872 shorten_branches (get_insns ());
8874 /* Scan all insns and move literals to pool chunks. */
8876 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8878 bool section_switch_p = false;
8880 /* Check for pending LTREL_BASE. */
8881 if (INSN_P (insn))
8883 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8884 if (ltrel_base)
8886 gcc_assert (ltrel_base == pending_ltrel);
8887 pending_ltrel = NULL_RTX;
8891 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8893 if (!curr_pool)
8894 curr_pool = s390_start_pool (&pool_list, insn);
8896 s390_add_execute (curr_pool, insn);
8897 s390_add_pool_insn (curr_pool, insn);
8899 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8901 rtx pool_ref = NULL_RTX;
8902 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8903 if (pool_ref)
8905 rtx constant = get_pool_constant (pool_ref);
8906 machine_mode mode = get_pool_mode (pool_ref);
8908 if (!curr_pool)
8909 curr_pool = s390_start_pool (&pool_list, insn);
8911 s390_add_constant (curr_pool, constant, mode);
8912 s390_add_pool_insn (curr_pool, insn);
8914 /* Don't split the pool chunk between a LTREL_OFFSET load
8915 and the corresponding LTREL_BASE. */
8916 if (GET_CODE (constant) == CONST
8917 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8918 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8920 gcc_assert (!pending_ltrel);
8921 pending_ltrel = pool_ref;
8926 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8928 if (curr_pool)
8929 s390_add_pool_insn (curr_pool, insn);
8930 /* An LTREL_BASE must follow within the same basic block. */
8931 gcc_assert (!pending_ltrel);
8934 if (NOTE_P (insn))
8935 switch (NOTE_KIND (insn))
8937 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8938 section_switch_p = true;
8939 break;
8940 case NOTE_INSN_VAR_LOCATION:
8941 case NOTE_INSN_CALL_ARG_LOCATION:
8942 continue;
8943 default:
8944 break;
8947 if (!curr_pool
8948 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8949 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8950 continue;
8952 if (TARGET_CPU_ZARCH)
8954 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8955 continue;
8957 s390_end_pool (curr_pool, NULL);
8958 curr_pool = NULL;
8960 else
8962 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8963 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8964 + extra_size;
8966 /* We will later have to insert base register reload insns.
8967 Those will have an effect on code size, which we need to
8968 consider here. This calculation makes rather pessimistic
8969 worst-case assumptions. */
8970 if (LABEL_P (insn))
8971 extra_size += 6;
8973 if (chunk_size < S390_POOL_CHUNK_MIN
8974 && curr_pool->size < S390_POOL_CHUNK_MIN
8975 && !section_switch_p)
8976 continue;
8978 /* Pool chunks can only be inserted after BARRIERs ... */
8979 if (BARRIER_P (insn))
8981 s390_end_pool (curr_pool, insn);
8982 curr_pool = NULL;
8983 extra_size = 0;
8986 /* ... so if we don't find one in time, create one. */
8987 else if (chunk_size > S390_POOL_CHUNK_MAX
8988 || curr_pool->size > S390_POOL_CHUNK_MAX
8989 || section_switch_p)
8991 rtx_insn *label, *jump, *barrier, *next, *prev;
8993 if (!section_switch_p)
8995 /* We can insert the barrier only after a 'real' insn. */
8996 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8997 continue;
8998 if (get_attr_length (insn) == 0)
8999 continue;
9000 /* Don't separate LTREL_BASE from the corresponding
9001 LTREL_OFFSET load. */
9002 if (pending_ltrel)
9003 continue;
9004 next = insn;
9007 insn = next;
9008 next = NEXT_INSN (insn);
9010 while (next
9011 && NOTE_P (next)
9012 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
9013 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
9015 else
9017 gcc_assert (!pending_ltrel);
9019 /* The old pool has to end before the section switch
9020 note in order to make it part of the current
9021 section. */
9022 insn = PREV_INSN (insn);
9025 label = gen_label_rtx ();
9026 prev = insn;
9027 if (prev && NOTE_P (prev))
9028 prev = prev_nonnote_insn (prev);
9029 if (prev)
9030 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9031 INSN_LOCATION (prev));
9032 else
9033 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9034 barrier = emit_barrier_after (jump);
9035 insn = emit_label_after (label, barrier);
9036 JUMP_LABEL (jump) = label;
9037 LABEL_NUSES (label) = 1;
9039 INSN_ADDRESSES_NEW (jump, -1);
9040 INSN_ADDRESSES_NEW (barrier, -1);
9041 INSN_ADDRESSES_NEW (insn, -1);
9043 s390_end_pool (curr_pool, barrier);
9044 curr_pool = NULL;
9045 extra_size = 0;
9050 if (curr_pool)
9051 s390_end_pool (curr_pool, NULL);
9052 gcc_assert (!pending_ltrel);
9054 /* Find all labels that are branched into
9055 from an insn belonging to a different chunk. */
9057 far_labels = BITMAP_ALLOC (NULL);
9059 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9061 rtx_jump_table_data *table;
9063 /* Labels marked with LABEL_PRESERVE_P can be target
9064 of non-local jumps, so we have to mark them.
9065 The same holds for named labels.
9067 Don't do that, however, if it is the label before
9068 a jump table. */
9070 if (LABEL_P (insn)
9071 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9073 rtx_insn *vec_insn = NEXT_INSN (insn);
9074 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9075 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9077 /* Check potential targets in a table jump (casesi_jump). */
9078 else if (tablejump_p (insn, NULL, &table))
9080 rtx vec_pat = PATTERN (table);
9081 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9083 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9085 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9087 if (s390_find_pool (pool_list, label)
9088 != s390_find_pool (pool_list, insn))
9089 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9092 /* If we have a direct jump (conditional or unconditional),
9093 check all potential targets. */
9094 else if (JUMP_P (insn))
9096 rtx pat = PATTERN (insn);
9098 if (GET_CODE (pat) == PARALLEL)
9099 pat = XVECEXP (pat, 0, 0);
9101 if (GET_CODE (pat) == SET)
9103 rtx label = JUMP_LABEL (insn);
9104 if (label && !ANY_RETURN_P (label))
9106 if (s390_find_pool (pool_list, label)
9107 != s390_find_pool (pool_list, insn))
9108 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9114 /* Insert base register reload insns before every pool. */
9116 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9118 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9119 curr_pool->label);
9120 rtx_insn *insn = curr_pool->first_insn;
9121 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9124 /* Insert base register reload insns at every far label. */
9126 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9127 if (LABEL_P (insn)
9128 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9130 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9131 if (pool)
9133 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9134 pool->label);
9135 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9140 BITMAP_FREE (far_labels);
9143 /* Recompute insn addresses. */
9145 init_insn_lengths ();
9146 shorten_branches (get_insns ());
9148 return pool_list;
9151 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9152 After we have decided to use this list, finish implementing
9153 all changes to the current function as required. */
9155 static void
9156 s390_chunkify_finish (struct constant_pool *pool_list)
9158 struct constant_pool *curr_pool = NULL;
9159 rtx_insn *insn;
9162 /* Replace all literal pool references. */
9164 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9166 if (INSN_P (insn))
9167 replace_ltrel_base (&PATTERN (insn));
9169 curr_pool = s390_find_pool (pool_list, insn);
9170 if (!curr_pool)
9171 continue;
9173 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9175 rtx addr, pool_ref = NULL_RTX;
9176 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9177 if (pool_ref)
9179 if (s390_execute_label (insn))
9180 addr = s390_find_execute (curr_pool, insn);
9181 else
9182 addr = s390_find_constant (curr_pool,
9183 get_pool_constant (pool_ref),
9184 get_pool_mode (pool_ref));
9186 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9187 INSN_CODE (insn) = -1;
9192 /* Dump out all literal pools. */
9194 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9195 s390_dump_pool (curr_pool, 0);
9197 /* Free pool list. */
9199 while (pool_list)
9201 struct constant_pool *next = pool_list->next;
9202 s390_free_pool (pool_list);
9203 pool_list = next;
9207 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9208 We have decided we cannot use this list, so revert all changes
9209 to the current function that were done by s390_chunkify_start. */
9211 static void
9212 s390_chunkify_cancel (struct constant_pool *pool_list)
9214 struct constant_pool *curr_pool = NULL;
9215 rtx_insn *insn;
9217 /* Remove all pool placeholder insns. */
9219 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9221 /* Did we insert an extra barrier? Remove it. */
9222 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9223 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9224 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9226 if (jump && JUMP_P (jump)
9227 && barrier && BARRIER_P (barrier)
9228 && label && LABEL_P (label)
9229 && GET_CODE (PATTERN (jump)) == SET
9230 && SET_DEST (PATTERN (jump)) == pc_rtx
9231 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9232 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9234 remove_insn (jump);
9235 remove_insn (barrier);
9236 remove_insn (label);
9239 remove_insn (curr_pool->pool_insn);
9242 /* Remove all base register reload insns. */
9244 for (insn = get_insns (); insn; )
9246 rtx_insn *next_insn = NEXT_INSN (insn);
9248 if (NONJUMP_INSN_P (insn)
9249 && GET_CODE (PATTERN (insn)) == SET
9250 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9251 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9252 remove_insn (insn);
9254 insn = next_insn;
9257 /* Free pool list. */
9259 while (pool_list)
9261 struct constant_pool *next = pool_list->next;
9262 s390_free_pool (pool_list);
9263 pool_list = next;
9267 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9269 void
9270 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9272 switch (GET_MODE_CLASS (mode))
9274 case MODE_FLOAT:
9275 case MODE_DECIMAL_FLOAT:
9276 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9278 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
9279 break;
9281 case MODE_INT:
9282 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9283 mark_symbol_refs_as_used (exp);
9284 break;
9286 case MODE_VECTOR_INT:
9287 case MODE_VECTOR_FLOAT:
9289 int i;
9290 machine_mode inner_mode;
9291 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9293 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9294 for (i = 0; i < XVECLEN (exp, 0); i++)
9295 s390_output_pool_entry (XVECEXP (exp, 0, i),
9296 inner_mode,
9297 i == 0
9298 ? align
9299 : GET_MODE_BITSIZE (inner_mode));
9301 break;
9303 default:
9304 gcc_unreachable ();
9309 /* Return an RTL expression representing the value of the return address
9310 for the frame COUNT steps up from the current frame. FRAME is the
9311 frame pointer of that frame. */
9314 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9316 int offset;
9317 rtx addr;
9319 /* Without backchain, we fail for all but the current frame. */
9321 if (!TARGET_BACKCHAIN && count > 0)
9322 return NULL_RTX;
9324 /* For the current frame, we need to make sure the initial
9325 value of RETURN_REGNUM is actually saved. */
9327 if (count == 0)
9329 /* On non-z architectures branch splitting could overwrite r14. */
9330 if (TARGET_CPU_ZARCH)
9331 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9332 else
9334 cfun_frame_layout.save_return_addr_p = true;
9335 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9339 if (TARGET_PACKED_STACK)
9340 offset = -2 * UNITS_PER_LONG;
9341 else
9342 offset = RETURN_REGNUM * UNITS_PER_LONG;
9344 addr = plus_constant (Pmode, frame, offset);
9345 addr = memory_address (Pmode, addr);
9346 return gen_rtx_MEM (Pmode, addr);
9349 /* Return an RTL expression representing the back chain stored in
9350 the current stack frame. */
9353 s390_back_chain_rtx (void)
9355 rtx chain;
9357 gcc_assert (TARGET_BACKCHAIN);
9359 if (TARGET_PACKED_STACK)
9360 chain = plus_constant (Pmode, stack_pointer_rtx,
9361 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9362 else
9363 chain = stack_pointer_rtx;
9365 chain = gen_rtx_MEM (Pmode, chain);
9366 return chain;
9369 /* Find first call clobbered register unused in a function.
9370 This could be used as base register in a leaf function
9371 or for holding the return address before epilogue. */
9373 static int
9374 find_unused_clobbered_reg (void)
9376 int i;
9377 for (i = 0; i < 6; i++)
9378 if (!df_regs_ever_live_p (i))
9379 return i;
9380 return 0;
9384 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9385 clobbered hard regs in SETREG. */
9387 static void
9388 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9390 char *regs_ever_clobbered = (char *)data;
9391 unsigned int i, regno;
9392 machine_mode mode = GET_MODE (setreg);
9394 if (GET_CODE (setreg) == SUBREG)
9396 rtx inner = SUBREG_REG (setreg);
9397 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9398 return;
9399 regno = subreg_regno (setreg);
9401 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9402 regno = REGNO (setreg);
9403 else
9404 return;
9406 for (i = regno;
9407 i < regno + HARD_REGNO_NREGS (regno, mode);
9408 i++)
9409 regs_ever_clobbered[i] = 1;
9412 /* Walks through all basic blocks of the current function looking
9413 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9414 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9415 each of those regs. */
9417 static void
9418 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9420 basic_block cur_bb;
9421 rtx_insn *cur_insn;
9422 unsigned int i;
9424 memset (regs_ever_clobbered, 0, 32);
9426 /* For non-leaf functions we have to consider all call clobbered regs to be
9427 clobbered. */
9428 if (!crtl->is_leaf)
9430 for (i = 0; i < 32; i++)
9431 regs_ever_clobbered[i] = call_really_used_regs[i];
9434 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9435 this work is done by liveness analysis (mark_regs_live_at_end).
9436 Special care is needed for functions containing landing pads. Landing pads
9437 may use the eh registers, but the code which sets these registers is not
9438 contained in that function. Hence s390_regs_ever_clobbered is not able to
9439 deal with this automatically. */
9440 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9441 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9442 if (crtl->calls_eh_return
9443 || (cfun->machine->has_landing_pad_p
9444 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9445 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9447 /* For nonlocal gotos all call-saved registers have to be saved.
9448 This flag is also set for the unwinding code in libgcc.
9449 See expand_builtin_unwind_init. For regs_ever_live this is done by
9450 reload. */
9451 if (crtl->saves_all_registers)
9452 for (i = 0; i < 32; i++)
9453 if (!call_really_used_regs[i])
9454 regs_ever_clobbered[i] = 1;
9456 FOR_EACH_BB_FN (cur_bb, cfun)
9458 FOR_BB_INSNS (cur_bb, cur_insn)
9460 rtx pat;
9462 if (!INSN_P (cur_insn))
9463 continue;
9465 pat = PATTERN (cur_insn);
9467 /* Ignore GPR restore insns. */
9468 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9470 if (GET_CODE (pat) == SET
9471 && GENERAL_REG_P (SET_DEST (pat)))
9473 /* lgdr */
9474 if (GET_MODE (SET_SRC (pat)) == DImode
9475 && FP_REG_P (SET_SRC (pat)))
9476 continue;
9478 /* l / lg */
9479 if (GET_CODE (SET_SRC (pat)) == MEM)
9480 continue;
9483 /* lm / lmg */
9484 if (GET_CODE (pat) == PARALLEL
9485 && load_multiple_operation (pat, VOIDmode))
9486 continue;
9489 note_stores (pat,
9490 s390_reg_clobbered_rtx,
9491 regs_ever_clobbered);
9496 /* Determine the frame area which actually has to be accessed
9497 in the function epilogue. The values are stored at the
9498 given pointers AREA_BOTTOM (address of the lowest used stack
9499 address) and AREA_TOP (address of the first item which does
9500 not belong to the stack frame). */
9502 static void
9503 s390_frame_area (int *area_bottom, int *area_top)
9505 int b, t;
9507 b = INT_MAX;
9508 t = INT_MIN;
9510 if (cfun_frame_layout.first_restore_gpr != -1)
9512 b = (cfun_frame_layout.gprs_offset
9513 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9514 t = b + (cfun_frame_layout.last_restore_gpr
9515 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9518 if (TARGET_64BIT && cfun_save_high_fprs_p)
9520 b = MIN (b, cfun_frame_layout.f8_offset);
9521 t = MAX (t, (cfun_frame_layout.f8_offset
9522 + cfun_frame_layout.high_fprs * 8));
9525 if (!TARGET_64BIT)
9527 if (cfun_fpr_save_p (FPR4_REGNUM))
9529 b = MIN (b, cfun_frame_layout.f4_offset);
9530 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9532 if (cfun_fpr_save_p (FPR6_REGNUM))
9534 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9535 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9538 *area_bottom = b;
9539 *area_top = t;
9541 /* Update gpr_save_slots in the frame layout trying to make use of
9542 FPRs as GPR save slots.
9543 This is a helper routine of s390_register_info. */
9545 static void
9546 s390_register_info_gprtofpr ()
9548 int save_reg_slot = FPR0_REGNUM;
9549 int i, j;
9551 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9552 return;
9554 for (i = 15; i >= 6; i--)
9556 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9557 continue;
9559 /* Advance to the next FP register which can be used as a
9560 GPR save slot. */
9561 while ((!call_really_used_regs[save_reg_slot]
9562 || df_regs_ever_live_p (save_reg_slot)
9563 || cfun_fpr_save_p (save_reg_slot))
9564 && FP_REGNO_P (save_reg_slot))
9565 save_reg_slot++;
9566 if (!FP_REGNO_P (save_reg_slot))
9568 /* We only want to use ldgr/lgdr if we can get rid of
9569 stm/lm entirely. So undo the gpr slot allocation in
9570 case we ran out of FPR save slots. */
9571 for (j = 6; j <= 15; j++)
9572 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9573 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9574 break;
9576 cfun_gpr_save_slot (i) = save_reg_slot++;
9580 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9581 stdarg.
9582 This is a helper routine for s390_register_info. */
9584 static void
9585 s390_register_info_stdarg_fpr ()
9587 int i;
9588 int min_fpr;
9589 int max_fpr;
9591 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9592 f0-f4 for 64 bit. */
9593 if (!cfun->stdarg
9594 || !TARGET_HARD_FLOAT
9595 || !cfun->va_list_fpr_size
9596 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9597 return;
9599 min_fpr = crtl->args.info.fprs;
9600 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9601 if (max_fpr >= FP_ARG_NUM_REG)
9602 max_fpr = FP_ARG_NUM_REG - 1;
9604 /* FPR argument regs start at f0. */
9605 min_fpr += FPR0_REGNUM;
9606 max_fpr += FPR0_REGNUM;
9608 for (i = min_fpr; i <= max_fpr; i++)
9609 cfun_set_fpr_save (i);
9612 /* Reserve the GPR save slots for GPRs which need to be saved due to
9613 stdarg.
9614 This is a helper routine for s390_register_info. */
9616 static void
9617 s390_register_info_stdarg_gpr ()
9619 int i;
9620 int min_gpr;
9621 int max_gpr;
9623 if (!cfun->stdarg
9624 || !cfun->va_list_gpr_size
9625 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9626 return;
9628 min_gpr = crtl->args.info.gprs;
9629 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9630 if (max_gpr >= GP_ARG_NUM_REG)
9631 max_gpr = GP_ARG_NUM_REG - 1;
9633 /* GPR argument regs start at r2. */
9634 min_gpr += GPR2_REGNUM;
9635 max_gpr += GPR2_REGNUM;
9637 /* If r6 was supposed to be saved into an FPR and now needs to go to
9638 the stack for vararg we have to adjust the restore range to make
9639 sure that the restore is done from stack as well. */
9640 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9641 && min_gpr <= GPR6_REGNUM
9642 && max_gpr >= GPR6_REGNUM)
9644 if (cfun_frame_layout.first_restore_gpr == -1
9645 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9646 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9647 if (cfun_frame_layout.last_restore_gpr == -1
9648 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9649 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9652 if (cfun_frame_layout.first_save_gpr == -1
9653 || cfun_frame_layout.first_save_gpr > min_gpr)
9654 cfun_frame_layout.first_save_gpr = min_gpr;
9656 if (cfun_frame_layout.last_save_gpr == -1
9657 || cfun_frame_layout.last_save_gpr < max_gpr)
9658 cfun_frame_layout.last_save_gpr = max_gpr;
9660 for (i = min_gpr; i <= max_gpr; i++)
9661 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9664 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9665 prologue and epilogue. */
9667 static void
9668 s390_register_info_set_ranges ()
9670 int i, j;
9672 /* Find the first and the last save slot supposed to use the stack
9673 to set the restore range.
9674 Vararg regs might be marked as save to stack but only the
9675 call-saved regs really need restoring (i.e. r6). This code
9676 assumes that the vararg regs have not yet been recorded in
9677 cfun_gpr_save_slot. */
9678 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9679 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9680 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9681 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9682 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9683 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9686 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9687 for registers which need to be saved in function prologue.
9688 This function can be used until the insns emitted for save/restore
9689 of the regs are visible in the RTL stream. */
9691 static void
9692 s390_register_info ()
9694 int i;
9695 char clobbered_regs[32];
9697 gcc_assert (!epilogue_completed);
9699 if (reload_completed)
9700 /* After reload we rely on our own routine to determine which
9701 registers need saving. */
9702 s390_regs_ever_clobbered (clobbered_regs);
9703 else
9704 /* During reload we use regs_ever_live as a base since reload
9705 does changes in there which we otherwise would not be aware
9706 of. */
9707 for (i = 0; i < 32; i++)
9708 clobbered_regs[i] = df_regs_ever_live_p (i);
9710 for (i = 0; i < 32; i++)
9711 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9713 /* Mark the call-saved FPRs which need to be saved.
9714 This needs to be done before checking the special GPRs since the
9715 stack pointer usage depends on whether high FPRs have to be saved
9716 or not. */
9717 cfun_frame_layout.fpr_bitmap = 0;
9718 cfun_frame_layout.high_fprs = 0;
9719 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9720 if (clobbered_regs[i] && !call_really_used_regs[i])
9722 cfun_set_fpr_save (i);
9723 if (i >= FPR8_REGNUM)
9724 cfun_frame_layout.high_fprs++;
9727 /* Register 12 is used for GOT address, but also as temp in prologue
9728 for split-stack stdarg functions (unless r14 is available). */
9729 clobbered_regs[12]
9730 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9731 || (flag_split_stack && cfun->stdarg
9732 && (crtl->is_leaf || TARGET_TPF_PROFILING
9733 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9735 clobbered_regs[BASE_REGNUM]
9736 |= (cfun->machine->base_reg
9737 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9739 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9740 |= !!frame_pointer_needed;
9742 /* On pre z900 machines this might take until machine dependent
9743 reorg to decide.
9744 save_return_addr_p will only be set on non-zarch machines so
9745 there is no risk that r14 goes into an FPR instead of a stack
9746 slot. */
9747 clobbered_regs[RETURN_REGNUM]
9748 |= (!crtl->is_leaf
9749 || TARGET_TPF_PROFILING
9750 || cfun->machine->split_branches_pending_p
9751 || cfun_frame_layout.save_return_addr_p
9752 || crtl->calls_eh_return);
9754 clobbered_regs[STACK_POINTER_REGNUM]
9755 |= (!crtl->is_leaf
9756 || TARGET_TPF_PROFILING
9757 || cfun_save_high_fprs_p
9758 || get_frame_size () > 0
9759 || (reload_completed && cfun_frame_layout.frame_size > 0)
9760 || cfun->calls_alloca);
9762 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9764 for (i = 6; i < 16; i++)
9765 if (clobbered_regs[i])
9766 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9768 s390_register_info_stdarg_fpr ();
9769 s390_register_info_gprtofpr ();
9770 s390_register_info_set_ranges ();
9771 /* stdarg functions might need to save GPRs 2 to 6. This might
9772 override the GPR->FPR save decision made by
9773 s390_register_info_gprtofpr for r6 since vararg regs must go to
9774 the stack. */
9775 s390_register_info_stdarg_gpr ();
9778 /* This function is called by s390_optimize_prologue in order to get
9779 rid of unnecessary GPR save/restore instructions. The register info
9780 for the GPRs is re-computed and the ranges are re-calculated. */
9782 static void
9783 s390_optimize_register_info ()
9785 char clobbered_regs[32];
9786 int i;
9788 gcc_assert (epilogue_completed);
9789 gcc_assert (!cfun->machine->split_branches_pending_p);
9791 s390_regs_ever_clobbered (clobbered_regs);
9793 for (i = 0; i < 32; i++)
9794 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9796 /* There is still special treatment needed for cases invisible to
9797 s390_regs_ever_clobbered. */
9798 clobbered_regs[RETURN_REGNUM]
9799 |= (TARGET_TPF_PROFILING
9800 /* When expanding builtin_return_addr in ESA mode we do not
9801 know whether r14 will later be needed as scratch reg when
9802 doing branch splitting. So the builtin always accesses the
9803 r14 save slot and we need to stick to the save/restore
9804 decision for r14 even if it turns out that it didn't get
9805 clobbered. */
9806 || cfun_frame_layout.save_return_addr_p
9807 || crtl->calls_eh_return);
9809 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9811 for (i = 6; i < 16; i++)
9812 if (!clobbered_regs[i])
9813 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9815 s390_register_info_set_ranges ();
9816 s390_register_info_stdarg_gpr ();
9819 /* Fill cfun->machine with info about frame of current function. */
9821 static void
9822 s390_frame_info (void)
9824 HOST_WIDE_INT lowest_offset;
9826 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9827 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9829 /* The va_arg builtin uses a constant distance of 16 *
9830 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9831 pointer. So even if we are going to save the stack pointer in an
9832 FPR we need the stack space in order to keep the offsets
9833 correct. */
9834 if (cfun->stdarg && cfun_save_arg_fprs_p)
9836 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9838 if (cfun_frame_layout.first_save_gpr_slot == -1)
9839 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9842 cfun_frame_layout.frame_size = get_frame_size ();
9843 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9844 fatal_error (input_location,
9845 "total size of local variables exceeds architecture limit");
9847 if (!TARGET_PACKED_STACK)
9849 /* Fixed stack layout. */
9850 cfun_frame_layout.backchain_offset = 0;
9851 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9852 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9853 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9854 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9855 * UNITS_PER_LONG);
9857 else if (TARGET_BACKCHAIN)
9859 /* Kernel stack layout - packed stack, backchain, no float */
9860 gcc_assert (TARGET_SOFT_FLOAT);
9861 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9862 - UNITS_PER_LONG);
9864 /* The distance between the backchain and the return address
9865 save slot must not change. So we always need a slot for the
9866 stack pointer which resides in between. */
9867 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9869 cfun_frame_layout.gprs_offset
9870 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9872 /* FPRs will not be saved. Nevertheless pick sane values to
9873 keep area calculations valid. */
9874 cfun_frame_layout.f0_offset =
9875 cfun_frame_layout.f4_offset =
9876 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9878 else
9880 int num_fprs;
9882 /* Packed stack layout without backchain. */
9884 /* With stdarg FPRs need their dedicated slots. */
9885 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9886 : (cfun_fpr_save_p (FPR4_REGNUM) +
9887 cfun_fpr_save_p (FPR6_REGNUM)));
9888 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9890 num_fprs = (cfun->stdarg ? 2
9891 : (cfun_fpr_save_p (FPR0_REGNUM)
9892 + cfun_fpr_save_p (FPR2_REGNUM)));
9893 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9895 cfun_frame_layout.gprs_offset
9896 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9898 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9899 - cfun_frame_layout.high_fprs * 8);
9902 if (cfun_save_high_fprs_p)
9903 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9905 if (!crtl->is_leaf)
9906 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9908 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9909 sized area at the bottom of the stack. This is required also for
9910 leaf functions. When GCC generates a local stack reference it
9911 will always add STACK_POINTER_OFFSET to all these references. */
9912 if (crtl->is_leaf
9913 && !TARGET_TPF_PROFILING
9914 && cfun_frame_layout.frame_size == 0
9915 && !cfun->calls_alloca)
9916 return;
9918 /* Calculate the number of bytes we have used in our own register
9919 save area. With the packed stack layout we can re-use the
9920 remaining bytes for normal stack elements. */
9922 if (TARGET_PACKED_STACK)
9923 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9924 cfun_frame_layout.f4_offset),
9925 cfun_frame_layout.gprs_offset);
9926 else
9927 lowest_offset = 0;
9929 if (TARGET_BACKCHAIN)
9930 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9932 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9934 /* If under 31 bit an odd number of gprs has to be saved we have to
9935 adjust the frame size to sustain 8 byte alignment of stack
9936 frames. */
9937 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9938 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9939 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9942 /* Generate frame layout. Fills in register and frame data for the current
9943 function in cfun->machine. This routine can be called multiple times;
9944 it will re-do the complete frame layout every time. */
9946 static void
9947 s390_init_frame_layout (void)
9949 HOST_WIDE_INT frame_size;
9950 int base_used;
9952 /* After LRA the frame layout is supposed to be read-only and should
9953 not be re-computed. */
9954 if (reload_completed)
9955 return;
9957 /* On S/390 machines, we may need to perform branch splitting, which
9958 will require both base and return address register. We have no
9959 choice but to assume we're going to need them until right at the
9960 end of the machine dependent reorg phase. */
9961 if (!TARGET_CPU_ZARCH)
9962 cfun->machine->split_branches_pending_p = true;
9966 frame_size = cfun_frame_layout.frame_size;
9968 /* Try to predict whether we'll need the base register. */
9969 base_used = cfun->machine->split_branches_pending_p
9970 || crtl->uses_const_pool
9971 || (!DISP_IN_RANGE (frame_size)
9972 && !CONST_OK_FOR_K (frame_size));
9974 /* Decide which register to use as literal pool base. In small
9975 leaf functions, try to use an unused call-clobbered register
9976 as base register to avoid save/restore overhead. */
9977 if (!base_used)
9978 cfun->machine->base_reg = NULL_RTX;
9979 else
9981 int br = 0;
9983 if (crtl->is_leaf)
9984 /* Prefer r5 (most likely to be free). */
9985 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9987 cfun->machine->base_reg =
9988 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9991 s390_register_info ();
9992 s390_frame_info ();
9994 while (frame_size != cfun_frame_layout.frame_size);
9997 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9998 the TX is nonescaping. A transaction is considered escaping if
9999 there is at least one path from tbegin returning CC0 to the
10000 function exit block without an tend.
10002 The check so far has some limitations:
10003 - only single tbegin/tend BBs are supported
10004 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10005 - when CC is copied to a GPR and the CC0 check is done with the GPR
10006 this is not supported
10009 static void
10010 s390_optimize_nonescaping_tx (void)
10012 const unsigned int CC0 = 1 << 3;
10013 basic_block tbegin_bb = NULL;
10014 basic_block tend_bb = NULL;
10015 basic_block bb;
10016 rtx_insn *insn;
10017 bool result = true;
10018 int bb_index;
10019 rtx_insn *tbegin_insn = NULL;
10021 if (!cfun->machine->tbegin_p)
10022 return;
10024 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10026 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10028 if (!bb)
10029 continue;
10031 FOR_BB_INSNS (bb, insn)
10033 rtx ite, cc, pat, target;
10034 unsigned HOST_WIDE_INT mask;
10036 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10037 continue;
10039 pat = PATTERN (insn);
10041 if (GET_CODE (pat) == PARALLEL)
10042 pat = XVECEXP (pat, 0, 0);
10044 if (GET_CODE (pat) != SET
10045 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10046 continue;
10048 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10050 rtx_insn *tmp;
10052 tbegin_insn = insn;
10054 /* Just return if the tbegin doesn't have clobbers. */
10055 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10056 return;
10058 if (tbegin_bb != NULL)
10059 return;
10061 /* Find the next conditional jump. */
10062 for (tmp = NEXT_INSN (insn);
10063 tmp != NULL_RTX;
10064 tmp = NEXT_INSN (tmp))
10066 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10067 return;
10068 if (!JUMP_P (tmp))
10069 continue;
10071 ite = SET_SRC (PATTERN (tmp));
10072 if (GET_CODE (ite) != IF_THEN_ELSE)
10073 continue;
10075 cc = XEXP (XEXP (ite, 0), 0);
10076 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10077 || GET_MODE (cc) != CCRAWmode
10078 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10079 return;
10081 if (bb->succs->length () != 2)
10082 return;
10084 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10085 if (GET_CODE (XEXP (ite, 0)) == NE)
10086 mask ^= 0xf;
10088 if (mask == CC0)
10089 target = XEXP (ite, 1);
10090 else if (mask == (CC0 ^ 0xf))
10091 target = XEXP (ite, 2);
10092 else
10093 return;
10096 edge_iterator ei;
10097 edge e1, e2;
10099 ei = ei_start (bb->succs);
10100 e1 = ei_safe_edge (ei);
10101 ei_next (&ei);
10102 e2 = ei_safe_edge (ei);
10104 if (e2->flags & EDGE_FALLTHRU)
10106 e2 = e1;
10107 e1 = ei_safe_edge (ei);
10110 if (!(e1->flags & EDGE_FALLTHRU))
10111 return;
10113 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10115 if (tmp == BB_END (bb))
10116 break;
10120 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10122 if (tend_bb != NULL)
10123 return;
10124 tend_bb = bb;
10129 /* Either we successfully remove the FPR clobbers here or we are not
10130 able to do anything for this TX. Both cases don't qualify for
10131 another look. */
10132 cfun->machine->tbegin_p = false;
10134 if (tbegin_bb == NULL || tend_bb == NULL)
10135 return;
10137 calculate_dominance_info (CDI_POST_DOMINATORS);
10138 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10139 free_dominance_info (CDI_POST_DOMINATORS);
10141 if (!result)
10142 return;
10144 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10145 gen_rtvec (2,
10146 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10147 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10148 INSN_CODE (tbegin_insn) = -1;
10149 df_insn_rescan (tbegin_insn);
10151 return;
10154 /* Return true if it is legal to put a value with MODE into REGNO. */
10156 bool
10157 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10159 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10160 return false;
10162 switch (REGNO_REG_CLASS (regno))
10164 case VEC_REGS:
10165 return ((GET_MODE_CLASS (mode) == MODE_INT
10166 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10167 || mode == DFmode
10168 || s390_vector_mode_supported_p (mode));
10169 break;
10170 case FP_REGS:
10171 if (TARGET_VX
10172 && ((GET_MODE_CLASS (mode) == MODE_INT
10173 && s390_class_max_nregs (FP_REGS, mode) == 1)
10174 || mode == DFmode
10175 || s390_vector_mode_supported_p (mode)))
10176 return true;
10178 if (REGNO_PAIR_OK (regno, mode))
10180 if (mode == SImode || mode == DImode)
10181 return true;
10183 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10184 return true;
10186 break;
10187 case ADDR_REGS:
10188 if (FRAME_REGNO_P (regno) && mode == Pmode)
10189 return true;
10191 /* fallthrough */
10192 case GENERAL_REGS:
10193 if (REGNO_PAIR_OK (regno, mode))
10195 if (TARGET_ZARCH
10196 || (mode != TFmode && mode != TCmode && mode != TDmode))
10197 return true;
10199 break;
10200 case CC_REGS:
10201 if (GET_MODE_CLASS (mode) == MODE_CC)
10202 return true;
10203 break;
10204 case ACCESS_REGS:
10205 if (REGNO_PAIR_OK (regno, mode))
10207 if (mode == SImode || mode == Pmode)
10208 return true;
10210 break;
10211 default:
10212 return false;
10215 return false;
10218 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10220 bool
10221 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10223 /* Once we've decided upon a register to use as base register, it must
10224 no longer be used for any other purpose. */
10225 if (cfun->machine->base_reg)
10226 if (REGNO (cfun->machine->base_reg) == old_reg
10227 || REGNO (cfun->machine->base_reg) == new_reg)
10228 return false;
10230 /* Prevent regrename from using call-saved regs which haven't
10231 actually been saved. This is necessary since regrename assumes
10232 the backend save/restore decisions are based on
10233 df_regs_ever_live. Since we have our own routine we have to tell
10234 regrename manually about it. */
10235 if (GENERAL_REGNO_P (new_reg)
10236 && !call_really_used_regs[new_reg]
10237 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10238 return false;
10240 return true;
10243 /* Return nonzero if register REGNO can be used as a scratch register
10244 in peephole2. */
10246 static bool
10247 s390_hard_regno_scratch_ok (unsigned int regno)
10249 /* See s390_hard_regno_rename_ok. */
10250 if (GENERAL_REGNO_P (regno)
10251 && !call_really_used_regs[regno]
10252 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10253 return false;
10255 return true;
10258 /* Maximum number of registers to represent a value of mode MODE
10259 in a register of class RCLASS. */
10262 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10264 int reg_size;
10265 bool reg_pair_required_p = false;
10267 switch (rclass)
10269 case FP_REGS:
10270 case VEC_REGS:
10271 reg_size = TARGET_VX ? 16 : 8;
10273 /* TF and TD modes would fit into a VR but we put them into a
10274 register pair since we do not have 128bit FP instructions on
10275 full VRs. */
10276 if (TARGET_VX
10277 && SCALAR_FLOAT_MODE_P (mode)
10278 && GET_MODE_SIZE (mode) >= 16)
10279 reg_pair_required_p = true;
10281 /* Even if complex types would fit into a single FPR/VR we force
10282 them into a register pair to deal with the parts more easily.
10283 (FIXME: What about complex ints?) */
10284 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10285 reg_pair_required_p = true;
10286 break;
10287 case ACCESS_REGS:
10288 reg_size = 4;
10289 break;
10290 default:
10291 reg_size = UNITS_PER_WORD;
10292 break;
10295 if (reg_pair_required_p)
10296 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10298 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10301 /* Return TRUE if changing mode from FROM to TO should not be allowed
10302 for register class CLASS. */
10305 s390_cannot_change_mode_class (machine_mode from_mode,
10306 machine_mode to_mode,
10307 enum reg_class rclass)
10309 machine_mode small_mode;
10310 machine_mode big_mode;
10312 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10313 return 0;
10315 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10317 small_mode = from_mode;
10318 big_mode = to_mode;
10320 else
10322 small_mode = to_mode;
10323 big_mode = from_mode;
10326 /* Values residing in VRs are little-endian style. All modes are
10327 placed left-aligned in an VR. This means that we cannot allow
10328 switching between modes with differing sizes. Also if the vector
10329 facility is available we still place TFmode values in VR register
10330 pairs, since the only instructions we have operating on TFmodes
10331 only deal with register pairs. Therefore we have to allow DFmode
10332 subregs of TFmodes to enable the TFmode splitters. */
10333 if (reg_classes_intersect_p (VEC_REGS, rclass)
10334 && (GET_MODE_SIZE (small_mode) < 8
10335 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10336 return 1;
10338 /* Likewise for access registers, since they have only half the
10339 word size on 64-bit. */
10340 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10341 return 1;
10343 return 0;
10346 /* Return true if we use LRA instead of reload pass. */
10347 static bool
10348 s390_lra_p (void)
10350 return s390_lra_flag;
10353 /* Return true if register FROM can be eliminated via register TO. */
10355 static bool
10356 s390_can_eliminate (const int from, const int to)
10358 /* On zSeries machines, we have not marked the base register as fixed.
10359 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10360 If a function requires the base register, we say here that this
10361 elimination cannot be performed. This will cause reload to free
10362 up the base register (as if it were fixed). On the other hand,
10363 if the current function does *not* require the base register, we
10364 say here the elimination succeeds, which in turn allows reload
10365 to allocate the base register for any other purpose. */
10366 if (from == BASE_REGNUM && to == BASE_REGNUM)
10368 if (TARGET_CPU_ZARCH)
10370 s390_init_frame_layout ();
10371 return cfun->machine->base_reg == NULL_RTX;
10374 return false;
10377 /* Everything else must point into the stack frame. */
10378 gcc_assert (to == STACK_POINTER_REGNUM
10379 || to == HARD_FRAME_POINTER_REGNUM);
10381 gcc_assert (from == FRAME_POINTER_REGNUM
10382 || from == ARG_POINTER_REGNUM
10383 || from == RETURN_ADDRESS_POINTER_REGNUM);
10385 /* Make sure we actually saved the return address. */
10386 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10387 if (!crtl->calls_eh_return
10388 && !cfun->stdarg
10389 && !cfun_frame_layout.save_return_addr_p)
10390 return false;
10392 return true;
10395 /* Return offset between register FROM and TO initially after prolog. */
10397 HOST_WIDE_INT
10398 s390_initial_elimination_offset (int from, int to)
10400 HOST_WIDE_INT offset;
10402 /* ??? Why are we called for non-eliminable pairs? */
10403 if (!s390_can_eliminate (from, to))
10404 return 0;
10406 switch (from)
10408 case FRAME_POINTER_REGNUM:
10409 offset = (get_frame_size()
10410 + STACK_POINTER_OFFSET
10411 + crtl->outgoing_args_size);
10412 break;
10414 case ARG_POINTER_REGNUM:
10415 s390_init_frame_layout ();
10416 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10417 break;
10419 case RETURN_ADDRESS_POINTER_REGNUM:
10420 s390_init_frame_layout ();
10422 if (cfun_frame_layout.first_save_gpr_slot == -1)
10424 /* If it turns out that for stdarg nothing went into the reg
10425 save area we also do not need the return address
10426 pointer. */
10427 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10428 return 0;
10430 gcc_unreachable ();
10433 /* In order to make the following work it is not necessary for
10434 r14 to have a save slot. It is sufficient if one other GPR
10435 got one. Since the GPRs are always stored without gaps we
10436 are able to calculate where the r14 save slot would
10437 reside. */
10438 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10439 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10440 UNITS_PER_LONG);
10441 break;
10443 case BASE_REGNUM:
10444 offset = 0;
10445 break;
10447 default:
10448 gcc_unreachable ();
10451 return offset;
10454 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10455 to register BASE. Return generated insn. */
10457 static rtx
10458 save_fpr (rtx base, int offset, int regnum)
10460 rtx addr;
10461 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10463 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10464 set_mem_alias_set (addr, get_varargs_alias_set ());
10465 else
10466 set_mem_alias_set (addr, get_frame_alias_set ());
10468 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10471 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10472 to register BASE. Return generated insn. */
10474 static rtx
10475 restore_fpr (rtx base, int offset, int regnum)
10477 rtx addr;
10478 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10479 set_mem_alias_set (addr, get_frame_alias_set ());
10481 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10484 /* Return true if REGNO is a global register, but not one
10485 of the special ones that need to be saved/restored in anyway. */
10487 static inline bool
10488 global_not_special_regno_p (int regno)
10490 return (global_regs[regno]
10491 /* These registers are special and need to be
10492 restored in any case. */
10493 && !(regno == STACK_POINTER_REGNUM
10494 || regno == RETURN_REGNUM
10495 || regno == BASE_REGNUM
10496 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10499 /* Generate insn to save registers FIRST to LAST into
10500 the register save area located at offset OFFSET
10501 relative to register BASE. */
10503 static rtx
10504 save_gprs (rtx base, int offset, int first, int last)
10506 rtx addr, insn, note;
10507 int i;
10509 addr = plus_constant (Pmode, base, offset);
10510 addr = gen_rtx_MEM (Pmode, addr);
10512 set_mem_alias_set (addr, get_frame_alias_set ());
10514 /* Special-case single register. */
10515 if (first == last)
10517 if (TARGET_64BIT)
10518 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10519 else
10520 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10522 if (!global_not_special_regno_p (first))
10523 RTX_FRAME_RELATED_P (insn) = 1;
10524 return insn;
10528 insn = gen_store_multiple (addr,
10529 gen_rtx_REG (Pmode, first),
10530 GEN_INT (last - first + 1));
10532 if (first <= 6 && cfun->stdarg)
10533 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10535 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10537 if (first + i <= 6)
10538 set_mem_alias_set (mem, get_varargs_alias_set ());
10541 /* We need to set the FRAME_RELATED flag on all SETs
10542 inside the store-multiple pattern.
10544 However, we must not emit DWARF records for registers 2..5
10545 if they are stored for use by variable arguments ...
10547 ??? Unfortunately, it is not enough to simply not the
10548 FRAME_RELATED flags for those SETs, because the first SET
10549 of the PARALLEL is always treated as if it had the flag
10550 set, even if it does not. Therefore we emit a new pattern
10551 without those registers as REG_FRAME_RELATED_EXPR note. */
10553 if (first >= 6 && !global_not_special_regno_p (first))
10555 rtx pat = PATTERN (insn);
10557 for (i = 0; i < XVECLEN (pat, 0); i++)
10558 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10559 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10560 0, i)))))
10561 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10563 RTX_FRAME_RELATED_P (insn) = 1;
10565 else if (last >= 6)
10567 int start;
10569 for (start = first >= 6 ? first : 6; start <= last; start++)
10570 if (!global_not_special_regno_p (start))
10571 break;
10573 if (start > last)
10574 return insn;
10576 addr = plus_constant (Pmode, base,
10577 offset + (start - first) * UNITS_PER_LONG);
10579 if (start == last)
10581 if (TARGET_64BIT)
10582 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10583 gen_rtx_REG (Pmode, start));
10584 else
10585 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10586 gen_rtx_REG (Pmode, start));
10587 note = PATTERN (note);
10589 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10590 RTX_FRAME_RELATED_P (insn) = 1;
10592 return insn;
10595 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10596 gen_rtx_REG (Pmode, start),
10597 GEN_INT (last - start + 1));
10598 note = PATTERN (note);
10600 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10602 for (i = 0; i < XVECLEN (note, 0); i++)
10603 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10604 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10605 0, i)))))
10606 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10608 RTX_FRAME_RELATED_P (insn) = 1;
10611 return insn;
10614 /* Generate insn to restore registers FIRST to LAST from
10615 the register save area located at offset OFFSET
10616 relative to register BASE. */
10618 static rtx
10619 restore_gprs (rtx base, int offset, int first, int last)
10621 rtx addr, insn;
10623 addr = plus_constant (Pmode, base, offset);
10624 addr = gen_rtx_MEM (Pmode, addr);
10625 set_mem_alias_set (addr, get_frame_alias_set ());
10627 /* Special-case single register. */
10628 if (first == last)
10630 if (TARGET_64BIT)
10631 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10632 else
10633 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10635 RTX_FRAME_RELATED_P (insn) = 1;
10636 return insn;
10639 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10640 addr,
10641 GEN_INT (last - first + 1));
10642 RTX_FRAME_RELATED_P (insn) = 1;
10643 return insn;
10646 /* Return insn sequence to load the GOT register. */
10648 static GTY(()) rtx got_symbol;
10649 rtx_insn *
10650 s390_load_got (void)
10652 rtx_insn *insns;
10654 /* We cannot use pic_offset_table_rtx here since we use this
10655 function also for non-pic if __tls_get_offset is called and in
10656 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10657 aren't usable. */
10658 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10660 if (!got_symbol)
10662 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10663 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10666 start_sequence ();
10668 if (TARGET_CPU_ZARCH)
10670 emit_move_insn (got_rtx, got_symbol);
10672 else
10674 rtx offset;
10676 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10677 UNSPEC_LTREL_OFFSET);
10678 offset = gen_rtx_CONST (Pmode, offset);
10679 offset = force_const_mem (Pmode, offset);
10681 emit_move_insn (got_rtx, offset);
10683 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10684 UNSPEC_LTREL_BASE);
10685 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10687 emit_move_insn (got_rtx, offset);
10690 insns = get_insns ();
10691 end_sequence ();
10692 return insns;
10695 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10696 and the change to the stack pointer. */
10698 static void
10699 s390_emit_stack_tie (void)
10701 rtx mem = gen_frame_mem (BLKmode,
10702 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10704 emit_insn (gen_stack_tie (mem));
10707 /* Copy GPRS into FPR save slots. */
10709 static void
10710 s390_save_gprs_to_fprs (void)
10712 int i;
10714 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10715 return;
10717 for (i = 6; i < 16; i++)
10719 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10721 rtx_insn *insn =
10722 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10723 gen_rtx_REG (DImode, i));
10724 RTX_FRAME_RELATED_P (insn) = 1;
10725 /* This prevents dwarf2cfi from interpreting the set. Doing
10726 so it might emit def_cfa_register infos setting an FPR as
10727 new CFA. */
10728 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10733 /* Restore GPRs from FPR save slots. */
10735 static void
10736 s390_restore_gprs_from_fprs (void)
10738 int i;
10740 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10741 return;
10743 for (i = 6; i < 16; i++)
10745 rtx_insn *insn;
10747 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10748 continue;
10750 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10752 if (i == STACK_POINTER_REGNUM)
10753 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10754 else
10755 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10757 df_set_regs_ever_live (i, true);
10758 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10759 if (i == STACK_POINTER_REGNUM)
10760 add_reg_note (insn, REG_CFA_DEF_CFA,
10761 plus_constant (Pmode, stack_pointer_rtx,
10762 STACK_POINTER_OFFSET));
10763 RTX_FRAME_RELATED_P (insn) = 1;
10768 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10769 generation. */
10771 namespace {
10773 const pass_data pass_data_s390_early_mach =
10775 RTL_PASS, /* type */
10776 "early_mach", /* name */
10777 OPTGROUP_NONE, /* optinfo_flags */
10778 TV_MACH_DEP, /* tv_id */
10779 0, /* properties_required */
10780 0, /* properties_provided */
10781 0, /* properties_destroyed */
10782 0, /* todo_flags_start */
10783 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10786 class pass_s390_early_mach : public rtl_opt_pass
10788 public:
10789 pass_s390_early_mach (gcc::context *ctxt)
10790 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10793 /* opt_pass methods: */
10794 virtual unsigned int execute (function *);
10796 }; // class pass_s390_early_mach
10798 unsigned int
10799 pass_s390_early_mach::execute (function *fun)
10801 rtx_insn *insn;
10803 /* Try to get rid of the FPR clobbers. */
10804 s390_optimize_nonescaping_tx ();
10806 /* Re-compute register info. */
10807 s390_register_info ();
10809 /* If we're using a base register, ensure that it is always valid for
10810 the first non-prologue instruction. */
10811 if (fun->machine->base_reg)
10812 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10814 /* Annotate all constant pool references to let the scheduler know
10815 they implicitly use the base register. */
10816 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10817 if (INSN_P (insn))
10819 annotate_constant_pool_refs (&PATTERN (insn));
10820 df_insn_rescan (insn);
10822 return 0;
10825 } // anon namespace
10827 /* Expand the prologue into a bunch of separate insns. */
10829 void
10830 s390_emit_prologue (void)
10832 rtx insn, addr;
10833 rtx temp_reg;
10834 int i;
10835 int offset;
10836 int next_fpr = 0;
10838 /* Choose best register to use for temp use within prologue.
10839 TPF with profiling must avoid the register 14 - the tracing function
10840 needs the original contents of r14 to be preserved. */
10842 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10843 && !crtl->is_leaf
10844 && !TARGET_TPF_PROFILING)
10845 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10846 else if (flag_split_stack && cfun->stdarg)
10847 temp_reg = gen_rtx_REG (Pmode, 12);
10848 else
10849 temp_reg = gen_rtx_REG (Pmode, 1);
10851 s390_save_gprs_to_fprs ();
10853 /* Save call saved gprs. */
10854 if (cfun_frame_layout.first_save_gpr != -1)
10856 insn = save_gprs (stack_pointer_rtx,
10857 cfun_frame_layout.gprs_offset +
10858 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10859 - cfun_frame_layout.first_save_gpr_slot),
10860 cfun_frame_layout.first_save_gpr,
10861 cfun_frame_layout.last_save_gpr);
10862 emit_insn (insn);
10865 /* Dummy insn to mark literal pool slot. */
10867 if (cfun->machine->base_reg)
10868 emit_insn (gen_main_pool (cfun->machine->base_reg));
10870 offset = cfun_frame_layout.f0_offset;
10872 /* Save f0 and f2. */
10873 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10875 if (cfun_fpr_save_p (i))
10877 save_fpr (stack_pointer_rtx, offset, i);
10878 offset += 8;
10880 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10881 offset += 8;
10884 /* Save f4 and f6. */
10885 offset = cfun_frame_layout.f4_offset;
10886 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10888 if (cfun_fpr_save_p (i))
10890 insn = save_fpr (stack_pointer_rtx, offset, i);
10891 offset += 8;
10893 /* If f4 and f6 are call clobbered they are saved due to
10894 stdargs and therefore are not frame related. */
10895 if (!call_really_used_regs[i])
10896 RTX_FRAME_RELATED_P (insn) = 1;
10898 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10899 offset += 8;
10902 if (TARGET_PACKED_STACK
10903 && cfun_save_high_fprs_p
10904 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10906 offset = (cfun_frame_layout.f8_offset
10907 + (cfun_frame_layout.high_fprs - 1) * 8);
10909 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10910 if (cfun_fpr_save_p (i))
10912 insn = save_fpr (stack_pointer_rtx, offset, i);
10914 RTX_FRAME_RELATED_P (insn) = 1;
10915 offset -= 8;
10917 if (offset >= cfun_frame_layout.f8_offset)
10918 next_fpr = i;
10921 if (!TARGET_PACKED_STACK)
10922 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10924 if (flag_stack_usage_info)
10925 current_function_static_stack_size = cfun_frame_layout.frame_size;
10927 /* Decrement stack pointer. */
10929 if (cfun_frame_layout.frame_size > 0)
10931 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10932 rtx real_frame_off;
10934 if (s390_stack_size)
10936 HOST_WIDE_INT stack_guard;
10938 if (s390_stack_guard)
10939 stack_guard = s390_stack_guard;
10940 else
10942 /* If no value for stack guard is provided the smallest power of 2
10943 larger than the current frame size is chosen. */
10944 stack_guard = 1;
10945 while (stack_guard < cfun_frame_layout.frame_size)
10946 stack_guard <<= 1;
10949 if (cfun_frame_layout.frame_size >= s390_stack_size)
10951 warning (0, "frame size of function %qs is %wd"
10952 " bytes exceeding user provided stack limit of "
10953 "%d bytes. "
10954 "An unconditional trap is added.",
10955 current_function_name(), cfun_frame_layout.frame_size,
10956 s390_stack_size);
10957 emit_insn (gen_trap ());
10958 emit_barrier ();
10960 else
10962 /* stack_guard has to be smaller than s390_stack_size.
10963 Otherwise we would emit an AND with zero which would
10964 not match the test under mask pattern. */
10965 if (stack_guard >= s390_stack_size)
10967 warning (0, "frame size of function %qs is %wd"
10968 " bytes which is more than half the stack size. "
10969 "The dynamic check would not be reliable. "
10970 "No check emitted for this function.",
10971 current_function_name(),
10972 cfun_frame_layout.frame_size);
10974 else
10976 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10977 & ~(stack_guard - 1));
10979 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10980 GEN_INT (stack_check_mask));
10981 if (TARGET_64BIT)
10982 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10983 t, const0_rtx),
10984 t, const0_rtx, const0_rtx));
10985 else
10986 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10987 t, const0_rtx),
10988 t, const0_rtx, const0_rtx));
10993 if (s390_warn_framesize > 0
10994 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10995 warning (0, "frame size of %qs is %wd bytes",
10996 current_function_name (), cfun_frame_layout.frame_size);
10998 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10999 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11001 /* Save incoming stack pointer into temp reg. */
11002 if (TARGET_BACKCHAIN || next_fpr)
11003 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
11005 /* Subtract frame size from stack pointer. */
11007 if (DISP_IN_RANGE (INTVAL (frame_off)))
11009 insn = gen_rtx_SET (stack_pointer_rtx,
11010 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11011 frame_off));
11012 insn = emit_insn (insn);
11014 else
11016 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11017 frame_off = force_const_mem (Pmode, frame_off);
11019 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
11020 annotate_constant_pool_refs (&PATTERN (insn));
11023 RTX_FRAME_RELATED_P (insn) = 1;
11024 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11025 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11026 gen_rtx_SET (stack_pointer_rtx,
11027 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11028 real_frame_off)));
11030 /* Set backchain. */
11032 if (TARGET_BACKCHAIN)
11034 if (cfun_frame_layout.backchain_offset)
11035 addr = gen_rtx_MEM (Pmode,
11036 plus_constant (Pmode, stack_pointer_rtx,
11037 cfun_frame_layout.backchain_offset));
11038 else
11039 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11040 set_mem_alias_set (addr, get_frame_alias_set ());
11041 insn = emit_insn (gen_move_insn (addr, temp_reg));
11044 /* If we support non-call exceptions (e.g. for Java),
11045 we need to make sure the backchain pointer is set up
11046 before any possibly trapping memory access. */
11047 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11049 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11050 emit_clobber (addr);
11054 /* Save fprs 8 - 15 (64 bit ABI). */
11056 if (cfun_save_high_fprs_p && next_fpr)
11058 /* If the stack might be accessed through a different register
11059 we have to make sure that the stack pointer decrement is not
11060 moved below the use of the stack slots. */
11061 s390_emit_stack_tie ();
11063 insn = emit_insn (gen_add2_insn (temp_reg,
11064 GEN_INT (cfun_frame_layout.f8_offset)));
11066 offset = 0;
11068 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11069 if (cfun_fpr_save_p (i))
11071 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11072 cfun_frame_layout.frame_size
11073 + cfun_frame_layout.f8_offset
11074 + offset);
11076 insn = save_fpr (temp_reg, offset, i);
11077 offset += 8;
11078 RTX_FRAME_RELATED_P (insn) = 1;
11079 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11080 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11081 gen_rtx_REG (DFmode, i)));
11085 /* Set frame pointer, if needed. */
11087 if (frame_pointer_needed)
11089 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11090 RTX_FRAME_RELATED_P (insn) = 1;
11093 /* Set up got pointer, if needed. */
11095 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11097 rtx_insn *insns = s390_load_got ();
11099 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11100 annotate_constant_pool_refs (&PATTERN (insn));
11102 emit_insn (insns);
11105 if (TARGET_TPF_PROFILING)
11107 /* Generate a BAS instruction to serve as a function
11108 entry intercept to facilitate the use of tracing
11109 algorithms located at the branch target. */
11110 emit_insn (gen_prologue_tpf ());
11112 /* Emit a blockage here so that all code
11113 lies between the profiling mechanisms. */
11114 emit_insn (gen_blockage ());
11118 /* Expand the epilogue into a bunch of separate insns. */
11120 void
11121 s390_emit_epilogue (bool sibcall)
11123 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11124 int area_bottom, area_top, offset = 0;
11125 int next_offset;
11126 rtvec p;
11127 int i;
11129 if (TARGET_TPF_PROFILING)
11132 /* Generate a BAS instruction to serve as a function
11133 entry intercept to facilitate the use of tracing
11134 algorithms located at the branch target. */
11136 /* Emit a blockage here so that all code
11137 lies between the profiling mechanisms. */
11138 emit_insn (gen_blockage ());
11140 emit_insn (gen_epilogue_tpf ());
11143 /* Check whether to use frame or stack pointer for restore. */
11145 frame_pointer = (frame_pointer_needed
11146 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11148 s390_frame_area (&area_bottom, &area_top);
11150 /* Check whether we can access the register save area.
11151 If not, increment the frame pointer as required. */
11153 if (area_top <= area_bottom)
11155 /* Nothing to restore. */
11157 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11158 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11160 /* Area is in range. */
11161 offset = cfun_frame_layout.frame_size;
11163 else
11165 rtx insn, frame_off, cfa;
11167 offset = area_bottom < 0 ? -area_bottom : 0;
11168 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11170 cfa = gen_rtx_SET (frame_pointer,
11171 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11172 if (DISP_IN_RANGE (INTVAL (frame_off)))
11174 insn = gen_rtx_SET (frame_pointer,
11175 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11176 insn = emit_insn (insn);
11178 else
11180 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11181 frame_off = force_const_mem (Pmode, frame_off);
11183 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11184 annotate_constant_pool_refs (&PATTERN (insn));
11186 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11187 RTX_FRAME_RELATED_P (insn) = 1;
11190 /* Restore call saved fprs. */
11192 if (TARGET_64BIT)
11194 if (cfun_save_high_fprs_p)
11196 next_offset = cfun_frame_layout.f8_offset;
11197 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11199 if (cfun_fpr_save_p (i))
11201 restore_fpr (frame_pointer,
11202 offset + next_offset, i);
11203 cfa_restores
11204 = alloc_reg_note (REG_CFA_RESTORE,
11205 gen_rtx_REG (DFmode, i), cfa_restores);
11206 next_offset += 8;
11212 else
11214 next_offset = cfun_frame_layout.f4_offset;
11215 /* f4, f6 */
11216 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11218 if (cfun_fpr_save_p (i))
11220 restore_fpr (frame_pointer,
11221 offset + next_offset, i);
11222 cfa_restores
11223 = alloc_reg_note (REG_CFA_RESTORE,
11224 gen_rtx_REG (DFmode, i), cfa_restores);
11225 next_offset += 8;
11227 else if (!TARGET_PACKED_STACK)
11228 next_offset += 8;
11233 /* Return register. */
11235 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11237 /* Restore call saved gprs. */
11239 if (cfun_frame_layout.first_restore_gpr != -1)
11241 rtx insn, addr;
11242 int i;
11244 /* Check for global register and save them
11245 to stack location from where they get restored. */
11247 for (i = cfun_frame_layout.first_restore_gpr;
11248 i <= cfun_frame_layout.last_restore_gpr;
11249 i++)
11251 if (global_not_special_regno_p (i))
11253 addr = plus_constant (Pmode, frame_pointer,
11254 offset + cfun_frame_layout.gprs_offset
11255 + (i - cfun_frame_layout.first_save_gpr_slot)
11256 * UNITS_PER_LONG);
11257 addr = gen_rtx_MEM (Pmode, addr);
11258 set_mem_alias_set (addr, get_frame_alias_set ());
11259 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11261 else
11262 cfa_restores
11263 = alloc_reg_note (REG_CFA_RESTORE,
11264 gen_rtx_REG (Pmode, i), cfa_restores);
11267 if (! sibcall)
11269 /* Fetch return address from stack before load multiple,
11270 this will do good for scheduling.
11272 Only do this if we already decided that r14 needs to be
11273 saved to a stack slot. (And not just because r14 happens to
11274 be in between two GPRs which need saving.) Otherwise it
11275 would be difficult to take that decision back in
11276 s390_optimize_prologue. */
11277 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
11279 int return_regnum = find_unused_clobbered_reg();
11280 if (!return_regnum)
11281 return_regnum = 4;
11282 return_reg = gen_rtx_REG (Pmode, return_regnum);
11284 addr = plus_constant (Pmode, frame_pointer,
11285 offset + cfun_frame_layout.gprs_offset
11286 + (RETURN_REGNUM
11287 - cfun_frame_layout.first_save_gpr_slot)
11288 * UNITS_PER_LONG);
11289 addr = gen_rtx_MEM (Pmode, addr);
11290 set_mem_alias_set (addr, get_frame_alias_set ());
11291 emit_move_insn (return_reg, addr);
11293 /* Once we did that optimization we have to make sure
11294 s390_optimize_prologue does not try to remove the
11295 store of r14 since we will not be able to find the
11296 load issued here. */
11297 cfun_frame_layout.save_return_addr_p = true;
11301 insn = restore_gprs (frame_pointer,
11302 offset + cfun_frame_layout.gprs_offset
11303 + (cfun_frame_layout.first_restore_gpr
11304 - cfun_frame_layout.first_save_gpr_slot)
11305 * UNITS_PER_LONG,
11306 cfun_frame_layout.first_restore_gpr,
11307 cfun_frame_layout.last_restore_gpr);
11308 insn = emit_insn (insn);
11309 REG_NOTES (insn) = cfa_restores;
11310 add_reg_note (insn, REG_CFA_DEF_CFA,
11311 plus_constant (Pmode, stack_pointer_rtx,
11312 STACK_POINTER_OFFSET));
11313 RTX_FRAME_RELATED_P (insn) = 1;
11316 s390_restore_gprs_from_fprs ();
11318 if (! sibcall)
11321 /* Return to caller. */
11323 p = rtvec_alloc (2);
11325 RTVEC_ELT (p, 0) = ret_rtx;
11326 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11327 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11331 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11333 static void
11334 s300_set_up_by_prologue (hard_reg_set_container *regs)
11336 if (cfun->machine->base_reg
11337 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11338 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11341 /* -fsplit-stack support. */
11343 /* A SYMBOL_REF for __morestack. */
11344 static GTY(()) rtx morestack_ref;
11346 /* When using -fsplit-stack, the allocation routines set a field in
11347 the TCB to the bottom of the stack plus this much space, measured
11348 in bytes. */
11350 #define SPLIT_STACK_AVAILABLE 1024
11352 /* Emit -fsplit-stack prologue, which goes before the regular function
11353 prologue. */
11355 void
11356 s390_expand_split_stack_prologue (void)
11358 rtx r1, guard, cc = NULL;
11359 rtx_insn *insn;
11360 /* Offset from thread pointer to __private_ss. */
11361 int psso = TARGET_64BIT ? 0x38 : 0x20;
11362 /* Pointer size in bytes. */
11363 /* Frame size and argument size - the two parameters to __morestack. */
11364 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11365 /* Align argument size to 8 bytes - simplifies __morestack code. */
11366 HOST_WIDE_INT args_size = crtl->args.size >= 0
11367 ? ((crtl->args.size + 7) & ~7)
11368 : 0;
11369 /* Label to be called by __morestack. */
11370 rtx_code_label *call_done = NULL;
11371 rtx_code_label *parm_base = NULL;
11372 rtx tmp;
11374 gcc_assert (flag_split_stack && reload_completed);
11375 if (!TARGET_CPU_ZARCH)
11377 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11378 return;
11381 r1 = gen_rtx_REG (Pmode, 1);
11383 /* If no stack frame will be allocated, don't do anything. */
11384 if (!frame_size)
11386 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11388 /* If va_start is used, just use r15. */
11389 emit_move_insn (r1,
11390 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11391 GEN_INT (STACK_POINTER_OFFSET)));
11394 return;
11397 if (morestack_ref == NULL_RTX)
11399 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11400 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11401 | SYMBOL_FLAG_FUNCTION);
11404 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11406 /* If frame_size will fit in an add instruction, do a stack space
11407 check, and only call __morestack if there's not enough space. */
11409 /* Get thread pointer. r1 is the only register we can always destroy - r0
11410 could contain a static chain (and cannot be used to address memory
11411 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11412 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11413 /* Aim at __private_ss. */
11414 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11416 /* If less that 1kiB used, skip addition and compare directly with
11417 __private_ss. */
11418 if (frame_size > SPLIT_STACK_AVAILABLE)
11420 emit_move_insn (r1, guard);
11421 if (TARGET_64BIT)
11422 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11423 else
11424 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11425 guard = r1;
11428 /* Compare the (maybe adjusted) guard with the stack pointer. */
11429 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11432 call_done = gen_label_rtx ();
11433 parm_base = gen_label_rtx ();
11435 /* Emit the parameter block. */
11436 tmp = gen_split_stack_data (parm_base, call_done,
11437 GEN_INT (frame_size),
11438 GEN_INT (args_size));
11439 insn = emit_insn (tmp);
11440 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11441 LABEL_NUSES (call_done)++;
11442 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11443 LABEL_NUSES (parm_base)++;
11445 /* %r1 = litbase. */
11446 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11447 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11448 LABEL_NUSES (parm_base)++;
11450 /* Now, we need to call __morestack. It has very special calling
11451 conventions: it preserves param/return/static chain registers for
11452 calling main function body, and looks for its own parameters at %r1. */
11454 if (cc != NULL)
11456 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11458 insn = emit_jump_insn (tmp);
11459 JUMP_LABEL (insn) = call_done;
11460 LABEL_NUSES (call_done)++;
11462 /* Mark the jump as very unlikely to be taken. */
11463 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11465 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11467 /* If va_start is used, and __morestack was not called, just use
11468 r15. */
11469 emit_move_insn (r1,
11470 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11471 GEN_INT (STACK_POINTER_OFFSET)));
11474 else
11476 tmp = gen_split_stack_call (morestack_ref, call_done);
11477 insn = emit_jump_insn (tmp);
11478 JUMP_LABEL (insn) = call_done;
11479 LABEL_NUSES (call_done)++;
11480 emit_barrier ();
11483 /* __morestack will call us here. */
11485 emit_label (call_done);
11488 /* We may have to tell the dataflow pass that the split stack prologue
11489 is initializing a register. */
11491 static void
11492 s390_live_on_entry (bitmap regs)
11494 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11496 gcc_assert (flag_split_stack);
11497 bitmap_set_bit (regs, 1);
11501 /* Return true if the function can use simple_return to return outside
11502 of a shrink-wrapped region. At present shrink-wrapping is supported
11503 in all cases. */
11505 bool
11506 s390_can_use_simple_return_insn (void)
11508 return true;
11511 /* Return true if the epilogue is guaranteed to contain only a return
11512 instruction and if a direct return can therefore be used instead.
11513 One of the main advantages of using direct return instructions
11514 is that we can then use conditional returns. */
11516 bool
11517 s390_can_use_return_insn (void)
11519 int i;
11521 if (!reload_completed)
11522 return false;
11524 if (crtl->profile)
11525 return false;
11527 if (TARGET_TPF_PROFILING)
11528 return false;
11530 for (i = 0; i < 16; i++)
11531 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11532 return false;
11534 /* For 31 bit this is not covered by the frame_size check below
11535 since f4, f6 are saved in the register save area without needing
11536 additional stack space. */
11537 if (!TARGET_64BIT
11538 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11539 return false;
11541 if (cfun->machine->base_reg
11542 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11543 return false;
11545 return cfun_frame_layout.frame_size == 0;
11548 /* The VX ABI differs for vararg functions. Therefore we need the
11549 prototype of the callee to be available when passing vector type
11550 values. */
11551 static const char *
11552 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11554 return ((TARGET_VX_ABI
11555 && typelist == 0
11556 && VECTOR_TYPE_P (TREE_TYPE (val))
11557 && (funcdecl == NULL_TREE
11558 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11559 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11560 ? N_("vector argument passed to unprototyped function")
11561 : NULL);
11565 /* Return the size in bytes of a function argument of
11566 type TYPE and/or mode MODE. At least one of TYPE or
11567 MODE must be specified. */
11569 static int
11570 s390_function_arg_size (machine_mode mode, const_tree type)
11572 if (type)
11573 return int_size_in_bytes (type);
11575 /* No type info available for some library calls ... */
11576 if (mode != BLKmode)
11577 return GET_MODE_SIZE (mode);
11579 /* If we have neither type nor mode, abort */
11580 gcc_unreachable ();
11583 /* Return true if a function argument of type TYPE and mode MODE
11584 is to be passed in a vector register, if available. */
11586 bool
11587 s390_function_arg_vector (machine_mode mode, const_tree type)
11589 if (!TARGET_VX_ABI)
11590 return false;
11592 if (s390_function_arg_size (mode, type) > 16)
11593 return false;
11595 /* No type info available for some library calls ... */
11596 if (!type)
11597 return VECTOR_MODE_P (mode);
11599 /* The ABI says that record types with a single member are treated
11600 just like that member would be. */
11601 while (TREE_CODE (type) == RECORD_TYPE)
11603 tree field, single = NULL_TREE;
11605 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11607 if (TREE_CODE (field) != FIELD_DECL)
11608 continue;
11610 if (single == NULL_TREE)
11611 single = TREE_TYPE (field);
11612 else
11613 return false;
11616 if (single == NULL_TREE)
11617 return false;
11618 else
11620 /* If the field declaration adds extra byte due to
11621 e.g. padding this is not accepted as vector type. */
11622 if (int_size_in_bytes (single) <= 0
11623 || int_size_in_bytes (single) != int_size_in_bytes (type))
11624 return false;
11625 type = single;
11629 return VECTOR_TYPE_P (type);
11632 /* Return true if a function argument of type TYPE and mode MODE
11633 is to be passed in a floating-point register, if available. */
11635 static bool
11636 s390_function_arg_float (machine_mode mode, const_tree type)
11638 if (s390_function_arg_size (mode, type) > 8)
11639 return false;
11641 /* Soft-float changes the ABI: no floating-point registers are used. */
11642 if (TARGET_SOFT_FLOAT)
11643 return false;
11645 /* No type info available for some library calls ... */
11646 if (!type)
11647 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11649 /* The ABI says that record types with a single member are treated
11650 just like that member would be. */
11651 while (TREE_CODE (type) == RECORD_TYPE)
11653 tree field, single = NULL_TREE;
11655 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11657 if (TREE_CODE (field) != FIELD_DECL)
11658 continue;
11660 if (single == NULL_TREE)
11661 single = TREE_TYPE (field);
11662 else
11663 return false;
11666 if (single == NULL_TREE)
11667 return false;
11668 else
11669 type = single;
11672 return TREE_CODE (type) == REAL_TYPE;
11675 /* Return true if a function argument of type TYPE and mode MODE
11676 is to be passed in an integer register, or a pair of integer
11677 registers, if available. */
11679 static bool
11680 s390_function_arg_integer (machine_mode mode, const_tree type)
11682 int size = s390_function_arg_size (mode, type);
11683 if (size > 8)
11684 return false;
11686 /* No type info available for some library calls ... */
11687 if (!type)
11688 return GET_MODE_CLASS (mode) == MODE_INT
11689 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11691 /* We accept small integral (and similar) types. */
11692 if (INTEGRAL_TYPE_P (type)
11693 || POINTER_TYPE_P (type)
11694 || TREE_CODE (type) == NULLPTR_TYPE
11695 || TREE_CODE (type) == OFFSET_TYPE
11696 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11697 return true;
11699 /* We also accept structs of size 1, 2, 4, 8 that are not
11700 passed in floating-point registers. */
11701 if (AGGREGATE_TYPE_P (type)
11702 && exact_log2 (size) >= 0
11703 && !s390_function_arg_float (mode, type))
11704 return true;
11706 return false;
11709 /* Return 1 if a function argument of type TYPE and mode MODE
11710 is to be passed by reference. The ABI specifies that only
11711 structures of size 1, 2, 4, or 8 bytes are passed by value,
11712 all other structures (and complex numbers) are passed by
11713 reference. */
11715 static bool
11716 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11717 machine_mode mode, const_tree type,
11718 bool named ATTRIBUTE_UNUSED)
11720 int size = s390_function_arg_size (mode, type);
11722 if (s390_function_arg_vector (mode, type))
11723 return false;
11725 if (size > 8)
11726 return true;
11728 if (type)
11730 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11731 return true;
11733 if (TREE_CODE (type) == COMPLEX_TYPE
11734 || TREE_CODE (type) == VECTOR_TYPE)
11735 return true;
11738 return false;
11741 /* Update the data in CUM to advance over an argument of mode MODE and
11742 data type TYPE. (TYPE is null for libcalls where that information
11743 may not be available.). The boolean NAMED specifies whether the
11744 argument is a named argument (as opposed to an unnamed argument
11745 matching an ellipsis). */
11747 static void
11748 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11749 const_tree type, bool named)
11751 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11753 if (s390_function_arg_vector (mode, type))
11755 /* We are called for unnamed vector stdarg arguments which are
11756 passed on the stack. In this case this hook does not have to
11757 do anything since stack arguments are tracked by common
11758 code. */
11759 if (!named)
11760 return;
11761 cum->vrs += 1;
11763 else if (s390_function_arg_float (mode, type))
11765 cum->fprs += 1;
11767 else if (s390_function_arg_integer (mode, type))
11769 int size = s390_function_arg_size (mode, type);
11770 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11772 else
11773 gcc_unreachable ();
11776 /* Define where to put the arguments to a function.
11777 Value is zero to push the argument on the stack,
11778 or a hard register in which to store the argument.
11780 MODE is the argument's machine mode.
11781 TYPE is the data type of the argument (as a tree).
11782 This is null for libcalls where that information may
11783 not be available.
11784 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11785 the preceding args and about the function being called.
11786 NAMED is nonzero if this argument is a named parameter
11787 (otherwise it is an extra parameter matching an ellipsis).
11789 On S/390, we use general purpose registers 2 through 6 to
11790 pass integer, pointer, and certain structure arguments, and
11791 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11792 to pass floating point arguments. All remaining arguments
11793 are pushed to the stack. */
11795 static rtx
11796 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11797 const_tree type, bool named)
11799 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11801 if (!named)
11802 s390_check_type_for_vector_abi (type, true, false);
11804 if (s390_function_arg_vector (mode, type))
11806 /* Vector arguments being part of the ellipsis are passed on the
11807 stack. */
11808 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11809 return NULL_RTX;
11811 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11813 else if (s390_function_arg_float (mode, type))
11815 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11816 return NULL_RTX;
11817 else
11818 return gen_rtx_REG (mode, cum->fprs + 16);
11820 else if (s390_function_arg_integer (mode, type))
11822 int size = s390_function_arg_size (mode, type);
11823 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11825 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11826 return NULL_RTX;
11827 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11828 return gen_rtx_REG (mode, cum->gprs + 2);
11829 else if (n_gprs == 2)
11831 rtvec p = rtvec_alloc (2);
11833 RTVEC_ELT (p, 0)
11834 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11835 const0_rtx);
11836 RTVEC_ELT (p, 1)
11837 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11838 GEN_INT (4));
11840 return gen_rtx_PARALLEL (mode, p);
11844 /* After the real arguments, expand_call calls us once again
11845 with a void_type_node type. Whatever we return here is
11846 passed as operand 2 to the call expanders.
11848 We don't need this feature ... */
11849 else if (type == void_type_node)
11850 return const0_rtx;
11852 gcc_unreachable ();
11855 /* Return true if return values of type TYPE should be returned
11856 in a memory buffer whose address is passed by the caller as
11857 hidden first argument. */
11859 static bool
11860 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11862 /* We accept small integral (and similar) types. */
11863 if (INTEGRAL_TYPE_P (type)
11864 || POINTER_TYPE_P (type)
11865 || TREE_CODE (type) == OFFSET_TYPE
11866 || TREE_CODE (type) == REAL_TYPE)
11867 return int_size_in_bytes (type) > 8;
11869 /* vector types which fit into a VR. */
11870 if (TARGET_VX_ABI
11871 && VECTOR_TYPE_P (type)
11872 && int_size_in_bytes (type) <= 16)
11873 return false;
11875 /* Aggregates and similar constructs are always returned
11876 in memory. */
11877 if (AGGREGATE_TYPE_P (type)
11878 || TREE_CODE (type) == COMPLEX_TYPE
11879 || VECTOR_TYPE_P (type))
11880 return true;
11882 /* ??? We get called on all sorts of random stuff from
11883 aggregate_value_p. We can't abort, but it's not clear
11884 what's safe to return. Pretend it's a struct I guess. */
11885 return true;
11888 /* Function arguments and return values are promoted to word size. */
11890 static machine_mode
11891 s390_promote_function_mode (const_tree type, machine_mode mode,
11892 int *punsignedp,
11893 const_tree fntype ATTRIBUTE_UNUSED,
11894 int for_return ATTRIBUTE_UNUSED)
11896 if (INTEGRAL_MODE_P (mode)
11897 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11899 if (type != NULL_TREE && POINTER_TYPE_P (type))
11900 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11901 return Pmode;
11904 return mode;
11907 /* Define where to return a (scalar) value of type RET_TYPE.
11908 If RET_TYPE is null, define where to return a (scalar)
11909 value of mode MODE from a libcall. */
11911 static rtx
11912 s390_function_and_libcall_value (machine_mode mode,
11913 const_tree ret_type,
11914 const_tree fntype_or_decl,
11915 bool outgoing ATTRIBUTE_UNUSED)
11917 /* For vector return types it is important to use the RET_TYPE
11918 argument whenever available since the middle-end might have
11919 changed the mode to a scalar mode. */
11920 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11921 || (!ret_type && VECTOR_MODE_P (mode)));
11923 /* For normal functions perform the promotion as
11924 promote_function_mode would do. */
11925 if (ret_type)
11927 int unsignedp = TYPE_UNSIGNED (ret_type);
11928 mode = promote_function_mode (ret_type, mode, &unsignedp,
11929 fntype_or_decl, 1);
11932 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11933 || SCALAR_FLOAT_MODE_P (mode)
11934 || (TARGET_VX_ABI && vector_ret_type_p));
11935 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11937 if (TARGET_VX_ABI && vector_ret_type_p)
11938 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11939 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11940 return gen_rtx_REG (mode, 16);
11941 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11942 || UNITS_PER_LONG == UNITS_PER_WORD)
11943 return gen_rtx_REG (mode, 2);
11944 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11946 /* This case is triggered when returning a 64 bit value with
11947 -m31 -mzarch. Although the value would fit into a single
11948 register it has to be forced into a 32 bit register pair in
11949 order to match the ABI. */
11950 rtvec p = rtvec_alloc (2);
11952 RTVEC_ELT (p, 0)
11953 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11954 RTVEC_ELT (p, 1)
11955 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11957 return gen_rtx_PARALLEL (mode, p);
11960 gcc_unreachable ();
11963 /* Define where to return a scalar return value of type RET_TYPE. */
11965 static rtx
11966 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11967 bool outgoing)
11969 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11970 fn_decl_or_type, outgoing);
11973 /* Define where to return a scalar libcall return value of mode
11974 MODE. */
11976 static rtx
11977 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11979 return s390_function_and_libcall_value (mode, NULL_TREE,
11980 NULL_TREE, true);
11984 /* Create and return the va_list datatype.
11986 On S/390, va_list is an array type equivalent to
11988 typedef struct __va_list_tag
11990 long __gpr;
11991 long __fpr;
11992 void *__overflow_arg_area;
11993 void *__reg_save_area;
11994 } va_list[1];
11996 where __gpr and __fpr hold the number of general purpose
11997 or floating point arguments used up to now, respectively,
11998 __overflow_arg_area points to the stack location of the
11999 next argument passed on the stack, and __reg_save_area
12000 always points to the start of the register area in the
12001 call frame of the current function. The function prologue
12002 saves all registers used for argument passing into this
12003 area if the function uses variable arguments. */
12005 static tree
12006 s390_build_builtin_va_list (void)
12008 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12010 record = lang_hooks.types.make_type (RECORD_TYPE);
12012 type_decl =
12013 build_decl (BUILTINS_LOCATION,
12014 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12016 f_gpr = build_decl (BUILTINS_LOCATION,
12017 FIELD_DECL, get_identifier ("__gpr"),
12018 long_integer_type_node);
12019 f_fpr = build_decl (BUILTINS_LOCATION,
12020 FIELD_DECL, get_identifier ("__fpr"),
12021 long_integer_type_node);
12022 f_ovf = build_decl (BUILTINS_LOCATION,
12023 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12024 ptr_type_node);
12025 f_sav = build_decl (BUILTINS_LOCATION,
12026 FIELD_DECL, get_identifier ("__reg_save_area"),
12027 ptr_type_node);
12029 va_list_gpr_counter_field = f_gpr;
12030 va_list_fpr_counter_field = f_fpr;
12032 DECL_FIELD_CONTEXT (f_gpr) = record;
12033 DECL_FIELD_CONTEXT (f_fpr) = record;
12034 DECL_FIELD_CONTEXT (f_ovf) = record;
12035 DECL_FIELD_CONTEXT (f_sav) = record;
12037 TYPE_STUB_DECL (record) = type_decl;
12038 TYPE_NAME (record) = type_decl;
12039 TYPE_FIELDS (record) = f_gpr;
12040 DECL_CHAIN (f_gpr) = f_fpr;
12041 DECL_CHAIN (f_fpr) = f_ovf;
12042 DECL_CHAIN (f_ovf) = f_sav;
12044 layout_type (record);
12046 /* The correct type is an array type of one element. */
12047 return build_array_type (record, build_index_type (size_zero_node));
12050 /* Implement va_start by filling the va_list structure VALIST.
12051 STDARG_P is always true, and ignored.
12052 NEXTARG points to the first anonymous stack argument.
12054 The following global variables are used to initialize
12055 the va_list structure:
12057 crtl->args.info:
12058 holds number of gprs and fprs used for named arguments.
12059 crtl->args.arg_offset_rtx:
12060 holds the offset of the first anonymous stack argument
12061 (relative to the virtual arg pointer). */
12063 static void
12064 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12066 HOST_WIDE_INT n_gpr, n_fpr;
12067 int off;
12068 tree f_gpr, f_fpr, f_ovf, f_sav;
12069 tree gpr, fpr, ovf, sav, t;
12071 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12072 f_fpr = DECL_CHAIN (f_gpr);
12073 f_ovf = DECL_CHAIN (f_fpr);
12074 f_sav = DECL_CHAIN (f_ovf);
12076 valist = build_simple_mem_ref (valist);
12077 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12078 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12079 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12080 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12082 /* Count number of gp and fp argument registers used. */
12084 n_gpr = crtl->args.info.gprs;
12085 n_fpr = crtl->args.info.fprs;
12087 if (cfun->va_list_gpr_size)
12089 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12090 build_int_cst (NULL_TREE, n_gpr));
12091 TREE_SIDE_EFFECTS (t) = 1;
12092 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12095 if (cfun->va_list_fpr_size)
12097 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12098 build_int_cst (NULL_TREE, n_fpr));
12099 TREE_SIDE_EFFECTS (t) = 1;
12100 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12103 if (flag_split_stack
12104 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12105 == NULL)
12106 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12108 rtx reg;
12109 rtx_insn *seq;
12111 reg = gen_reg_rtx (Pmode);
12112 cfun->machine->split_stack_varargs_pointer = reg;
12114 start_sequence ();
12115 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12116 seq = get_insns ();
12117 end_sequence ();
12119 push_topmost_sequence ();
12120 emit_insn_after (seq, entry_of_function ());
12121 pop_topmost_sequence ();
12124 /* Find the overflow area.
12125 FIXME: This currently is too pessimistic when the vector ABI is
12126 enabled. In that case we *always* set up the overflow area
12127 pointer. */
12128 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12129 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12130 || TARGET_VX_ABI)
12132 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12133 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12134 else
12135 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12137 off = INTVAL (crtl->args.arg_offset_rtx);
12138 off = off < 0 ? 0 : off;
12139 if (TARGET_DEBUG_ARG)
12140 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12141 (int)n_gpr, (int)n_fpr, off);
12143 t = fold_build_pointer_plus_hwi (t, off);
12145 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12146 TREE_SIDE_EFFECTS (t) = 1;
12147 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12150 /* Find the register save area. */
12151 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12152 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12154 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12155 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12157 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12158 TREE_SIDE_EFFECTS (t) = 1;
12159 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12163 /* Implement va_arg by updating the va_list structure
12164 VALIST as required to retrieve an argument of type
12165 TYPE, and returning that argument.
12167 Generates code equivalent to:
12169 if (integral value) {
12170 if (size <= 4 && args.gpr < 5 ||
12171 size > 4 && args.gpr < 4 )
12172 ret = args.reg_save_area[args.gpr+8]
12173 else
12174 ret = *args.overflow_arg_area++;
12175 } else if (vector value) {
12176 ret = *args.overflow_arg_area;
12177 args.overflow_arg_area += size / 8;
12178 } else if (float value) {
12179 if (args.fgpr < 2)
12180 ret = args.reg_save_area[args.fpr+64]
12181 else
12182 ret = *args.overflow_arg_area++;
12183 } else if (aggregate value) {
12184 if (args.gpr < 5)
12185 ret = *args.reg_save_area[args.gpr]
12186 else
12187 ret = **args.overflow_arg_area++;
12188 } */
12190 static tree
12191 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12192 gimple_seq *post_p ATTRIBUTE_UNUSED)
12194 tree f_gpr, f_fpr, f_ovf, f_sav;
12195 tree gpr, fpr, ovf, sav, reg, t, u;
12196 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12197 tree lab_false, lab_over = NULL_TREE;
12198 tree addr = create_tmp_var (ptr_type_node, "addr");
12199 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12200 a stack slot. */
12202 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12203 f_fpr = DECL_CHAIN (f_gpr);
12204 f_ovf = DECL_CHAIN (f_fpr);
12205 f_sav = DECL_CHAIN (f_ovf);
12207 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12208 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12209 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12211 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12212 both appear on a lhs. */
12213 valist = unshare_expr (valist);
12214 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12216 size = int_size_in_bytes (type);
12218 s390_check_type_for_vector_abi (type, true, false);
12220 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12222 if (TARGET_DEBUG_ARG)
12224 fprintf (stderr, "va_arg: aggregate type");
12225 debug_tree (type);
12228 /* Aggregates are passed by reference. */
12229 indirect_p = 1;
12230 reg = gpr;
12231 n_reg = 1;
12233 /* kernel stack layout on 31 bit: It is assumed here that no padding
12234 will be added by s390_frame_info because for va_args always an even
12235 number of gprs has to be saved r15-r2 = 14 regs. */
12236 sav_ofs = 2 * UNITS_PER_LONG;
12237 sav_scale = UNITS_PER_LONG;
12238 size = UNITS_PER_LONG;
12239 max_reg = GP_ARG_NUM_REG - n_reg;
12240 left_align_p = false;
12242 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12244 if (TARGET_DEBUG_ARG)
12246 fprintf (stderr, "va_arg: vector type");
12247 debug_tree (type);
12250 indirect_p = 0;
12251 reg = NULL_TREE;
12252 n_reg = 0;
12253 sav_ofs = 0;
12254 sav_scale = 8;
12255 max_reg = 0;
12256 left_align_p = true;
12258 else if (s390_function_arg_float (TYPE_MODE (type), type))
12260 if (TARGET_DEBUG_ARG)
12262 fprintf (stderr, "va_arg: float type");
12263 debug_tree (type);
12266 /* FP args go in FP registers, if present. */
12267 indirect_p = 0;
12268 reg = fpr;
12269 n_reg = 1;
12270 sav_ofs = 16 * UNITS_PER_LONG;
12271 sav_scale = 8;
12272 max_reg = FP_ARG_NUM_REG - n_reg;
12273 left_align_p = false;
12275 else
12277 if (TARGET_DEBUG_ARG)
12279 fprintf (stderr, "va_arg: other type");
12280 debug_tree (type);
12283 /* Otherwise into GP registers. */
12284 indirect_p = 0;
12285 reg = gpr;
12286 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12288 /* kernel stack layout on 31 bit: It is assumed here that no padding
12289 will be added by s390_frame_info because for va_args always an even
12290 number of gprs has to be saved r15-r2 = 14 regs. */
12291 sav_ofs = 2 * UNITS_PER_LONG;
12293 if (size < UNITS_PER_LONG)
12294 sav_ofs += UNITS_PER_LONG - size;
12296 sav_scale = UNITS_PER_LONG;
12297 max_reg = GP_ARG_NUM_REG - n_reg;
12298 left_align_p = false;
12301 /* Pull the value out of the saved registers ... */
12303 if (reg != NULL_TREE)
12306 if (reg > ((typeof (reg))max_reg))
12307 goto lab_false;
12309 addr = sav + sav_ofs + reg * save_scale;
12311 goto lab_over;
12313 lab_false:
12316 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12317 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12319 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12320 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12321 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12322 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12323 gimplify_and_add (t, pre_p);
12325 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12326 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12327 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12328 t = fold_build_pointer_plus (t, u);
12330 gimplify_assign (addr, t, pre_p);
12332 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12334 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12337 /* ... Otherwise out of the overflow area. */
12339 t = ovf;
12340 if (size < UNITS_PER_LONG && !left_align_p)
12341 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12343 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12345 gimplify_assign (addr, t, pre_p);
12347 if (size < UNITS_PER_LONG && left_align_p)
12348 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12349 else
12350 t = fold_build_pointer_plus_hwi (t, size);
12352 gimplify_assign (ovf, t, pre_p);
12354 if (reg != NULL_TREE)
12355 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12358 /* Increment register save count. */
12360 if (n_reg > 0)
12362 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12363 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12364 gimplify_and_add (u, pre_p);
12367 if (indirect_p)
12369 t = build_pointer_type_for_mode (build_pointer_type (type),
12370 ptr_mode, true);
12371 addr = fold_convert (t, addr);
12372 addr = build_va_arg_indirect_ref (addr);
12374 else
12376 t = build_pointer_type_for_mode (type, ptr_mode, true);
12377 addr = fold_convert (t, addr);
12380 return build_va_arg_indirect_ref (addr);
12383 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12384 expanders.
12385 DEST - Register location where CC will be stored.
12386 TDB - Pointer to a 256 byte area where to store the transaction.
12387 diagnostic block. NULL if TDB is not needed.
12388 RETRY - Retry count value. If non-NULL a retry loop for CC2
12389 is emitted
12390 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12391 of the tbegin instruction pattern. */
12393 void
12394 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12396 rtx retry_plus_two = gen_reg_rtx (SImode);
12397 rtx retry_reg = gen_reg_rtx (SImode);
12398 rtx_code_label *retry_label = NULL;
12400 if (retry != NULL_RTX)
12402 emit_move_insn (retry_reg, retry);
12403 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12404 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12405 retry_label = gen_label_rtx ();
12406 emit_label (retry_label);
12409 if (clobber_fprs_p)
12411 if (TARGET_VX)
12412 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12413 tdb));
12414 else
12415 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12416 tdb));
12418 else
12419 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12420 tdb));
12422 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12423 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12424 CC_REGNUM)),
12425 UNSPEC_CC_TO_INT));
12426 if (retry != NULL_RTX)
12428 const int CC0 = 1 << 3;
12429 const int CC1 = 1 << 2;
12430 const int CC3 = 1 << 0;
12431 rtx jump;
12432 rtx count = gen_reg_rtx (SImode);
12433 rtx_code_label *leave_label = gen_label_rtx ();
12435 /* Exit for success and permanent failures. */
12436 jump = s390_emit_jump (leave_label,
12437 gen_rtx_EQ (VOIDmode,
12438 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12439 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12440 LABEL_NUSES (leave_label) = 1;
12442 /* CC2 - transient failure. Perform retry with ppa. */
12443 emit_move_insn (count, retry_plus_two);
12444 emit_insn (gen_subsi3 (count, count, retry_reg));
12445 emit_insn (gen_tx_assist (count));
12446 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12447 retry_reg,
12448 retry_reg));
12449 JUMP_LABEL (jump) = retry_label;
12450 LABEL_NUSES (retry_label) = 1;
12451 emit_label (leave_label);
12456 /* Return the decl for the target specific builtin with the function
12457 code FCODE. */
12459 static tree
12460 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12462 if (fcode >= S390_BUILTIN_MAX)
12463 return error_mark_node;
12465 return s390_builtin_decls[fcode];
12468 /* We call mcount before the function prologue. So a profiled leaf
12469 function should stay a leaf function. */
12471 static bool
12472 s390_keep_leaf_when_profiled ()
12474 return true;
12477 /* Output assembly code for the trampoline template to
12478 stdio stream FILE.
12480 On S/390, we use gpr 1 internally in the trampoline code;
12481 gpr 0 is used to hold the static chain. */
12483 static void
12484 s390_asm_trampoline_template (FILE *file)
12486 rtx op[2];
12487 op[0] = gen_rtx_REG (Pmode, 0);
12488 op[1] = gen_rtx_REG (Pmode, 1);
12490 if (TARGET_64BIT)
12492 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12493 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12494 output_asm_insn ("br\t%1", op); /* 2 byte */
12495 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12497 else
12499 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12500 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12501 output_asm_insn ("br\t%1", op); /* 2 byte */
12502 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12506 /* Emit RTL insns to initialize the variable parts of a trampoline.
12507 FNADDR is an RTX for the address of the function's pure code.
12508 CXT is an RTX for the static chain value for the function. */
12510 static void
12511 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12513 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12514 rtx mem;
12516 emit_block_move (m_tramp, assemble_trampoline_template (),
12517 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12519 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12520 emit_move_insn (mem, cxt);
12521 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12522 emit_move_insn (mem, fnaddr);
12525 /* Output assembler code to FILE to increment profiler label # LABELNO
12526 for profiling a function entry. */
12528 void
12529 s390_function_profiler (FILE *file, int labelno)
12531 rtx op[7];
12533 char label[128];
12534 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12536 fprintf (file, "# function profiler \n");
12538 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12539 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12540 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12542 op[2] = gen_rtx_REG (Pmode, 1);
12543 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12544 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12546 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12547 if (flag_pic)
12549 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12550 op[4] = gen_rtx_CONST (Pmode, op[4]);
12553 if (TARGET_64BIT)
12555 output_asm_insn ("stg\t%0,%1", op);
12556 output_asm_insn ("larl\t%2,%3", op);
12557 output_asm_insn ("brasl\t%0,%4", op);
12558 output_asm_insn ("lg\t%0,%1", op);
12560 else if (TARGET_CPU_ZARCH)
12562 output_asm_insn ("st\t%0,%1", op);
12563 output_asm_insn ("larl\t%2,%3", op);
12564 output_asm_insn ("brasl\t%0,%4", op);
12565 output_asm_insn ("l\t%0,%1", op);
12567 else if (!flag_pic)
12569 op[6] = gen_label_rtx ();
12571 output_asm_insn ("st\t%0,%1", op);
12572 output_asm_insn ("bras\t%2,%l6", op);
12573 output_asm_insn (".long\t%4", op);
12574 output_asm_insn (".long\t%3", op);
12575 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12576 output_asm_insn ("l\t%0,0(%2)", op);
12577 output_asm_insn ("l\t%2,4(%2)", op);
12578 output_asm_insn ("basr\t%0,%0", op);
12579 output_asm_insn ("l\t%0,%1", op);
12581 else
12583 op[5] = gen_label_rtx ();
12584 op[6] = gen_label_rtx ();
12586 output_asm_insn ("st\t%0,%1", op);
12587 output_asm_insn ("bras\t%2,%l6", op);
12588 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12589 output_asm_insn (".long\t%4-%l5", op);
12590 output_asm_insn (".long\t%3-%l5", op);
12591 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12592 output_asm_insn ("lr\t%0,%2", op);
12593 output_asm_insn ("a\t%0,0(%2)", op);
12594 output_asm_insn ("a\t%2,4(%2)", op);
12595 output_asm_insn ("basr\t%0,%0", op);
12596 output_asm_insn ("l\t%0,%1", op);
12600 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12601 into its SYMBOL_REF_FLAGS. */
12603 static void
12604 s390_encode_section_info (tree decl, rtx rtl, int first)
12606 default_encode_section_info (decl, rtl, first);
12608 if (TREE_CODE (decl) == VAR_DECL)
12610 /* Store the alignment to be able to check if we can use
12611 a larl/load-relative instruction. We only handle the cases
12612 that can go wrong (i.e. no FUNC_DECLs). */
12613 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12614 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12615 else if (DECL_ALIGN (decl) % 32)
12616 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12617 else if (DECL_ALIGN (decl) % 64)
12618 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12621 /* Literal pool references don't have a decl so they are handled
12622 differently here. We rely on the information in the MEM_ALIGN
12623 entry to decide upon the alignment. */
12624 if (MEM_P (rtl)
12625 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12626 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12628 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12629 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12630 else if (MEM_ALIGN (rtl) % 32)
12631 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12632 else if (MEM_ALIGN (rtl) % 64)
12633 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12637 /* Output thunk to FILE that implements a C++ virtual function call (with
12638 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12639 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12640 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12641 relative to the resulting this pointer. */
12643 static void
12644 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12645 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12646 tree function)
12648 rtx op[10];
12649 int nonlocal = 0;
12651 /* Make sure unwind info is emitted for the thunk if needed. */
12652 final_start_function (emit_barrier (), file, 1);
12654 /* Operand 0 is the target function. */
12655 op[0] = XEXP (DECL_RTL (function), 0);
12656 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12658 nonlocal = 1;
12659 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12660 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12661 op[0] = gen_rtx_CONST (Pmode, op[0]);
12664 /* Operand 1 is the 'this' pointer. */
12665 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12666 op[1] = gen_rtx_REG (Pmode, 3);
12667 else
12668 op[1] = gen_rtx_REG (Pmode, 2);
12670 /* Operand 2 is the delta. */
12671 op[2] = GEN_INT (delta);
12673 /* Operand 3 is the vcall_offset. */
12674 op[3] = GEN_INT (vcall_offset);
12676 /* Operand 4 is the temporary register. */
12677 op[4] = gen_rtx_REG (Pmode, 1);
12679 /* Operands 5 to 8 can be used as labels. */
12680 op[5] = NULL_RTX;
12681 op[6] = NULL_RTX;
12682 op[7] = NULL_RTX;
12683 op[8] = NULL_RTX;
12685 /* Operand 9 can be used for temporary register. */
12686 op[9] = NULL_RTX;
12688 /* Generate code. */
12689 if (TARGET_64BIT)
12691 /* Setup literal pool pointer if required. */
12692 if ((!DISP_IN_RANGE (delta)
12693 && !CONST_OK_FOR_K (delta)
12694 && !CONST_OK_FOR_Os (delta))
12695 || (!DISP_IN_RANGE (vcall_offset)
12696 && !CONST_OK_FOR_K (vcall_offset)
12697 && !CONST_OK_FOR_Os (vcall_offset)))
12699 op[5] = gen_label_rtx ();
12700 output_asm_insn ("larl\t%4,%5", op);
12703 /* Add DELTA to this pointer. */
12704 if (delta)
12706 if (CONST_OK_FOR_J (delta))
12707 output_asm_insn ("la\t%1,%2(%1)", op);
12708 else if (DISP_IN_RANGE (delta))
12709 output_asm_insn ("lay\t%1,%2(%1)", op);
12710 else if (CONST_OK_FOR_K (delta))
12711 output_asm_insn ("aghi\t%1,%2", op);
12712 else if (CONST_OK_FOR_Os (delta))
12713 output_asm_insn ("agfi\t%1,%2", op);
12714 else
12716 op[6] = gen_label_rtx ();
12717 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12721 /* Perform vcall adjustment. */
12722 if (vcall_offset)
12724 if (DISP_IN_RANGE (vcall_offset))
12726 output_asm_insn ("lg\t%4,0(%1)", op);
12727 output_asm_insn ("ag\t%1,%3(%4)", op);
12729 else if (CONST_OK_FOR_K (vcall_offset))
12731 output_asm_insn ("lghi\t%4,%3", op);
12732 output_asm_insn ("ag\t%4,0(%1)", op);
12733 output_asm_insn ("ag\t%1,0(%4)", op);
12735 else if (CONST_OK_FOR_Os (vcall_offset))
12737 output_asm_insn ("lgfi\t%4,%3", op);
12738 output_asm_insn ("ag\t%4,0(%1)", op);
12739 output_asm_insn ("ag\t%1,0(%4)", op);
12741 else
12743 op[7] = gen_label_rtx ();
12744 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12745 output_asm_insn ("ag\t%4,0(%1)", op);
12746 output_asm_insn ("ag\t%1,0(%4)", op);
12750 /* Jump to target. */
12751 output_asm_insn ("jg\t%0", op);
12753 /* Output literal pool if required. */
12754 if (op[5])
12756 output_asm_insn (".align\t4", op);
12757 targetm.asm_out.internal_label (file, "L",
12758 CODE_LABEL_NUMBER (op[5]));
12760 if (op[6])
12762 targetm.asm_out.internal_label (file, "L",
12763 CODE_LABEL_NUMBER (op[6]));
12764 output_asm_insn (".long\t%2", op);
12766 if (op[7])
12768 targetm.asm_out.internal_label (file, "L",
12769 CODE_LABEL_NUMBER (op[7]));
12770 output_asm_insn (".long\t%3", op);
12773 else
12775 /* Setup base pointer if required. */
12776 if (!vcall_offset
12777 || (!DISP_IN_RANGE (delta)
12778 && !CONST_OK_FOR_K (delta)
12779 && !CONST_OK_FOR_Os (delta))
12780 || (!DISP_IN_RANGE (delta)
12781 && !CONST_OK_FOR_K (vcall_offset)
12782 && !CONST_OK_FOR_Os (vcall_offset)))
12784 op[5] = gen_label_rtx ();
12785 output_asm_insn ("basr\t%4,0", op);
12786 targetm.asm_out.internal_label (file, "L",
12787 CODE_LABEL_NUMBER (op[5]));
12790 /* Add DELTA to this pointer. */
12791 if (delta)
12793 if (CONST_OK_FOR_J (delta))
12794 output_asm_insn ("la\t%1,%2(%1)", op);
12795 else if (DISP_IN_RANGE (delta))
12796 output_asm_insn ("lay\t%1,%2(%1)", op);
12797 else if (CONST_OK_FOR_K (delta))
12798 output_asm_insn ("ahi\t%1,%2", op);
12799 else if (CONST_OK_FOR_Os (delta))
12800 output_asm_insn ("afi\t%1,%2", op);
12801 else
12803 op[6] = gen_label_rtx ();
12804 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12808 /* Perform vcall adjustment. */
12809 if (vcall_offset)
12811 if (CONST_OK_FOR_J (vcall_offset))
12813 output_asm_insn ("l\t%4,0(%1)", op);
12814 output_asm_insn ("a\t%1,%3(%4)", op);
12816 else if (DISP_IN_RANGE (vcall_offset))
12818 output_asm_insn ("l\t%4,0(%1)", op);
12819 output_asm_insn ("ay\t%1,%3(%4)", op);
12821 else if (CONST_OK_FOR_K (vcall_offset))
12823 output_asm_insn ("lhi\t%4,%3", op);
12824 output_asm_insn ("a\t%4,0(%1)", op);
12825 output_asm_insn ("a\t%1,0(%4)", op);
12827 else if (CONST_OK_FOR_Os (vcall_offset))
12829 output_asm_insn ("iilf\t%4,%3", op);
12830 output_asm_insn ("a\t%4,0(%1)", op);
12831 output_asm_insn ("a\t%1,0(%4)", op);
12833 else
12835 op[7] = gen_label_rtx ();
12836 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12837 output_asm_insn ("a\t%4,0(%1)", op);
12838 output_asm_insn ("a\t%1,0(%4)", op);
12841 /* We had to clobber the base pointer register.
12842 Re-setup the base pointer (with a different base). */
12843 op[5] = gen_label_rtx ();
12844 output_asm_insn ("basr\t%4,0", op);
12845 targetm.asm_out.internal_label (file, "L",
12846 CODE_LABEL_NUMBER (op[5]));
12849 /* Jump to target. */
12850 op[8] = gen_label_rtx ();
12852 if (!flag_pic)
12853 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12854 else if (!nonlocal)
12855 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12856 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12857 else if (flag_pic == 1)
12859 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12860 output_asm_insn ("l\t%4,%0(%4)", op);
12862 else if (flag_pic == 2)
12864 op[9] = gen_rtx_REG (Pmode, 0);
12865 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12866 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12867 output_asm_insn ("ar\t%4,%9", op);
12868 output_asm_insn ("l\t%4,0(%4)", op);
12871 output_asm_insn ("br\t%4", op);
12873 /* Output literal pool. */
12874 output_asm_insn (".align\t4", op);
12876 if (nonlocal && flag_pic == 2)
12877 output_asm_insn (".long\t%0", op);
12878 if (nonlocal)
12880 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12881 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12884 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12885 if (!flag_pic)
12886 output_asm_insn (".long\t%0", op);
12887 else
12888 output_asm_insn (".long\t%0-%5", op);
12890 if (op[6])
12892 targetm.asm_out.internal_label (file, "L",
12893 CODE_LABEL_NUMBER (op[6]));
12894 output_asm_insn (".long\t%2", op);
12896 if (op[7])
12898 targetm.asm_out.internal_label (file, "L",
12899 CODE_LABEL_NUMBER (op[7]));
12900 output_asm_insn (".long\t%3", op);
12903 final_end_function ();
12906 static bool
12907 s390_valid_pointer_mode (machine_mode mode)
12909 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12912 /* Checks whether the given CALL_EXPR would use a caller
12913 saved register. This is used to decide whether sibling call
12914 optimization could be performed on the respective function
12915 call. */
12917 static bool
12918 s390_call_saved_register_used (tree call_expr)
12920 CUMULATIVE_ARGS cum_v;
12921 cumulative_args_t cum;
12922 tree parameter;
12923 machine_mode mode;
12924 tree type;
12925 rtx parm_rtx;
12926 int reg, i;
12928 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12929 cum = pack_cumulative_args (&cum_v);
12931 for (i = 0; i < call_expr_nargs (call_expr); i++)
12933 parameter = CALL_EXPR_ARG (call_expr, i);
12934 gcc_assert (parameter);
12936 /* For an undeclared variable passed as parameter we will get
12937 an ERROR_MARK node here. */
12938 if (TREE_CODE (parameter) == ERROR_MARK)
12939 return true;
12941 type = TREE_TYPE (parameter);
12942 gcc_assert (type);
12944 mode = TYPE_MODE (type);
12945 gcc_assert (mode);
12947 /* We assume that in the target function all parameters are
12948 named. This only has an impact on vector argument register
12949 usage none of which is call-saved. */
12950 if (pass_by_reference (&cum_v, mode, type, true))
12952 mode = Pmode;
12953 type = build_pointer_type (type);
12956 parm_rtx = s390_function_arg (cum, mode, type, true);
12958 s390_function_arg_advance (cum, mode, type, true);
12960 if (!parm_rtx)
12961 continue;
12963 if (REG_P (parm_rtx))
12965 for (reg = 0;
12966 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12967 reg++)
12968 if (!call_used_regs[reg + REGNO (parm_rtx)])
12969 return true;
12972 if (GET_CODE (parm_rtx) == PARALLEL)
12974 int i;
12976 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12978 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12980 gcc_assert (REG_P (r));
12982 for (reg = 0;
12983 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12984 reg++)
12985 if (!call_used_regs[reg + REGNO (r)])
12986 return true;
12991 return false;
12994 /* Return true if the given call expression can be
12995 turned into a sibling call.
12996 DECL holds the declaration of the function to be called whereas
12997 EXP is the call expression itself. */
12999 static bool
13000 s390_function_ok_for_sibcall (tree decl, tree exp)
13002 /* The TPF epilogue uses register 1. */
13003 if (TARGET_TPF_PROFILING)
13004 return false;
13006 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13007 which would have to be restored before the sibcall. */
13008 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13009 return false;
13011 /* Register 6 on s390 is available as an argument register but unfortunately
13012 "caller saved". This makes functions needing this register for arguments
13013 not suitable for sibcalls. */
13014 return !s390_call_saved_register_used (exp);
13017 /* Return the fixed registers used for condition codes. */
13019 static bool
13020 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13022 *p1 = CC_REGNUM;
13023 *p2 = INVALID_REGNUM;
13025 return true;
13028 /* This function is used by the call expanders of the machine description.
13029 It emits the call insn itself together with the necessary operations
13030 to adjust the target address and returns the emitted insn.
13031 ADDR_LOCATION is the target address rtx
13032 TLS_CALL the location of the thread-local symbol
13033 RESULT_REG the register where the result of the call should be stored
13034 RETADDR_REG the register where the return address should be stored
13035 If this parameter is NULL_RTX the call is considered
13036 to be a sibling call. */
13038 rtx_insn *
13039 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13040 rtx retaddr_reg)
13042 bool plt_call = false;
13043 rtx_insn *insn;
13044 rtx call;
13045 rtx clobber;
13046 rtvec vec;
13048 /* Direct function calls need special treatment. */
13049 if (GET_CODE (addr_location) == SYMBOL_REF)
13051 /* When calling a global routine in PIC mode, we must
13052 replace the symbol itself with the PLT stub. */
13053 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13055 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13057 addr_location = gen_rtx_UNSPEC (Pmode,
13058 gen_rtvec (1, addr_location),
13059 UNSPEC_PLT);
13060 addr_location = gen_rtx_CONST (Pmode, addr_location);
13061 plt_call = true;
13063 else
13064 /* For -fpic code the PLT entries might use r12 which is
13065 call-saved. Therefore we cannot do a sibcall when
13066 calling directly using a symbol ref. When reaching
13067 this point we decided (in s390_function_ok_for_sibcall)
13068 to do a sibcall for a function pointer but one of the
13069 optimizers was able to get rid of the function pointer
13070 by propagating the symbol ref into the call. This
13071 optimization is illegal for S/390 so we turn the direct
13072 call into a indirect call again. */
13073 addr_location = force_reg (Pmode, addr_location);
13076 /* Unless we can use the bras(l) insn, force the
13077 routine address into a register. */
13078 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13080 if (flag_pic)
13081 addr_location = legitimize_pic_address (addr_location, 0);
13082 else
13083 addr_location = force_reg (Pmode, addr_location);
13087 /* If it is already an indirect call or the code above moved the
13088 SYMBOL_REF to somewhere else make sure the address can be found in
13089 register 1. */
13090 if (retaddr_reg == NULL_RTX
13091 && GET_CODE (addr_location) != SYMBOL_REF
13092 && !plt_call)
13094 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13095 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13098 addr_location = gen_rtx_MEM (QImode, addr_location);
13099 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13101 if (result_reg != NULL_RTX)
13102 call = gen_rtx_SET (result_reg, call);
13104 if (retaddr_reg != NULL_RTX)
13106 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13108 if (tls_call != NULL_RTX)
13109 vec = gen_rtvec (3, call, clobber,
13110 gen_rtx_USE (VOIDmode, tls_call));
13111 else
13112 vec = gen_rtvec (2, call, clobber);
13114 call = gen_rtx_PARALLEL (VOIDmode, vec);
13117 insn = emit_call_insn (call);
13119 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13120 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13122 /* s390_function_ok_for_sibcall should
13123 have denied sibcalls in this case. */
13124 gcc_assert (retaddr_reg != NULL_RTX);
13125 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13127 return insn;
13130 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13132 static void
13133 s390_conditional_register_usage (void)
13135 int i;
13137 if (flag_pic)
13139 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13140 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13142 if (TARGET_CPU_ZARCH)
13144 fixed_regs[BASE_REGNUM] = 0;
13145 call_used_regs[BASE_REGNUM] = 0;
13146 fixed_regs[RETURN_REGNUM] = 0;
13147 call_used_regs[RETURN_REGNUM] = 0;
13149 if (TARGET_64BIT)
13151 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13152 call_used_regs[i] = call_really_used_regs[i] = 0;
13154 else
13156 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13157 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13160 if (TARGET_SOFT_FLOAT)
13162 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13163 call_used_regs[i] = fixed_regs[i] = 1;
13166 /* Disable v16 - v31 for non-vector target. */
13167 if (!TARGET_VX)
13169 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13170 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13174 /* Corresponding function to eh_return expander. */
13176 static GTY(()) rtx s390_tpf_eh_return_symbol;
13177 void
13178 s390_emit_tpf_eh_return (rtx target)
13180 rtx_insn *insn;
13181 rtx reg, orig_ra;
13183 if (!s390_tpf_eh_return_symbol)
13184 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13186 reg = gen_rtx_REG (Pmode, 2);
13187 orig_ra = gen_rtx_REG (Pmode, 3);
13189 emit_move_insn (reg, target);
13190 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13191 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13192 gen_rtx_REG (Pmode, RETURN_REGNUM));
13193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13194 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13196 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13199 /* Rework the prologue/epilogue to avoid saving/restoring
13200 registers unnecessarily. */
13202 static void
13203 s390_optimize_prologue (void)
13205 rtx_insn *insn, *new_insn, *next_insn;
13207 /* Do a final recompute of the frame-related data. */
13208 s390_optimize_register_info ();
13210 /* If all special registers are in fact used, there's nothing we
13211 can do, so no point in walking the insn list. */
13213 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13214 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13215 && (TARGET_CPU_ZARCH
13216 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13217 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13218 return;
13220 /* Search for prologue/epilogue insns and replace them. */
13222 for (insn = get_insns (); insn; insn = next_insn)
13224 int first, last, off;
13225 rtx set, base, offset;
13226 rtx pat;
13228 next_insn = NEXT_INSN (insn);
13230 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13231 continue;
13233 pat = PATTERN (insn);
13235 /* Remove ldgr/lgdr instructions used for saving and restore
13236 GPRs if possible. */
13237 if (TARGET_Z10)
13239 rtx tmp_pat = pat;
13241 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13242 tmp_pat = XVECEXP (pat, 0, 0);
13244 if (GET_CODE (tmp_pat) == SET
13245 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13246 && REG_P (SET_SRC (tmp_pat))
13247 && REG_P (SET_DEST (tmp_pat)))
13249 int src_regno = REGNO (SET_SRC (tmp_pat));
13250 int dest_regno = REGNO (SET_DEST (tmp_pat));
13251 int gpr_regno;
13252 int fpr_regno;
13254 if (!((GENERAL_REGNO_P (src_regno)
13255 && FP_REGNO_P (dest_regno))
13256 || (FP_REGNO_P (src_regno)
13257 && GENERAL_REGNO_P (dest_regno))))
13258 continue;
13260 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13261 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13263 /* GPR must be call-saved, FPR must be call-clobbered. */
13264 if (!call_really_used_regs[fpr_regno]
13265 || call_really_used_regs[gpr_regno])
13266 continue;
13268 /* It must not happen that what we once saved in an FPR now
13269 needs a stack slot. */
13270 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13272 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13274 remove_insn (insn);
13275 continue;
13280 if (GET_CODE (pat) == PARALLEL
13281 && store_multiple_operation (pat, VOIDmode))
13283 set = XVECEXP (pat, 0, 0);
13284 first = REGNO (SET_SRC (set));
13285 last = first + XVECLEN (pat, 0) - 1;
13286 offset = const0_rtx;
13287 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13288 off = INTVAL (offset);
13290 if (GET_CODE (base) != REG || off < 0)
13291 continue;
13292 if (cfun_frame_layout.first_save_gpr != -1
13293 && (cfun_frame_layout.first_save_gpr < first
13294 || cfun_frame_layout.last_save_gpr > last))
13295 continue;
13296 if (REGNO (base) != STACK_POINTER_REGNUM
13297 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13298 continue;
13299 if (first > BASE_REGNUM || last < BASE_REGNUM)
13300 continue;
13302 if (cfun_frame_layout.first_save_gpr != -1)
13304 rtx s_pat = save_gprs (base,
13305 off + (cfun_frame_layout.first_save_gpr
13306 - first) * UNITS_PER_LONG,
13307 cfun_frame_layout.first_save_gpr,
13308 cfun_frame_layout.last_save_gpr);
13309 new_insn = emit_insn_before (s_pat, insn);
13310 INSN_ADDRESSES_NEW (new_insn, -1);
13313 remove_insn (insn);
13314 continue;
13317 if (cfun_frame_layout.first_save_gpr == -1
13318 && GET_CODE (pat) == SET
13319 && GENERAL_REG_P (SET_SRC (pat))
13320 && GET_CODE (SET_DEST (pat)) == MEM)
13322 set = pat;
13323 first = REGNO (SET_SRC (set));
13324 offset = const0_rtx;
13325 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13326 off = INTVAL (offset);
13328 if (GET_CODE (base) != REG || off < 0)
13329 continue;
13330 if (REGNO (base) != STACK_POINTER_REGNUM
13331 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13332 continue;
13334 remove_insn (insn);
13335 continue;
13338 if (GET_CODE (pat) == PARALLEL
13339 && load_multiple_operation (pat, VOIDmode))
13341 set = XVECEXP (pat, 0, 0);
13342 first = REGNO (SET_DEST (set));
13343 last = first + XVECLEN (pat, 0) - 1;
13344 offset = const0_rtx;
13345 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13346 off = INTVAL (offset);
13348 if (GET_CODE (base) != REG || off < 0)
13349 continue;
13351 if (cfun_frame_layout.first_restore_gpr != -1
13352 && (cfun_frame_layout.first_restore_gpr < first
13353 || cfun_frame_layout.last_restore_gpr > last))
13354 continue;
13355 if (REGNO (base) != STACK_POINTER_REGNUM
13356 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13357 continue;
13358 if (first > BASE_REGNUM || last < BASE_REGNUM)
13359 continue;
13361 if (cfun_frame_layout.first_restore_gpr != -1)
13363 rtx rpat = restore_gprs (base,
13364 off + (cfun_frame_layout.first_restore_gpr
13365 - first) * UNITS_PER_LONG,
13366 cfun_frame_layout.first_restore_gpr,
13367 cfun_frame_layout.last_restore_gpr);
13369 /* Remove REG_CFA_RESTOREs for registers that we no
13370 longer need to save. */
13371 REG_NOTES (rpat) = REG_NOTES (insn);
13372 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13373 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13374 && ((int) REGNO (XEXP (*ptr, 0))
13375 < cfun_frame_layout.first_restore_gpr))
13376 *ptr = XEXP (*ptr, 1);
13377 else
13378 ptr = &XEXP (*ptr, 1);
13379 new_insn = emit_insn_before (rpat, insn);
13380 RTX_FRAME_RELATED_P (new_insn) = 1;
13381 INSN_ADDRESSES_NEW (new_insn, -1);
13384 remove_insn (insn);
13385 continue;
13388 if (cfun_frame_layout.first_restore_gpr == -1
13389 && GET_CODE (pat) == SET
13390 && GENERAL_REG_P (SET_DEST (pat))
13391 && GET_CODE (SET_SRC (pat)) == MEM)
13393 set = pat;
13394 first = REGNO (SET_DEST (set));
13395 offset = const0_rtx;
13396 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13397 off = INTVAL (offset);
13399 if (GET_CODE (base) != REG || off < 0)
13400 continue;
13402 if (REGNO (base) != STACK_POINTER_REGNUM
13403 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13404 continue;
13406 remove_insn (insn);
13407 continue;
13412 /* On z10 and later the dynamic branch prediction must see the
13413 backward jump within a certain windows. If not it falls back to
13414 the static prediction. This function rearranges the loop backward
13415 branch in a way which makes the static prediction always correct.
13416 The function returns true if it added an instruction. */
13417 static bool
13418 s390_fix_long_loop_prediction (rtx_insn *insn)
13420 rtx set = single_set (insn);
13421 rtx code_label, label_ref;
13422 rtx_insn *uncond_jump;
13423 rtx_insn *cur_insn;
13424 rtx tmp;
13425 int distance;
13427 /* This will exclude branch on count and branch on index patterns
13428 since these are correctly statically predicted. */
13429 if (!set
13430 || SET_DEST (set) != pc_rtx
13431 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13432 return false;
13434 /* Skip conditional returns. */
13435 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13436 && XEXP (SET_SRC (set), 2) == pc_rtx)
13437 return false;
13439 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13440 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13442 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13444 code_label = XEXP (label_ref, 0);
13446 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13447 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13448 || (INSN_ADDRESSES (INSN_UID (insn))
13449 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13450 return false;
13452 for (distance = 0, cur_insn = PREV_INSN (insn);
13453 distance < PREDICT_DISTANCE - 6;
13454 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13455 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13456 return false;
13458 rtx_code_label *new_label = gen_label_rtx ();
13459 uncond_jump = emit_jump_insn_after (
13460 gen_rtx_SET (pc_rtx,
13461 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13462 insn);
13463 emit_label_after (new_label, uncond_jump);
13465 tmp = XEXP (SET_SRC (set), 1);
13466 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13467 XEXP (SET_SRC (set), 2) = tmp;
13468 INSN_CODE (insn) = -1;
13470 XEXP (label_ref, 0) = new_label;
13471 JUMP_LABEL (insn) = new_label;
13472 JUMP_LABEL (uncond_jump) = code_label;
13474 return true;
13477 /* Returns 1 if INSN reads the value of REG for purposes not related
13478 to addressing of memory, and 0 otherwise. */
13479 static int
13480 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13482 return reg_referenced_p (reg, PATTERN (insn))
13483 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13486 /* Starting from INSN find_cond_jump looks downwards in the insn
13487 stream for a single jump insn which is the last user of the
13488 condition code set in INSN. */
13489 static rtx_insn *
13490 find_cond_jump (rtx_insn *insn)
13492 for (; insn; insn = NEXT_INSN (insn))
13494 rtx ite, cc;
13496 if (LABEL_P (insn))
13497 break;
13499 if (!JUMP_P (insn))
13501 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13502 break;
13503 continue;
13506 /* This will be triggered by a return. */
13507 if (GET_CODE (PATTERN (insn)) != SET)
13508 break;
13510 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13511 ite = SET_SRC (PATTERN (insn));
13513 if (GET_CODE (ite) != IF_THEN_ELSE)
13514 break;
13516 cc = XEXP (XEXP (ite, 0), 0);
13517 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13518 break;
13520 if (find_reg_note (insn, REG_DEAD, cc))
13521 return insn;
13522 break;
13525 return NULL;
13528 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13529 the semantics does not change. If NULL_RTX is passed as COND the
13530 function tries to find the conditional jump starting with INSN. */
13531 static void
13532 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13534 rtx tmp = *op0;
13536 if (cond == NULL_RTX)
13538 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13539 rtx set = jump ? single_set (jump) : NULL_RTX;
13541 if (set == NULL_RTX)
13542 return;
13544 cond = XEXP (SET_SRC (set), 0);
13547 *op0 = *op1;
13548 *op1 = tmp;
13549 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13552 /* On z10, instructions of the compare-and-branch family have the
13553 property to access the register occurring as second operand with
13554 its bits complemented. If such a compare is grouped with a second
13555 instruction that accesses the same register non-complemented, and
13556 if that register's value is delivered via a bypass, then the
13557 pipeline recycles, thereby causing significant performance decline.
13558 This function locates such situations and exchanges the two
13559 operands of the compare. The function return true whenever it
13560 added an insn. */
13561 static bool
13562 s390_z10_optimize_cmp (rtx_insn *insn)
13564 rtx_insn *prev_insn, *next_insn;
13565 bool insn_added_p = false;
13566 rtx cond, *op0, *op1;
13568 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13570 /* Handle compare and branch and branch on count
13571 instructions. */
13572 rtx pattern = single_set (insn);
13574 if (!pattern
13575 || SET_DEST (pattern) != pc_rtx
13576 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13577 return false;
13579 cond = XEXP (SET_SRC (pattern), 0);
13580 op0 = &XEXP (cond, 0);
13581 op1 = &XEXP (cond, 1);
13583 else if (GET_CODE (PATTERN (insn)) == SET)
13585 rtx src, dest;
13587 /* Handle normal compare instructions. */
13588 src = SET_SRC (PATTERN (insn));
13589 dest = SET_DEST (PATTERN (insn));
13591 if (!REG_P (dest)
13592 || !CC_REGNO_P (REGNO (dest))
13593 || GET_CODE (src) != COMPARE)
13594 return false;
13596 /* s390_swap_cmp will try to find the conditional
13597 jump when passing NULL_RTX as condition. */
13598 cond = NULL_RTX;
13599 op0 = &XEXP (src, 0);
13600 op1 = &XEXP (src, 1);
13602 else
13603 return false;
13605 if (!REG_P (*op0) || !REG_P (*op1))
13606 return false;
13608 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13609 return false;
13611 /* Swap the COMPARE arguments and its mask if there is a
13612 conflicting access in the previous insn. */
13613 prev_insn = prev_active_insn (insn);
13614 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13615 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13616 s390_swap_cmp (cond, op0, op1, insn);
13618 /* Check if there is a conflict with the next insn. If there
13619 was no conflict with the previous insn, then swap the
13620 COMPARE arguments and its mask. If we already swapped
13621 the operands, or if swapping them would cause a conflict
13622 with the previous insn, issue a NOP after the COMPARE in
13623 order to separate the two instuctions. */
13624 next_insn = next_active_insn (insn);
13625 if (next_insn != NULL_RTX && INSN_P (next_insn)
13626 && s390_non_addr_reg_read_p (*op1, next_insn))
13628 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13629 && s390_non_addr_reg_read_p (*op0, prev_insn))
13631 if (REGNO (*op1) == 0)
13632 emit_insn_after (gen_nop1 (), insn);
13633 else
13634 emit_insn_after (gen_nop (), insn);
13635 insn_added_p = true;
13637 else
13638 s390_swap_cmp (cond, op0, op1, insn);
13640 return insn_added_p;
13643 /* Number of INSNs to be scanned backward in the last BB of the loop
13644 and forward in the first BB of the loop. This usually should be a
13645 bit more than the number of INSNs which could go into one
13646 group. */
13647 #define S390_OSC_SCAN_INSN_NUM 5
13649 /* Scan LOOP for static OSC collisions and return true if a osc_break
13650 should be issued for this loop. */
13651 static bool
13652 s390_adjust_loop_scan_osc (struct loop* loop)
13655 HARD_REG_SET modregs, newregs;
13656 rtx_insn *insn, *store_insn = NULL;
13657 rtx set;
13658 struct s390_address addr_store, addr_load;
13659 subrtx_iterator::array_type array;
13660 int insn_count;
13662 CLEAR_HARD_REG_SET (modregs);
13664 insn_count = 0;
13665 FOR_BB_INSNS_REVERSE (loop->latch, insn)
13667 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13668 continue;
13670 insn_count++;
13671 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13672 return false;
13674 find_all_hard_reg_sets (insn, &newregs, true);
13675 IOR_HARD_REG_SET (modregs, newregs);
13677 set = single_set (insn);
13678 if (!set)
13679 continue;
13681 if (MEM_P (SET_DEST (set))
13682 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
13684 store_insn = insn;
13685 break;
13689 if (store_insn == NULL_RTX)
13690 return false;
13692 insn_count = 0;
13693 FOR_BB_INSNS (loop->header, insn)
13695 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13696 continue;
13698 if (insn == store_insn)
13699 return false;
13701 insn_count++;
13702 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13703 return false;
13705 find_all_hard_reg_sets (insn, &newregs, true);
13706 IOR_HARD_REG_SET (modregs, newregs);
13708 set = single_set (insn);
13709 if (!set)
13710 continue;
13712 /* An intermediate store disrupts static OSC checking
13713 anyway. */
13714 if (MEM_P (SET_DEST (set))
13715 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
13716 return false;
13718 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
13719 if (MEM_P (*iter)
13720 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
13721 && rtx_equal_p (addr_load.base, addr_store.base)
13722 && rtx_equal_p (addr_load.indx, addr_store.indx)
13723 && rtx_equal_p (addr_load.disp, addr_store.disp))
13725 if ((addr_load.base != NULL_RTX
13726 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
13727 || (addr_load.indx != NULL_RTX
13728 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
13729 return true;
13732 return false;
13735 /* Look for adjustments which can be done on simple innermost
13736 loops. */
13737 static void
13738 s390_adjust_loops ()
13740 struct loop *loop = NULL;
13742 df_analyze ();
13743 compute_bb_for_insn ();
13745 /* Find the loops. */
13746 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
13748 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
13750 if (dump_file)
13752 flow_loop_dump (loop, dump_file, NULL, 0);
13753 fprintf (dump_file, ";; OSC loop scan Loop: ");
13755 if (loop->latch == NULL
13756 || pc_set (BB_END (loop->latch)) == NULL_RTX
13757 || !s390_adjust_loop_scan_osc (loop))
13759 if (dump_file)
13761 if (loop->latch == NULL)
13762 fprintf (dump_file, " muliple backward jumps\n");
13763 else
13765 fprintf (dump_file, " header insn: %d latch insn: %d ",
13766 INSN_UID (BB_HEAD (loop->header)),
13767 INSN_UID (BB_END (loop->latch)));
13768 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
13769 fprintf (dump_file, " loop does not end with jump\n");
13770 else
13771 fprintf (dump_file, " not instrumented\n");
13775 else
13777 rtx_insn *new_insn;
13779 if (dump_file)
13780 fprintf (dump_file, " adding OSC break insn: ");
13781 new_insn = emit_insn_before (gen_osc_break (),
13782 BB_END (loop->latch));
13783 INSN_ADDRESSES_NEW (new_insn, -1);
13787 loop_optimizer_finalize ();
13789 df_finish_pass (false);
13792 /* Perform machine-dependent processing. */
13794 static void
13795 s390_reorg (void)
13797 bool pool_overflow = false;
13798 int hw_before, hw_after;
13800 if (s390_tune == PROCESSOR_2964_Z13)
13801 s390_adjust_loops ();
13803 /* Make sure all splits have been performed; splits after
13804 machine_dependent_reorg might confuse insn length counts. */
13805 split_all_insns_noflow ();
13807 /* Install the main literal pool and the associated base
13808 register load insns.
13810 In addition, there are two problematic situations we need
13811 to correct:
13813 - the literal pool might be > 4096 bytes in size, so that
13814 some of its elements cannot be directly accessed
13816 - a branch target might be > 64K away from the branch, so that
13817 it is not possible to use a PC-relative instruction.
13819 To fix those, we split the single literal pool into multiple
13820 pool chunks, reloading the pool base register at various
13821 points throughout the function to ensure it always points to
13822 the pool chunk the following code expects, and / or replace
13823 PC-relative branches by absolute branches.
13825 However, the two problems are interdependent: splitting the
13826 literal pool can move a branch further away from its target,
13827 causing the 64K limit to overflow, and on the other hand,
13828 replacing a PC-relative branch by an absolute branch means
13829 we need to put the branch target address into the literal
13830 pool, possibly causing it to overflow.
13832 So, we loop trying to fix up both problems until we manage
13833 to satisfy both conditions at the same time. Note that the
13834 loop is guaranteed to terminate as every pass of the loop
13835 strictly decreases the total number of PC-relative branches
13836 in the function. (This is not completely true as there
13837 might be branch-over-pool insns introduced by chunkify_start.
13838 Those never need to be split however.) */
13840 for (;;)
13842 struct constant_pool *pool = NULL;
13844 /* Collect the literal pool. */
13845 if (!pool_overflow)
13847 pool = s390_mainpool_start ();
13848 if (!pool)
13849 pool_overflow = true;
13852 /* If literal pool overflowed, start to chunkify it. */
13853 if (pool_overflow)
13854 pool = s390_chunkify_start ();
13856 /* Split out-of-range branches. If this has created new
13857 literal pool entries, cancel current chunk list and
13858 recompute it. zSeries machines have large branch
13859 instructions, so we never need to split a branch. */
13860 if (!TARGET_CPU_ZARCH && s390_split_branches ())
13862 if (pool_overflow)
13863 s390_chunkify_cancel (pool);
13864 else
13865 s390_mainpool_cancel (pool);
13867 continue;
13870 /* If we made it up to here, both conditions are satisfied.
13871 Finish up literal pool related changes. */
13872 if (pool_overflow)
13873 s390_chunkify_finish (pool);
13874 else
13875 s390_mainpool_finish (pool);
13877 /* We're done splitting branches. */
13878 cfun->machine->split_branches_pending_p = false;
13879 break;
13882 /* Generate out-of-pool execute target insns. */
13883 if (TARGET_CPU_ZARCH)
13885 rtx_insn *insn, *target;
13886 rtx label;
13888 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13890 label = s390_execute_label (insn);
13891 if (!label)
13892 continue;
13894 gcc_assert (label != const0_rtx);
13896 target = emit_label (XEXP (label, 0));
13897 INSN_ADDRESSES_NEW (target, -1);
13899 target = emit_insn (s390_execute_target (insn));
13900 INSN_ADDRESSES_NEW (target, -1);
13904 /* Try to optimize prologue and epilogue further. */
13905 s390_optimize_prologue ();
13907 /* Walk over the insns and do some >=z10 specific changes. */
13908 if (s390_tune >= PROCESSOR_2097_Z10)
13910 rtx_insn *insn;
13911 bool insn_added_p = false;
13913 /* The insn lengths and addresses have to be up to date for the
13914 following manipulations. */
13915 shorten_branches (get_insns ());
13917 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13919 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13920 continue;
13922 if (JUMP_P (insn))
13923 insn_added_p |= s390_fix_long_loop_prediction (insn);
13925 if ((GET_CODE (PATTERN (insn)) == PARALLEL
13926 || GET_CODE (PATTERN (insn)) == SET)
13927 && s390_tune == PROCESSOR_2097_Z10)
13928 insn_added_p |= s390_z10_optimize_cmp (insn);
13931 /* Adjust branches if we added new instructions. */
13932 if (insn_added_p)
13933 shorten_branches (get_insns ());
13936 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13937 if (hw_after > 0)
13939 rtx_insn *insn;
13941 /* Insert NOPs for hotpatching. */
13942 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13943 /* Emit NOPs
13944 1. inside the area covered by debug information to allow setting
13945 breakpoints at the NOPs,
13946 2. before any insn which results in an asm instruction,
13947 3. before in-function labels to avoid jumping to the NOPs, for
13948 example as part of a loop,
13949 4. before any barrier in case the function is completely empty
13950 (__builtin_unreachable ()) and has neither internal labels nor
13951 active insns.
13953 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13954 break;
13955 /* Output a series of NOPs before the first active insn. */
13956 while (insn && hw_after > 0)
13958 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13960 emit_insn_before (gen_nop_6_byte (), insn);
13961 hw_after -= 3;
13963 else if (hw_after >= 2)
13965 emit_insn_before (gen_nop_4_byte (), insn);
13966 hw_after -= 2;
13968 else
13970 emit_insn_before (gen_nop_2_byte (), insn);
13971 hw_after -= 1;
13977 /* Return true if INSN is a fp load insn writing register REGNO. */
13978 static inline bool
13979 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13981 rtx set;
13982 enum attr_type flag = s390_safe_attr_type (insn);
13984 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13985 return false;
13987 set = single_set (insn);
13989 if (set == NULL_RTX)
13990 return false;
13992 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13993 return false;
13995 if (REGNO (SET_DEST (set)) != regno)
13996 return false;
13998 return true;
14001 /* This value describes the distance to be avoided between an
14002 aritmetic fp instruction and an fp load writing the same register.
14003 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14004 fine but the exact value has to be avoided. Otherwise the FP
14005 pipeline will throw an exception causing a major penalty. */
14006 #define Z10_EARLYLOAD_DISTANCE 7
14008 /* Rearrange the ready list in order to avoid the situation described
14009 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14010 moved to the very end of the ready list. */
14011 static void
14012 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14014 unsigned int regno;
14015 int nready = *nready_p;
14016 rtx_insn *tmp;
14017 int i;
14018 rtx_insn *insn;
14019 rtx set;
14020 enum attr_type flag;
14021 int distance;
14023 /* Skip DISTANCE - 1 active insns. */
14024 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14025 distance > 0 && insn != NULL_RTX;
14026 distance--, insn = prev_active_insn (insn))
14027 if (CALL_P (insn) || JUMP_P (insn))
14028 return;
14030 if (insn == NULL_RTX)
14031 return;
14033 set = single_set (insn);
14035 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14036 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14037 return;
14039 flag = s390_safe_attr_type (insn);
14041 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14042 return;
14044 regno = REGNO (SET_DEST (set));
14045 i = nready - 1;
14047 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14048 i--;
14050 if (!i)
14051 return;
14053 tmp = ready[i];
14054 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14055 ready[0] = tmp;
14059 /* The s390_sched_state variable tracks the state of the current or
14060 the last instruction group.
14062 0,1,2 number of instructions scheduled in the current group
14063 3 the last group is complete - normal insns
14064 4 the last group was a cracked/expanded insn */
14066 static int s390_sched_state;
14068 #define S390_SCHED_STATE_NORMAL 3
14069 #define S390_SCHED_STATE_CRACKED 4
14071 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14072 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14073 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14074 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14076 static unsigned int
14077 s390_get_sched_attrmask (rtx_insn *insn)
14079 unsigned int mask = 0;
14081 switch (s390_tune)
14083 case PROCESSOR_2827_ZEC12:
14084 if (get_attr_zEC12_cracked (insn))
14085 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14086 if (get_attr_zEC12_expanded (insn))
14087 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14088 if (get_attr_zEC12_endgroup (insn))
14089 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14090 if (get_attr_zEC12_groupalone (insn))
14091 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14092 break;
14093 case PROCESSOR_2964_Z13:
14094 if (get_attr_z13_cracked (insn))
14095 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14096 if (get_attr_z13_expanded (insn))
14097 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14098 if (get_attr_z13_endgroup (insn))
14099 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14100 if (get_attr_z13_groupalone (insn))
14101 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14102 break;
14103 default:
14104 gcc_unreachable ();
14106 return mask;
14109 static unsigned int
14110 s390_get_unit_mask (rtx_insn *insn, int *units)
14112 unsigned int mask = 0;
14114 switch (s390_tune)
14116 case PROCESSOR_2964_Z13:
14117 *units = 3;
14118 if (get_attr_z13_unit_lsu (insn))
14119 mask |= 1 << 0;
14120 if (get_attr_z13_unit_fxu (insn))
14121 mask |= 1 << 1;
14122 if (get_attr_z13_unit_vfu (insn))
14123 mask |= 1 << 2;
14124 break;
14125 default:
14126 gcc_unreachable ();
14128 return mask;
14131 /* Return the scheduling score for INSN. The higher the score the
14132 better. The score is calculated from the OOO scheduling attributes
14133 of INSN and the scheduling state s390_sched_state. */
14134 static int
14135 s390_sched_score (rtx_insn *insn)
14137 unsigned int mask = s390_get_sched_attrmask (insn);
14138 int score = 0;
14140 switch (s390_sched_state)
14142 case 0:
14143 /* Try to put insns into the first slot which would otherwise
14144 break a group. */
14145 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14146 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14147 score += 5;
14148 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14149 score += 10;
14150 /* fallthrough */
14151 case 1:
14152 /* Prefer not cracked insns while trying to put together a
14153 group. */
14154 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14155 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14156 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14157 score += 10;
14158 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14159 score += 5;
14160 break;
14161 case 2:
14162 /* Prefer not cracked insns while trying to put together a
14163 group. */
14164 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14165 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14166 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14167 score += 10;
14168 /* Prefer endgroup insns in the last slot. */
14169 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14170 score += 10;
14171 break;
14172 case S390_SCHED_STATE_NORMAL:
14173 /* Prefer not cracked insns if the last was not cracked. */
14174 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14175 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14176 score += 5;
14177 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14178 score += 10;
14179 break;
14180 case S390_SCHED_STATE_CRACKED:
14181 /* Try to keep cracked insns together to prevent them from
14182 interrupting groups. */
14183 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14184 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14185 score += 5;
14186 break;
14189 if (s390_tune == PROCESSOR_2964_Z13)
14191 int units, i;
14192 unsigned unit_mask, m = 1;
14194 unit_mask = s390_get_unit_mask (insn, &units);
14195 gcc_assert (units <= MAX_SCHED_UNITS);
14197 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14198 ago the last insn of this unit type got scheduled. This is
14199 supposed to help providing a proper instruction mix to the
14200 CPU. */
14201 for (i = 0; i < units; i++, m <<= 1)
14202 if (m & unit_mask)
14203 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14204 MAX_SCHED_MIX_DISTANCE);
14206 return score;
14209 /* This function is called via hook TARGET_SCHED_REORDER before
14210 issuing one insn from list READY which contains *NREADYP entries.
14211 For target z10 it reorders load instructions to avoid early load
14212 conflicts in the floating point pipeline */
14213 static int
14214 s390_sched_reorder (FILE *file, int verbose,
14215 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14217 if (s390_tune == PROCESSOR_2097_Z10
14218 && reload_completed
14219 && *nreadyp > 1)
14220 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14222 if (s390_tune >= PROCESSOR_2827_ZEC12
14223 && reload_completed
14224 && *nreadyp > 1)
14226 int i;
14227 int last_index = *nreadyp - 1;
14228 int max_index = -1;
14229 int max_score = -1;
14230 rtx_insn *tmp;
14232 /* Just move the insn with the highest score to the top (the
14233 end) of the list. A full sort is not needed since a conflict
14234 in the hazard recognition cannot happen. So the top insn in
14235 the ready list will always be taken. */
14236 for (i = last_index; i >= 0; i--)
14238 int score;
14240 if (recog_memoized (ready[i]) < 0)
14241 continue;
14243 score = s390_sched_score (ready[i]);
14244 if (score > max_score)
14246 max_score = score;
14247 max_index = i;
14251 if (max_index != -1)
14253 if (max_index != last_index)
14255 tmp = ready[max_index];
14256 ready[max_index] = ready[last_index];
14257 ready[last_index] = tmp;
14259 if (verbose > 5)
14260 fprintf (file,
14261 ";;\t\tBACKEND: move insn %d to the top of list\n",
14262 INSN_UID (ready[last_index]));
14264 else if (verbose > 5)
14265 fprintf (file,
14266 ";;\t\tBACKEND: best insn %d already on top\n",
14267 INSN_UID (ready[last_index]));
14270 if (verbose > 5)
14272 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14273 s390_sched_state);
14275 for (i = last_index; i >= 0; i--)
14277 unsigned int sched_mask;
14278 rtx_insn *insn = ready[i];
14280 if (recog_memoized (insn) < 0)
14281 continue;
14283 sched_mask = s390_get_sched_attrmask (insn);
14284 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14285 INSN_UID (insn),
14286 s390_sched_score (insn));
14287 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14288 ((M) & sched_mask) ? #ATTR : "");
14289 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14290 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14291 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14292 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14293 #undef PRINT_SCHED_ATTR
14294 if (s390_tune == PROCESSOR_2964_Z13)
14296 unsigned int unit_mask, m = 1;
14297 int units, j;
14299 unit_mask = s390_get_unit_mask (insn, &units);
14300 fprintf (file, "(units:");
14301 for (j = 0; j < units; j++, m <<= 1)
14302 if (m & unit_mask)
14303 fprintf (file, " u%d", j);
14304 fprintf (file, ")");
14306 fprintf (file, "\n");
14311 return s390_issue_rate ();
14315 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14316 the scheduler has issued INSN. It stores the last issued insn into
14317 last_scheduled_insn in order to make it available for
14318 s390_sched_reorder. */
14319 static int
14320 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14322 last_scheduled_insn = insn;
14324 if (s390_tune >= PROCESSOR_2827_ZEC12
14325 && reload_completed
14326 && recog_memoized (insn) >= 0)
14328 unsigned int mask = s390_get_sched_attrmask (insn);
14330 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14331 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14332 s390_sched_state = S390_SCHED_STATE_CRACKED;
14333 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14334 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14335 s390_sched_state = S390_SCHED_STATE_NORMAL;
14336 else
14338 /* Only normal insns are left (mask == 0). */
14339 switch (s390_sched_state)
14341 case 0:
14342 case 1:
14343 case 2:
14344 case S390_SCHED_STATE_NORMAL:
14345 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14346 s390_sched_state = 1;
14347 else
14348 s390_sched_state++;
14350 break;
14351 case S390_SCHED_STATE_CRACKED:
14352 s390_sched_state = S390_SCHED_STATE_NORMAL;
14353 break;
14357 if (s390_tune == PROCESSOR_2964_Z13)
14359 int units, i;
14360 unsigned unit_mask, m = 1;
14362 unit_mask = s390_get_unit_mask (insn, &units);
14363 gcc_assert (units <= MAX_SCHED_UNITS);
14365 for (i = 0; i < units; i++, m <<= 1)
14366 if (m & unit_mask)
14367 last_scheduled_unit_distance[i] = 0;
14368 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14369 last_scheduled_unit_distance[i]++;
14372 if (verbose > 5)
14374 unsigned int sched_mask;
14376 sched_mask = s390_get_sched_attrmask (insn);
14378 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14379 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14380 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14381 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14382 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14383 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14384 #undef PRINT_SCHED_ATTR
14386 if (s390_tune == PROCESSOR_2964_Z13)
14388 unsigned int unit_mask, m = 1;
14389 int units, j;
14391 unit_mask = s390_get_unit_mask (insn, &units);
14392 fprintf (file, "(units:");
14393 for (j = 0; j < units; j++, m <<= 1)
14394 if (m & unit_mask)
14395 fprintf (file, " %d", j);
14396 fprintf (file, ")");
14398 fprintf (file, " sched state: %d\n", s390_sched_state);
14400 if (s390_tune == PROCESSOR_2964_Z13)
14402 int units, j;
14404 s390_get_unit_mask (insn, &units);
14406 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14407 for (j = 0; j < units; j++)
14408 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14409 fprintf (file, "\n");
14414 if (GET_CODE (PATTERN (insn)) != USE
14415 && GET_CODE (PATTERN (insn)) != CLOBBER)
14416 return more - 1;
14417 else
14418 return more;
14421 static void
14422 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14423 int verbose ATTRIBUTE_UNUSED,
14424 int max_ready ATTRIBUTE_UNUSED)
14426 last_scheduled_insn = NULL;
14427 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14428 s390_sched_state = 0;
14431 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14432 a new number struct loop *loop should be unrolled if tuned for cpus with
14433 a built-in stride prefetcher.
14434 The loop is analyzed for memory accesses by calling check_dpu for
14435 each rtx of the loop. Depending on the loop_depth and the amount of
14436 memory accesses a new number <=nunroll is returned to improve the
14437 behavior of the hardware prefetch unit. */
14438 static unsigned
14439 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14441 basic_block *bbs;
14442 rtx_insn *insn;
14443 unsigned i;
14444 unsigned mem_count = 0;
14446 if (s390_tune < PROCESSOR_2097_Z10)
14447 return nunroll;
14449 /* Count the number of memory references within the loop body. */
14450 bbs = get_loop_body (loop);
14451 subrtx_iterator::array_type array;
14452 for (i = 0; i < loop->num_nodes; i++)
14453 FOR_BB_INSNS (bbs[i], insn)
14454 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14455 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14456 if (MEM_P (*iter))
14457 mem_count += 1;
14458 free (bbs);
14460 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14461 if (mem_count == 0)
14462 return nunroll;
14464 switch (loop_depth(loop))
14466 case 1:
14467 return MIN (nunroll, 28 / mem_count);
14468 case 2:
14469 return MIN (nunroll, 22 / mem_count);
14470 default:
14471 return MIN (nunroll, 16 / mem_count);
14475 /* Restore the current options. This is a hook function and also called
14476 internally. */
14478 static void
14479 s390_function_specific_restore (struct gcc_options *opts,
14480 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14482 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14485 static void
14486 s390_option_override_internal (bool main_args_p,
14487 struct gcc_options *opts,
14488 const struct gcc_options *opts_set)
14490 const char *prefix;
14491 const char *suffix;
14493 /* Set up prefix/suffix so the error messages refer to either the command
14494 line argument, or the attribute(target). */
14495 if (main_args_p)
14497 prefix = "-m";
14498 suffix = "";
14500 else
14502 prefix = "option(\"";
14503 suffix = "\")";
14507 /* Architecture mode defaults according to ABI. */
14508 if (!(opts_set->x_target_flags & MASK_ZARCH))
14510 if (TARGET_64BIT)
14511 opts->x_target_flags |= MASK_ZARCH;
14512 else
14513 opts->x_target_flags &= ~MASK_ZARCH;
14516 /* Set the march default in case it hasn't been specified on cmdline. */
14517 if (!opts_set->x_s390_arch)
14518 opts->x_s390_arch = PROCESSOR_2064_Z900;
14519 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14520 || opts->x_s390_arch == PROCESSOR_9672_G6)
14521 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14522 "in future releases; use at least %sarch=z900%s",
14523 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14524 suffix, prefix, suffix);
14526 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14528 /* Determine processor to tune for. */
14529 if (!opts_set->x_s390_tune)
14530 opts->x_s390_tune = opts->x_s390_arch;
14531 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14532 || opts->x_s390_tune == PROCESSOR_9672_G6)
14533 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14534 "in future releases; use at least %stune=z900%s",
14535 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14536 suffix, prefix, suffix);
14538 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14540 /* Sanity checks. */
14541 if (opts->x_s390_arch == PROCESSOR_NATIVE
14542 || opts->x_s390_tune == PROCESSOR_NATIVE)
14543 gcc_unreachable ();
14544 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14545 error ("z/Architecture mode not supported on %s",
14546 processor_table[(int)opts->x_s390_arch].name);
14547 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14548 error ("64-bit ABI not supported in ESA/390 mode");
14550 /* Enable hardware transactions if available and not explicitly
14551 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14552 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14554 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14555 opts->x_target_flags |= MASK_OPT_HTM;
14556 else
14557 opts->x_target_flags &= ~MASK_OPT_HTM;
14560 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14562 if (TARGET_OPT_VX_P (opts->x_target_flags))
14564 if (!TARGET_CPU_VX_P (opts))
14565 error ("hardware vector support not available on %s",
14566 processor_table[(int)opts->x_s390_arch].name);
14567 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14568 error ("hardware vector support not available with -msoft-float");
14571 else
14573 if (TARGET_CPU_VX_P (opts))
14574 /* Enable vector support if available and not explicitly disabled
14575 by user. E.g. with -m31 -march=z13 -mzarch */
14576 opts->x_target_flags |= MASK_OPT_VX;
14577 else
14578 opts->x_target_flags &= ~MASK_OPT_VX;
14581 /* Use hardware DFP if available and not explicitly disabled by
14582 user. E.g. with -m31 -march=z10 -mzarch */
14583 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14585 if (TARGET_DFP_P (opts))
14586 opts->x_target_flags |= MASK_HARD_DFP;
14587 else
14588 opts->x_target_flags &= ~MASK_HARD_DFP;
14591 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14593 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14595 if (!TARGET_CPU_DFP_P (opts))
14596 error ("hardware decimal floating point instructions"
14597 " not available on %s",
14598 processor_table[(int)opts->x_s390_arch].name);
14599 if (!TARGET_ZARCH_P (opts->x_target_flags))
14600 error ("hardware decimal floating point instructions"
14601 " not available in ESA/390 mode");
14603 else
14604 opts->x_target_flags &= ~MASK_HARD_DFP;
14607 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14608 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14610 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14611 && TARGET_HARD_DFP_P (opts->x_target_flags))
14612 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14614 opts->x_target_flags &= ~MASK_HARD_DFP;
14617 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14618 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14619 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14620 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14621 "in combination");
14623 if (opts->x_s390_stack_size)
14625 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14626 error ("stack size must be greater than the stack guard value");
14627 else if (opts->x_s390_stack_size > 1 << 16)
14628 error ("stack size must not be greater than 64k");
14630 else if (opts->x_s390_stack_guard)
14631 error ("-mstack-guard implies use of -mstack-size");
14633 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14634 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14635 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14636 #endif
14638 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14640 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14641 opts->x_param_values,
14642 opts_set->x_param_values);
14643 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14644 opts->x_param_values,
14645 opts_set->x_param_values);
14646 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14647 opts->x_param_values,
14648 opts_set->x_param_values);
14649 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14650 opts->x_param_values,
14651 opts_set->x_param_values);
14654 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14655 opts->x_param_values,
14656 opts_set->x_param_values);
14657 /* values for loop prefetching */
14658 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14659 opts->x_param_values,
14660 opts_set->x_param_values);
14661 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14662 opts->x_param_values,
14663 opts_set->x_param_values);
14664 /* s390 has more than 2 levels and the size is much larger. Since
14665 we are always running virtualized assume that we only get a small
14666 part of the caches above l1. */
14667 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14668 opts->x_param_values,
14669 opts_set->x_param_values);
14670 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14671 opts->x_param_values,
14672 opts_set->x_param_values);
14673 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14674 opts->x_param_values,
14675 opts_set->x_param_values);
14677 /* Use the alternative scheduling-pressure algorithm by default. */
14678 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14679 opts->x_param_values,
14680 opts_set->x_param_values);
14682 /* Call target specific restore function to do post-init work. At the moment,
14683 this just sets opts->x_s390_cost_pointer. */
14684 s390_function_specific_restore (opts, NULL);
14687 static void
14688 s390_option_override (void)
14690 unsigned int i;
14691 cl_deferred_option *opt;
14692 vec<cl_deferred_option> *v =
14693 (vec<cl_deferred_option> *) s390_deferred_options;
14695 if (v)
14696 FOR_EACH_VEC_ELT (*v, i, opt)
14698 switch (opt->opt_index)
14700 case OPT_mhotpatch_:
14702 int val1;
14703 int val2;
14704 char s[256];
14705 char *t;
14707 strncpy (s, opt->arg, 256);
14708 s[255] = 0;
14709 t = strchr (s, ',');
14710 if (t != NULL)
14712 *t = 0;
14713 t++;
14714 val1 = integral_argument (s);
14715 val2 = integral_argument (t);
14717 else
14719 val1 = -1;
14720 val2 = -1;
14722 if (val1 == -1 || val2 == -1)
14724 /* argument is not a plain number */
14725 error ("arguments to %qs should be non-negative integers",
14726 "-mhotpatch=n,m");
14727 break;
14729 else if (val1 > s390_hotpatch_hw_max
14730 || val2 > s390_hotpatch_hw_max)
14732 error ("argument to %qs is too large (max. %d)",
14733 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14734 break;
14736 s390_hotpatch_hw_before_label = val1;
14737 s390_hotpatch_hw_after_label = val2;
14738 break;
14740 default:
14741 gcc_unreachable ();
14745 /* Set up function hooks. */
14746 init_machine_status = s390_init_machine_status;
14748 s390_option_override_internal (true, &global_options, &global_options_set);
14750 /* Save the initial options in case the user does function specific
14751 options. */
14752 target_option_default_node = build_target_option_node (&global_options);
14753 target_option_current_node = target_option_default_node;
14755 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14756 requires the arch flags to be evaluated already. Since prefetching
14757 is beneficial on s390, we enable it if available. */
14758 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14759 flag_prefetch_loop_arrays = 1;
14761 if (TARGET_TPF)
14763 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14764 debuggers do not yet support DWARF 3/4. */
14765 if (!global_options_set.x_dwarf_strict)
14766 dwarf_strict = 1;
14767 if (!global_options_set.x_dwarf_version)
14768 dwarf_version = 2;
14771 /* Register a target-specific optimization-and-lowering pass
14772 to run immediately before prologue and epilogue generation.
14774 Registering the pass must be done at start up. It's
14775 convenient to do it here. */
14776 opt_pass *new_pass = new pass_s390_early_mach (g);
14777 struct register_pass_info insert_pass_s390_early_mach =
14779 new_pass, /* pass */
14780 "pro_and_epilogue", /* reference_pass_name */
14781 1, /* ref_pass_instance_number */
14782 PASS_POS_INSERT_BEFORE /* po_op */
14784 register_pass (&insert_pass_s390_early_mach);
14787 #if S390_USE_TARGET_ATTRIBUTE
14788 /* Inner function to process the attribute((target(...))), take an argument and
14789 set the current options from the argument. If we have a list, recursively go
14790 over the list. */
14792 static bool
14793 s390_valid_target_attribute_inner_p (tree args,
14794 struct gcc_options *opts,
14795 struct gcc_options *new_opts_set,
14796 bool force_pragma)
14798 char *next_optstr;
14799 bool ret = true;
14801 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14802 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14803 static const struct
14805 const char *string;
14806 size_t len;
14807 int opt;
14808 int has_arg;
14809 int only_as_pragma;
14810 } attrs[] = {
14811 /* enum options */
14812 S390_ATTRIB ("arch=", OPT_march_, 1),
14813 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14814 /* uinteger options */
14815 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14816 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14817 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14818 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14819 /* flag options */
14820 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14821 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14822 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14823 S390_ATTRIB ("htm", OPT_mhtm, 0),
14824 S390_ATTRIB ("vx", OPT_mvx, 0),
14825 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14826 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14827 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14828 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14829 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14830 /* boolean options */
14831 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14833 #undef S390_ATTRIB
14834 #undef S390_PRAGMA
14836 /* If this is a list, recurse to get the options. */
14837 if (TREE_CODE (args) == TREE_LIST)
14839 bool ret = true;
14840 int num_pragma_values;
14841 int i;
14843 /* Note: attribs.c:decl_attributes prepends the values from
14844 current_target_pragma to the list of target attributes. To determine
14845 whether we're looking at a value of the attribute or the pragma we
14846 assume that the first [list_length (current_target_pragma)] values in
14847 the list are the values from the pragma. */
14848 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14849 ? list_length (current_target_pragma) : 0;
14850 for (i = 0; args; args = TREE_CHAIN (args), i++)
14852 bool is_pragma;
14854 is_pragma = (force_pragma || i < num_pragma_values);
14855 if (TREE_VALUE (args)
14856 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
14857 opts, new_opts_set,
14858 is_pragma))
14860 ret = false;
14863 return ret;
14866 else if (TREE_CODE (args) != STRING_CST)
14868 error ("attribute %<target%> argument not a string");
14869 return false;
14872 /* Handle multiple arguments separated by commas. */
14873 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
14875 while (next_optstr && *next_optstr != '\0')
14877 char *p = next_optstr;
14878 char *orig_p = p;
14879 char *comma = strchr (next_optstr, ',');
14880 size_t len, opt_len;
14881 int opt;
14882 bool opt_set_p;
14883 char ch;
14884 unsigned i;
14885 int mask = 0;
14886 enum cl_var_type var_type;
14887 bool found;
14889 if (comma)
14891 *comma = '\0';
14892 len = comma - next_optstr;
14893 next_optstr = comma + 1;
14895 else
14897 len = strlen (p);
14898 next_optstr = NULL;
14901 /* Recognize no-xxx. */
14902 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
14904 opt_set_p = false;
14905 p += 3;
14906 len -= 3;
14908 else
14909 opt_set_p = true;
14911 /* Find the option. */
14912 ch = *p;
14913 found = false;
14914 for (i = 0; i < ARRAY_SIZE (attrs); i++)
14916 opt_len = attrs[i].len;
14917 if (ch == attrs[i].string[0]
14918 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
14919 && memcmp (p, attrs[i].string, opt_len) == 0)
14921 opt = attrs[i].opt;
14922 if (!opt_set_p && cl_options[opt].cl_reject_negative)
14923 continue;
14924 mask = cl_options[opt].var_value;
14925 var_type = cl_options[opt].var_type;
14926 found = true;
14927 break;
14931 /* Process the option. */
14932 if (!found)
14934 error ("attribute(target(\"%s\")) is unknown", orig_p);
14935 return false;
14937 else if (attrs[i].only_as_pragma && !force_pragma)
14939 /* Value is not allowed for the target attribute. */
14940 error ("value %qs is not supported by attribute %<target%>",
14941 attrs[i].string);
14942 return false;
14945 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
14947 if (var_type == CLVC_BIT_CLEAR)
14948 opt_set_p = !opt_set_p;
14950 if (opt_set_p)
14951 opts->x_target_flags |= mask;
14952 else
14953 opts->x_target_flags &= ~mask;
14954 new_opts_set->x_target_flags |= mask;
14957 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
14959 int value;
14961 if (cl_options[opt].cl_uinteger)
14963 /* Unsigned integer argument. Code based on the function
14964 decode_cmdline_option () in opts-common.c. */
14965 value = integral_argument (p + opt_len);
14967 else
14968 value = (opt_set_p) ? 1 : 0;
14970 if (value != -1)
14972 struct cl_decoded_option decoded;
14974 /* Value range check; only implemented for numeric and boolean
14975 options at the moment. */
14976 generate_option (opt, NULL, value, CL_TARGET, &decoded);
14977 s390_handle_option (opts, new_opts_set, &decoded, input_location);
14978 set_option (opts, new_opts_set, opt, value,
14979 p + opt_len, DK_UNSPECIFIED, input_location,
14980 global_dc);
14982 else
14984 error ("attribute(target(\"%s\")) is unknown", orig_p);
14985 ret = false;
14989 else if (cl_options[opt].var_type == CLVC_ENUM)
14991 bool arg_ok;
14992 int value;
14994 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
14995 if (arg_ok)
14996 set_option (opts, new_opts_set, opt, value,
14997 p + opt_len, DK_UNSPECIFIED, input_location,
14998 global_dc);
14999 else
15001 error ("attribute(target(\"%s\")) is unknown", orig_p);
15002 ret = false;
15006 else
15007 gcc_unreachable ();
15009 return ret;
15012 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15014 tree
15015 s390_valid_target_attribute_tree (tree args,
15016 struct gcc_options *opts,
15017 const struct gcc_options *opts_set,
15018 bool force_pragma)
15020 tree t = NULL_TREE;
15021 struct gcc_options new_opts_set;
15023 memset (&new_opts_set, 0, sizeof (new_opts_set));
15025 /* Process each of the options on the chain. */
15026 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15027 force_pragma))
15028 return error_mark_node;
15030 /* If some option was set (even if it has not changed), rerun
15031 s390_option_override_internal, and then save the options away. */
15032 if (new_opts_set.x_target_flags
15033 || new_opts_set.x_s390_arch
15034 || new_opts_set.x_s390_tune
15035 || new_opts_set.x_s390_stack_guard
15036 || new_opts_set.x_s390_stack_size
15037 || new_opts_set.x_s390_branch_cost
15038 || new_opts_set.x_s390_warn_framesize
15039 || new_opts_set.x_s390_warn_dynamicstack_p)
15041 const unsigned char *src = (const unsigned char *)opts_set;
15042 unsigned char *dest = (unsigned char *)&new_opts_set;
15043 unsigned int i;
15045 /* Merge the original option flags into the new ones. */
15046 for (i = 0; i < sizeof(*opts_set); i++)
15047 dest[i] |= src[i];
15049 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15050 s390_option_override_internal (false, opts, &new_opts_set);
15051 /* Save the current options unless we are validating options for
15052 #pragma. */
15053 t = build_target_option_node (opts);
15055 return t;
15058 /* Hook to validate attribute((target("string"))). */
15060 static bool
15061 s390_valid_target_attribute_p (tree fndecl,
15062 tree ARG_UNUSED (name),
15063 tree args,
15064 int ARG_UNUSED (flags))
15066 struct gcc_options func_options;
15067 tree new_target, new_optimize;
15068 bool ret = true;
15070 /* attribute((target("default"))) does nothing, beyond
15071 affecting multi-versioning. */
15072 if (TREE_VALUE (args)
15073 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15074 && TREE_CHAIN (args) == NULL_TREE
15075 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15076 return true;
15078 tree old_optimize = build_optimization_node (&global_options);
15080 /* Get the optimization options of the current function. */
15081 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15083 if (!func_optimize)
15084 func_optimize = old_optimize;
15086 /* Init func_options. */
15087 memset (&func_options, 0, sizeof (func_options));
15088 init_options_struct (&func_options, NULL);
15089 lang_hooks.init_options_struct (&func_options);
15091 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15093 /* Initialize func_options to the default before its target options can
15094 be set. */
15095 cl_target_option_restore (&func_options,
15096 TREE_TARGET_OPTION (target_option_default_node));
15098 new_target = s390_valid_target_attribute_tree (args, &func_options,
15099 &global_options_set,
15100 (args ==
15101 current_target_pragma));
15102 new_optimize = build_optimization_node (&func_options);
15103 if (new_target == error_mark_node)
15104 ret = false;
15105 else if (fndecl && new_target)
15107 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15108 if (old_optimize != new_optimize)
15109 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15111 return ret;
15114 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15115 cache. */
15117 void
15118 s390_activate_target_options (tree new_tree)
15120 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15121 if (TREE_TARGET_GLOBALS (new_tree))
15122 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15123 else if (new_tree == target_option_default_node)
15124 restore_target_globals (&default_target_globals);
15125 else
15126 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15127 s390_previous_fndecl = NULL_TREE;
15130 /* Establish appropriate back-end context for processing the function
15131 FNDECL. The argument might be NULL to indicate processing at top
15132 level, outside of any function scope. */
15133 static void
15134 s390_set_current_function (tree fndecl)
15136 /* Only change the context if the function changes. This hook is called
15137 several times in the course of compiling a function, and we don't want to
15138 slow things down too much or call target_reinit when it isn't safe. */
15139 if (fndecl == s390_previous_fndecl)
15140 return;
15142 tree old_tree;
15143 if (s390_previous_fndecl == NULL_TREE)
15144 old_tree = target_option_current_node;
15145 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15146 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15147 else
15148 old_tree = target_option_default_node;
15150 if (fndecl == NULL_TREE)
15152 if (old_tree != target_option_current_node)
15153 s390_activate_target_options (target_option_current_node);
15154 return;
15157 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15158 if (new_tree == NULL_TREE)
15159 new_tree = target_option_default_node;
15161 if (old_tree != new_tree)
15162 s390_activate_target_options (new_tree);
15163 s390_previous_fndecl = fndecl;
15165 #endif
15167 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15169 static bool
15170 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15171 unsigned int align ATTRIBUTE_UNUSED,
15172 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15173 bool speed_p ATTRIBUTE_UNUSED)
15175 return (size == 1 || size == 2
15176 || size == 4 || (TARGET_ZARCH && size == 8));
15179 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15181 static void
15182 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15184 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15185 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15186 tree call_efpc = build_call_expr (efpc, 0);
15187 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15189 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15190 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15191 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15192 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15193 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15194 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15196 /* Generates the equivalent of feholdexcept (&fenv_var)
15198 fenv_var = __builtin_s390_efpc ();
15199 __builtin_s390_sfpc (fenv_var & mask) */
15200 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15201 tree new_fpc =
15202 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15203 build_int_cst (unsigned_type_node,
15204 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15205 FPC_EXCEPTION_MASK)));
15206 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15207 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15209 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15211 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15212 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15213 build_int_cst (unsigned_type_node,
15214 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15215 *clear = build_call_expr (sfpc, 1, new_fpc);
15217 /* Generates the equivalent of feupdateenv (fenv_var)
15219 old_fpc = __builtin_s390_efpc ();
15220 __builtin_s390_sfpc (fenv_var);
15221 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15223 old_fpc = create_tmp_var_raw (unsigned_type_node);
15224 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15225 old_fpc, call_efpc);
15227 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15229 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15230 build_int_cst (unsigned_type_node,
15231 FPC_FLAGS_MASK));
15232 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15233 build_int_cst (unsigned_type_node,
15234 FPC_FLAGS_SHIFT));
15235 tree atomic_feraiseexcept
15236 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15237 raise_old_except = build_call_expr (atomic_feraiseexcept,
15238 1, raise_old_except);
15240 *update = build2 (COMPOUND_EXPR, void_type_node,
15241 build2 (COMPOUND_EXPR, void_type_node,
15242 store_old_fpc, set_new_fpc),
15243 raise_old_except);
15245 #undef FPC_EXCEPTION_MASK
15246 #undef FPC_FLAGS_MASK
15247 #undef FPC_DXC_MASK
15248 #undef FPC_EXCEPTION_MASK_SHIFT
15249 #undef FPC_FLAGS_SHIFT
15250 #undef FPC_DXC_SHIFT
15253 /* Return the vector mode to be used for inner mode MODE when doing
15254 vectorization. */
15255 static machine_mode
15256 s390_preferred_simd_mode (machine_mode mode)
15258 if (TARGET_VX)
15259 switch (mode)
15261 case DFmode:
15262 return V2DFmode;
15263 case DImode:
15264 return V2DImode;
15265 case SImode:
15266 return V4SImode;
15267 case HImode:
15268 return V8HImode;
15269 case QImode:
15270 return V16QImode;
15271 default:;
15273 return word_mode;
15276 /* Our hardware does not require vectors to be strictly aligned. */
15277 static bool
15278 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15279 const_tree type ATTRIBUTE_UNUSED,
15280 int misalignment ATTRIBUTE_UNUSED,
15281 bool is_packed ATTRIBUTE_UNUSED)
15283 if (TARGET_VX)
15284 return true;
15286 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15287 is_packed);
15290 /* The vector ABI requires vector types to be aligned on an 8 byte
15291 boundary (our stack alignment). However, we allow this to be
15292 overriden by the user, while this definitely breaks the ABI. */
15293 static HOST_WIDE_INT
15294 s390_vector_alignment (const_tree type)
15296 if (!TARGET_VX_ABI)
15297 return default_vector_alignment (type);
15299 if (TYPE_USER_ALIGN (type))
15300 return TYPE_ALIGN (type);
15302 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15305 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15306 /* Implement TARGET_ASM_FILE_START. */
15307 static void
15308 s390_asm_file_start (void)
15310 default_file_start ();
15311 s390_asm_output_machine_for_arch (asm_out_file);
15313 #endif
15315 /* Implement TARGET_ASM_FILE_END. */
15316 static void
15317 s390_asm_file_end (void)
15319 #ifdef HAVE_AS_GNU_ATTRIBUTE
15320 varpool_node *vnode;
15321 cgraph_node *cnode;
15323 FOR_EACH_VARIABLE (vnode)
15324 if (TREE_PUBLIC (vnode->decl))
15325 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15327 FOR_EACH_FUNCTION (cnode)
15328 if (TREE_PUBLIC (cnode->decl))
15329 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15332 if (s390_vector_abi != 0)
15333 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15334 s390_vector_abi);
15335 #endif
15336 file_end_indicate_exec_stack ();
15338 if (flag_split_stack)
15339 file_end_indicate_split_stack ();
15342 /* Return true if TYPE is a vector bool type. */
15343 static inline bool
15344 s390_vector_bool_type_p (const_tree type)
15346 return TYPE_VECTOR_OPAQUE (type);
15349 /* Return the diagnostic message string if the binary operation OP is
15350 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15351 static const char*
15352 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15354 bool bool1_p, bool2_p;
15355 bool plusminus_p;
15356 bool muldiv_p;
15357 bool compare_p;
15358 machine_mode mode1, mode2;
15360 if (!TARGET_ZVECTOR)
15361 return NULL;
15363 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15364 return NULL;
15366 bool1_p = s390_vector_bool_type_p (type1);
15367 bool2_p = s390_vector_bool_type_p (type2);
15369 /* Mixing signed and unsigned types is forbidden for all
15370 operators. */
15371 if (!bool1_p && !bool2_p
15372 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15373 return N_("types differ in signedness");
15375 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15376 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15377 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15378 || op == ROUND_DIV_EXPR);
15379 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15380 || op == EQ_EXPR || op == NE_EXPR);
15382 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15383 return N_("binary operator does not support two vector bool operands");
15385 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15386 return N_("binary operator does not support vector bool operand");
15388 mode1 = TYPE_MODE (type1);
15389 mode2 = TYPE_MODE (type2);
15391 if (bool1_p != bool2_p && plusminus_p
15392 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15393 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15394 return N_("binary operator does not support mixing vector "
15395 "bool with floating point vector operands");
15397 return NULL;
15400 /* Implement TARGET_C_EXCESS_PRECISION.
15402 FIXME: For historical reasons, float_t and double_t are typedef'ed to
15403 double on s390, causing operations on float_t to operate in a higher
15404 precision than is necessary. However, it is not the case that SFmode
15405 operations have implicit excess precision, and we generate more optimal
15406 code if we let the compiler know no implicit extra precision is added.
15408 That means when we are compiling with -fexcess-precision=fast, the value
15409 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
15410 float_t (though they would be correct for -fexcess-precision=standard).
15412 A complete fix would modify glibc to remove the unnecessary typedef
15413 of float_t to double. */
15415 static enum flt_eval_method
15416 s390_excess_precision (enum excess_precision_type type)
15418 switch (type)
15420 case EXCESS_PRECISION_TYPE_IMPLICIT:
15421 case EXCESS_PRECISION_TYPE_FAST:
15422 /* The fastest type to promote to will always be the native type,
15423 whether that occurs with implicit excess precision or
15424 otherwise. */
15425 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
15426 case EXCESS_PRECISION_TYPE_STANDARD:
15427 /* Otherwise, when we are in a standards compliant mode, to
15428 ensure consistency with the implementation in glibc, report that
15429 float is evaluated to the range and precision of double. */
15430 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
15431 default:
15432 gcc_unreachable ();
15434 return FLT_EVAL_METHOD_UNPREDICTABLE;
15437 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
15439 static unsigned HOST_WIDE_INT
15440 s390_asan_shadow_offset (void)
15442 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
15445 /* Initialize GCC target structure. */
15447 #undef TARGET_ASM_ALIGNED_HI_OP
15448 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15449 #undef TARGET_ASM_ALIGNED_DI_OP
15450 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15451 #undef TARGET_ASM_INTEGER
15452 #define TARGET_ASM_INTEGER s390_assemble_integer
15454 #undef TARGET_ASM_OPEN_PAREN
15455 #define TARGET_ASM_OPEN_PAREN ""
15457 #undef TARGET_ASM_CLOSE_PAREN
15458 #define TARGET_ASM_CLOSE_PAREN ""
15460 #undef TARGET_OPTION_OVERRIDE
15461 #define TARGET_OPTION_OVERRIDE s390_option_override
15463 #ifdef TARGET_THREAD_SSP_OFFSET
15464 #undef TARGET_STACK_PROTECT_GUARD
15465 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
15466 #endif
15468 #undef TARGET_ENCODE_SECTION_INFO
15469 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15471 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15472 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15474 #ifdef HAVE_AS_TLS
15475 #undef TARGET_HAVE_TLS
15476 #define TARGET_HAVE_TLS true
15477 #endif
15478 #undef TARGET_CANNOT_FORCE_CONST_MEM
15479 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15481 #undef TARGET_DELEGITIMIZE_ADDRESS
15482 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15484 #undef TARGET_LEGITIMIZE_ADDRESS
15485 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15487 #undef TARGET_RETURN_IN_MEMORY
15488 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15490 #undef TARGET_INIT_BUILTINS
15491 #define TARGET_INIT_BUILTINS s390_init_builtins
15492 #undef TARGET_EXPAND_BUILTIN
15493 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15494 #undef TARGET_BUILTIN_DECL
15495 #define TARGET_BUILTIN_DECL s390_builtin_decl
15497 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15498 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15500 #undef TARGET_ASM_OUTPUT_MI_THUNK
15501 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15502 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15503 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15505 #undef TARGET_C_EXCESS_PRECISION
15506 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
15508 #undef TARGET_SCHED_ADJUST_PRIORITY
15509 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15510 #undef TARGET_SCHED_ISSUE_RATE
15511 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15512 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15513 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15515 #undef TARGET_SCHED_VARIABLE_ISSUE
15516 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15517 #undef TARGET_SCHED_REORDER
15518 #define TARGET_SCHED_REORDER s390_sched_reorder
15519 #undef TARGET_SCHED_INIT
15520 #define TARGET_SCHED_INIT s390_sched_init
15522 #undef TARGET_CANNOT_COPY_INSN_P
15523 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15524 #undef TARGET_RTX_COSTS
15525 #define TARGET_RTX_COSTS s390_rtx_costs
15526 #undef TARGET_ADDRESS_COST
15527 #define TARGET_ADDRESS_COST s390_address_cost
15528 #undef TARGET_REGISTER_MOVE_COST
15529 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15530 #undef TARGET_MEMORY_MOVE_COST
15531 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15532 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15533 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15534 s390_builtin_vectorization_cost
15536 #undef TARGET_MACHINE_DEPENDENT_REORG
15537 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15539 #undef TARGET_VALID_POINTER_MODE
15540 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15542 #undef TARGET_BUILD_BUILTIN_VA_LIST
15543 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15544 #undef TARGET_EXPAND_BUILTIN_VA_START
15545 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15546 #undef TARGET_ASAN_SHADOW_OFFSET
15547 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
15548 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15549 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15551 #undef TARGET_PROMOTE_FUNCTION_MODE
15552 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15553 #undef TARGET_PASS_BY_REFERENCE
15554 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15556 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15557 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15558 #undef TARGET_FUNCTION_ARG
15559 #define TARGET_FUNCTION_ARG s390_function_arg
15560 #undef TARGET_FUNCTION_ARG_ADVANCE
15561 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15562 #undef TARGET_FUNCTION_VALUE
15563 #define TARGET_FUNCTION_VALUE s390_function_value
15564 #undef TARGET_LIBCALL_VALUE
15565 #define TARGET_LIBCALL_VALUE s390_libcall_value
15566 #undef TARGET_STRICT_ARGUMENT_NAMING
15567 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15569 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15570 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15572 #undef TARGET_FIXED_CONDITION_CODE_REGS
15573 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15575 #undef TARGET_CC_MODES_COMPATIBLE
15576 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15578 #undef TARGET_INVALID_WITHIN_DOLOOP
15579 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15581 #ifdef HAVE_AS_TLS
15582 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15583 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15584 #endif
15586 #undef TARGET_DWARF_FRAME_REG_MODE
15587 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15589 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15590 #undef TARGET_MANGLE_TYPE
15591 #define TARGET_MANGLE_TYPE s390_mangle_type
15592 #endif
15594 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15595 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15597 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15598 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15600 #undef TARGET_PREFERRED_RELOAD_CLASS
15601 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15603 #undef TARGET_SECONDARY_RELOAD
15604 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15606 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15607 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15609 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15610 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15612 #undef TARGET_LEGITIMATE_ADDRESS_P
15613 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15615 #undef TARGET_LEGITIMATE_CONSTANT_P
15616 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15618 #undef TARGET_LRA_P
15619 #define TARGET_LRA_P s390_lra_p
15621 #undef TARGET_CAN_ELIMINATE
15622 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15624 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15625 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15627 #undef TARGET_LOOP_UNROLL_ADJUST
15628 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15630 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15631 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15632 #undef TARGET_TRAMPOLINE_INIT
15633 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15635 #undef TARGET_UNWIND_WORD_MODE
15636 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15638 #undef TARGET_CANONICALIZE_COMPARISON
15639 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15641 #undef TARGET_HARD_REGNO_SCRATCH_OK
15642 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15644 #undef TARGET_ATTRIBUTE_TABLE
15645 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15647 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15648 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15650 #undef TARGET_SET_UP_BY_PROLOGUE
15651 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15653 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15654 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
15656 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15657 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15658 s390_use_by_pieces_infrastructure_p
15660 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15661 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
15663 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
15664 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
15666 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15667 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
15669 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15670 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
15672 #undef TARGET_VECTOR_ALIGNMENT
15673 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
15675 #undef TARGET_INVALID_BINARY_OP
15676 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
15678 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15679 #undef TARGET_ASM_FILE_START
15680 #define TARGET_ASM_FILE_START s390_asm_file_start
15681 #endif
15683 #undef TARGET_ASM_FILE_END
15684 #define TARGET_ASM_FILE_END s390_asm_file_end
15686 #if S390_USE_TARGET_ATTRIBUTE
15687 #undef TARGET_SET_CURRENT_FUNCTION
15688 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
15690 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15691 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15692 #endif
15694 #undef TARGET_OPTION_RESTORE
15695 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15697 struct gcc_target targetm = TARGET_INITIALIZER;
15699 #include "gt-s390.h"